<a href="https://colab.research.google.com/github/navneet-g/sde-skills-genai-workshop/blob/main/Workshop_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Pre-reqs

1. Bring a laptop or pair with someone to participate in hands-on lab
2. Signup for free google Colab at https://colab.research.google.com/
3. Get an api key at https://platform.openai.com/api-keys
4. We will follow the Notebook at https://github.com/navneet-g/sde-skills-genai-workshop/blob/main/Workshop_1.ipynb




# Guess the word


### Example 1
* He loved to study and when he grew up he became a <?>

### Example 2
* He wore a white coat and stethoscope to work everyday. He loved to study and when he grew up he became a <?>.




That is how transformers work, they guess the word based on the context.

* Large langugage models are trained on lots of data and can help and

In [None]:
!pip install --q -U langchain langchain_community \
openai langchain-openai selenium unstructured \
langchain-text-splitters unstructured faiss-cpu langgraph

In [None]:
from langchain_community.document_loaders import SeleniumURLLoader  # loading documents
from langchain.text_splitter import CharacterTextSplitter  # splitting text
from langchain_community.vectorstores import (
    FAISS,
)  # creating vector store from embeddings; can use chromadb instead as well
from langchain.chains import RetrievalQA  # creating qa system
from langchain_openai import ChatOpenAI  # using llm for qa system
from langchain_openai import OpenAIEmbeddings  # embedding text with openai

from langchain_community.chat_models import ChatOllama
from langchain_community.embeddings import OllamaEmbeddings

import openai

import importlib
google_collab = importlib.util.find_spec('google')
local_run = google_collab is None

# try:
#     imp.find_module('google')
#     from google.colab import userdata
#     local_run = False
# except ImportError:
#     local_run = True

print("Running locally:", local_run)

In [None]:
if local_run:
  llm = ChatOllama(model="llama3:8b")
  embeddings = OllamaEmbeddings(model="nomic-embed-text")
else:
  from google.colab import userdata
  openai_api_key=userdata.get('OPENAI_API_KEY')
  llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo", openai_api_key=openai_api_key)
  embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)


# docs https://python.langchain.com/docs/integrations/llms/google_ai/


In [None]:
llm.invoke("When were the last academy awards before 2023").content

# **In context learning**

In [None]:
context = "Context: \n\
The 96th Academy Awards ceremony, which was presented by the Academy of Motion Picture Arts and Sciences (AMPAS), took place on March 10, 2024, at the Dolby Theatre in Hollywood, Los Angeles.[6] During the gala, the AMPAS presented Academy Awards (commonly referred to as Oscars) in 23 categories honoring films released in 2023. Comedian Jimmy Kimmel hosted the show for the fourth time.[a] \n"

question = "When were the 96th academy awards?"

llm.invoke(context + question).content


In [None]:
question ="Who were the academy awards nominees"
llm.invoke(context + question).content

# **RAG**

In [None]:
# load url
urls = [
    "https://en.wikipedia.org/wiki/96th_Academy_Awards",
]

loader = SeleniumURLLoader(urls=urls)
data = loader.load()


# split document by character
print("Splitting document by character...")
text_splitter = CharacterTextSplitter(
    separator="\n", chunk_size=1000, chunk_overlap=200
)

# split into multiple documents
print("Splitting into multiple documents...")
docs = text_splitter.split_documents(data)

In [None]:
print("Creating vector store...")
# create vector store
db = FAISS.from_documents(docs, embedding=embeddings)

In [None]:

# create retriever to ask questions using openai and vector store
print("Creating retriever...")
qa = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=db.as_retriever(search_kwargs={"k": 10}),
)

In [None]:
def ask_question(question):
    print("Query: " + question)
    print("Response: " + qa.invoke(question)['result'])
    print("===============")

In [None]:
ask_question("Who were the academy awards winners?")
ask_question("What date did the academy awards happen?")
ask_question("What date did the 96th academy awards happen?")

## **Tools**

Ref: [Building LLM Agents in 3 Levels of Complexity: From Scratch, OpenAI Functions & LangChain](https://www.youtube.com/watch?v=v1tyQtncsE4)

In [None]:
import subprocess

class ModelWithTools:
    def __init__(self, llm):
        self.llm = llm

    def get_response(self, prompt_question):
        messages=[{"role": "system", "content": "You are a helpful research and programming assistant"},
                    {"role": "user", "content": prompt_question}]
        return llm.invoke(messages).content

    def create_directory(self, directory_name):
        subprocess.run(["mkdir", directory_name])

    def create_file(self, file_name):
        subprocess.run(["touch", file_name])

    def list_files(self):
        subprocess.run(["ls"])

    def execute_function_call(self, function_call_string: str):
        exec(function_call_string)

In [None]:
model = ModelWithTools(llm=llm)

In [None]:
from IPython.display import print

task_description = "Create a folder called 'sde-skills-workshop'. Inside that folder, create a file called 'llm-agent-demo.md"
output = model.get_response(f"""Given a task that will be fed as input, and consider you have access to the following functions:

    def create_directory(self, directory_name):
        '''Function that creates a directory given a directory name.'''
        pass

    def create_file(self, file_name):
        '''Function that creates a file given a file name.'''
        pass

    def list_files(self):
       '''Function that lists all files in the current directory.'''
        pass

    Your output should be the first function to be executed to complete the task containing the necessary arguments.
    For example:

    task: 'create a folder named lucas-the-agent-master'
    output: model.create_directory('lucas-the-agent-master')

    task: 'create a file named the-10-master-rules.md'
    output: model.create_file('the-10-master-rules.md')

    The OUTPUT SHOULD ONLY BE THE PYTHON FUNCTION CALL and NOTHING ELSE.
    task: {task_description}
    output:\n
    """)

print(output)

In [None]:
model.execute_function_call(output)

In [None]:
# class Agent_Graph:
#     def __init__(self):
#         self.state = None
#         self.nodes = {}
#         self.edges = {}
#         self.dynamic_edges = {}
#         self.entry_point = None
#         self.finish_point = None

#     def add_node(self, node_id, node):
#         self.nodes[node_id] = node

#     def add_edge(self, node1_id, node2_id):
#         if node1_id not in self.nodes:
#             raise Exception( node1_id + " does not exist")
#         if node2_id not in self.nodes:
#             raise Exception(node2_id + " does not exist")

#         if node1_id in self.edges:
#             raise Exception ("Edge already exists starting at " + node1_id)

#         self.edges[node1_id] = node2_id # node1_id -> node2_id

#         # TODO: Check cycles later

#     def add_dynamic_edge(self, node_1, condition):
#         if node_1 not in self.nodes:
#             raise Exception( node_1 + " does not exist")

#         if node_1 in self.edges:
#             raise Exception ("Edge already exists starting at " + node_1)

#         if node_1 in self.dynamic_edges:
#             raise Exception ("Dynamic edge already exists starting at " + node_1)

#         self.edges[node_1] = None
#         self.dynamic_edges[node_1] = condition


#     def set_entry_point(self, node_id):
#         if node_id not in self.nodes:
#             raise Exception( node_id + " does not exist")
#         self.entry_point = node_id

#     def set_finish_point(self, node_id):
#         if node_id not in self.nodes and node_id != "STOP":
#             raise Exception( node_id + " does not exist")
#         self.finish_point = node_id


#     def stream(self, state):
#         self.state = state
#         if self.entry_point is None:
#             raise Exception("No entry point")
#         if self.finish_point is None:
#             raise Exception("No finish point")

#         current_node_id = self.entry_point
#         while current_node_id != None and current_node_id != "STOP":
#             node = self.nodes[current_node_id]
#             self.state = node(self.state)
#             yield (current_node_id, self.state)

#             if(current_node_id == self.finish_point or current_node_id == "STOP"):
#                 current_node_id = None
#             else:
#                 if current_node_id in self.dynamic_edges:
#                     condition = self.dynamic_edges[current_node_id]
#                     next_node_id = condition(self.state)

#                     if(next_node_id not in self.nodes and next_node_id != "STOP"):
#                         raise Exception("Next node does not exist: ", next_node_id)
#                 else:
#                     next_node_id  = self.edges[current_node_id]

#                 current_node_id = next_node_id







# **Multi-step graphs**

https://www.youtube.com/watch?v=R8KB-Zcynxc

In [None]:

def get_animal_of_the_day(state):
    print("Getting animal of the day...")
    animal = llm.invoke("Return a random animal as a central character of a story for kids. Return only the animal name nothing else otherwise I will charge you 1 million dollars.").content
    state['messages'].append(animal)
    print("Animal of the day: " + animal)
    return state

def get_place_of_the_day(state):
    print("Getting place of the day...")
    messages = state['messages']
    place = llm.invoke("Return a random place where the given animal can be in a story for kids. Return only the place name nothing else otherwise I will charge you 1 million dollars. The animal is: " + messages[0]).content
    state['messages'].append(place)
    print("Place of the day: " + place)
    return state

def get_mood_of_the_day(state):
    print("Getting mood of the day...")
    messages = state['messages']
    mood = llm.invoke("Return a random mood of an animal at the place to be used in the story. Return only the mood nothing else otherwise I will charge you 1 million dollars.  The animal is: " + messages[0]+ " and the place is: " + messages[1]).content
    state['messages'].append(mood)
    print("Mood of the day: " + mood)
    return state

def generate_story(state):
    print("Generating story...")
    messages = state['messages']
    animal = messages[0]
    place = messages[1]
    mood = messages[2]

    story = llm.invoke("You are an expert in writing children's stories. Generate a story based on the animal, place and mood. The animal is: " + animal + " The place is: " + place + " The mood is: " + mood).content

    state['messages'].append(story)
    print("Story: " + story)
    return state

def validate_story(state):
    print("Validating story...")
    print("State received: ", state)
    messages = state['messages']
    animal = messages[0]
    place = messages[1]
    mood = messages[2]
    story = messages[-1]
    result = llm.invoke("You are an expert in editing children's stories. Given the following animal name, place and mood verify that the story is based on the animal, place and mood. \
                        The animal is: " + animal +
                        " The place is: " + place +
                        " The mood is: " + mood +
                        " The story is: " + story +
    "If the story is not based on the animal, place and mood return 'continue' and if the story is based on the animal, place and mood return 'STOP'. Return only one word response 'generate_art' or 'STOP', nothing else otherwise I will charge you 1 million dollars.").content

    print("Validation result: " + result)
    return result

def generate_art(state):
    print("Generating art...")
    messages = state['messages']
    story = messages[-1]
    art = llm.invoke("You are an artist expert as creating an ascii art. Create an ascii art based on the story for kids. The story is: " + story).content
    messages.append(art)
    return state

from langgraph.graph import Graph, END

graph = Graph()

graph.add_node("get_animal_of_the_day", get_animal_of_the_day)
graph.add_node("get_place_of_the_day", get_place_of_the_day)
graph.add_node("get_mood_of_the_day", get_mood_of_the_day)
graph.add_node("generate_story", generate_story)
graph.add_node("generate_art", generate_art)


graph.add_edge("get_animal_of_the_day", "get_place_of_the_day")
graph.add_edge("get_place_of_the_day", "get_mood_of_the_day")
graph.add_edge("get_mood_of_the_day", "generate_story")

graph.add_conditional_edges("generate_story", validate_story, {
    "generate_art": "generate_art",
    "STOP": END
})



graph.set_entry_point("get_animal_of_the_day")
graph.set_finish_point("generate_art")

app_state = {'messages': []}
app = graph.compile()


for output in app.stream(app_state):
    # stream() yields dictionaries with output keyed by node name
    for node_id, state in output.items():
        print(f"Output from node '{node_id}':")
        print("---")
        print(state)
        print("\n---\n")

art = app_state['messages'][-1]
story = app_state['messages'][-2]

print(art)
print(story)

