# Environment configuration

In [4]:
%pip install -q --upgrade --user google-cloud-aiplatform==1.36.1

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.1/3.1 MB[0m [31m2.1 MB/s[0m eta [36m0:00:00[0m
[0m[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
langchain-google-vertexai 0.1.1 requires google-cloud-aiplatform<2.0.0,>=1.44.0, but you have google-cloud-aiplatform 1.36.1 which is incompatible.[0m[31m
[0m

### Restart current runtime

To use the newly installed packages in this Jupyter runtime, you must restart the runtime. You can do this by running the cell below, which will restart the current kernel.

In [5]:
# Restart kernel after installs so that your environment can access the new packages
import IPython
import time

app = IPython.Application.instance()
app.kernel.do_shutdown(True)

{'status': 'ok', 'restart': True}

### Authenticate your notebook environment (Colab only)

If you are running this notebook on Google Colab, you will need to authenticate your environment. To do this, run the new cell below. This step is not required if you are using [Vertex AI Workbench](https://cloud.google.com/vertex-ai-workbench).

In [1]:
import sys

if "google.colab" in sys.modules:
    # Authenticate user to Google Cloud
    from google.colab import auth

    auth.authenticate_user()

# Vertex AI setup

In [3]:
%pip install --upgrade --quiet langchain langchain-core langchain-google-vertexai chromadb wikipedia

In [5]:
from typing import List
from pprint import pprint
from langchain_core.prompts import PromptTemplate
from langchain_google_vertexai import VertexAI

model = VertexAI(model_name="gemini-pro", temperature=0.0)

# Invoking model


## Standrd invoke

In [6]:
prompt = PromptTemplate(
    input_variables=["question"],
    template="""Question: {question}""")

question = "Why python is not a great programming language?"

model.invoke(prompt.format(question=question))

"**Limitations of Python:**\n\n* **Speed:** Python is an interpreted language, which means it is slower than compiled languages like C++ or Java. This can be a disadvantage for performance-intensive applications.\n* **Memory Management:** Python uses automatic memory management, which can lead to memory leaks if not handled properly.\n* **Concurrency:** Python's Global Interpreter Lock (GIL) limits the number of threads that can run concurrently, which can hinder performance in multithreaded applications.\n* **Type Safety:** Python is a dynamically typed language, which means that data types are not checked at compile time. This can lead to errors that are difficult to detect.\n* **Limited Library Support:** While Python has a large standard library, it may not have all the necessary libraries for specialized applications.\n* **Lack of Low-Level Control:** Python does not provide direct access to low-level system resources, which can be a limitation for certain types of applications.\n

## LCEL

In [7]:
prompt = PromptTemplate.from_template("""Question: {question}""")

chain = prompt | model
chain.invoke({"question": question})

"**Limitations of Python:**\n\n* **Speed:** Python is an interpreted language, which means it is slower than compiled languages like C++ or Java. This can be a disadvantage for performance-intensive applications.\n* **Memory Management:** Python uses automatic memory management, which can lead to memory leaks if not handled properly.\n* **Concurrency:** Python's Global Interpreter Lock (GIL) limits the number of threads that can run concurrently, which can hinder performance in multithreaded applications.\n* **Type Safety:** Python is a dynamically typed language, which means that data types are not checked at compile time. This can lead to errors that are difficult to detect.\n* **Limited Library Support:** While Python has a large standard library, it may not have all the necessary libraries for specialized applications.\n* **Lack of Low-Level Control:** Python provides limited access to low-level system resources, which can be a disadvantage for applications that require fine-graine

# Prepare test data

In [9]:
short_file = "./luna_nozava.txt"

with open(short_file, "r") as f:
    short_context = f.read()

long_file = "./ridge_forrester.txt"

with open(long_file, "r") as f:
    long_context = f.read()

# Basic query based on context

In [10]:
promtp_template = """
    You are expert on The Bold and the Beautiful tv series.
    Based on context: {context}
    Answer the question: {question}
"""
prompt = PromptTemplate.from_template(promtp_template)

questions =  "\n".join([
    "What is the name of described character?",
    "Did she cheated on someone? If yes, who?",
    "What are her addictions?",
])

chain = prompt | model

# short_context - description of one of the characters
chain.invoke({"context": short_context, "question": questions })



''

# Structured output

In [11]:
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.output_parsers import JsonOutputParser, PydanticOutputParser

In [12]:
class QA(BaseModel):
  q: str = Field(description="question asked")
  a: str = Field(description="answer to question")

class PearsonCard(BaseModel):
  questionsAndAnswers: List[QA] = Field(description="list of questions and answers")

# Set up a parser + inject instructions into the prompt template.
parser = JsonOutputParser(pydantic_object=PearsonCard)

prompt = PromptTemplate.from_template("""
    You are expert on The Bold and the Beautiful tv series.
    Based on context: {context}
    Answer the questions: {question}
    {format_instructions}
    """,
    partial_variables={"format_instructions": parser.get_format_instructions()}
    )

chain = prompt | model | parser

chain.invoke({"context": short_context, "question": questions})

{'questionsAndAnswers': [{'q': 'What is the name of described character?',
   'a': 'Luna Nozawa'},
  {'q': 'Did she cheated on someone? If yes, who?',
   'a': 'Yes, she cheated on R.J. Forrester with Zende Forrester Dominguez'},
  {'q': 'What are her addictions?', 'a': 'She is addicted to narcotics'}]}

# Even more structured output

In [14]:
# to see what is happening under the hood
#from langchain.globals import set_verbose, set_debug
#set_debug(True)
#set_verbose(True)

# Define your desired data structure.
class PearsonCard(BaseModel):
  name: str = Field(description="name of the character?")
  cheated: bool = Field(description="did she cheated on someone?")
  cheated_on: List[str] = Field(description="who she cheated on?")


# Set up a parser + inject instructions into the prompt template.
parser = PydanticOutputParser(pydantic_object=PearsonCard)

prompt = PromptTemplate.from_template("""
    You are expert on The Bold and the Beautiful tv series.
    Based on context: {context}
    Answer the question: {question}
    {format_instructions}
    """,
    partial_variables={"format_instructions": parser.get_format_instructions()}
    )

chain = prompt | model | parser

chain.invoke({"context": short_context, "question": questions})

PearsonCard(name='Luna Nozawa', cheated=True, cheated_on=['R.J. Forrester'])

# Simple wrapper for asking questions

In [15]:
def ask(context: str, questions: List[str], returnType: object) -> object:
  parser = PydanticOutputParser(pydantic_object=returnType)
  prompt = PromptTemplate.from_template("""
    Based on context: {context}
    Answer the question: {question}
    {format_instructions}
    """,
    partial_variables={"format_instructions": parser.get_format_instructions()}
  )
  chain = prompt | model | parser
  return chain.invoke({"context": context, "question": questions})

class Answer(BaseModel):
  questionAsked: str = Field(description="question asked")
  answer: bool = Field(description="answer to question as boolean value")

ask(short_context, ["Did character cheated on someone?"], Answer)

Answer(questionAsked='Did character cheated on someone?', answer=True)

### Basic test of simple properties

In [None]:
response = ask(short_context, ["Did character cheated on someone?"], Answer)
assert response.answer == True

In [None]:
test_scenarios = [
    {"question": "Did character cheated on someone?",   "expected": True},
    {"question": "Did character has alcohol problem?",  "expected": False},
    {"question": "Did character uses drugs?",           "expected": True}
]

for scenario in test_scenarios:
  response = ask(short_context, [scenario["question"]], Answer)
  assert response.answer == scenario["expected"], f"Expected {scenario['expected']} for {scenario['question']}"


# Testing complex resonses

## Complex resonse

In [17]:
class DesciptiveResponse(BaseModel):
  questionAsked: str = Field(description="question asked")
  answer: str = Field(description="answer to question")


ask(short_context, ["How character was introduced to the series?"], DesciptiveResponse)

DesciptiveResponse(questionAsked='How character was introduced to the series?', answer='Luna was introduced as she entered the family mansion with an armload of supplies for Eric and R.J. Forrester, to help create the new couture line they were working on in secret.')

## Evaluator

In [18]:
# taken from https://github.com/davidtan-tw/testing-llms-demo/blob/main/testing_llms_demo/tests/test_llm_based_tests.py
from pprint import pprint
from langchain.output_parsers.json import SimpleJsonOutputParser

def evaluate_llm_response(input_prompt, generated_response):
    evaluator_prompt = PromptTemplate.from_template("""
    You are an evaluator, focused on checking that Generated Response contains only key information
    that is present in Input Prompt

    Here's an Input Prompt: {input_prompt}
    Here's the Generated Response: {generated_response}

    Instruction: Return a valid JSON object with 4 keys:
    result (SATISFACTORY or NOT_SATISFACTORY),
    reason (explaining rationale for result),
    input_prompt,
    generated_response
    result must be SATISFACTORY only if Generated Response match all the responsibilities listed in Input Prompt
    """
    )

    evaluator_runnable = evaluator_prompt | model | SimpleJsonOutputParser()
    evaluation_result = evaluator_runnable.invoke(
        {"input_prompt": input_prompt, "generated_response": generated_response})
    #pprint(evaluation_result)

    return evaluation_result

In [19]:
def ask2(context: str, questions: List[str], returnType: BaseModel) -> (str, BaseModel):
  parser = PydanticOutputParser(pydantic_object=returnType)
  prompt = PromptTemplate.from_template("""
    Based on context: {context}
    Answer the question: {question}
    {format_instructions}
    """,
    partial_variables={"format_instructions": parser.get_format_instructions()}
  )
  final_prompt = prompt.format(context = context, question = questions)
  chain = prompt | model | parser
  result = chain.invoke({"context": context, "question": questions})
  return final_prompt, result

In [20]:
question = "How character was introduced to the series? Give answer in 7 words."
input_prompt, response = ask2(short_context, [question], DesciptiveResponse)
evaluation_result = evaluate_llm_response(input_prompt, response)

print(evaluation_result['result'])
print(evaluation_result['reason'])

SATISFACTORY
The Generated Response contains only key information that is present in the Input Prompt.


# Long context

## How it can fail

In [21]:
model = VertexAI(model_name="text-bison")
ask(long_context, ["How character was introduced to the series?"], DesciptiveResponse)



KeyboardInterrupt: 

## Create vector store

In [23]:
from langchain_community.vectorstores import Chroma
from langchain_google_vertexai import VertexAIEmbeddings
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader

loader = TextLoader("./ridge_forrester.txt")
doc = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
splits = text_splitter.split_documents(doc)
vectorstore = Chroma.from_documents(documents=splits, embedding=VertexAIEmbeddings("textembedding-gecko"))


## Manual vectorstore usage

In [24]:
retriever = vectorstore.as_retriever()
retrieved_docs = retriever.invoke("Did character cheated on someone?")

pprint(retrieved_docs)

[Document(page_content="At the start of the series, Ridge is secretly dating [[Caroline Spencer Forrester|Caroline Spencer]]. Caroline's father, Bill Spencer, hears about Ridge's bad reputation with women and asks him not to see Caroline again. Later that evening, Ridge proposes to Caroline out of spite. Bill Spencer hires a private investigator to look into Ridge and see if he is mistreating Caroline. Bill tries to trust Ridge and calls the private investigator off, but the private investigator had already taken a picture of Ridge sleeping with a former fling, Alex Simpson in a hotel room having sex and sent it to Bill Spencer. On Caroline's wedding day, Bill tells Caroline what he has found. Caroline is heartbroken and angry at her father and decides to go on with the wedding anyway. As she is walking down the aisle, she faints and is taken to the hospital, leaving Ridge at the alter. After she is released, Ridge realizes he really does love her, but Caroline decides she needs to liv

In [25]:
ask(retrieved_docs, ["Did character cheated on someone?"], DesciptiveResponse)

DesciptiveResponse(questionAsked='Did character cheated on someone?', answer='Yes, Ridge cheated on Caroline with Alex Simpson.')

## Vectorstore as part of chain

In [26]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

prompt = PromptTemplate.from_template("""
    Based on context: {context}
    Answer the question: {question}
    """)

rag_chain = (
    {"context": retriever,  "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

rag_chain.invoke("Did character cheated on someone?")

' Yes, Ridge cheated on Caroline with Alex Simpson.'

# Tools

In [27]:
from langchain_community.tools import WikipediaQueryRun
from langchain_community.utilities import WikipediaAPIWrapper

api_wrapper = WikipediaAPIWrapper(top_k_results=1, doc_content_chars_max=100)
tool = WikipediaQueryRun(api_wrapper=api_wrapper)


print(tool.description)
print(tool.args)

A wrapper around Wikipedia. Useful for when you need to answer general questions about people, places, companies, facts, historical events, or other subjects. Input should be a search query.
{'query': {'title': 'Query', 'type': 'string'}}


In [28]:
# note: this code is not always working correctly :(
from langchain.agents import AgentType, initialize_agent


agent = initialize_agent([tool], model, verbose=True)

prompt = PromptTemplate.from_template(
    """
    You are expert on The Bold and the Beautiful tv series.
    Based on context: {context}
    Answer the question: {question}
  """
)
agent.invoke(prompt.format(context= short_context, question= "What is character's name? What year was actor playint this character born?"))


  warn_deprecated(




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m Question: What is character's name? What year was actor playing this character born?
Thought: The question asks for the character's name and the year the actor playing the character was born.
Action: wikipedia
Action Input: Luna Nozawa[0m
Observation: [36;1m[1;3mPage: List of The Bold and the Beautiful characters (2020s)
Summary: The Bold and the Beautiful is a[0m
Thought:[32;1m[1;3m Question: What is character's name? What year was actor playing this character born?

Thought: The question asks for the character's name and the year the actor playing the character was born.

Action: wikipedia
Action Input: Luna Nozawa[0m
Observation: [36;1m[1;3mPage: List of The Bold and the Beautiful characters (2020s)
Summary: The Bold and the Beautiful is a[0m
Thought:[32;1m[1;3m Question: What is character's name? What year was actor playing this character born?

Thought: The question asks for the character's name and the year

{'input': '\n    You are expert on The Bold and the Beautiful tv series.\n    Based on context: {{Infobox soap character\n|image1 = File:lunan.jpg\n|name = Luna Nozawa\n|caption1 = Lisa Yamada as Luna Nozawa\n|portrayer = [[Lisa Yamada]]\n|status = Present, Contract\n|years = 2023-\n|first = September 13, 2023\n|creator = [[Bradley Bell]]\n|introducer = [[Bradley Bell]]\n|family = [[Nozawa family]] \n|gender = Female\n|born = 2002 \n|age = 20 \n|parents =[[Poppy Nozawa]] (mother)\n|siblings = \n|romances = [[R.J. Forrester]] <br> [[Zende Forrester Dominguez]] \n|aunts/uncles = [[Li Finnegan]] (maternal aunt)\n|nieces/nephews = \n|cousins = [[Finn Finnegan|John "Finn" Finnegan]] (adopted cousin) <br> [[Hayes Finnegan]] (adopted cousin once removed)\n|grandparents = \n|series = [[The Bold and the Beautiful]]\n|occupation = Intern at [[Forrester Creations]] <br> Assistant fashion designer to [[Eric Forrester]] \n|last = \n|cause =}}\n\'\'\'Luna Nozawa\'\'\' is a character on [[The Bold an