In [1]:
!python --version

Python 3.11.9


In [1]:
# run on each inference
import os
from langchain_core.runnables import RunnablePassthrough, RunnableParallel, RunnableLambda
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_google_genai import ChatGoogleGenerativeAI
from keys import gemini_api_key, gemini_api_key2
from langchain_huggingface import HuggingFaceEmbeddings
from langchain.schema import Document
from langchain_community.vectorstores import Chroma
from operator import itemgetter
from langchain.prompts import PromptTemplate
from langchain_core.messages import AIMessage, HumanMessage
from IPython.display import Markdown
import databaseinfo as dbi
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader
from langchain_community.vectorstores.pgvector import PGVector
from langchain_community.vectorstores import Chroma
from langchain.indexes import SQLRecordManager, index
import psycopg2
from ragas.testset.generator import TestsetGenerator
from ragas.testset.evolutions import simple, reasoning, multi_context
from langchain_text_splitters import CharacterTextSplitter
from langchain_experimental.text_splitter import SemanticChunker
from langchain_text_splitters import TextSplitter
import re
from typing import List, Any
from langchain.storage import InMemoryStore
from langchain.retrievers import ParentDocumentRetriever
from pydantic import BaseModel, Field
from typing import Optional
from sqlalchemy import Column, String, create_engine
from sqlalchemy.orm import declarative_base
from sqlalchemy.dialects.postgresql import JSONB
import logging
from typing import Generic, Iterator, Sequence, TypeVar
from langchain.schema import Document
from langchain_core.stores import BaseStore
from sqlalchemy.orm import sessionmaker, scoped_session
from langchain import hub
from langchain.agents import AgentExecutor, create_openai_tools_agent
from langchain.tools.retriever import create_retriever_tool
from langchain.utils.math import cosine_similarity
from langchain_community.document_loaders.text import TextLoader
from langchain_community.utilities.sql_database import SQLDatabase
from sqlalchemy import inspect
from tabulate import tabulate
from psycopg2 import sql
from sqlalchemy.exc import ProgrammingError
from psycopg2.errors import InsufficientPrivilege
import nest_asyncio
from nemoguardrails import LLMRails, RailsConfig

## 1. LCEL

langchain implements a special runnable interface for all of its classes. They all are derived from the class RunnableSeriable.<br>
That is why any langchain's method can be put inside lcel chain.<br>
When the python interpreter sees the | symbol between 2 objects, it thinks it as the "or" operator and attempts to feed a into b:<br>


In [33]:
class Runnable:
    def __init__(self, func):
        self.func = func

    def __or__(self, other):
        def chained_func(*args, **kwargs):
            # the other func consumes the result of this func
            return other(self.func(*args, **kwargs))
        return Runnable(chained_func)

    def __call__(self, *args, **kwargs):
        return self.func(*args, **kwargs)

def add_one(x):
    return x+1

def multiply_three(x):
    return 3*x

#make the function runnable
a= Runnable(add_one)
b= Runnable(multiply_three)

chain= a | b
ans= chain(42)
print(ans)

chain= a.__or__(b)   
ans= chain(42)
print(ans)
#both the above act the same

129
129


all the components participating in that chain have the "invoke"method. The requirement is the datatype of the input

## 2. Runnables

In [None]:
#RunnablePassThrough: it does nothing, just passes whatever input it got
#RunnableParallel: creates a dictonary with keys as branches and values as the previous input

In [51]:
chain= RunnablePassthrough() | RunnablePassthrough() | RunnablePassthrough()
chain.invoke("hey")

'hey'

In [47]:
def x3(x):
    return x*3

chain= RunnablePassthrough() | RunnableLambda(x3) | RunnablePassthrough()
chain.invoke("hey")

'heyheyhey'

In [53]:
branch1= RunnablePassthrough()
branch2= lambda z: z["input2"]

chain= RunnableParallel({"x": branch1, "y": branch2 })
chain.invoke({"input1": "Dawn", "input2": "Dusk"})

{'x': {'input1': 'Dawn', 'input2': 'Dusk'}, 'y': 'Dusk'}

In [71]:
#create new keys:
def assign_func(_):
    return "new_branch_created"

chain= RunnableParallel({"x1": RunnablePassthrough()})
chain.invoke({"input1": "heyy", "input2": "how are you?"})

{'x1': {'input1': 'heyy', 'input2': 'how are you?'}}

In [73]:
print(chain.get_graph().draw_ascii())

+-------------------+  
| Parallel<x1>Input |  
+-------------------+  
           *           
           *           
           *           
    +-------------+    
    | Passthrough |    
    +-------------+    
           *           
           *           
           *           
+--------------------+ 
| Parallel<x1>Output | 
+--------------------+ 


In [75]:
#create new keys:
def assign_func(_):
    return "new_branch_created"

chain= RunnableParallel({"x1": RunnablePassthrough()}).assign(x2= RunnableLambda(assign_func))
chain.invoke({"input1": "heyy", "input2": "how are you?"})

{'x1': {'input1': 'heyy', 'input2': 'how are you?'},
 'x2': 'new_branch_created'}

In [77]:
print(chain.get_graph().draw_ascii())

                  +-------------------+                
                  | Parallel<x1>Input |                
                  +-------------------+                
                            *                          
                            *                          
                            *                          
                     +-------------+                   
                     | Passthrough |                   
                     +-------------+                   
                            *                          
                            *                          
                            *                          
                  +-------------------+                
                  | Parallel<x2>Input |                
                  +-------------------+                
                   ***              ***                
                ***                    ***             
              **                          **    

## 3. RAG with LCEL

In [2]:
# run on each inference
os.environ["GOOGLE_API_KEY"]= gemini_api_key
llm= ChatGoogleGenerativeAI(model= "gemini-1.5-pro-latest")

In [3]:
# run on each inference
embeddings= HuggingFaceEmbeddings(model_name='sentence-transformers/all-MiniLM-L6-v2')

In [93]:
docs= [
    Document(page_content= "Most popular dog breed is Labrador.", metadata= {"source":"dogs.txt"}),
    Document(page_content= "The favourite food for dogs is bone.", metadata= {"source":"dogs.txt"}),
    Document(page_content= "Most popular cat breed is Domestic shorthair.", metadata= {"source":"cats.txt"}),
    Document(page_content= "The favourite food for cats is milk.", metadata= {"source":"cats.txt"}),
]

In [95]:
db= Chroma.from_documents(docs, embeddings)

In [97]:
retriever= db.as_retriever()

In [99]:
prompt_template= '''Answer the following question based only the given context.
Context: {context}

Question: {question}'''

prompt= ChatPromptTemplate.from_template(prompt_template)

In [109]:
retrieval_chain= (
    {
        "context": (lambda x: x["question"]) | retriever,
        "question": itemgetter("question")
    }
    | prompt
    | llm
    | StrOutputParser()    
)
retrieval_chain.invoke({"question": "what is dogs' fav food?"})

'Bone \n'

The first component is a dictionary, not a runnable. How is this working? <br>
Langchain has a property called coersion. It automatically converts dictionary in to RunnableParallel<br><br>

The 'x' comes from the .invoke()<br>

_itemgetter("question")_ is equivalent to _lambda x: x["question"],_

In [112]:
# a better way to write the chain:
retrieval_chain= (
    {
        "context": retriever,
        "question": RunnablePassthrough()
    }
    | prompt
    | llm
    | StrOutputParser()    
)

retrieval_chain.invoke("what is dogs' fav food?")

'Bone \n'

## 4. Chat history

You ask a follow up question, for example: "Explain in detail". The retriever will take this and show top k docs, where all of them are completely irrelevant to given question. So we need a stand alone question

In [129]:
rephrasal_prompt_template= '''Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language.
Chat History:
{chat_history}
Follow up question: {question}'''
rephrase_template= PromptTemplate.from_template(rephrasal_prompt_template)

In [135]:
rephrase_chain= rephrase_template | llm | StrOutputParser()

prompt_rephrased= rephrase_chain.invoke(
    {
        "question": "Are you sure?",
        "chat_history": [
            HumanMessage(content= "what is dog's fav food?"),
            AIMessage(content= "Plum cake")
        ]
    }
)
prompt_rephrased

"Is plum cake really a dog's favorite food? \n"

In [None]:
retrieval_chain= (
    {"context": retriever, "question": RunnablePassthrough()},
    | prompt
    | llm
    | StrOutpuParser()
)

In [141]:
full_chain= rephrase_chain | retrieval_chain

answer= full_chain.invoke(
    {
        "question": "Are you sure?",
        "chat_history": [
            HumanMessage(content= "What is dog's fav food?"),
            AIMessage(content= "Bone")
        ]
    }
)
Markdown(answer)

This document provides insufficient information to answer if a bone is truly a dog's favorite food. It only states that it is **"The favourite food for dogs"** according to the file "dogs.txt".  We need more context or evidence to determine if this statement is actually true. 


## 5. Indexing

- prevents duplicate entry uploads
- avoids rewriting unchanged data
- avoids recomputing embeddings of unchanged data

clean up modes:<br>
1. "incremental": if source doc files are deleted, their respective data in the db wont be deleted<br>
2. "full": it will be deleted
3. "none": no clean up, keeps the data inside db no matter what
<br>

NOTE: <br>
- both "incremental" and "full" will delete older versions of mutated content, while "none" clean up mode wont.
- All three prevent data redundancy

In [4]:
CONNECTION_STRING= f"postgresql+psycopg2://{dbi.user}:{dbi.password}@{dbi.host}:{dbi.port}/vectordb-texts"
CONN_STRING= f"dbname='vectordb-texts' user='{dbi.user}' host='{dbi.host}' password='{dbi.password}'"
COLLECTION_NAME= "vectordb-texts"
TABLE_NAME= "langchain_pg_embedding"

In [5]:
# run on each inference
doc_dir= "C:\\Users\\HP\Desktop\\chatbot\\texts"
persist_dir= os.path.join(doc_dir, "new-text-datastore")

loader= DirectoryLoader('../', glob="**/texts/*.txt")
documents= loader.load() 

text_splitter= RecursiveCharacterTextSplitter(chunk_size= 500, chunk_overlap= 0)
docs= text_splitter.split_documents(documents)

In [158]:
db= PGVector(
    connection_string= CONNECTION_STRING,
    collection_name= COLLECTION_NAME,
    embedding_function= embeddings
)
namespace= f"pgvector/{COLLECTION_NAME}"
record_manager= SQLRecordManager(namespace, db_url= CONNECTION_STRING)
record_manager.create_schema()

index(
    docs,
    record_manager,
    db,
    cleanup= "full",
    source_id_key= "source"
)

{'num_added': 0, 'num_updated': 0, 'num_skipped': 12, 'num_deleted': 0}

## 6. RAGAS- Evaluating RAG's performance

It has 5 evaluation metrics:
- Context Precision
  - Evaluates whether all ground-truth relevant items in the contexts are ranked higher
  - All relevant chunks should ideally be at the top ranks
  - Higher scores indicating better precision
- Context Recall
  - Measures the extent to which the retrieved context aligns with the annotated answer
  - Higher values indicating better performance
- Context Relevancy
  - Gauges the relevancy of the retrieved context based on both the question and contexts
  - Higher values indicating better relevancy
- Answer Relevancy
  - Assesses how pertinent(relevant) the generated answer is to the given prompt
  - Higher scores indicating better relevancy, determined by cosine similarity between the original and reverse engineered questions.
- Faithfullness:
  - Measures factual consistency of the generated answer against the given context
  - Higher scores indicate better consistency

In [4]:
#RAGAS needs "file_name" metadata, so we create a new key
for doc in docs:
    doc.metadata["file_name"]= doc.metadata["source"]
    del doc.metadata["source"]

In [None]:
generator= TestsetGenerator.from_langchain(
    embeddings= embeddings,
    generator_llm= llm,
    critic_llm= llm
)

testset= generator.generate_with_langchain_docs(
    docs,
    test_size= 4,
    distributions= {simple: 0.4, reasoning: 0.2, multi_context: 0.4}
)

Resource Exhausted 429

## 7. Chunking

why chunking?
- context window:
  llms have limited context window (no of tokens they can process at a time)
  becoming less important problem nowadays, (models nowadays come with higher context window)
- Embeddings:
  Chunking should be done in such a way that the semantics of the chunks are preserved well
  

In [11]:
with open("./texts/color-psychology.txt") as f:
    file= f.read()
    
text_splitter= CharacterTextSplitter(
    separator= "\n",
    chunk_size= 200,
    chunk_overlap= 20,
    length_function= len,
    is_separator_regex= False
)

docs= text_splitter.split_text(file)

Created a chunk of size 262, which is longer than the specified 200
Created a chunk of size 288, which is longer than the specified 200
Created a chunk of size 261, which is longer than the specified 200
Created a chunk of size 285, which is longer than the specified 200
Created a chunk of size 259, which is longer than the specified 200
Created a chunk of size 277, which is longer than the specified 200
Created a chunk of size 266, which is longer than the specified 200
Created a chunk of size 228, which is longer than the specified 200
Created a chunk of size 306, which is longer than the specified 200
Created a chunk of size 267, which is longer than the specified 200
Created a chunk of size 201, which is longer than the specified 200
Created a chunk of size 347, which is longer than the specified 200


In [12]:
len(docs)

25

In [13]:
text_splitter= RecursiveCharacterTextSplitter(
    chunk_size= 200,
    chunk_overlap= 20,
    length_function= len,
    is_separator_regex= False
)

docs= text_splitter.split_text(file)

In [14]:
len(docs)

39

RecursiveCharacterTextSplitter is the recommended text splitter.<br>
It still doesnt capture the semantic meaning well, <br>it just splits based on the specified characters, eg ("\n\n", "\n", " ")

A better approach is __Semantic Chunking__: (it is still experimental)<br>
splits a text into individual sentences and then embeds the sentences,
after embedding all the sentences, it compares and finds the similar chunks and creates a new chunk out of it.<br>
<br>
How many such embeddings should be aggregated into forming a chunk?<br>
it has a predefined breakpoint threshold

In [20]:
text_splitter= SemanticChunker(embeddings, breakpoint_threshold_type= "standard_deviation")

In [21]:
docs= text_splitter.split_text(file)

In [22]:
len(docs)

1

#### much better chunking technique: custom built with an LLM

In [26]:
class GPTSplitter(TextSplitter):
    def __init__(self, model_name: str= "gemini-1.5-pro-latest", **kwargs: Any)-> None:
        super().__init__(**kwargs)
        self.llm= ChatGoogleGenerativeAI(model= model_name)
        self.template= '''
        You are an expert in identifying the semantic meaning of text.
        You wrap each chunk in <<<>>>.
        Example:
        Text:
        Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets. Color psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user. A person does have a system of thought patterns that will answer or draw a conclusion on something that he feels is new or interesting.

        Wrapped:
        <<<Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets.>>>
        <<<Color psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user.>>>
        <<<A person does have a system of thought patterns that will answer or draw a conclusion on something that he feels is new or interesting.>>>

        Now, process the following text:
        {text}
        '''
        self.prompt= ChatPromptTemplate.from_template(self.template)
        self.output_parser= StrOutputParser()
        self.chain= (
            {"text": RunnablePassthrough()}
            | self.prompt
            | self.llm
            | self.output_parser
        )
    def split_text(self, text: str) -> List[str]:
        response= self.chain.invoke({"text": text})
        chunks= re.findall(r"<<<(.*?)>>>", response, re.DOTALL)
        return [chunk.strip() for chunk in chunks]

In [29]:
gpt_splitter= GPTSplitter()
docs= gpt_splitter.split_text(file)

In [30]:
len(docs)

29

In [31]:
docs

['Color Psychology — as previously discussed, the world of graphic design and color theory are things that cannot be separated. Color theory is certainly very useful for creating good designs by paying attention to visual concepts that are attractive to the eye.',
 'That way, the colors used must also be appropriate and appropriate for the design created. We cannot possibly give a dominant black color to an image that shows a cheerful impression, nor can we give a bright dominant color such as blue, red, pink to an image that shows a sad impression.',
 'Showing a picture with a sad impression, a painter will give a little boredom to the picture. It can be seen from the paintings depicting a sad event, most of them have a lot of color, but there is little saturation, it could be said that the saturation is low.',
 'However, images that show the impression of love will usually have a lot of pink or other bright colors that show love.',
 'Without realizing it, these things are one of the 

In [32]:
#improve the prompt; dont let the llm simply split according to new line character (in our case, it just did that)

## 8. Embeddings

In [39]:
vector= embeddings.embed_documents(file)

In [43]:
len(vector[0])

384

Embedding dimension count vary from model to model
<br><br>
HuggingFace embedding models have 384 dimensions<br>
BERT= 768<br>
OpenAI Embeddings= 1536, 3072<br>
Cohere= 1024<br>
Mistral= 1024<br>
LLama 2= 5192<br>

Higher embedding dimensions means higher accuracy, slower computation and more storage

## 9. Queries

#### MultiQuery Retrieval

Generate different variations of the same input question. These questions may retrieve better content than the original query

In [47]:
query= "what color used by companies?"

QUERY_PROMPT= PromptTemplate(
    input_variables= ["question"],
    template= '''
    You are an AI language model assistant, Your task is to generate five different versions
    of the given user question to retrieve relevant documents from a vector database. By generating
    multiple perspectives on the user question, your goal is to help the user overcome some of the
    limitations of the distance-based similarity search.
    Provide these alternative question like this:
    <<question1>>
    <<question2>>
    Only provide the query, no numberings.
    Original question: {question}
    '''
)

In [52]:
def extract_questions(input):
    return [item for item in re.split(r"<<|>>", input) if item.strip()]

In [53]:
multiquery_chain= QUERY_PROMPT | llm | StrOutputParser() | RunnableLambda(extract_questions)

In [54]:
list_of_queries= multiquery_chain.invoke(query)

In [55]:
list_of_queries

['What are the most popular colors for company branding?',
 'What is the psychology of color in branding and marketing?',
 'How do companies choose brand colors?',
 'What colors are associated with different industries?',
 'What is the impact of color on consumer perception of brands?']

In [5]:
# run on each inference
chunks= text_splitter.split_documents(documents)

In [6]:
# run on each inference
db= Chroma.from_documents(chunks, embeddings)
retriever= db.as_retriever()

In [81]:
docs= []
for query in list_of_queries:
    data= retriever.invoke(query)
    docs.extend(data)

In [84]:
docs

[Document(page_content='Colors and their meanings:', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color Psychology — as previously discussed, the world of graphic design and color theory are things that cannot be separated. Color theory is certainly very useful for creating good designs by paying', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color psychology is also often used for marketin

In [85]:
len(docs)

20

In [91]:
unique_contents= set()
final_docs= []
for i in docs:
    if i.page_content not in unique_contents:
        final_docs.append(i)
    unique_contents.add(i.page_content)

In [92]:
final_docs

[Document(page_content='Colors and their meanings:', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color Psychology — as previously discussed, the world of graphic design and color theory are things that cannot be separated. Color theory is certainly very useful for creating good designs by paying', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='What is color psychology? According to Wikipedia

## 10. Parent Document Retriever

Smaller chunks provide better precision, but returns less context, <br>
while the larger chunks retrieves more context, but they may not be precise

Parent Document Retriever uses small chunks for similarity search, but returns its associated larger chunk

Langchain only provides InMemoryStore() for this, which is not suitable at larger scale

In [9]:
docstore= InMemoryStore() # stores larger chunks
child_splitter= RecursiveCharacterTextSplitter(chunk_size= 250)
parent_splitter= RecursiveCharacterTextSplitter(chunk_size= 600)

retriever= ParentDocumentRetriever(
    vectorstore= db,
    docstore= docstore,
    child_splitter= child_splitter,
    parent_splitter= parent_splitter
)

retriever.add_documents(docs)

In [16]:
retriever.invoke("blue color")[0]

Document(page_content='How Color psychology works? In short, a person does have a system of thought patterns that will answer or draw a conclusion on something that he feels is new or interesting.\n\nSomeone will see it, be interested in it, look for a conclusion from what they see, and finally react to what they feel. That’s what happens with color, someone will see the color, create a conclusion, and finally give a reaction, whether it’s bold, hot, or something else.\n\nColors and their meanings:', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'})

In [17]:
print(type(retriever.invoke("blue color")[0]))

<class 'langchain_core.documents.base.Document'>


#### Custom docstore using PostgreSQL

doc store works with keys and values<br>
key is referenced to "doc_id", or 'source' which is in the metadata dictonary of the data


In [13]:
class DocumentModel(BaseModel):
    key: Optional[str]= Field(None)
    page_content: Optional[str]= Field(None)
    metadata: dict= Field(default_factory= dict)

In [26]:
Base= declarative_base()
'''
table only consists of 2 columns: key and value
why JSONB (JSON Binary), we cant store python classes in as entries in the table
(the document datatype is Document class) 
So we serialize the data and store; when we retrieve, we de-serialize it 
binary data -> <class 'langchain_core.documents.base.Document'>
'''
class SQLDocument(Base):
    __tablename__= "docstore"
    key= Column(String, primary_key= True)
    value= Column(JSONB) #serialization

    def __repr__(self):
        return f"<SQLDocument(key= '{self.key}', value='{self.value}')>"

In [30]:
logger= logging.getLogger(__name__)
D= TypeVar("D", bound= Document)

#class inherits from langchain's base store (key: string, value: DocumentModel)
class PostgresStore(BaseStore[str, DocumentModel], Generic[D]):
    def __init__(self, connection_string: str):
        self.engine= create_engine(connection_string)
        Base.metadata.create_all(self.engine)
        self.Session= scoped_session(sessionmaker(bind= self.engine))

    # the table has 2 rows, one for key and other one is value
    def serialize_document(self, doc: Document) -> dict:
        return {"metadata": doc.metadata, "page_content": doc.page_content}

    def deserialize_document(self, value: dict) -> dict:
        return Document(
            metadata= value.get("metadata", {}), # if we dont have, we set it to empty dict
            page_content= value.get("page_content", "")  # if we dont have, we set it to empty string          
        )

    def mget(self, keys: Sequence[str]) -> list[Document]:
        with self.Session() as session:
            try:
                sql_documents= (
                    session.query(SQLDocument).filter(SQLDocument.key.in_(keys)).all()
                )
                return [
                    self.deserialize_document(sql_doc.value)
                    for sql_doc in sql_documents
                ]
            except Exception as e:
                logger.error(f"Error in mget: {e}")
                print(f"Error in mget: {e}")
                session.rollback()
                return []
    def mset(self, key_value_pairs: Sequence[tuple[str, Document]]) -> None:
        with self.Session() as session:
            try:
                serialized_docs= []
                for key, document in key_value_pairs:
                    serialized_doc= self.serialize_document(document)
                    serialized_docs.append((key, serialized_doc))
                documents_to_update= [
                    SQLDocument(key= key, value= value) for key, value in serialized_docs
                ]
                session.bulk_save_objects(documents_to_update, update_changed_only= True)
                session.commit()
            except Exception as e:
                logger.error(f"Error in mset: {e}")
                print(f"Error in mset: {e}")
                session.rollback()

    def mdelete(self, keys: Sequence[str])-> None:
        with self.Session() as session:
            try:
                session.query(SQLDocument).filter(SQLDocument.key.in_(keys)).delete(
                synchronize_session= False
                )
                session.commit()
            except Exception as e:
                logger.error(f"Error in mdelete: {e}")
                print(f"Error in mdelete: {e}")
                session.rollback()

    def yield_keys(self) -> Iterator[str]:
        with self.Session() as session:
            try:
                query= session.query(SQLDocument.key)
                for key in query:
                    yield key[0]
            except Exception as e:
                logger.error(f"Error in yield _keys: {e}")
                print(f"Error in yield _keys: {e}")
                session.rollback()

In [39]:
DATABASE_URL= CONNECTION_STRING
store= PGVector(
    collection_name= "vectordb",
    connection_string= DATABASE_URL,
    embedding_function= embeddings
)
child_splitter= RecursiveCharacterTextSplitter(chunk_size= 250)
parent_splitter= RecursiveCharacterTextSplitter(chunk_size= 600)

retriever= ParentDocumentRetriever(
    vectorstore= store,
    docstore= PostgresStore(connection_string= DATABASE_URL),
    child_splitter= child_splitter,
    parent_splitter= parent_splitter
)

  warn_deprecated(


In [40]:
retriever.add_documents(docs)

In [42]:
retriever.invoke("what color is used by companies")

[Document(page_content='Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets.\n\nColor psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='How Color psychology works? In short, a person does have a system of thought patterns that will answer or draw a conclusion on something that he feels is new or interesting.\n\nSomeone will see it, be interested in it, look for a conclusion from what they see, and finally react to what they feel. That’s what happens with color, someone will see the color, create a conclusion, and finally give a reaction, whether it’s bold, hot, or something else.\n\nColors and their meanings:', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),

When to use Parent Document Retriever?<br>

1. if you use an LLM with a large context window due to larger docs passes to the LLM<br>
2. Useful if documents contain multiple topics, but splitting would loose overall meaning of these documents<br>
_use and LLM based splitter in that case (captures the semantic meaning of chunk and easier to implement_

## 11. Agents

- Entity that can make decisions and take actions to achieve specific goals
- LLM acts as a "brain" for these agents
- Agents can utilize tools which allows them to communicate with the outside world (internet search, call API, perform RAG etc..)

In [47]:
prompt= hub.pull("hwchase17/openai-tools-agent")
prompt.messages

[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], template='You are a helpful assistant')),
 MessagesPlaceholder(variable_name='chat_history', optional=True),
 HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], template='{input}')),
 MessagesPlaceholder(variable_name='agent_scratchpad')]

In [50]:
db= Chroma.from_documents(docs, embeddings)
retriever= db.as_retriever()

In [51]:
tool= create_retriever_tool(
    retriever= retriever, 
    name= "ragagent",
    description= "performs RAG on a small dataset"
)

tools= [tool]

In [60]:
llm= ChatGoogleGenerativeAI(model= "gemini-1.5-pro", convert_system_message_to_human= True)

In [61]:
agent= create_openai_tools_agent(llm, tools, prompt)
agent_executor= AgentExecutor(agent= agent, tools= tools)

for an agent, we need an AgentExecutor (it is the runtime for that agent)

In [65]:
response= agent_executor.invoke({"input": "which color is said to represent companies?"})

In [66]:
Markdown(response["output"])

The color most commonly associated with companies and corporations is **blue**. 

Here's why:

* **Trustworthiness and Reliability:** Blue conveys a sense of security, stability, and dependability – qualities important for businesses to project.
* **Professionalism and Competence:**  It's seen as a formal and authoritative color, often used in business suits and corporate settings.
* **Calmness and Communication:** Blue can have a calming effect and is associated with clear communication, making it suitable for building relationships.

While blue is a dominant choice, other colors are also used strategically by companies depending on their industry and brand identity. 


## 12. Re-Ranking retrieved documents

use a cross encoder to compare how similar the retrieved documents are compared to the query<br>
but we have the embeddings right?<br><br>
The embeddings we used, uses Bi- Encoder, and here we use Cross- Encoder

![image](https://raw.githubusercontent.com/UKPLab/sentence-transformers/master/docs/img/Bi_vs_Cross-Encoder.png)

Bi Encoder: 
- Generate independent sentence embeddings 
- then we can project these embeddings in a N-d space and see similarity
- use cases: info retrieval, semantic search, clustering

Cross Encoder: 
- Encodes 2 sentences together and produces an output score
- relatively more accurate, slow and less scalable

In [7]:
retrieved_docs= retriever.invoke("which color does companies use?")

In [8]:
retrieved_docs

[Document(page_content='Color psychology is also often used for marketing purposes. In the world of marketing, of course color psychology is needed so that the design can be conveyed well by the specified marketing targets.\n\nColor psychology is also very useful for fashion trends. The colors used and the combination of colors must of course suit the user.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='This proves how influential a color is on a person’s perception of something. Of course, knowing color psychology is a must for a designer to create a design that suits the target.\n\nIf we target people who have an exclusive soul, then we can use colors that depict this, such as black, gold, gray or dark green. Of course, the combination of basic colors and complementary colors must be balanced, that’s why we have to study color theory.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'}),
 Document(page_content='That way, the 

In [9]:
from sentence_transformers import CrossEncoder
cross_encoder= CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

contents= [doc.page_content for doc in retrieved_docs]

In [10]:
pairs= []
for text in contents:
    pairs.append(["which color does companies use?", text])

scores= cross_encoder.predict(pairs)
scores

array([-1.8563137 , -0.75353575, -0.43809405, -7.37597   ], dtype=float32)

In [11]:
scored_docs= zip(scores, retrieved_docs)
sorted_docs= sorted(scored_docs, reverse= True)
sorted_docs

[(-0.43809405,
  Document(page_content='That way, the colors used must also be appropriate and appropriate for the design created. We cannot possibly give a dominant black color to an image that shows a cheerful impression, nor can we give a bright dominant color such as blue, red, pink to an image that shows a sad impression.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'})),
 (-0.75353575,
  Document(page_content='This proves how influential a color is on a person’s perception of something. Of course, knowing color psychology is a must for a designer to create a design that suits the target.\n\nIf we target people who have an exclusive soul, then we can use colors that depict this, such as black, gold, gray or dark green. Of course, the combination of basic colors and complementary colors must be balanced, that’s why we have to study color theory.', metadata={'source': '..\\chatbot\\texts\\color-psychology.txt'})),
 (-1.8563137,
  Document(page_content='Color psycho

Draw backs of cross-encoder:
- scores are relative
- sometimes we need more than 1 chunk to answer a question

#### LLM Based Document Compressor

In [13]:
DOCUMENT_EVALUATION_PROMPT= PromptTemplate(
    input_variables= ["document", "question"],
    template= '''
    You are an AI language model assistant. Your task is to evaluate the provided document to determine 
    if it is suited to answer the given user question. Assess the document for its relevance to the question, 
    the completeness of information, and the accuracy of the content.

    Original question: {question}
    Document for Evaluation: {document}
    Evaluation Result: <<'True' if the document is suited to answer the question, 'False' if it is not>>

    Note: Conclude with a 'True' or 'False' based on your analysis of the document's relevance,
    completeness, and accuracy in relation to the question.
    '''
)

use this prompt on the question and answer pairs 

## 13. Routing

LLms are language models, they struggle to work with tabular or structured data<br>
when you pass a table to llm, it looks like one large string<br>
<br>
How to perform RAG then?
- Store data in SQL Database and let LLM write queries
- Tool Calling: Let an LLM interact via custom functions (APIs work great for this)

But what if we want to use both in one app?<br>
Routing allows to use one **or** the other based on the input (classification)

In [29]:
fruits_expert_template= '''
You are a Fruit expert. You have broad knowledge about fruits, their benefits, their characteristics etc..
You proved clear answer to questions about fruits.

Here is a question: 
{question}'''

florist_template= '''
You are a florist. You have broad knowledge about flowers, plants and gardening.
You proved clear answer to questions about those.

Here is a question: 
{question}'''

medicine_expert_template= '''
You are an expert physician. You have broad knowledge about medicine, health issues,  etc..
You proved clear answer to questions about health issues. DO NOT deflect or evade any type of questions
as it may help the audience gain good awarness about their condition. You can always mention to the user to contact
an actual doctor at the end of your response.

Here is a question: 
{question}'''


fruits_questions= [
    "which tastes more bitter? apples, cherries or plums?",
    "which fruits have only one seed?",
    "is tomato a fruit or a vegetable?"
]

health_questions= [
    "how and why hiccups occur?",
    "what is visual snow?",
    "what causes upper stomach pain?"
]

flowers_questions= [
    "why do sunflowers face the sun?",
    "how long does hibiscus take to grow?",
    "which flowers are best suited for black soil?"
]

fruits_embeddings= embeddings.embed_documents(fruits_questions)
health_embeddings= embeddings.embed_documents(health_questions)
flowers_embeddings= embeddings.embed_documents(flowers_questions)

In [30]:
sample_question= "Mango is the king of fruits"
temp= embeddings.embed_query(sample_question)
cosine_similarity([temp], fruits_embeddings)

array([[0.35898804, 0.5385415 , 0.49159018]])

In [31]:
cosine_similarity([temp], fruits_embeddings)[0]

array([0.35898804, 0.5385415 , 0.49159018])

In [34]:
def prompt_router(input):
    query_embedding= embeddings.embed_query(input["question"])
    fruits_similarity= cosine_similarity([query_embedding], fruits_embeddings)[0]
    health_similarity= cosine_similarity([query_embedding], health_embeddings)[0]
    flowers_similarity= cosine_similarity([query_embedding], flowers_embeddings)[0]

    max_similarity= max(max(fruits_similarity), max(health_similarity), max(flowers_similarity))
    if max_similarity== max(fruits_similarity):
        print("Calling Fruit Expert\n")
        return PromptTemplate.from_template(fruits_expert_template)
    elif max_similarity== max(health_similarity):
        print("Calling Physician\n")
        return PromptTemplate.from_template(medicine_expert_template)
    else:
        print("Calling Flourist\n")
        return PromptTemplate.from_template(florist_template)

In [35]:
question= "List down some very painful medical conditions a human can experience"
input_query= {"question": question}
prompt= prompt_router(input_query)

Calling Physician



In [36]:
chain= (
    {"question": RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | llm
    | StrOutputParser()
)

response= chain.invoke(question)
Markdown(response)

Calling Physician



It's important to preface this list by acknowledging that pain is subjective and individual. What one person experiences as excruciating, another may find manageable. This list is also not exhaustive and does not substitute for professional medical advice. 

That being said, here are some medical conditions known to cause extreme pain:

**Acute Conditions:**

* **Kidney Stones:** The passing of a kidney stone can cause sudden, sharp, and intense pain in the back, side, or groin.
* **Cluster Headaches:** Often described as the "most painful headache," cluster headaches cause severe, debilitating pain on one side of the head, typically around the eye.
* **Gout:** This inflammatory arthritis causes sudden, severe attacks of pain, swelling, and tenderness in joints, often the big toe.
* **Acute Pancreatitis:** Inflammation of the pancreas can lead to sudden, agonizing pain in the upper abdomen that radiates to the back.
* **Shingles:** This viral infection causes a painful rash and burning sensation along a nerve pathway.

**Chronic Conditions:**

* **Trigeminal Neuralgia:** This neurological disorder causes excruciating facial pain, often triggered by simple acts like touching the face or chewing.
* **Complex Regional Pain Syndrome (CRPS):** A chronic pain condition that typically affects one limb after an injury or surgery, causing severe burning, swelling, and sensitivity to touch. 
* **Fibromyalgia:** Characterized by widespread musculoskeletal pain, fatigue, and other symptoms like sleep problems and mood disorders.
* **Sickle Cell Crisis:** In sickle cell disease, abnormally shaped red blood cells can block blood flow, causing severe pain episodes in various parts of the body.
* **Endometriosis:** This condition occurs when tissue similar to the uterine lining grows outside the uterus, often leading to debilitating pelvic pain, especially during menstruation. 

**Other:**

* **Burns:** Severe burns cause immense pain due to damage to nerve endings and tissues.
* **Bone Fractures:** Depending on the severity and location, bone fractures can be incredibly painful.
* **Cancer Pain:** Pain associated with cancer can be caused by the tumor itself, treatments like chemotherapy, or complications.

This list provides a glimpse into some incredibly painful medical conditions. It's crucial to remember that everyone experiences pain differently, and seeking medical help is essential for diagnosis, pain management, and treatment.

**It is vital to consult a healthcare professional for any health concerns or before making any decisions related to your health or treatment.** 


<br><br>if there are edge cases, the embeddings may not work.
Use and llm to classify the type of question. <br>
<br>prompt= '''you are good at classifiying a question. 
Given the user question below, classify it as either being about "Health", "Flowers" or "Fruits".
<If the question is about flowers, plants and gardening, return "Flowers"><br>
<If the question is about health issues, pain and medicine, return "Health"><br>
<If the question is about fruits, natural food and eatery, return "Fruits"><br>

Question:<br>
{question}

Classification: '''
<br>
now if Classification == "Health": call Physician etc..

**NOTE:** Always do classification using LLM over embeddings
- it is cheap
- it is accurate
- easy to maintain code/promptt

Routing with vectorstore(text data) and database(tabular data)

In [44]:
DATABASE_URL= CONNECTION_STRING
store= PGVector(
    collection_name= "vectordb",
    connection_string= DATABASE_URL,
    embedding_function= embeddings
)

loader1= TextLoader(doc_dir + "/color-psychology.txt")
loader2= TextLoader(doc_dir + "/natural-disasters.txt")

docs2= loader2.load()
docs1= loader1.load()
docs= docs1+ docs2

splitter= RecursiveCharacterTextSplitter(chunk_size= 250, chunk_overlap= 20)
chunks= splitter.split_documents(docs)
store.add_documents(chunks)
retriever= store.as_retriever()

In [16]:
template= '''
Based on the table schema below, write an SQL query (just the Query) that would answer the user's query.
Do not add comments, Do not do any corrections:
{schema}

Question: {question}
SQL Query: '''

prompt= ChatPromptTemplate.from_template(template)

In [47]:
db= SQLDatabase.from_uri(CONNECTION_STRING)

In [14]:
def get_schema(_):
    engine= create_engine(CONNECTION_STRING)
    inspector= inspect(engine)
    columns= inspector.get_columns("fruits_and_vegetables")

    column_data= [
        {
            "Column Name": col["name"],
            "Data Type": str(col["type"]),
            "Nullable": "Yes" if col["nullable"] else "No",
            "Default": col["default"] if col["default"] else "None",
            "Autoincrement": "Yes" if col["autoincrement"] else "No",
        }
        for col in columns        
    ]
    schema_output= tabulate(column_data, headers= "keys", tablefmt= "grid")
    formatted_schema= f"Schema for 'fruits_and_vegetables' table:\n{schema_output}"
    return formatted_schema

In [68]:
conn= psycopg2.connect(CONN_STRING)
cursor= conn.cursor()

In [69]:
create_table_query= '''
CREATE TABLE IF NOT EXISTS fruits_and_vegetables (
    id SERIAL PRIMARY KEY,
    name VARCHAR(20) UNIQUE,
    price DECIMAL(10,2),
    color TEXT,
    category TEXT
);
'''
cursor.execute(create_table_query)
conn.commit()

with open(doc_dir + "/fruits-vegetables.txt", "r") as file:
    food_items= file.readlines()

for line in food_items:
    name, price, color, category= line.strip().split(", ")
    price= price[3:]
    insert_query= '''
    INSERT INTO fruits_and_vegetables (name, price, color, category)
    VALUES (%s, %s, %s, %s)
    ON CONFLICT (name) DO NOTHING;
    '''
    cursor.execute(insert_query, (name, price, color, category))

conn.commit()
cursor.close()
conn.close()

In [70]:
conn= psycopg2.connect(CONN_STRING)
cursor= conn.cursor()

cursor.execute("SELECT * FROM fruits_and_vegetables;")
products= cursor.fetchall()
for product in products:
    print(product)
cursor.close()
conn.close()

(1, 'Apple', Decimal('105.00'), 'green', 'fruit')
(2, 'Apricot', Decimal('40.00'), 'orange', 'fruit')
(3, 'Watermelon', Decimal('125.00'), 'green', 'fruit')
(4, 'Banana', Decimal('30.00'), 'yellow', 'fruit')
(5, 'Carrot', Decimal('28.00'), 'yellow', 'vegetable')
(6, 'Mango', Decimal('140.00'), 'pink', 'fruit')
(7, 'Beetroot', Decimal('110.00'), 'red', 'fruit')
(8, 'Cherry', Decimal('20.00'), 'red', 'fruit')
(9, 'Custard Apple', Decimal('100.00'), 'red', 'fruit')
(10, 'Orange', Decimal('45.00'), 'orange', 'fruit')
(11, 'Potato', Decimal('32.00'), 'purple', 'vegetable')
(12, 'Spinach', Decimal('120.00'), 'red', 'fruit')
(13, 'Plum', Decimal('115.00'), 'yellow', 'fruit')
(14, 'Kiwi', Decimal('35.00'), 'green', 'fruit')
(15, 'Grapes', Decimal('120.00'), 'purple', 'fruit')
(16, 'Pomogranete', Decimal('25.00'), 'orange', 'vegetable')
(17, 'Cabbage', Decimal('130.00'), 'green', 'fruit')
(18, 'Pineapple', Decimal('50.00'), 'orange', 'fruit')
(19, 'Cauliflower', Decimal('90.00'), 'red', 'fruit'

In [77]:
print(get_schema("_"))

Schema for 'fruits_and_vegetables' table:
+---------------+----------------+------------+---------------------------------------------------+-----------------+
| Column Name   | Data Type      | Nullable   | Default                                           | Autoincrement   |
| id            | INTEGER        | No         | nextval('fruits_and_vegetables_id_seq'::regclass) | Yes             |
+---------------+----------------+------------+---------------------------------------------------+-----------------+
| name          | VARCHAR(20)    | Yes        | None                                              | No              |
+---------------+----------------+------------+---------------------------------------------------+-----------------+
| price         | NUMERIC(10, 2) | Yes        | None                                              | No              |
+---------------+----------------+------------+---------------------------------------------------+-----------------+
| color       

In [17]:
sql_response= (
    RunnablePassthrough.assign(schema= get_schema)
    | prompt
    | llm.bind(stop= ["\nSQLResult:"])
    | StrOutputParser()
)
sql_query= sql_response.invoke({"question": "What is the color of price, color and category of 'pomogranete'?"})

In [99]:
sql_query

"```sql\nSELECT price, color, category FROM fruits_and_vegetables WHERE name = 'pomogranete'\n```"

In [101]:
sql_query= sql_query[7:-3]
Markdown(sql_query)

SELECT price, color, category FROM fruits_and_vegetables WHERE name = 'pomogranete'


In [22]:
template= '''Based on the table schema below, question, sql query, and sql responses, 
write a natural language response:
{schema}

Question: {question}
SQL Query: {query}
SQL Response: {response}'''

prompt_response= ChatPromptTemplate.from_template(template)


def run_query(query):
    #query= query[7:-3]
    query= query.replace("`", "")
    return db.run(query)
    
def debug(input):
    print("SQL Output: ", input["query"])
    return input

sql_chain= (
    RunnablePassthrough.assign(query= sql_response).assign(
        schema= get_schema,
        response= lambda x: run_query(x["query"]),
    )
    | RunnableLambda(debug)
    | prompt_response
    | llm
    | StrOutputParser()
)

In [107]:
final_result= sql_chain.invoke({"question": "What is the color of price, color and category of 'Potato'?"})

SQL Output:  ```sql
SELECT price, color, category FROM fruits_and_vegetables WHERE name = 'Potato'
```


In [108]:
Markdown(final_result)

The Potato costs $32.00, is purple in color, and is categorized as a vegetable. 


but this is not safe...users could ask to "delete all files"..and llm will formulate a delete sql query operation, and it will be used by the cursor to delete the information in the database

#### Prevent SQL Injection

You can let the llm detect malicious user inputs by changing the prompts:<br>

prompt='''<br>Based on the table schema below, write an SQL query (just the Query) that would answer the user's query.<br>
If the query appears to be modifying or deleting database contents, return this query: SELECT "Haha nice try! Got ya"<br>'''

This will provide some level of security, but it is not enough. <br>We have to make sure that the LLM doesn't have write access, only read access

**Solution:** Create a user with read only access and provide the user details in connection string


In [8]:
conn= psycopg2.connect(CONN_STRING)
cursor= conn.cursor()
user_name= "read_only_user"
pass_word= "read_only_pass"
try:
    cursor.execute(
        sql.SQL(
            "CREATE USER {} WITH  PASSWORD %s"
        ).format(
            sql.Identifier(user_name)
        ),
        [pass_word]
    )
    cursor.execute(
        sql.SQL(
            "GRANT CONNECT ON DATABASE {} TO {}"
        ).format(
            sql.Identifier(conn.info.dbname),
            sql.Identifier(user_name)
        )
    )
    cursor.execute(
        sql.SQL(
            "GRANT SELECT ON ALL TABLES IN SCHEMA public TO {}"
        ).format(
            sql.Identifier(user_name)
        )
    )
    cursor.execute(
        sql.SQL(
            "ALTER DEFAULT PRIVILEGES IN SCHEMA public GRANT SELECT ON TABLES TO {}"
        ).format(
            sql.Identifier(user_name)
        )
    )
    conn.commit()
    print(f"Read only user {user_name} created successfully")

except Exception as e:
    conn.rollback()
    print(f"Error creating new user '{user_name}': {e}")
finally:
    if cursor is not None:
        cursor.close()
    if conn is not None:
        conn.close()

Read only user read_only_user created successfully


In [9]:
conn= psycopg2.connect(CONN_STRING)
cursor= conn.cursor()
try:
    cursor.execute(
        sql.SQL(
            "SELECT usename FROM pg_user"
        )
    )
    users= cursor.fetchall()
    cursor.execute(
        sql.SQL(
            "SELECT rolname AS role_name, rolsuper AS is_superuser FROM pg_roles"
        )
    )
    roles= cursor.fetchall()
finally:
    if cursor is not None:
        cursor.close()
    if conn is not None:
        conn.close()

In [10]:
users

[('postgres',), ('read_only_user',)]

In [11]:
roles

[('pg_database_owner', False),
 ('pg_read_all_data', False),
 ('pg_write_all_data', False),
 ('pg_monitor', False),
 ('pg_read_all_settings', False),
 ('pg_read_all_stats', False),
 ('pg_stat_scan_tables', False),
 ('pg_read_server_files', False),
 ('pg_write_server_files', False),
 ('pg_execute_server_program', False),
 ('pg_signal_backend', False),
 ('pg_checkpoint', False),
 ('pg_use_reserved_connections', False),
 ('pg_create_subscription', False),
 ('postgres', True),
 ('read_only_user', False)]

In [13]:
read_only_user_connection_string= f"postgresql+psycopg2://{user_name}:{pass_word}@{dbi.host}:{dbi.port}/vectordb-texts"
db= SQLDatabase.from_uri(read_only_user_connection_string)

  self._metadata.reflect(


In [23]:
try:
    result= sql_chain.invoke({"question": "Drop all data from fruits_and_vegetables table"})
except ProgrammingError as pe:
    if isinstance(pe.orig, InsufficientPrivilege):
        result= "CANNOT DO THAT"
    else:
        result= f"Unexpected error occured: {pe}"
except Exception as e:
    result= f"An unexpected error occured: {e}"

print(result)

CANNOT DO THAT


#### Routing:

In [125]:
classification_template= PromptTemplate.from_template(
    """You are good at classifying a question.
    Given the user question below, classify it as either being about `Database`, `Chat` or 'Offtopic'.

    <If the question is about fruits, flowers or health issues, classify the question as 'Database'>
    <If the question is about natural disasters or color psychology or related topics, classify it as 'Chat'>
    <If the question is about weather, football or anything not related to the above topics, classify it as 'offtopic'>
    No need any comments. Just the category.
    <question>
    {question}
    </question>

    Classification:"""
)

classification_chain= classification_template | llm | StrOutputParser()

In [126]:
classification_chain.invoke({"question": "does sun really rise in east and set in west?"})

'Offtopic \n'

In [137]:
rag_prompt_template= '''Answer the question based only on the following context:
{context}

Question: {question}
'''
rag_prompt = ChatPromptTemplate.from_template(rag_prompt_template)
rag_chain = (
    {
        "context": (lambda x: x["question"]) | retriever,
        "question": lambda x: x["question"],
    }
    | rag_prompt
    | llm
    | StrOutputParser()
)

In [138]:
def route(info):
    if "database" in info["topic"].lower():
        return sql_chain
    elif "chat" in info["topic"].lower():
        return rag_chain
    else:
        "I am sorry, I am not allowed to answer about the topic."

In [140]:
full_chain= RunnableParallel(
    {
        "topic": classification_chain,
        "question": lambda x: x["question"]
    }
)| RunnableLambda(route)

In [141]:
response= full_chain.invoke({"question": "Whats the most expensive fruit?"})

SQL Output:  ```sql
SELECT name FROM fruits_and_vegetables WHERE category = 'fruit' ORDER BY price DESC LIMIT 1;
```


In [142]:
Markdown(response)

The most expensive fruit is Mango. 


In [143]:
response= full_chain.invoke({"question": "Which color represents companies?"})

In [144]:
Markdown(response)

I cannot answer this question based on the context provided. The given text snippets mention "colors and their meanings" and "color theory," but they do not specify which color represents companies. 


In [155]:
full_chain.invoke({"question": "Who will win? Master Oogway or Dragon Warrior Po?"})

## 14. NeMo- Guardrails

- Open-source toolkit for easily adding programmable guardrails to LLM based conversational applications
- Guardrails are specific ways of controlling the output of LLM

Benefits:
- Building a safe, secure and trustworthy llm based applications easily
- Prevent LLM from talking about specific topics (politics etc..)
- Allows to easily design best practices (e.g. Authentication)
- Controllable dialog: you can steer the LLM to follow pre-defined conversational paths (ordering process)

In [6]:
nest_asyncio.apply() #to make nemo guardrails work in a jupyter environment

colang_content= '''
define user express greeting
    "hello"
    "hi"

define bot express greeting
    "Hello there!! Can I help you today?"

define flow hello
    user express greeting
    bot express greeting
'''

yaml_content= '''
models:
- type: main
  engine: vertexai
  model: gemini-1.5-pro-latest
'''

colang is a modelling language for conversational applications. we use colang to design how the conversation between the user and the bot should happen. in colang the two core concepts are messages and flows. In colang, conversation is modelled as an exchange of a user message and bot message.It is like trying to teach an LLM different ways of saying hello

In [7]:
config= RailsConfig.from_content(
    yaml_content= yaml_content,
    colang_content= colang_content
)

In [None]:
# not implemented for ChatGoogleGenerativeAI
rails= LLMRails(config= config)

ERROR: Only these are supported for NemoGuardrails:

```python
from langchain_openai import OpenAI
from langchain_openai.chat_models import ChatOpenAI
from langchain_openai.chat_models import AzureChatOpenAI
from langchain_google_vertexai import VertexAI
```

```python
...
```

```python
elif model_config.engine == "nvidia_ai_endpoints" or model_config.engine == "nim":
    try:
        from ._langchain_nvidia_ai_endpoints_patch import ChatNVIDIA

        # Check the version
        package_version = version("langchain_nvidia_ai_endpoints")

        if _parse_version(package_version) < (0, 2, 0):
            raise ValueError(
                "langchain_nvidia_ai_endpoints version must be 0.2.0 or above."
                " Please upgrade it with `pip install langchain-nvidia-ai-endpoints --upgrade`."
            )
        return ChatNVIDIA
```

```python
...
```

## 15. LangFuse

- Open source llm engineering platform
- alternative to langsmith
- can be self deployed (crucial when we want strict data privacy (fully local)

## 16. Tool Calling

- LLMs don't have all information available (real time information, company's internal data, etc..)
- RAG and letting LLM write SQL queries is not enough
- Tool calling allows getting Real time/ Near time data
- Perfect to get data from an API (since it provides an interface for structured input and output

old langchain_google_genai version = 2.0.7

`!pip uninstall langchain_experimental guardrails-ai`

In [24]:
!pip install langchain_google_genai --upgrade -q

In [3]:
!pip show langchain_google_genai

Name: langchain-google-genai
Version: 2.0.9
Summary: An integration package connecting Google's genai package and LangChain
Home-page: https://github.com/langchain-ai/langchain-google
Author: 
Author-email: 
License: MIT
Location: C:\Users\HP\anaconda3\Lib\site-packages
Requires: filetype, google-generativeai, langchain-core, pydantic
Required-by: 


In [1]:
from langchain_google_genai import ChatGoogleGenerativeAI
from keys import gemini_api_key, api_key

llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", api_key=gemini_api_key)
llm.invoke("How was the weather in Chennai yesterday?").content

'I do not have real-time access to specific weather data, including historical weather.  To get the weather information for Chennai from yesterday, you would need to consult a weather website or app that archives past weather data.  Some popular options include:\n\n* **AccuWeather:**  Often has detailed historical weather information.\n* **Weather Underground (wunderground.com):**  Known for its historical weather data.\n* **Google Weather:** Search "weather Chennai yesterday" on Google.\n* **Local news websites or apps:**  Indian news outlets often have weather sections.'

- In Langchain there are 2 ways to declare tools

1. **Declaring tools as classes**

In [20]:
from pydantic import BaseModel, Field
# from langchain_core.pydantic_v1 import BaseModel, Field

class fake_weather_api(BaseModel):
    """Check the weather in a specified city."""
    city: str = Field(default="", description="The name of the city where you want to check the weather.")

class outdoor_seating_availability(BaseModel):
    """Check if outdoor seating is available at a specified restaurant in a given city."""
    city: str = Field(default="", description="The name of the city where you want to check for outdoor seating availability.")

tools= [fake_weather_api, outdoor_seating_availability]

2. **Using** `@tool` **decorator**

- tool decorator must be put on top of the tool function
- the tool function MUST have a doc string in the above mentioned format

In [91]:
from langchain_core.tools import tool

@tool
def fake_weather_api(city: str) -> str:
    '''
    Check the weather in a specified city.

    Args:
        city (str): The name of the city where you want to check the weather.

    Returns:
        str: A description of the current weather in the specified city.
    '''
    return "Rainy, 21C"

@tool
def outdoor_seating_availability(city: str) -> str:
    '''
    Check if outdoor seating is available at a specified restaurant in a given city.

    Args:
        city (str): The name of the city where you want to check for outdoor seating availability.

    Returns:
        str: A message stating whether outdoor seating is available or not.
    '''
    return "Outdoor seating is available."

tools= [fake_weather_api, outdoor_seating_availability]

Both the above implementation produce the same results

In [93]:
llm_with_tools= llm.bind_tools(tools)

In [79]:
result = llm_with_tools.invoke("What is the weather today in Chennai?")
result

AIMessage(content='', additional_kwargs={'function_call': {'name': 'fake_weather_api', 'arguments': '{"city": "Chennai"}'}}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-3a7dee77-abda-494d-9014-87a80d0f412c-0', tool_calls=[{'name': 'fake_weather_api', 'args': {'city': 'Chennai'}, 'id': '44f69378-a28f-49d1-8ec2-a44747367851', 'type': 'tool_call'}], usage_metadata={'input_tokens': 135, 'output_tokens': 7, 'total_tokens': 142, 'input_token_details': {'cache_read': 0}})

- In AI Message, the content is an empty string
- Instead, we have a special argument "tool_calls"

`tool_calls=[{'name': 'fake_weather_api', 'args': {'city': 'Chennai'}, 'id': 'da45d986-c7bb-443b-83a0-3bc16c18157b', 'type': 'tool_call'}]`

- Some times, one tool call is not sufficient to answer the query

In [81]:
result = llm_with_tools.invoke("What is the weather today in Chennai? Do you still have seats outdoor available?")
result.tool_calls

[{'name': 'fake_weather_api',
  'args': {'city': 'Chennai'},
  'id': 'c020dc3a-2f03-44ea-a575-6883b92c52c6',
  'type': 'tool_call'},
 {'name': 'outdoor_seating_availability',
  'args': {'city': 'Chennai'},
  'id': 'b46e39ec-b586-4bc2-bb66-9e6cc1d32876',
  'type': 'tool_call'}]

- Now how to work with those?

In [123]:
from langchain_core.messages import HumanMessage, ToolMessage

messages = [
    HumanMessage(
        "How will the weather be in Chennai today? I would like to eat outside if possible."
    )
]

llm_output = llm_with_tools.invoke(messages)
messages.append(llm_output)

In [125]:
for message in messages:
    message.pretty_print()


How will the weather be in Chennai today? I would like to eat outside if possible.
Tool Calls:
  fake_weather_api (20cb1ff5-10a5-4043-8ed1-3f91db0f35d2)
 Call ID: 20cb1ff5-10a5-4043-8ed1-3f91db0f35d2
  Args:
    city: Chennai
  outdoor_seating_availability (ce532635-4a26-46ab-af33-8daf3b38adf8)
 Call ID: ce532635-4a26-46ab-af33-8daf3b38adf8
  Args:
    city: Chennai


In [127]:
tool_mapping = {
    "fake_weather_api" : fake_weather_api,
    "outdoor_seating_availability" : outdoor_seating_availability
}

In [129]:
for tool_call in llm_output.tool_calls:
    tool = tool_mapping[tool_call["name"]]
    tool_output = tool.invoke(tool_call)
    # messages.append(ToolMessage(tool_output, tool_call_id=tool_call["id"]))
    messages.append(ToolMessage(tool_output, name=tool_call["name"], tool_call_id=tool_call["id"]))

In [131]:
for message in messages:
    message.pretty_print()


How will the weather be in Chennai today? I would like to eat outside if possible.
Tool Calls:
  fake_weather_api (20cb1ff5-10a5-4043-8ed1-3f91db0f35d2)
 Call ID: 20cb1ff5-10a5-4043-8ed1-3f91db0f35d2
  Args:
    city: Chennai
  outdoor_seating_availability (ce532635-4a26-46ab-af33-8daf3b38adf8)
 Call ID: ce532635-4a26-46ab-af33-8daf3b38adf8
  Args:
    city: Chennai
Name: fake_weather_api

content='Rainy, 21C' name='fake_weather_api' tool_call_id='20cb1ff5-10a5-4043-8ed1-3f91db0f35d2'
Name: outdoor_seating_availability

content='Outdoor seating is available.' name='outdoor_seating_availability' tool_call_id='ce532635-4a26-46ab-af33-8daf3b38adf8'


In [135]:
llm_with_tools.invoke(messages).content

'The weather in Chennai is Rainy with a temperature of 21C. Outdoor seating is available.'

#### Tool calling with a live API

Run this locally:

```python
from fastapi import FastAPI
from pydantic import BaseModel

app = FastAPI()


class WeatherResponse(BaseModel):
    weather: str = ""


class OutdoorSeatingResponse(BaseModel):
    outdoor_seating: str = ""


weather_data = {
    "Rainytown": "Rainy, 25°C",
    "Sunland": "Sunny, 28°C",
    "Windyworld": "Windy, 22°C",
    "Cloudycity": "Cloudy, 22°C"
}

outdoor_seating_data = {
    "Rainytown": "Outdoor seating is not available",
    "Sunland": "Outdoor seating is available",
    "Windyworld": "Outdoor seating is available",
    "Cloudycity": "Outdoor seating is not available"
}


@app.get("/weather/{city}", response_model=WeatherResponse)
async def get_weather(city: str):
    # city_lower = city.lower()
    return {
        "weather": weather_data.get(city_lower, "Weather info not available")
    }


@app.get("/outdoor-seating/{city}", response_model=OutdoorSeatingResponse)
async def get_outdoor_seating(city: str):
    # city_lower = city.lower()
    return {
        "outdoor_seating": outdoor_seating_data.get(city_lower, "Outdoor seating info not available")
    }


if __name__ == "__main__":
    import uvicorn
    uvicorn.run(app, host="0.0.0.0", port=1906)

```

In [7]:
from langchain_core.tools import tool
from langchain_core.messages import HumanMessage, ToolMessage
import httpx
from langchain_google_genai import ChatGoogleGenerativeAI
from keys import gemini_api_key, api_key

@tool
def our_weather_api(city: str) -> str:
    """
    Get Weather information about the specified city from a FastAPI endpoint on localhost:1906 
    """
    response = httpx.get(f"http://localhost:1906/weather/{city}")
    if response.status_code == 200:
        return response.json().get("weather", "Weather information not available")
    else:
        return "Failed to get weather information"


@tool
def our_seating_api(city: str) -> str:
    """
    Check if outdoor seating is available in a specified city from a FastAPI endpoint on localhost:1906
    """
    response = httpx.get(f"http://localhost:1906/outdoor-seating/{city}")
    if response.status_code == 200:
        return response.json().get("outdoor_seating", "Outdoor seating information not available")
    else:
        return "Failed to get outdoor seating information"


api_tools = [our_weather_api, our_seating_api]
tool_mapping = {
    "our_weather_api": our_weather_api,
    "our_seating_api": our_seating_api
}

In [13]:
def interact_with_llm_and_tools(human_message: str):
    llm = ChatGoogleGenerativeAI(model="gemini-1.5-pro", api_key=gemini_api_key)
    model = llm.bind_tools(api_tools)

    messages = [HumanMessage(content=human_message)]
    llm_output = model.invoke(messages)
    messages.append(llm_output)

    print("Tool calls:", llm_output.tool_calls)
    for tool_call in llm_output.tool_calls:
        tool = tool_mapping[tool_call["name"]]
        if tool:
            tool_output = tool.invoke(tool_call["args"])
            print("Tool output:", tool_output)
            messages.append(ToolMessage(tool_output, name=tool_call["name"], tool_call_id=tool_call["id"]))

    final_response = model.invoke(messages)
    return final_response

In [17]:
interact_with_llm_and_tools("How will the weather be in Windyworld? I would like to eat outside. Is it possible?")

Tool calls: [{'name': 'our_weather_api', 'args': {'city': 'Windyworld'}, 'id': '889f11df-4691-424f-ade0-2a2e849777fc', 'type': 'tool_call'}, {'name': 'our_seating_api', 'args': {'city': 'Windyworld'}, 'id': 'da3bf74a-a724-4064-b5b4-6cf67ad848d1', 'type': 'tool_call'}]
Tool output: Windy, 22°C
Tool output: Outdoor seating is available


AIMessage(content='The weather in Windyworld is windy and 22°C. Outdoor seating is available.', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'safety_ratings': []}, id='run-8bb610f8-0235-4eea-b8f9-79c4d4c2148f-0', usage_metadata={'input_tokens': 217, 'output_tokens': 20, 'total_tokens': 237, 'input_token_details': {'cache_read': 0}})