# Advanved RAG (Part 2)

Continuation from [first part](https://github.com/jzamalloa1/langchain_learning/blob/main/advanced_rag.ipynb)

In [1]:
import pandas as pd
import numpy as np
import json, os, pprint
import matplotlib.pyplot as plt
import plotly.express as px
import random
from langchain_openai import OpenAIEmbeddings
from langchain_openai.chat_models import ChatOpenAI
from langchain_core.utils.function_calling import convert_to_openai_tool
from langchain_core.tools import tool
from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
from langchain_core.output_parsers import StrOutputParser
from langchain.output_parsers import JsonOutputToolsParser, JsonOutputKeyToolsParser
from langchain.agents import AgentExecutor, create_openai_tools_agent, create_react_agent, Tool
from langchain.agents.format_scratchpad.openai_tools import format_to_openai_tool_messages
from langchain.agents.output_parsers.openai_tools import OpenAIToolsAgentOutputParser
from langchain_experimental.utilities import PythonREPL
from langchain_experimental.tools import PythonREPLTool
from langchain import hub
from typing import List
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain_core.callbacks import Callbacks
from langchain.prompts import ChatPromptTemplate, HumanMessagePromptTemplate, MessagesPlaceholder
from langchain_community.document_loaders.csv_loader import CSVLoader
from langchain_community.vectorstores import Chroma, FAISS
from langchain_core.runnables import RunnablePassthrough

In [4]:
os.environ["OPENAI_API_KEY"] = ""

In [5]:
llm = ChatOpenAI(model="gpt-4-turbo-preview", temperature=0.1)

## <center> Logical and Semantic Routing </center>

<p>
<img src="ILLUSTRATIONS/rag_diagram_routing.png" 
      width="65%" height="auto"
      style="display: block; margin: 0 auto" />

Illustration [reference](https://github.com/langchain-ai/rag-from-scratch/blob/main/rag_from_scratch_10_and_11.ipynb)

### Logical Routing
Used in routing to appropriate source choice (ie. Vector DB)

In [11]:
from typing import Literal

class RouterQuery(BaseModel):
    """Route user query to most relevant data source"""

    datasource: Literal["python_docs", "web_browser"] = Field(
        ...,
        description = "Given a user question choose the most appropriate datasource to answer it"
    )

In [12]:
structured_llm = llm.with_structured_output(RouterQuery)

  warn_beta(


In [13]:
system_prompt = """
You are an expert router of user's questions. Based on the question asked, route it to
the appropriate data source
"""

prompt = ChatPromptTemplate.from_messages(
    [
        ("system", system_prompt),
        ("human", "{question}")
    ]
)

# Define router
router = prompt | structured_llm

In [14]:
query = """
how should I calcuate the mean of two vectors in numpy?
"""

router_result = router.invoke({"question":query})
router_result

RouterQuery(datasource='python_docs')

In [15]:
query = """
what is the weather in NYC today?
"""

router_result = router.invoke({"question":query})
router_result

RouterQuery(datasource='web_browser')

In [21]:
router_result.datasource

'web_broswer'

We can define a *mechanic chain* to route query (incorporate into our main chain)

In [16]:
from langchain_core.runnables import RunnableLambda
from langchain.utils.math import cosine_similarity

def choose_route(router_output):
    if "python_docs" in router_output.datasource.lower():
        return "chain for python docs..."
    elif "web_browser" in router_output.datasource.lower():
        return "chain for web browser..."
    else:
        return "throw error..."
    
full_chain = router | RunnableLambda(choose_route)

In [33]:
full_chain.invoke({"question":query})

'chain for web browser...'

In [34]:
full_chain.invoke({"question":"what is the One Piece"})

'chain for web browser...'

### Semantic Routing
Used in routing to appropriate prompt.

This is done by choosing the prompt that has the closest similarity to the user's question.

In [6]:
# Define various prompts
one_piece_prompt = """
You are an expert in all things One Piece, the famous manga about Monkey D. Luffy.
You know the story behind all the characters, their history and the convoluted stories
within One Piece. When you don't know and answer, just say that you don't know.

Question: {query}
"""

peruvian_soccer_prompt = """
You are an expert Peruvian soccer scout that is familiar with the Peruvian Liga 1, its teams,
their stats and know all the relevant players today. You know which major tournaments the 
teams are participating in and their stats. You reply with stats to all the user's questions
when possible.

Question: {query}
"""

embeddings = OpenAIEmbeddings()
prompt_templates = [one_piece_prompt, peruvian_soccer_prompt]
prompt_embeddings = embeddings.embed_documents(prompt_templates)


In [20]:
# Define function to route prompt
def prompt_router(input):
    query_embedding = embeddings.embed_query(input["query"]) # Confirm on chain's input RunnablePassthrough() key

    query_similarity = cosine_similarity([query_embedding], prompt_embeddings)[0]

    top_prompt = prompt_templates[query_similarity.argmax()] #Choose top prompt

    print("Using One Piece prompt" if top_prompt == one_piece_prompt else "Using Peruvian Soccer prompt")

    return ChatPromptTemplate.from_messages(
        [
            HumanMessagePromptTemplate.from_template(top_prompt)
        ]    
    )

It not only chooses the prompt, but as a `RunnableLambda` it embeds the query in the prompt itself to be passed to the LLM

In [21]:
chain = (
    {"query":RunnablePassthrough()}
    | RunnableLambda(prompt_router)
)

print(chain.invoke("Who is Luffy?"))

Using One Piece prompt
messages=[HumanMessage(content="\nYou are an expert in all things One Piece, the famous manga about Monkey D. Luffy.\nYou know the story behind all the characters, their history and the convoluted stories\nwithin One Piece. When you don't know and answer, just say that you don't know.\n\nQuestion: Who is Luffy?\n")]


Executing complete prompt

In [18]:
chain = (
    {"query":RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | llm
    | StrOutputParser()
)

print(chain.invoke("Who is Luffy?"))

before cos
after cos
Using One Piece prompt
Monkey D. Luffy is the main protagonist of the manga and anime series "One Piece," created by Eiichiro Oda. He is a young pirate with the dream of becoming the Pirate King by finding the legendary treasure known as "One Piece," left by the previous Pirate King, Gol D. Roger. Luffy is the captain of the Straw Hat Pirates, a diverse crew of pirates he has assembled on his journey across the Grand Line and the New World.

Luffy is distinguished by his optimistic, carefree, and adventurous personality. Despite his seemingly simple-mindedness, he has a strong sense of justice and an unwavering determination to protect his friends and those he considers innocent. Luffy possesses the ability to stretch his body like rubber after inadvertently eating a Devil Fruit known as the Gomu Gomu no Mi (Gum-Gum Fruit) when he was a child. This ability, while granting him immense strength and flexibility, also renders him unable to swim, a significant handicap 

In [23]:
chain = (
    {"query":RunnablePassthrough()}
    | RunnableLambda(prompt_router)
    | llm
    | StrOutputParser()
)

print(chain.invoke("Quienes son Los Chankas en Peru?"))

Using Peruvian Soccer prompt
Los Chankas es un equipo de fútbol peruano que actualmente participa en la Liga 2, la segunda división del sistema de ligas de fútbol en Perú. No forman parte de la Liga 1, que es la máxima categoría del fútbol peruano. El equipo representa a la región de Apurímac y su nombre hace referencia al antiguo pueblo de los Chankas, que habitaba en esa zona del país. Hasta la fecha de corte de mi conocimiento en 2023, Los Chankas están trabajando para ascender a la Liga 1, compitiendo en la Liga 2 y buscando mejorar su rendimiento y posición en el torneo para lograr su objetivo de ascenso.
