In [2]:
!pip install langchain_community tiktoken langchain-openai langchainhub chromadb langchain




In [3]:
import os
from dotenv import load_dotenv
load_dotenv()
load_dotenv()
openai_key = os.getenv("OPENAI_API_KEY")
openai_model = os.getenv("OPENAI_MODEL_NAME")
os.environ["OPENAI_MODEL_NAME"]= openai_model
os.environ["OPENAI_API_KEY"]= openai_key

In [4]:
from langchain.prompts import ChatPromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_openai import ChatOpenAI

# multi-query
prompt = """You are an AI language model assistant. Your task is to generate 10 different versions of
    the given user questions to retrieve relevant data from the internet. In order to this well, you will need to 
    think like a political consultant incharge of running a campaign. By generating multiple perspectives on the user question, 
    your goal is to help the user get a better answer with a full picture. Provide these alternative questions separated by newlines. 
    Original question: {question}"""
prompt_perspectives = ChatPromptTemplate.from_template(prompt)
generate_queries = (
    prompt_perspectives
    | ChatOpenAI(temperature=0) 
    | StrOutputParser()
    | (lambda x: x.split("\n"))
)
# We define the prompt and context of the LLM
# Function call to Open AI model with the prompt/system message plus user question
# Model generates multiple queries, the goal being to offer different perspectives on the original question

In [5]:
# Test the chain
question = "What is the political landscape of Gainesville, Fl?"
chain = generate_queries
docs = generate_queries.invoke({"question": question})
print(docs)

['1. How would you describe the current political environment in Gainesville, Fl?', '2. Can you provide an overview of the political dynamics in Gainesville, Fl?', '3. What are the key political players and parties shaping the landscape in Gainesville, Fl?', '4. How has the political landscape in Gainesville, Fl evolved in recent years?', '5. What are the major political issues dominating the discourse in Gainesville, Fl?', '6. Who are the influential political figures driving the agenda in Gainesville, Fl?', '7. What demographic factors influence the political landscape of Gainesville, Fl?', '8. How do local elections impact the political landscape of Gainesville, Fl?', '9. What are the main political ideologies at play in Gainesville, Fl?', '10. How does the political landscape of Gainesville, Fl compare to other cities in Florida?']


### Decomposition

In [7]:
from langchain.output_parsers import PydanticToolsParser
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI

decomp = """You are an AI language model assistant. Your task is to generate 5 different versions of
    the given user questions to retrieve relevant data from the internet. In order to this well, you will need to 
    think like a political consultant incharge of running a campaign. By generating multiple perspectives on the user question, 
    your goal is to help the user get a better answer with a full picture. Provide these alternative questions separated by newlines. 
    Original question: {question}"""
decomp_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", decomp),
        ("human", "{question}"),
    ]
)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
# llm_with_tools = llm.bind_tools([SubQuery])
# parser = PydanticToolsParser(tools=[SubQuery])
query_analyzer = (decomp_prompt | llm | StrOutputParser() | (lambda x: x.split("\n")))


In [8]:
query_analyzer.invoke({"question":question})

['1. How do political parties influence the dynamics of Gainesville, Fl?',
 '2. What are the key political issues shaping the landscape of Gainesville, Fl?',
 '3. Who are the prominent political figures in Gainesville, Fl and how do they impact the local politics?',
 '4. In what ways do local elections reflect the political preferences of Gainesville, Fl residents?',
 '5. How has the political landscape of Gainesville, Fl evolved over the past decade and what are the implications for the future?']

## Step-Back 


In [13]:
# Few Shot Examples
from langchain_core.prompts import ChatPromptTemplate, FewShotChatMessagePromptTemplate
examples = [
    {
        "input": "What is the political landscape of Gainesville, FL?",
        "output": "What are important factors in local politics?",
    },
    {
        "input": "What are important issues to the voters of Gainesville, FL?",
        "output": "what are important political issues?",
    },
]
# We now transform these to example messages
example_prompt = ChatPromptTemplate.from_messages(
    [
        ("human", "{input}"),
        ("ai", "{output}"),
    ]
)
few_shot_prompt = FewShotChatMessagePromptTemplate(
    example_prompt=example_prompt,
    examples=examples,
)
prompt = ChatPromptTemplate.from_messages(
    [
        (
            "system",
            """You are an expert at political campaigns. Your task is to stepback and paraphrase a quesitons into 5 more 
                generic step-back questions that are easier to answer.
                Here are a few examples:""",
        ),
        # Few shot examples
        few_shot_prompt,
        # New question
        ("user", "{question}"),
    ]
)

In [16]:
generate_queries_step_back = prompt | ChatOpenAI(temperature=0) | StrOutputParser() | (lambda x: x.split("\n"))
question = "What is the political landscape of Gainesville, FL??"
generate_queries_step_back.invoke({"question": question})

['What are the main political parties in Gainesville, FL?',
 'What are the demographics of the population in Gainesville, FL?',
 'What are the key political events that have shaped Gainesville, FL?',
 'What are the major political ideologies present in Gainesville, FL?',
 'What are the key political institutions in Gainesville, FL?']

## HyDE - Hypothetical Document Embedding


In [19]:
from langchain.prompts import ChatPromptTemplate

# HyDE document genration
template = """Please write a short memo to answer the question below:
Question: {question}
Passage:"""
prompt_hyde = ChatPromptTemplate.from_template(template)

# from langchain_core.output_parsers import StrOutputParser
# from langchain_openai import ChatOpenAI

generate_docs_for_retrieval = (
    prompt_hyde | ChatOpenAI(temperature=0) | StrOutputParser() 
)

# Run
question = "What is the political landscape of Gainesville, FL?"
generate_docs_for_retrieval.invoke({"question":question})

'The political landscape of Gainesville, FL is diverse and dynamic. The city is known for its progressive values and has a history of electing liberal-leaning officials. However, there is also a strong conservative presence in the area, particularly in the surrounding suburbs and rural areas. The city council is currently controlled by a majority of progressive members, but there is always a mix of viewpoints and ideologies represented in local government. Overall, Gainesville is a politically engaged community with a range of perspectives and opinions.'

In [21]:
decomp = """You are an AI language model assistant. Your task is to generate 5 different questions from
    the given user memo to retrieve relevant data from the internet. In order to this well, you will need to 
    think like a political consultant incharge of running a campaign. By generating multiple perspectives on the user question, 
    your goal is to help the user get a better answer with a full picture. Provide these alternative questions separated by newlines. 
    Original question: {question}"""
decomp_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", decomp),
        ("human", "{memo}"),
    ]
)
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
query_analyzer = (generate_docs_for_retrieval | decomp_prompt | llm | StrOutputParser() | (lambda x: x.split("\n")))

In [22]:
query_analyzer.invoke({"question":question})

TypeError: Expected mapping type as input to ChatPromptTemplate. Received <class 'str'>.