#### Simple Gen AI APP Using Langchain

In [11]:
import os
from dotenv import load_dotenv
load_dotenv()

os.environ['OPENAI_API_KEY']=os.getenv("OPENAI_API_KEY")
## Langsmith Tracking
os.environ["LANGCHAIN_API_KEY"]=os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"]="true"
os.environ["LANGCHAIN_PROJECT"]=os.getenv("LANGCHAIN_PROJECT")

In [12]:
## Data Ingestion--From the website we need to scrape the data
from langchain_community.document_loaders import WebBaseLoader

In [13]:
loader=WebBaseLoader("https://en.wikipedia.org/wiki/Donald_Trump")
loader

<langchain_community.document_loaders.web_base.WebBaseLoader at 0x7013f9a10b30>

In [14]:
docs=loader.load()
docs



In [15]:
### Load Data--> Docs-->Divide our Docuemnts into chunks dcouments-->text-->vectors-->Vector Embeddings--->Vector Store DB
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter=RecursiveCharacterTextSplitter(chunk_size=1000,chunk_overlap=200)
documents=text_splitter.split_documents(docs)

In [16]:
documents

[Document(metadata={'source': 'https://en.wikipedia.org/wiki/Donald_Trump', 'title': 'Donald Trump - Wikipedia', 'language': 'en'}, page_content='Donald Trump - Wikipedia\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nJump to content\n\n\n\n\n\n\n\nMain menu\n\n\n\n\n\nMain menu\nmove to sidebar\nhide\n\n\n\n\t\tNavigation\n\t\n\n\nMain pageContentsCurrent eventsRandom articleAbout WikipediaContact us\n\n\n\n\n\n\t\tContribute\n\t\n\n\nHelpLearn to editCommunity portalRecent changesUpload file\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nAppearance\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nDonate\n\nCreate account\n\nLog in\n\n\n\n\n\n\n\n\nPersonal tools\n\n\n\n\n\nDonate Create account Log in\n\n\n\n\n\n\t\tPages for logged out editors learn more\n\n\n\nContributionsTalk\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nContents\nmove to sidebar\nhide\n\n\n\n\n(Top)\n\n\n\n\n\n1\nEarly l

In [17]:
from langchain_openai import OpenAIEmbeddings
embeddings=OpenAIEmbeddings()

In [18]:
from langchain_community.vectorstores import FAISS
vectorstoredb=FAISS.from_documents(documents,embeddings)

In [19]:
vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x7013d4d05c70>

In [20]:
## Query From a vector db
query="Trump has produced how many books under his name?"
result=vectorstoredb.similarity_search(query)
result[0].page_content

'Media career\nMain article: Media career of Donald Trump\nSee also: Bibliography of Donald Trump\nTrump has produced 19 books under his name, most written or co-written by ghostwriters.[118] His first book, The Art of the Deal (1987), was a New York Times Best Seller. While he was credited as co-author, the entire book was written by Tony Schwartz. The New Yorker said the book made Trump famous as an "emblem of the successful tycoon".[119]\nTrump had cameos in many films and television shows from 1985 to 2001.[120]\nStarting in the 1990s, Trump was a guest 24 times on the nationally syndicated Howard Stern Show.[121] He had his own short-form talk radio program, Trumped!, from 2004 to 2008.[122] From 2011 until 2015, he was a guest commentator on Fox & Friends.[123]'

In [21]:
## Query From a vector db
query="Trump earned how much money from his reality shows?"
reality_shows=vectorstoredb.similarity_search(query)
reality_shows[0].page_content

'From 2004 to 2015, Trump was co-producer and host of reality shows The Apprentice and The Celebrity Apprentice. On the shows, he was a superrich and successful chief executive who eliminated contestants with the catchphrase "you\'re fired". The New York Times called his portrayal a "highly flattering, highly fictionalized version of Mr. Trump".[124] The shows remade his image for millions of viewers nationwide.[125] With the related licensing agreements, they earned him more than $400\xa0million.[126]\nIn 2021, Trump, who had been a member of SAG-AFTRA since 1989, resigned to avoid a disciplinary hearing regarding the January\xa06 attack.[127] Two days later, the union permanently barred him.[128]'

In [22]:
from langchain_openai import ChatOpenAI
llm=ChatOpenAI(model="gpt-4o")

In [23]:
## Retrieval Chain, Document chain

from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate

## Answer question only based on the context (custom knowledge base)
prompt=ChatPromptTemplate.from_template(
    """
Answer the following question based only on the provided context:
<context>
{context}
</context>


"""
)

document_chain=create_stuff_documents_chain(llm,prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x7013d4d41820>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x7013d4da4230>, root_client=<openai.OpenAI object at 0x7013d4d856d0>, root_async_client=<openai.AsyncOpenAI object at 0x7013d4d41dc0>, model_name='gpt-4o', model_kwargs={}, openai_api_key=SecretStr('**********'))
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'},

In [24]:
from langchain_core.documents import Document
document_chain.invoke({
    "input":"Trump has degree in economics",
    "context":[Document(page_content="Trump graduated with a bachelor's degree in economics from the University of Pennsylvania in 1968. Becoming president of the family real estate business in 1971, he focused on luxury accommodation.")]
})

"What field did Trump graduate in from the University of Pennsylvania?\n\nTrump graduated with a bachelor's degree in economics."

However, we want the documents to first come from the retriever we just set up. That way, we can use the retriever to dynamically select the most relevant documents and pass those in for a given question.

In [25]:
### Input--->Retriever--->vectorstoredb

vectorstoredb

<langchain_community.vectorstores.faiss.FAISS at 0x7013d4d05c70>

In [26]:
retriever=vectorstoredb.as_retriever()
from langchain.chains import create_retrieval_chain  # noqa: E402
retrieval_chain=create_retrieval_chain(retriever,document_chain)


In [27]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x7013d4d05c70>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\nAnswer the following question based only on the provided context:\n<context>\n{context}\n</context>\n\n\n'), additional_kwargs={})])
            | Chat

In [28]:
## Get the response form the LLM
response=retrieval_chain.invoke({"input":"Does Trump like to drink wines?"})
response['answer']

"What is Donald Trump's stance on alcohol consumption based on the provided context?\n\nDonald Trump is a teetotaler, meaning he does not consume alcohol."

In [29]:

response

{'input': 'Does Trump like to drink wines?',
 'context': [Document(id='f0bb50a6-9aa9-4146-b35a-759214eb3ff8', metadata={'source': 'https://en.wikipedia.org/wiki/Donald_Trump', 'title': 'Donald Trump - Wikipedia', 'language': 'en'}, page_content='^ Parker, Ashley; Rucker, Philip (October 2, 2018). "Kavanaugh likes beer — but Trump is a teetotaler: \'He doesn\'t like drinkers.\'". The Washington Post. Retrieved February 5, 2021.\n\n^ Dangerfield, Katie (January 17, 2018). "Donald Trump sleeps 4–5 hours each night; he\'s not the only famous \'short sleeper\'". Global News. Retrieved February 5, 2021.\n\n^ Almond, Douglas; Du, Xinming (December 2020). "Later bedtimes predict President Trump\'s performance". Economics Letters. 197. doi:10.1016/j.econlet.2020.109590. ISSN\xa00165-1765. PMC\xa07518119. PMID\xa033012904.\n\n^ Ballengee, Ryan (July 14, 2018). "Donald Trump says he gets most of his exercise from golf, then uses cart at Turnberry". Golf News Net. Retrieved July 4, 2019.\n\n^ Rett

In [30]:
response['context']

[Document(id='f0bb50a6-9aa9-4146-b35a-759214eb3ff8', metadata={'source': 'https://en.wikipedia.org/wiki/Donald_Trump', 'title': 'Donald Trump - Wikipedia', 'language': 'en'}, page_content='^ Parker, Ashley; Rucker, Philip (October 2, 2018). "Kavanaugh likes beer — but Trump is a teetotaler: \'He doesn\'t like drinkers.\'". The Washington Post. Retrieved February 5, 2021.\n\n^ Dangerfield, Katie (January 17, 2018). "Donald Trump sleeps 4–5 hours each night; he\'s not the only famous \'short sleeper\'". Global News. Retrieved February 5, 2021.\n\n^ Almond, Douglas; Du, Xinming (December 2020). "Later bedtimes predict President Trump\'s performance". Economics Letters. 197. doi:10.1016/j.econlet.2020.109590. ISSN\xa00165-1765. PMC\xa07518119. PMID\xa033012904.\n\n^ Ballengee, Ryan (July 14, 2018). "Donald Trump says he gets most of his exercise from golf, then uses cart at Turnberry". Golf News Net. Retrieved July 4, 2019.\n\n^ Rettner, Rachael (May 14, 2017). "Trump thinks that exercisin