In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_community.document_loaders import WebBaseLoader, TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from langchain_core.documents import Document
from langchain.chains import create_retrieval_chain

os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')

#Langsmith tracking
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
os.environ['LANCGCHAIN_TRACKING_V2'] = "true"

USER_AGENT environment variable not set, consider setting it to identify your requests.


### Data ingestion to scrape the data

In [3]:
loader=TextLoader('D:\AI\GenAI\Learn_Langchain\llm_wiki.txt')
docs=loader.load()
print(docs)

[Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content="A large language model (LLM) is a computational model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. Based on language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a computationally intensive self-supervised and semi-supervised training process.[1] LLMs can be used for text generation, a form of generative AI, by taking an input text and repeatedly predicting the next token or word.[2]\n\nLLMs are artificial neural networks that utilize the transformer architecture, invented in 2017. The largest and most capable LLMs, as of June 2024, are built with a decoder-only transformer-based architecture, which enables efficient processing and generation of large-scale text data.\n\nHistorically, up to 2020, fine-tuning was the primary method used to a

### Split the text into chunks to manage the context window size

In [4]:
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
documents=text_splitter.split_documents(docs)
documents

[Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='A large language model (LLM) is a computational model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. Based on language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a computationally intensive self-supervised and semi-supervised training process.[1] LLMs can be used for text generation, a form of generative AI, by taking an input text and'),
 Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='of generative AI, by taking an input text and repeatedly predicting the next token or word.[2]'),
 Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='LLMs are artificial neural networks that utilize the transformer architecture, invented in 2017. The largest and most capable

#### EMbeddings - text to vector

In [5]:
embeddings=OpenAIEmbeddings()
#embeddings.embed_query('hello')

### store embeddings into vectorDB

In [6]:
vectordb = FAISS.from_documents(documents=documents, embedding=embeddings)
vectordb

<langchain_community.vectorstores.faiss.FAISS at 0x15b56ad3c40>

In [7]:
results=vectordb.similarity_search("what is large language model?")
results[0]

Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='A large language model (LLM) is a computational model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. Based on language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a computationally intensive self-supervised and semi-supervised training process.[1] LLMs can be used for text generation, a form of generative AI, by taking an input text and')

In [8]:
context=results[0].page_content
context

'A large language model (LLM) is a computational model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. Based on language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a computationally intensive self-supervised and semi-supervised training process.[1] LLMs can be used for text generation, a form of generative AI, by taking an input text and'

### LLM

In [9]:
llm = ChatOpenAI(model="gpt-4o")
llm

ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000015B455F5030>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000015B56A9B790>, model_name='gpt-4o', openai_api_key=SecretStr('**********'), openai_proxy='')

#### Retrieval chain

In [11]:
prompt = ChatPromptTemplate.from_template(
    """
    Answer the following question based on the provided context only
    <context>
    {context}
    </context>
    <query>
    {input}
    </query>
    """
)

document_chain = create_stuff_documents_chain(llm, prompt)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), config={'run_name': 'format_inputs'})
| ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\n    Answer the following question based on the provided context only\n    <context>\n    {context}\n    </context>\n    <query>\n    {input}\n    </query>\n    '))])
| ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000015B455F5030>, async_client=<openai.resources.chat.completions.AsyncCompletions object at 0x0000015B56A9B790>, model_name='gpt-4o', openai_api_key=SecretStr('**********'), openai_proxy='')
| StrOutputParser(), config={'run_name': 'stuff_documents_chain'})

In [12]:
retriever = vectordb.as_retriever()
#retriever.invoke("billing details please")
retrieval_chain = create_retrieval_chain(retriever, document_chain)
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x0000015B56AD3C40>), config={'run_name': 'retrieve_documents'})
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), config={'run_name': 'format_inputs'})
            | ChatPromptTemplate(input_variables=['context', 'input'], messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'input'], template='\n    Answer the following question based on the provided context only\n    <context>\n    {context}\n    </context>\n    <query>\n    {input}\n    </query>\n    '))])
            | ChatOpenAI(client=<openai.resources.chat.completions.Completions object at 0x0000015B455F5030>, async_clien

In [13]:
response=retrieval_chain.invoke({"input":"what is LLM?"})
response

{'input': 'what is LLM?',
 'context': [Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='LLMs are artificial neural networks that utilize the transformer architecture, invented in 2017. The largest and most capable LLMs, as of June 2024, are built with a decoder-only transformer-based architecture, which enables efficient processing and generation of large-scale text data.'),
  Document(metadata={'source': 'D:\\AI\\GenAI\\Learn_Langchain\\llm_wiki.txt'}, page_content='A large language model (LLM) is a computational model notable for its ability to achieve general-purpose language generation and other natural language processing tasks such as classification. Based on language models, LLMs acquire these abilities by learning statistical relationships from vast amounts of text during a computationally intensive self-supervised and semi-supervised training process.[1] LLMs can be used for text generation, a form of generative AI, by taking an input