In [None]:
## Data Ingestion
from langchain_community.document_loaders import TextLoader

loader=TextLoader("speech.txt")
text_documents=loader.load()
text_documents

In [None]:
import os
os.environ["MISTRAL_API_KEY"]="SBevql7qFGCxmnJTdbQBsOOBPKx2UMIU"

In [None]:
## WebBased Loader
from langchain_community.document_loaders import WebBaseLoader
import bs4

## load, chunk and index the content of the html page

loader = WebBaseLoader(
    web_paths=("https://blog.langchain.com/agent-engineering-a-new-discipline/",),
    bs_kwargs=dict(
        parse_only=bs4.SoupStrainer(
            class_=("post-title", "post-template", "post_header"),
        )
    ),
)

text_documents = loader.load()
text_documents

In [None]:
## PDF Reader

from langchain_community.document_loaders import PyPDFLoader

loader=PyPDFLoader("ppt.pdf")
docs = loader.load()
docs


In [None]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)

documents = text_splitter.split_documents(docs)

documents

In [None]:
## Vector Embedding and Vector Store

from langchain_mistralai.embeddings import MistralAIEmbeddings
from langchain_community.vectorstores import Chroma

db = Chroma.from_documents(documents, MistralAIEmbeddings())

In [None]:
## vector Database

query="What is MNR"
result = db.similarity_search(query)
result[0].page_content

In [None]:
## FAISS Vector DB

from langchain_community.vectorstores import FAISS
db2 = FAISS.from_documents(documents, MistralAIEmbeddings())

In [None]:
query="What is MNR"
result = db2.similarity_search(query)
result[0].page_content

In [None]:
from langchain_mistralai import ChatMistralAI

llm = ChatMistralAI(model="mistral-large-latest")


In [None]:
from langchain_core.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """
Answer the following question based on the provided context,
Think step by step before providing a detailed answer. 
I will tip you $1000 if user finds the answer helpful.
<context>
{context}
</context>
Question: {input}
    """
)

In [None]:
## Chain Intro
## Create Stuff Document Chain

from langchain_classic.chains.combine_documents import create_stuff_documents_chain

document_chain = create_stuff_documents_chain(llm, prompt)

In [None]:
## Retriever Intro
"""
A retriever is an interface that returns documents given an unstructured query. 
It is more general than a vector store. 
A retriever does not need to be able to store documents, only to return (or retrieve) them. 
Retrievers can be created from vector stores, but are also broad enough to include Wikipedia search and Amazon Kendra.
Retrievers accept a string query as input and return a list of Document objects as output.
Note that all vector stores can be cast to retrievers. 
Refer to the vector store integration docs for available vector stores. 
https://docs.langchain.com/oss/python/integrations/retrievers
"""

retriever = db2.as_retriever()
retriever

In [None]:
"""
Retrieveal chain: This chain takes in a user inquiry, which is then 
passed to the retriever to fetch relevant documents. Those documents 
(and original inputs) are then passed to an LLM to generate a response
"""

from langchain_classic.chains.retrieval import create_retrieval_chain

retrieval_chain=create_retrieval_chain(retriever, document_chain)


In [None]:

response = retrieval_chain.invoke({"input": "What services does SNP offer"})

response["answer"]