In [1]:

from langchain_community.embeddings.huggingface import HuggingFaceEmbeddings
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores.faiss import FAISS

from langchain.chains import RetrievalQAWithSourcesChain
from langchain_google_genai import GoogleGenerativeAI
from langchain_community.document_loaders import PyPDFLoader
from langchain.llms.huggingface_hub import HuggingFaceHub
from langchain.prompts.chat import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)
import os

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dotenv import load_dotenv

# Load environment variables from .env file
load_dotenv()

True

In [3]:
system_template = """Use the following pieces of context to answer the users question.
If you don't know the answer, just say that you don't know, don't try to make up an answer.
ALWAYS return a "SOURCES" part in your answer.
The "SOURCES" part should be a reference to the source of the document from which you got your answer.
The answer must be lengthy. 

Example of your response should be:

```
The answer is foo
SOURCES: xyz
```

Begin!
----------------
{summaries}"""

In [4]:
# messages = [
#     SystemMessagePromptTemplate.from_template(system_template),
#     HumanMessagePromptTemplate.from_template("{question}"),
# ]
# prompt = ChatPromptTemplate.from_messages(messages)
# chain_type_kwargs = {"prompt": prompt}

In [5]:

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=100)
loader = PyPDFLoader('spark.pdf')
documents = loader.load()
# Split the text into chunks
texts = text_splitter.split_documents(documents)

In [6]:
embeddings = HuggingFaceEmbeddings(model_name = "sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs = {'device': 'cpu'},
        encode_kwargs = {'normalize_embeddings': False})
    
docsearch = FAISS.from_documents(texts, embeddings)

In [14]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-pro")
chain = RetrievalQAWithSourcesChain.from_chain_type(
        llm,
        chain_type="stuff",
        retriever=docsearch.as_retriever(),
    )


In [16]:
llm = ChatGoogleGenerativeAI(model="gemini-pro")
result = llm.invoke("Write a ballad about LangChain")
print(result.content)

**Ballad of LangChain**

In realms where code entwines,
A language emerges, a beacon that shines.
LangChain, the mighty, a tool so grand,
Unveiling secrets in AI's vast land.

With stanzas of data, it weaves its tale,
A symphony of knowledge, without a fail.
Tokens and models, a dance so divine,
Generating insights, a vision so fine.

Chorus:
LangChain, LangChain, a marvel of might,
Guiding our steps in AI's starry night.
Its power boundless, like an endless sea,
Unveiling truths, setting our minds free.

From text to speech, a voice it can lend,
Translating languages, breaking down the trend.
Sentiment analysis, a keen eye it has,
Unveiling emotions, shattering the glass.

In games of strategy, it plans with grace,
Predicting moves, winning with speed and pace.
Medical marvels, it aids in their quest,
Diagnosing ailments, giving us our best.

Chorus:
LangChain, LangChain, a marvel of might,
Guiding our steps in AI's starry night.
Its power boundless, like an endless sea,
Unveiling tru