# RAG with Pinecone VectorDB

In [12]:
import os

In [13]:
from dotenv import load_dotenv
load_dotenv()


os.environ['HF_TOKEN']=os.getenv("HF_TOKEN")
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
os.environ['PINECONE_API_KEY'] = os.getenv('PINECONE_API_KEY')

## Loading The File

In [14]:
file_path = "../../2.1-DataIngestion/speech.txt"

In [15]:
from langchain_community.document_loaders import TextLoader
loader = TextLoader(file_path)

In [16]:
loader.load()

[Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of political liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of the champions of the rights of mankind. We shall be satisfied when those rights have been made as secure as the faith and the freedom of nations can make them.\n\nJust because we fight without rancor and without selfish object, seeking nothing for ourselves but what we shall wish to share with all free peoples, we shall, I feel confident, conduct our operations as belligerents without passion and ourselves observe with proud punctilio the principles of right and of fair play we profess to be fighting for.\n\n…\n\nIt will be all the easier for us to conduct ourselves as belligerents in a high spirit 

## Splitting the file into Chunks


In [17]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter_doc = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap=50)

In [19]:
splitted_docs = splitter_doc.split_documents(loader.load())

In [20]:
splitted_docs

[Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of'),
 Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='must be planted upon the tested foundations of political liberty. We have no selfish ends to serve.'),
 Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='liberty. We have no selfish ends to serve. We desire no conquest, no dominion. We seek no'),
 Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='We desire no conquest, no dominion. We seek no indemnities for ourselves, no material compensation'),
 Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='for ourselves, no material compensation for the sacrifices we shall freely make. We are but one of'),
 Document(metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content=

## Embedding

In [21]:
# Hugging face Embedder
#from langchain_huggingface import HuggingFaceEmbeddings
#embeddings=HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

In [None]:
# Let's use Google Embedder, it creates more feature representation that hugging face embedder
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")


## VectorDB creation in cloud using Pinecone

In [24]:
from pinecone import Pinecone

In [25]:
pinecone_api_key = os.getenv('PINECONE_API_KEY')

In [26]:
pc= Pinecone(api_key=pinecone_api_key)

### Index creation

In [33]:
index_name = "myrag"

In [34]:
from pinecone import ServerlessSpec

In [35]:
if not pc.has_index(index_name):
    pc.create_index(
        name=index_name,
        dimension=768,
        metric='cosine',
        spec=ServerlessSpec(cloud='aws',region='us-east-1')
    )

In [36]:
index = pc.Index(index_name)

### VectorDB creation 

In [37]:
from langchain_pinecone import PineconeVectorStore

In [38]:
vector_store = PineconeVectorStore(index=index, embedding=embeddings)

In [39]:
vector_store.add_documents(documents=splitted_docs)

['73615c08-3610-4927-b9a2-157af0979e30',
 'f5c1f3d6-fde0-4eb5-9016-64f5253c11f4',
 'e4082f87-19f0-4bd4-bf69-c603337155df',
 'edfae8c6-277d-43d7-9bbe-834f19607696',
 'f1921a3b-ed37-44d5-92a9-d57efc103bd6',
 'bdbacb4d-2fca-49f4-b6fe-69634c8148f2',
 '551f6982-df10-43df-bc11-af2da4468021',
 '87af4c50-5ec7-4755-b09c-108dddb76f2e',
 '89d65335-22b6-43a5-a40e-4572945c9ca9',
 '7dbdd2d4-14db-411e-a877-fee918cf7ccf',
 'd168b223-858b-490c-a37b-3c924c6c8d47',
 '38183dcb-443d-4509-96c1-4796f6c8ca40',
 '60eb649b-7b49-4eea-a075-d3f09b4cce15',
 'b1514780-5a1c-4808-8f3e-79c041efa29f',
 '823ff895-e406-42aa-b635-b7e9a8c07032',
 '8454527d-86e0-4b9d-b981-5a9b7900e02e',
 'a67446f7-0ce2-4880-b2ed-7fcf2f8da34f',
 '68729e4a-dc96-45f7-adbd-2fa91c006f64',
 'f6febc89-efb1-406f-9d5a-9515c9195f2f',
 '0a3e42a5-b2b4-485f-828c-435c70f1e2e2',
 '3d42197c-61a4-4472-ad63-8688fe699f12',
 'c8dbaf43-b708-4a7c-853c-d060d7f33e25',
 'f6765a25-d46e-4e71-83b9-2c1db334d76a',
 'd36062ad-130a-4a59-80a2-f76a29be55be',
 '33ed579f-1ae0-

In [41]:
vector_store.similarity_search('democracy',k=2)

[Document(id='73615c08-3610-4927-b9a2-157af0979e30', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of'),
 Document(id='4c6e0f06-653f-4be9-bc66-9f73f53fc658', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='carried nearest our hearts—for democracy, for the right of those who submit to authority to have a')]

### Creation of retrival pipeline

In [42]:
retriver = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.7} #hyperparameter
)

In [44]:
retriver.invoke('what is Democracy?')

[Document(id='4c6e0f06-653f-4be9-bc66-9f73f53fc658', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='carried nearest our hearts—for democracy, for the right of those who submit to authority to have a'),
 Document(id='73615c08-3610-4927-b9a2-157af0979e30', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='The world must be made safe for democracy. Its peace must be planted upon the tested foundations of'),
 Document(id='999a9bcc-de5a-449e-9c5b-56cd2c5eb9d8', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='right of those who submit to authority to have a voice in their own governments, for the rights and'),
 Document(id='b466d970-ecc7-49b0-b3a8-55cb9f43f4d0', metadata={'source': '../../2.1-DataIngestion/speech.txt'}, page_content='shall fight for the things which we have always carried nearest our hearts—for democracy, for the')]

## LLM Creation

In [45]:
from langchain_google_genai import ChatGoogleGenerativeAI
model = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

#### Prompt creation 

In [46]:
from langchain import hub
prompt = hub.pull('rlm/rag-prompt')

#### Chaining

In [47]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [48]:
def format_doc(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [49]:
rag_chain = (
    {"context": retriver | format_doc, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [50]:
rag_chain.invoke('What is my Name?')

'This question cannot be answered from the given context.  The provided text does not contain any information about your name.'

In [51]:
rag_chain.invoke("What is Democracy?")

"Democracy is the right of those who submit to authority to have a voice in their own governments.  It's a system where the people have a say in how they are governed.  The provided text emphasizes its importance and the need to protect it."

In [52]:
rag_chain.invoke("Can u say what is powerfull speech?")

'I\'m sorry, but this text does not define "powerful speech."  The provided text focuses on leadership and the burdens of war, not the characteristics of effective or persuasive communication.'

In [53]:
rag_chain.invoke("What is leadership?")

'Based on the provided text, leadership is described as a fearful undertaking, especially when leading a peaceful people into war.  The context highlights the challenges and potential for disloyalty that leaders face.  It also suggests that strong leadership involves patience and forbearance.'