### Pine cone DB

In [90]:
from dotenv import load_dotenv
import os
from langchain_google_genai import GoogleGenerativeAIEmbeddings
load_dotenv()

True

In [91]:
os.environ["HUGGING_FACE_TOKEN"] = os.getenv("HUGGING_FACE_TOKEN")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

In [2]:
from langchain_huggingface import HuggingFaceEmbeddings

embedding1= HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm
To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


#### Using Google embedding model

In [5]:
google_embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

len(google_embeddings.embed_query("Hello World"))

768

In [3]:
len(embedding1.embed_query("Hi Bharath"))

384

### Pinecone

In [26]:
from pinecone import Pinecone
from pinecone import ServerlessSpec

pinecone_api_key = os.getenv("PINECONE_API_KEY")

pc = Pinecone(api_key=pinecone_api_key)

In [36]:
index = "index-test"

In [37]:
if not pc.has_index(index):
    pc.create_index(
        name=index,
        dimension=768,
        metric="cosine",
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )

In [38]:
index_name = pc.Index(index)
index_name

<pinecone.data.index.Index at 0x179e334f810>

In [39]:
from langchain_pinecone import PineconeVectorStore

In [40]:
vector_store = PineconeVectorStore(index=index_name, embedding=google_embeddings)

In [41]:
from langchain_core.documents import Document


document_1 = Document(
    page_content="I had chocolate chip pancakes and scrambled eggs for breakfast this morning.",
    metadata={"source": "tweet"},
)



document_2 = Document(
    page_content="The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.",
    metadata={"source": "news"},
)


document_3 = Document(
    page_content="Building an exciting new project with LangChain - come check it out!",
    metadata={"source": "tweet"},
)


document_4 = Document(
    page_content="Robbers broke into the city bank and stole $1 million in cash.",
    metadata={"source": "news"},
)


document_5 = Document(
    page_content="Wow! That was an amazing movie. I can't wait to see it again.",
    metadata={"source": "tweet"},
)


document_6 = Document(
    page_content="Is the new iPhone worth the price? Read this review to find out.",
    metadata={"source": "website"},
)


document_7 = Document(
    page_content="The top 10 soccer players in the world right now.",
    metadata={"source": "website"},
)


document_8 = Document(
    page_content="LangGraph is the best framework for building stateful, agentic applications!",
    metadata={"source": "tweet"},
)


document_9 = Document(
    page_content="The stock market is down 500 points today due to fears of a recession.",
    metadata={"source": "news"},
)


document_10 = Document(
    page_content="I have a bad feeling I am going to get deleted :(",
    metadata={"source": "tweet"},
)


documents = [
    document_1,
    document_2,
    document_3,
    document_4,
    document_5,
    document_6,
    document_7,
    document_8,
    document_9,
    document_10,
]


In [42]:
documents

[Document(metadata={'source': 'tweet'}, page_content='I had chocolate chip pancakes and scrambled eggs for breakfast this morning.'),
 Document(metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'),
 Document(metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!'),
 Document(metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.'),
 Document(metadata={'source': 'tweet'}, page_content="Wow! That was an amazing movie. I can't wait to see it again."),
 Document(metadata={'source': 'website'}, page_content='Is the new iPhone worth the price? Read this review to find out.'),
 Document(metadata={'source': 'website'}, page_content='The top 10 soccer players in the world right now.'),
 Document(metadata={'source': 'tweet'}, page_content='LangGraph is the best framework for building stateful, agentic application

In [44]:
from uuid import uuid4
uuids = [str(uuid4()) for i in range(len(documents))]

In [45]:
print(uuids)

['79c788a4-448c-4f64-83bc-eb075eda76f2', '42bdec08-012d-4815-9ba3-68f04296d814', '0e265470-97ea-404c-a2e9-ab89bac07ec0', '190b3d50-91ec-48b7-bd0b-492d58077aef', '5e9eff41-0fc1-4efb-bf7c-65c221630fa0', '02191078-caa0-4b71-8a0c-c254b65cc2c8', '45b61a53-1314-439d-ab2d-c074ab690914', 'f15fb443-bd71-48b2-bf99-71cc29cb2552', '344bf992-76e3-4447-98c2-b8bc715a21c1', 'dd18b3b9-b303-49ae-bafb-38637266732d']


In [47]:
vector_store.add_documents(documents=documents,ids = uuids)

['79c788a4-448c-4f64-83bc-eb075eda76f2',
 '42bdec08-012d-4815-9ba3-68f04296d814',
 '0e265470-97ea-404c-a2e9-ab89bac07ec0',
 '190b3d50-91ec-48b7-bd0b-492d58077aef',
 '5e9eff41-0fc1-4efb-bf7c-65c221630fa0',
 '02191078-caa0-4b71-8a0c-c254b65cc2c8',
 '45b61a53-1314-439d-ab2d-c074ab690914',
 'f15fb443-bd71-48b2-bf99-71cc29cb2552',
 '344bf992-76e3-4447-98c2-b8bc715a21c1',
 'dd18b3b9-b303-49ae-bafb-38637266732d']

In [59]:
results = vector_store.similarity_search("What is langchain?", k= 3, filter={"source" : "news"})
print(results)

[Document(id='344bf992-76e3-4447-98c2-b8bc715a21c1', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'), Document(id='42bdec08-012d-4815-9ba3-68f04296d814', metadata={'source': 'news'}, page_content='The weather forecast for tomorrow is cloudy and overcast, with a high of 62 degrees.'), Document(id='190b3d50-91ec-48b7-bd0b-492d58077aef', metadata={'source': 'news'}, page_content='Robbers broke into the city bank and stole $1 million in cash.')]


In [60]:
retriever = vector_store.as_retriever(search_type = "similarity_score_threshold", 
                                      search_kwargs={"k":3, "score_threshold": 0.5})

In [65]:
retriever.invoke("Delete")

[Document(id='dd18b3b9-b303-49ae-bafb-38637266732d', metadata={'source': 'tweet'}, page_content='I have a bad feeling I am going to get deleted :('),
 Document(id='344bf992-76e3-4447-98c2-b8bc715a21c1', metadata={'source': 'news'}, page_content='The stock market is down 500 points today due to fears of a recession.'),
 Document(id='0e265470-97ea-404c-a2e9-ab89bac07ec0', metadata={'source': 'tweet'}, page_content='Building an exciting new project with LangChain - come check it out!')]

In [92]:
from langchain_google_genai import ChatGoogleGenerativeAI

model = ChatGoogleGenerativeAI(model="models/gemini-1.5-pro-latest", temperature=0.1)

In [93]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

prompt

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])

In [94]:
import pprint

pprint.pprint(prompt)

ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])


In [95]:
from langchain_core.prompts import PromptTemplate

prt = PromptTemplate(
    input_variables=["context", "question"],
    template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise. Context: {context} Question: {question} Answer:""",
)

In [96]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [97]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | model
    | StrOutputParser()
)

In [98]:
rag_chain.invoke("What is langchain?")

GoogleGenerativeAIError: Error embedding content: 400 API key expired. Please renew the API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key expired. Please renew the API key."
]