<a href="https://colab.research.google.com/github/muhdrehan/Langchain/blob/main/lang_chain_with_RAG.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Setup langchain with RAG libraries

In [1]:
%pip install langchain pinecone langchain-google-genai

Collecting pinecone
  Downloading pinecone-5.4.2-py3-none-any.whl.metadata (19 kB)
Collecting langchain-google-genai
  Downloading langchain_google_genai-2.0.8-py3-none-any.whl.metadata (3.6 kB)
Collecting pinecone-plugin-inference<4.0.0,>=2.0.0 (from pinecone)
  Downloading pinecone_plugin_inference-3.1.0-py3-none-any.whl.metadata (2.2 kB)
Collecting pinecone-plugin-interface<0.0.8,>=0.0.7 (from pinecone)
  Downloading pinecone_plugin_interface-0.0.7-py3-none-any.whl.metadata (1.2 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain-google-genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
Downloading pinecone-5.4.2-py3-none-any.whl (427 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m427.3/427.3 kB[0m [31m8.3 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading langchain_google_genai-2.0.8-py3-none-any.whl (41 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.5/41.5 kB[0m [31m3.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownload

### Pinecone Setup

In [2]:
from google.colab import userdata
PINE_CONE_API_KEY = userdata.get('PINE_CONE_API_KEY')

In [3]:
# Import the Pinecone library
from pinecone import Pinecone, ServerlessSpec

# Initialize a Pinecone client with your API key
pc = Pinecone(api_key=PINE_CONE_API_KEY)

#### Pinecone Index Setup

In [4]:
# Define index name
index_name = "pc-rag-test-index"

# Create or connect to Pinecone index
if index_name not in pc.list_indexes():
    pc.create_index(
      name=index_name,
      dimension=768,
      metric="cosine",
      spec=ServerlessSpec(
          cloud="aws",
          region="us-east-1"
      )
    )
index = pc.Index(index_name)

#### Setup a RAG document setup

In [5]:
%pip install -Uq langchain-community langchain-google-genai langchain-pinecone

[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.5/2.5 MB[0m [31m28.4 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.2/1.2 MB[0m [31m40.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.6/49.6 kB[0m [31m3.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m49.5/49.5 kB[0m [31m3.6 MB/s[0m eta [36m0:00:00[0m
[?25h

#### Setup Gemini embeddig model

In [11]:
from google.colab import userdata
import os

os.environ["GOOGLE_API_KEY"] = userdata.get('GOOGLE_API_KEY')

In [12]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

In [20]:
!pip install pypdf

Collecting pypdf
  Downloading pypdf-5.1.0-py3-none-any.whl.metadata (7.2 kB)
Downloading pypdf-5.1.0-py3-none-any.whl (297 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/298.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [91m━━━━━━━━━━[0m[91m╸[0m[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m81.9/298.0 kB[0m [31m3.0 MB/s[0m eta [36m0:00:01[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m298.0/298.0 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pypdf
Successfully installed pypdf-5.1.0


In [21]:
from langchain.document_loaders import PyPDFLoader


# Load PDF using LangChain's PyPDFLoader
def load_pdf(filepath):
    loader = PyPDFLoader(filepath)
    return loader.load()

In [22]:
# Load the PDF content
documents = load_pdf("/content/ResumeRehan_PM.pdf")

In [23]:
documents

[Document(metadata={'source': '/content/ResumeRehan_PM.pdf', 'page': 0}, page_content="Muhammad Rehan\n \n+923335527847 ◇edison.protus@hotmail.com ◇\nHouse No 8A, Street No 19, Jinnah Garden Islamabad Pakistan, Islamabad, 44000, Pakistan ◇Open to Remote ◇\nLinkedIn ◇Portfolio\nSUMMARY\nResult-focused VAS Engineer with seven years of experience in innovative smart and IoT, digital, and VAS product planning,\ndesigning, and implementation, as well as a strong foundation in customer service and technical help. I am looking for a\nchallenging position where I can use my technical expertise and customer-focused attitude to help create and improve cutting-\nedge VAS, digital, and IoT solutions.\nEXPERIENCE\n Mar '24 — Present\nIslamabad, Pakistan (Remote)\n \nSTC resource augmentation contract.\nAssessing the needs of the business and creating the necessary geo-fencing and M2M data & billing-related packages for\nIoT devices.\nCarrying out the enterprise and wholesale business units' onboard

In [24]:
from langchain_pinecone import PineconeVectorStore

vector_store = PineconeVectorStore(index=index, embedding=embeddings)
vector_store.add_documents(documents=documents)
print("CV content embedded and stored in Pinecone.")

CV content embedded and stored in Pinecone.


In [25]:
vector_store.similarity_search("matric")

[]

### LLM Introduce for RAG

In [34]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model="gemini-1.5-flash",api_key = 'GOOGLE_API_KEY')

In [35]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.runnables import RunnableLambda

message = """
Answer this question using the provided context only.

{question}

Context:
{context}
"""


retriever = RunnableLambda(vector_store.similarity_search)
prompt = ChatPromptTemplate.from_messages([("human", message)])
rag_chain = {"context": retriever, "question": RunnablePassthrough()} | prompt | llm

In [36]:
response = rag_chain.invoke("list down all the countries, cities he worked on? give in number format")

print(response.content)

GoogleGenerativeAIError: Error embedding content: 400 API key expired. Please renew the API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key expired. Please renew the API key."
]

In [38]:
response = rag_chain.invoke("What's his phone number and list down the country name he is right now")

print(response.content)

ChatGoogleGenerativeAIError: Invalid argument provided to Gemini: 400 API key not valid. Please pass a valid API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key not valid. Please pass a valid API key."
]

In [37]:
response = rag_chain.invoke("If he will do business, what things will help him based on his CV")

print(response.content)

GoogleGenerativeAIError: Error embedding content: 400 API key expired. Please renew the API key. [reason: "API_KEY_INVALID"
domain: "googleapis.com"
metadata {
  key: "service"
  value: "generativelanguage.googleapis.com"
}
, locale: "en-US"
message: "API key expired. Please renew the API key."
]