In [1]:
import os
from dotenv import load_dotenv
# from google.colab import userdata
# userdata.get('GOOGLE_API_KEY')
# os.environ['GOOGLE_API_KEY'] = userdata.get('GOOGLE_API_KEY')
# PINECONE_API_KEY = userdata.get('PINECONE_API_KEY')
load_dotenv()
GOOGLE_API_KEY = os.getenv("API_KEY")
PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')


In [2]:
from langchain_google_genai import ChatGoogleGenerativeAI
llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash')

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
embeddings.embed_query("What's our Q1 revenue?")

In [4]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [5]:
# Load documents
loader = PyPDFLoader('documents/MDF.pdf')
documents = loader.load()

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
docs = text_splitter.split_documents(documents)
print(docs)
print(len(docs))

[Document(metadata={'source': 'documents/MDF.pdf', 'page': 0}, page_content='Constitution of Murree Development Forum \nTrial: \nThe Islamic Republic of Pakistan is a manifestation of the aspirations and aspirations of the Muslims \nof the Indian subcontinent, which came into existence as a result of millions of human sacrifices. Among the \nobjectives of its establishment was the establishment of a fair society in the light of Islamic principles where \npeople can live under values such as basic human rights, freedom and equality and reach the highest level of'), Document(metadata={'source': 'documents/MDF.pdf', 'page': 0}, page_content='construction and development. \nThe objectives of Murree Development Forum are to provide opportunities based on freedom, equality and \njustice to benefit the construction and development of our region. \n \nVision of Murree Development Forum: \nTo make Murree a developed city of international standards. \nMission of Murree Development Forum: \nTo im

In [6]:
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

index_name = "quickstart4"
pc = Pinecone(api_key=PINECONE_API_KEY)
# Check if index already exists before attempting to create it
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=768,  # Replace with your model dimensions
        metric="cosine",  # Replace with your model metric
        spec=ServerlessSpec(
            cloud="aws",
            region="us-east-1"
        )
    )
    print(f"Index '{index_name}' created successfully.")
else:
    print(f"Index '{index_name}' already exists.")

index = pc.Index(index_name)

vector_store = PineconeVectorStore(embedding=embeddings, index=index)

Index 'quickstart4' created successfully.


In [7]:
from tqdm import tqdm

# Create embeddings and upload to Pinecone
vectors=[]
for doc in tqdm(docs):
    vector = embeddings.embed_query(doc.page_content)
    # Generate a unique ID for each document (e.g., UUID or hash)
    doc_id = str(hash(doc.page_content))

    metadata = {'text': doc.page_content}
    # Upsert the vector with a unique ID
    index.upsert(vectors=[(doc_id, vector, metadata)])

  0%|          | 0/37 [00:00<?, ?it/s]

100%|██████████| 37/37 [01:11<00:00,  1.93s/it]


In [8]:
from langchain.vectorstores import Pinecone
retriever=  Pinecone(index=index, embedding=embeddings, text_key="text")
# Convert it into a retriever compatible with RetrievalQA
retriever = vector_store.as_retriever()

  retriever=  Pinecone(index=index, embedding=embeddings, text_key="text")


In [9]:
from langchain.chains import RetrievalQA

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="refine",  # Other options: "map_reduce", "refine"
    retriever=retriever
)

In [10]:
query = "what is murree developemnt forum?"
# query = "what is python?"
response = qa_chain.invoke(query)
print(response['result'])

The Murree Development Forum is an organization dedicated to the multifaceted development of Murree.  Their efforts encompass improving the quality of life for residents through initiatives in healthcare (upgrading primary health centers and attracting private investment),  positive entertainment, and cultural preservation.  A strong emphasis is placed on education, aiming for 100% literacy by reintegrating out-of-school children, mandating matriculation, and enhancing educational quality across public and private institutions.  Critically, the Forum also focuses on economic development, aiming to cultivate a skilled workforce through technical and soft skills training, establish IT infrastructure to promote digital literacy and access to modern markets, and attract investors to create employment opportunities and boost cottage and local industries.  Their ultimate goal is to elevate Murree to the status of an internationally recognized, developed city.

