In [None]:
from langchain.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter

In [None]:
def load_and_split_pdf(file_path):
    # Load the PDF document
    loader = TextLoader(file_path)
    documents = loader.load()   
    return documents

doc = load_and_split_pdf("../data/shahzaib.txt")

In [None]:
from typing import List
from langchain.schema import Document

# only select pagecontent attribute from Document
def extract_page_contents(docs: List[Document]) -> List[str]:
    return [doc.page_content for doc in docs]

page_contents = extract_page_contents(doc)
page_contents

In [None]:
from typing import List
from langchain_core.documents import Document

def split_documents(documents: List[Document])->List[Document]:
    text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000,
            chunk_overlap=200,
            length_function=len
        )
    docs = text_splitter.split_documents(documents)
    return docs

text_chunks = split_documents(doc)

In [None]:
from langchain.embeddings import HuggingFaceEmbeddings


In [None]:
def download_emb_embeddings():
    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
    return embeddings

embeddings = download_emb_embeddings()

In [None]:
from dotenv import load_dotenv
import os

load_dotenv()

PINECONE_API_KEY = os.getenv("PINECONE_API_KEY")
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")

if not PINECONE_API_KEY:
    raise ValueError("PINECONE_API_KEY is missing")

if not GEMINI_API_KEY:
    raise ValueError("GEMINI_API_KEY is missing")

os.environ["PINECONE_API_KEY"] = PINECONE_API_KEY
os.environ["GEMINI_API_KEY"] = GEMINI_API_KEY


In [None]:

from pinecone import Pinecone
pc = Pinecone(api_key=PINECONE_API_KEY)
from pinecone import ServerlessSpec
index_name = "chatbot"
try:
    pc.create_index(
        name=index_name,
        dimension=384,
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )
except Exception as e:
    if "already exists" in str(e).lower():
        pass  
    else:
        raise  

index = pc.Index(index_name)


In [None]:

from langchain_pinecone import PineconeVectorStore

docsearch = PineconeVectorStore(
    index=index,
    embedding=embeddings
)
docsearch.add_documents(text_chunks)

In [None]:
from langchain_pinecone import PineconeVectorStore

load_docsearch = PineconeVectorStore.from_existing_index( embedding= embeddings, index_name=index_name)

In [None]:
query = "What is your Favort Computer language?"
retriever = load_docsearch.as_retriever(search_type="similarity", k=3)

In [None]:
retriever.invoke(query)

In [None]:
retriever.invoke(query)

In [None]:
import langchain
import langchain_core

print(langchain.__version__)
print(langchain_core.__version__)


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_classic.chains.combine_documents import create_stuff_documents_chain
from langchain_classic.chains import create_retrieval_chain
from langchain_core.prompts.chat import ChatPromptTemplate, SystemMessagePromptTemplate, HumanMessagePromptTemplate



In [None]:
system_prompt = (
    """"
            You are a knowledgeable and precise AI assistant.

    Your task is to answer the userâ€™s question using ONLY the information provided in the retrieved context.
    Rules:
    - Do NOT use prior knowledge or make assumptions.
    - If the answer is not explicitly found in the context, respond with:
    "I do not have enough information in the provided context to answer this question."
    - Do NOT invent facts, examples, or explanations.
    - Keep the answer clear, factual, and concise.
    - Use professional and neutral language.
    - If the context contains multiple relevant points, summarize them accurately.

    Context will be provided before each question.
{context}
"""
)


In [None]:
prompt = ChatPromptTemplate.from_messages([
    SystemMessagePromptTemplate.from_template(system_prompt),
    HumanMessagePromptTemplate.from_template("{input}")
])



In [None]:
chat_model = ChatGoogleGenerativeAI(
    model="gemini-flash-lite-latest",   
    temperature=0
)


In [None]:
Question_answeer_chain = create_stuff_documents_chain(chat_model, prompt)
rag_chain = create_retrieval_chain(retriever, Question_answeer_chain)

In [None]:
response = rag_chain.invoke({"input": "What you need job?"})
print(response["answer"])

In [None]:
from google.genai import client

client = client.Client()
models_pager = client.models.list()

# Convert Pager to a list and print model names
models_list = list(models_pager)
for model in models_list:
    print(model.name)
