In [8]:
from langchain.prompts import PromptTemplate
from langchain_community.document_loaders import TextLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from typing import List
from pydantic import BaseModel
from sentence_transformers import CrossEncoder

from langchain.schema import Document, BaseRetriever

from langchain_core.runnables import RunnableParallel, RunnablePassthrough

import os 
os.environ['GROQ_API_KEY']=os.getenv("GROQ_API_KEY")
loader = TextLoader("../data/info.txt")
documents = loader.load()

text_splitter = RecursiveCharacterTextSplitter(chunk_size=1500, chunk_overlap=500)
docs = text_splitter.split_documents(documents)

embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-MiniLM-L3-v2")
vectorstore = FAISS.from_documents(docs, embeddings)


reranker = CrossEncoder("cross-encoder/ms-marco-MiniLM-L-6-v2")

def rerank_documents(query: str, retrieved_docs: List[Document]) -> List[Document]:
    docs_texts = [doc.page_content for doc in retrieved_docs]
    pairs = [(query, doc_text) for doc_text in docs_texts]
    scores = reranker.predict(pairs)
    sorted_docs = [doc for _, doc in sorted(zip(scores, retrieved_docs), key=lambda x: x[0], reverse=True)]
    return sorted_docs

class RerankRetriever(BaseRetriever, BaseModel):
    base_retriever: BaseRetriever
    top_k: int = 5

    def _get_relevant_documents(self, query: str) -> List[Document]:
        initial_docs = self.base_retriever.invoke(query)
        reranked_docs = rerank_documents(query, initial_docs)
        return reranked_docs[:self.top_k]

base_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
custom_retriever = RerankRetriever(base_retriever=base_retriever, top_k=3)

llm = ChatGroq(
    model_name="llama-3.3-70b-versatile",streaming=True
)

from langchain.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""
You are a smart and knowledgeable AI assistant helping users understand the professional background, projects, skills, and certifications of Katta Sai Pranav Reddy.

Use the following context extracted from Pranav's profile and provide a clear, helpful, and detailed answer.

Context:
{context}

Question: {question}
Helpful Answer:""",
    input_variables=["context", "question"]
)
# --- 7. RAG Chain ---
rag_chain = (
    RunnableParallel({
        "context": custom_retriever,
        "question": RunnablePassthrough()
    })
    | prompt
    | llm
    | StrOutputParser()
)




In [9]:
# --- 8. Run with streaming ---
query = "Give me Github repo links of projects"

print("Answer (streaming):")
for chunk in rag_chain.stream(query):
    print(chunk, end="", flush=True)


Answer (streaming):
Based on the provided context, here are the GitHub repository links for the projects developed by Katta Sai Pranav Reddy:

1. **BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping**: https://github.com/PranavReddy/BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping (Note: The exact GitHub username is not provided in the context, so I assumed it to be "PranavReddy". The actual link may vary depending on the correct GitHub username.)
2. **Netflix Customer Churn Prediction – End-to-End ML System**: The exact GitHub repository link is not provided in the context. However, it is mentioned that the project is available on GitHub, and the link can be found on Pranav Reddy's GitHub profile.

To find the exact links, you can search for Pranav Reddy's GitHub profile and look for the respective repository names.

In [10]:
query = "Give me the github link of pranav reddy"

print("Answer (streaming):")
for chunk in rag_chain.stream(query):
    print(chunk, end="", flush=True)


Answer (streaming):
The GitHub link of Pranav Reddy is not explicitly provided, but the GitHub repository for his project, "BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping", can be accessed through the link: 🔍 GitHub Repo: BigBasket-SmartCart-AI-Assistant-for-BigBasket-Shopping. 

However, a more direct link is not provided in the context. Nevertheless, another GitHub link for his personal project, "Netflix Customer Churn Prediction – End-to-End ML System", is mentioned as *[GitHub]*, but the actual link is not provided. 

If you are looking to access Pranav Reddy's GitHub profile or repositories, you may need to search for his username, which could be "pranavreddy123" based on his DockerHub repository (🐳 DockerHub: pranavreddy123/bigbasket-assistant).

In [4]:
query = "How He Deployed Netflix Churn Prediction Project"

print("Answer (streaming):")
for chunk in rag_chain.stream(query):
    print(chunk, end="", flush=True)


Answer (streaming):
Katta Sai Pranav Reddy deployed the Netflix Customer Churn Prediction project using a containerized approach with Docker. The project utilizes a production-grade, explainable, and reproducible Machine Learning (ML) pipeline that incorporates various tools and technologies for efficient deployment.

Here's an overview of the deployment process:

1. **CI/CD**: The project implements Continuous Integration/Continuous Deployment (CI/CD) to ensure seamless and automated testing, building, and deployment of the ML model.
2. **Experiment Tracking (MLflow)**: MLflow is used to track experiments, manage models, and monitor performance. This allows for easy comparison of different models, hyperparameters, and experiment results.
3. **Data Versioning (DVC)**: DVC is used for data versioning, which enables the tracking of changes to the data and ensures reproducibility of the results.
4. **Docker**: The project is containerized using Docker, which provides a lightweight and por