In [None]:
import json
import json
import os
import requests
from bs4 import BeautifulSoup
from langchain_core.runnables import RunnablePassthrough, RunnableMap
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain.embeddings.cache import CacheBackedEmbeddings
from langchain.storage import LocalFileStore
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableParallel
from langchain.vectorstores import Chroma
from langchain_groq import ChatGroq

# Load the dataset for evaluation
with open(r"C:\Users\GursewakNeet\Documents\rag_chatbot\ground_truth.json", "r") as f:
    ground_truth = json.load(f)

# Function to calculate evaluation metrics
def evaluate_retrieval(retriever, ground_truth, k=10):
    precision_list = []
    recall_list = []
    reciprocal_ranks = []

    for item in ground_truth:
        query = item["query"]
        ground_truth_doc = item["ground_truth_document"]

        # Retrieve top-k documents
        retrieved_docs = retriever.get_relevant_documents(query)[:k]

        # Extract content for comparison
        retrieved_content = [doc.page_content for doc in retrieved_docs]

        # Precision@k
        relevant_retrieved = sum(1 for doc in retrieved_content if ground_truth_doc in doc)
        precision = relevant_retrieved / k
        precision_list.append(precision)

        # Recall@k
        total_relevant = 1  # Assuming one ground truth document
        recall = relevant_retrieved / total_relevant
        recall_list.append(recall)

        # Mean Reciprocal Rank (MRR)
        try:
            rank = next(i + 1 for i, doc in enumerate(retrieved_content) if ground_truth_doc in doc)
            reciprocal_ranks.append(1 / rank)
        except StopIteration:
            reciprocal_ranks.append(0)

    # Calculate averages
    precision_avg = sum(precision_list) / len(precision_list)
    recall_avg = sum(recall_list) / len(recall_list)
    mrr = sum(reciprocal_ranks) / len(reciprocal_ranks)

    return {
        "Precision@k": precision_avg,
        "Recall@k": recall_avg,
        "MRR": mrr
    }

### STEP 1: Scrape data and save as a JSON file ###

# URL to scrape
url = 'https://en.wikipedia.org/wiki/97th_Academy_Awards'

response = requests.get(url)
page_content = response.content

# Create a BeautifulSoup object
soup = BeautifulSoup(page_content, 'html.parser')

# Extract all <p> tags and combine their text
paragraphs = soup.find_all('p')
text_content = ' '.join([para.get_text() for para in paragraphs])
#Import a text splitter class from LangChain.
#This class recursively splits documents by paragraph, sentence, or character while preserving meaning.
from langchain.text_splitter import RecursiveCharacterTextSplitter
#chunk_size=1000: Each chunk will be up to 1000 characters long.
#chunk_overlap=20: Each chunk will overlap 20 characters with the next, helping maintain context between chunks.
text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
chunks = text_splitter.split_text(text_content)

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.embeddings.cache import CacheBackedEmbeddings
from langchain.storage import LocalFileStore

# Set up the cache store
store = LocalFileStore("./cache/")

# Initialize the Hugging Face embedding model
core_embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

# Use the cache-backed embedder with the Hugging Face model
embedder = CacheBackedEmbeddings.from_bytes_store(
    core_embeddings_model,
    store,
    namespace=core_embeddings_model.model_name
)

# Store embeddings in the Pinecone vector store
from langchain_pinecone import PineconeVectorStore
import os
from pinecone import  Pinecone, ServerlessSpec
from dotenv import load_dotenv
load_dotenv()    
pinecone_api_key = os.getenv("PINECONE_API_KEY")
index_name = "ragchatbot2" 


# Initialize Pinecone
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY"))
if index_name not in pc.list_indexes().names():
    pc.create_index(
        name=index_name,
        dimension=384,  # Huggingface embeddings = 384
        metric="cosine",
        spec=ServerlessSpec(cloud="aws", region="us-east-1")
    )

# Create vectorstore
for i in range(0, len(chunks), 100):  # upload 100 at a time
    vectorstore= PineconeVectorStore.from_texts(
        chunks[i:i+100],
        embedding=embedder,
        index_name=index_name
    )

# Query
query = "What is Pinecone used for?"
results = vectorstore.similarity_search(query)

# Print top result
print("Answer:", results[0].page_content)

# Instantiate a retriever from the vector store
retriever = vectorstore.as_retriever()

from langchain_groq import ChatGroq
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableParallel
import os
# this formats the docs returned by the retriever
def format_docs(docs):
	return "\n\n".join(doc.page_content for doc in docs)

# prompt to send to the LLM
prompt = """You are an assistant for question-answering tasks.
    	Use the following pieces of retrieved context to answer the question.
    	If you don't know the answer, search in google  .

    	Question: {question}

    	Context: {context}

    	Answer:
    	"""

prompt_template = ChatPromptTemplate.from_template(prompt)

llm = ChatGroq(
    model_name="llama3-70b-8192", streaming=True, groq_api_key=os.getenv("GROQ_API_KEY")
)
"""This code defines a chain where input documents are first formatted,
then passed through a prompt template,
and finally processed by an LLM."""

rag_chain_from_docs = (
	RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"])))
		| prompt_template
	| llm
	)
"""This code creates a parallel process:
one retrieves the context (using a retriever),
and the other passes the question through unchanged.
The results are then combined and assigned to the variable `answer` using the `rag_chain_from_docs` processing chain."""

rag_chain_with_source = RunnableParallel(
	{"context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)


# Evaluate the retriever
evaluation_metrics = evaluate_retrieval(retriever, ground_truth, k=10)
print("Retrieval Evaluation Metrics:", evaluation_metrics)

# Run a query
response = rag_chain_with_source.invoke("who stars in Deadpool & Wolverine")
print("Answer:", response["answer"].content)
print("Retrieved Context:", response["context"])



  core_embeddings_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm


Answer: In introducing the "In Memoriam" montage, Morgan Freeman also gave an individual spoken tribute to Gene Hackman.[75] The montage, which featured the Los Angeles Master Chorale performing "Lacrimosa" from Mozart's Requiem, paid tribute to the following individuals:[76]


  retrieved_docs = retriever.get_relevant_documents(query)[:k]


Retrieval Evaluation Metrics: {'Precision@k': 0.2, 'Recall@k': 2.0, 'MRR': 0.5}
Answer: The answer is Hugh Jackman and Ryan Reynolds. Hugh Jackman stars as Wolverine, and Ryan Reynolds stars as Deadpool.
Retrieved Context: [Document(id='d99ed675-b80d-4013-af9c-6ae87f24b2a4', metadata={}, page_content='opportunity. A combo of Deadpool & Wolverine stars Hugh Jackman and Ryan Reynolds were also among those buzzed to be in the mix, but the latter told Deadline Hollywood that it was highly unlikely though he said he would like to host it with Jackman "someday".[8][40][41] As for the production team, returning to the telecast are Rob Paine as co-executive producer, Taryn Hurd and Sarah Levine Hall as producers, Mandy Moore as supervising choreographer, and Bob Dickinson and Noah Mitz as lighting'), Document(id='c8ab2065-e187-45b4-91b2-ed02d547f59a', metadata={}, page_content='opportunity. A combo of Deadpool & Wolverine stars Hugh Jackman and Ryan Reynolds were also among those buzzed to be 

In [5]:
from nltk.translate.bleu_score import sentence_bleu, SmoothingFunction
from rouge import Rouge

def evaluate_bleu_rouge(responses, ground_truths):
    """
    Evaluate LLM-generated responses using BLEU and ROUGE scores.

    Args:
        responses (list of str): List of responses generated by the LLM.
        ground_truths (list of str): List of ground truth answers.

    Returns:
        dict: A dictionary containing BLEU and ROUGE scores.
    """
    assert len(responses) == len(ground_truths), "Responses and ground truths must have the same length."

    # Initialize scores
    total_bleu_score = 0
    rouge = Rouge()
    rouge_scores = []

    # Evaluate each response
    for response, ground_truth in zip(responses, ground_truths):
        # Compute BLEU score
        bleu_score = sentence_bleu(
            [ground_truth.split()], response.split(),
            smoothing_function=SmoothingFunction().method1
        )
        total_bleu_score += bleu_score

        # Compute ROUGE scores
        rouge_score = rouge.get_scores(response, ground_truth, avg=True)
        rouge_scores.append(rouge_score)

    # Average BLEU score 
    avg_bleu_score = total_bleu_score / len(responses)

    # Average ROUGE scores
    avg_rouge_score = {
        "rouge-1": {
            "f": sum(score["rouge-1"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-1"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-1"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
        "rouge-2": {
            "f": sum(score["rouge-2"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-2"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-2"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
        "rouge-l": {
            "f": sum(score["rouge-l"]["f"] for score in rouge_scores) / len(rouge_scores),
            "p": sum(score["rouge-l"]["p"] for score in rouge_scores) / len(rouge_scores),
            "r": sum(score["rouge-l"]["r"] for score in rouge_scores) / len(rouge_scores),
        },
    }

    return {
        "Average BLEU Score": avg_bleu_score,
        "Average ROUGE Score": avg_rouge_score,
    }

# Example usage
responses = [
    "This is a generated response.",
    "Another generated response."
]
ground_truths = [
    "This is the expected response.",
    "Another correct response."
]

scores = evaluate_bleu_rouge(responses, ground_truths)
print(scores)
print("Answer:", response["answer"].content)

{'Average BLEU Score': 0.1304316792244985, 'Average ROUGE Score': {'rouge-1': {'f': 0.6333333283333334, 'p': 0.6333333333333333, 'r': 0.6333333333333333}, 'rouge-2': {'f': 0.12499999750000004, 'p': 0.125, 'r': 0.125}, 'rouge-l': {'f': 0.6333333283333334, 'p': 0.6333333333333333, 'r': 0.6333333333333333}}}
Answer: The answer is Hugh Jackman and Ryan Reynolds. Hugh Jackman stars as Wolverine, and Ryan Reynolds stars as Deadpool.
