Imports

In [36]:
import hashlib
import os
from langchain_community.document_loaders import PyMuPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from pymongo import MongoClient
from langchain_pinecone import PineconeVectorStore
from pinecone import Pinecone, ServerlessSpec

# rag chain imports
from langchain.prompts import PromptTemplate
from langchain_core.runnables import RunnableLambda
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from dotenv import load_dotenv

from datetime import datetime, timezone

import google.generativeai as genai
from google.generativeai.types import HarmCategory, HarmBlockThreshold

import json
from duckduckgo_search import DDGS

load_dotenv()

embedding_dim = 768
MONGO_DB_URI = os.environ.get('MONGO_DB_URI')
GOOGLE_API_KEY = os.environ.get('GEMINI_API_KEY')
MONGO_DB_HISTORY = os.environ.get('MONGO_DB_HISTORY')

genai.configure(api_key=GOOGLE_API_KEY)

In [37]:
pinecone = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))

In [38]:
def generate_response(llm, query):
    try:
        response = llm.generate_content(query.text)
        # print(response.text)
        return response.text
    except Exception as e:
        print("Something went wrong in generate_response")
        return str(e)

In [39]:
template = """
Answer as briefly as you can to the folowing question using the provided Context and Web Results. 
You are required to give a one-shot answer.

<Web Results>{web_results}</Web Results>
<Context>{context}</Context>
<Question>{question}</Question>

"""


custom_rag_prompt = PromptTemplate(
    template=template,
    input_variables=["context", "question", "web_results"]
)


def format_docs(docs):
    return "\n\n".join([d.page_content for d in docs])


def get_web_results(query):
    # print("Searching the web for", query)
    try:
        results = DDGS().chat(query)  # , model='claude-3-haiku')
    except:
        results = DDGS().answers(query)

    # print("The results are", results)
    return results

In [40]:
def initialize_vector_store(chat_id):
    try:
        pinecone.create_index(
            name=str(chat_id),
            dimension=768,
            metric="cosine",
            spec=ServerlessSpec(
                cloud="aws",
                region="us-east-1")
        )
    except:
        pass
    pinecone_index = pinecone.Index(str(chat_id))

    vector_store = PineconeVectorStore(
        index=pinecone_index,
        embedding=GoogleGenerativeAIEmbeddings(
            google_api_key=GOOGLE_API_KEY,
            model="models/text-embedding-004",
            task_type="clustering"
        )
    )
    return vector_store

def generate_file_hash(file_path):
    sha256 = hashlib.sha256()
    with open(file_path, "rb") as f:
        while chunk := f.read(8192):
            sha256.update(chunk)
    return sha256.hexdigest()


def chunk_and_store(file_path, chat_id):
    try:
        # Initialize Pinecone index and vector store
        vector_store = initialize_vector_store(chat_id)

        file_hash = generate_file_hash(file_path)

        # Load and split the document
        loader = PyMuPDFLoader(file_path)
        documents = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(
            chunk_size=1000, chunk_overlap=300)
        chunks = text_splitter.split_documents(documents)

        # Insert document chunks with unique IDs and file path metadata
        uuids = [str(file_hash)+str(i) for i in range(len(chunks))]

        vector_store.add_documents(documents=chunks, ids=uuids)

        return "Documents successfully embedded and stored in Pinecone."
    except Exception as e:
        # print("Exception thrown:", e)
        return str(e)

In [41]:
def generate_response_with_rag(llm, query, chat_id):
    try:
        vector_store = initialize_vector_store(chat_id)
        retriever = vector_store.as_retriever(
            search_type="similarity_score_threshold",
            search_kwargs={"k": 5, "score_threshold": 0.5},
        )

        rag_chain = (
            {"context": retriever | format_docs,
             "question": RunnablePassthrough(), 
             "web_results": RunnableLambda(lambda x: get_web_results(x)),
             }
            | custom_rag_prompt
            | RunnableLambda(lambda x: generate_response(llm, x))
            # | StrOutputParser
        )

        prompt = custom_rag_prompt.format(
            context = format_docs(retriever.invoke(query)[:30]),
            question = query, 
            web_results = get_web_results(query),
        )

        result = rag_chain.invoke(query)

        return result

    except Exception as e:
        print(f"Error in generate_response_with_rag: {e}")
        return f"Error in generate_response_with_rag: {e}"

Add RAG documents to vector store

In [42]:
FILE_PATH = r"C:\Users\shaharyar\Documents\VS Code\Topics in LLMs\Project\harrypotter.pdf"
# CHAT_ID = "eval-default"
CHAT_ID = "eval-clustering"

Use CHAT_ID `eval-default` for the default embedding and `eval-clustering` for clustered embeddings

In [43]:
# import warnings

# # Suppress warnings
# with warnings.catch_warnings():
#     warnings.simplefilter("ignore")
#     chunk_and_store(FILE_PATH, CHAT_ID)


Ask the questions

In [44]:
from tenacity import (
    retry,
    stop_after_attempt,
    wait_random_exponential,
)

In [45]:
@retry(wait=wait_random_exponential(min=1, max=60), stop=stop_after_attempt(6))
def get_result(question):
    return generate_response_with_rag(question)

In [46]:
model1 = genai.GenerativeModel("models/gemini-1.5-flash-8b-latest",
                            generation_config={
                                "temperature": 0,
                                "response_mime_type": "text/plain"}
                            )

model2 = genai.GenerativeModel("models/gemini-2.0-flash-exp",
                            generation_config={
                                "temperature": 0,
                                "response_mime_type": "text/plain"}
                            )

model3 = genai.GenerativeModel("models/learnlm-1.5-pro-experimental",
                            generation_config={
                                "temperature": 0,
                                "response_mime_type": "text/plain"}
                            )

models = [model1, model2, model3]

In [47]:
import pandas as pd
df = pd.read_csv('harry-potter-trivia-ai-100.csv')
df.head()

Unnamed: 0,question,answer
0,Who was the founder of Ravenclaw House?,Rowena Ravenclaw
1,What is the name of the spell used to create a...,Protego
2,What was Elfric the Eager's uprising?,A revolt.
3,What is the name of the spell used to create a...,Furnunculus
4,What is the name of the spell used to repair o...,Reparo


In [48]:
# df = df.sample(5)
df.head()

Unnamed: 0,question,answer
0,Who was the founder of Ravenclaw House?,Rowena Ravenclaw
1,What is the name of the spell used to create a...,Protego
2,What was Elfric the Eager's uprising?,A revolt.
3,What is the name of the spell used to create a...,Furnunculus
4,What is the name of the spell used to repair o...,Reparo


In [49]:
from tqdm import tqdm

def generate_results_for_all_models(df, chat_id):
    # Initialize empty lists to store responses for each model
    model_1_responses = []
    model_2_responses = []
    model_3_responses = []

    # Iterate over each question with a tqdm progress bar
    for question in tqdm(df['question'], desc="Generating responses", unit="question"):
        # print(question)
        try:
            # Get response for Model 1
            response_model_1 = generate_response_with_rag(models[0], question, chat_id)            
        except Exception as e:
            response_model_1 = f"Error: {e}"

        try:
            # Get response for Model 2
            response_model_2 = generate_response_with_rag(models[1], question, chat_id)
        except Exception as e:
            response_model_2 = f"Error: {e}"

        try:
            # Get response for Model 3
            response_model_3 = generate_response_with_rag(models[2], question, chat_id)
        except Exception as e:
            response_model_3 = f"Error: {e}"

        # Append responses to their respective lists
        response_model_1 = response_model_1.replace('\n', ' ')
        response_model_2 = response_model_2.replace('\n', ' ')
        response_model_3 = response_model_3.replace('\n', ' ')

        model_1_responses.append(response_model_1)
        model_2_responses.append(response_model_2)
        model_3_responses.append(response_model_3)

    # Add responses as new columns to the DataFrame
    df['gemini_1.5_flash'] = model_1_responses
    df['gemini_2.0_flash'] = model_2_responses
    df['learnlm_1.5_flash'] = model_3_responses

    return df

In [50]:
# Apply the function and display the updated DataFrame
df = generate_results_for_all_models(df, CHAT_ID)

Generating responses:  50%|█████     | 50/100 [26:41<26:04, 31.30s/question]

Something went wrong in generate_response


Generating responses:  67%|██████▋   | 67/100 [40:16<20:05, 36.54s/question]

Something went wrong in generate_response


Generating responses:  68%|██████▊   | 68/100 [40:48<18:49, 35.29s/question]

Something went wrong in generate_response


Generating responses: 100%|██████████| 100/100 [1:00:28<00:00, 36.28s/question]


In [51]:
df.head()

Unnamed: 0,question,answer,gemini_1.5_flash,gemini_2.0_flash,learnlm_1.5_flash
0,Who was the founder of Ravenclaw House?,Rowena Ravenclaw,Rowena Ravenclaw,Rowena Ravenclaw was the founder of Ravenclaw ...,Rowena Ravenclaw.
1,What is the name of the spell used to create a...,Protego,Shield Charm,The Shield Charm.,Shield Charm.
2,What was Elfric the Eager's uprising?,A revolt.,The provided text does not contain information...,The provided text does not mention Elfric the ...,This question cannot be answered from the give...
3,What is the name of the spell used to create a...,Furnunculus,Fiendfyre,The spell used to create the fire is called Fi...,Fiendfyre.
4,What is the name of the spell used to repair o...,Reparo,Reparo,Reparo,Reparo.


In [52]:
df.to_csv('harry-potter-trivia-ai-100-results-clustering.csv', index=False)
# df.to_csv('harry-potter-trivia-ai-100-results-default.csv', index=False)