# Imports

We import all the necessary libraries and modules required for the script. These include libraries for PDF loading, conversational retrieval chains, language models, vector stores, embeddings, and various similarity metrics.

In [1]:
import os
import time
from langchain.document_loaders import PyPDFLoader
from langchain.chains import ConversationalRetrievalChain
from langchain_ollama.llms import OllamaLLM
from langchain.vectorstores import FAISS
from langchain.embeddings import HuggingFaceEmbeddings
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import jaccard_score
from sklearn.preprocessing import Binarizer
from scipy.sparse import csr_matrix

# Load PDF Document

We load a PDF document from the specified file path using the PyPDFLoader class.

In [2]:
def load_pdf(file_path):
    loader = PyPDFLoader(file_path)
    return loader.load()

# Create Vector Store

We create a vector store using HuggingFace embeddings. It takes chunks of text as input and returns a FAISS vector store.

In [3]:
def get_vectorstore(text_chunks):
    embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
    vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
    return vector_store

# Create Conversational Chain

We create a conversational retrieval chain based on the specified model name. It converts the documents into text chunks, creates a vector store, and then creates a conversational retrieval chain.

In [4]:
def create_conversational_chain(documents, model_name="llama3.1:8b"):
    llm = OllamaLLM(model=model_name)
    text_chunks = [doc.page_content for doc in documents]
    vectorstore = get_vectorstore(text_chunks)
    qa_chain = ConversationalRetrievalChain.from_llm(
        llm=llm,
        retriever=vectorstore.as_retriever()
    )
    return qa_chain

# Calculate Jaccard Similarity

We calculate the Jaccard similarity between two vectors. It converts the vectors to sparse matrices, calculates the intersection and union, and returns the Jaccard similarity score.

In [5]:
def calculate_jaccard_similarity(vec1, vec2):
    binarizer = Binarizer()
    vec1_bin = binarizer.fit_transform(vec1.toarray()).flatten()
    vec2_bin = binarizer.fit_transform(vec2.toarray()).flatten()
    return jaccard_score(vec1_bin, vec2_bin)

# Evaluate Models Based on Similarity and Time

We evaluate the models based on various similarity metrics and the time taken. It calculates cosine similarity using TF-IDF and CountVectorizer, Jaccard similarity, and the time taken for each model. It then determines the best model based on similarity scores and time.

In [6]:
def evaluate_models_time_and_similarity(models_responses, true_answers):
    results = {
        "Cosine Similarity (TF-IDF)": {},
        "Cosine Similarity (CountVectorizer)": {},
        "Jaccard Similarity": {},
        "Time Taken (seconds)": {}
    }
    averages = {}
    best_model = None
    best_score = -1
    best_time = float("inf")

    for model_name, responses in models_responses.items():
        start_time = time.time()

        tfidf_vectorizer = TfidfVectorizer()
        count_vectorizer = CountVectorizer()

        tfidf_all = tfidf_vectorizer.fit_transform(responses + true_answers)
        tfidf_responses = tfidf_all[:len(responses)]
        tfidf_answers = tfidf_all[len(responses):]

        count_all = count_vectorizer.fit_transform(responses + true_answers)
        count_responses = count_all[:len(responses)]
        count_answers = count_all[len(responses):]

        tfidf_cosine_scores = [
            cosine_similarity(tfidf_responses[i], tfidf_answers[i])[0][0]
            for i in range(len(responses))
        ]

        count_cosine_scores = [
            cosine_similarity(count_responses[i], count_answers[i])[0][0]
            for i in range(len(responses))
        ]

        jaccard_scores = [
            calculate_jaccard_similarity(count_responses[i], count_answers[i])
            for i in range(len(responses))
        ]

        end_time = time.time()
        time_taken = end_time - start_time

        results["Cosine Similarity (TF-IDF)"][model_name] = tfidf_cosine_scores
        results["Cosine Similarity (CountVectorizer)"][model_name] = count_cosine_scores
        results["Jaccard Similarity"][model_name] = jaccard_scores
        results["Time Taken (seconds)"][model_name] = time_taken

        avg_tfidf_cosine = sum(tfidf_cosine_scores) / len(tfidf_cosine_scores)
        avg_count_cosine = sum(count_cosine_scores) / len(count_cosine_scores)
        avg_jaccard = sum(jaccard_scores) / len(jaccard_scores)

        average_score = (avg_tfidf_cosine + avg_count_cosine + avg_jaccard) / 3
        averages[model_name] = {
            "Average Similarity Score": average_score,
            "Time Taken": time_taken
        }

        if average_score > best_score or (average_score == best_score and time_taken < best_time):
            best_model = model_name
            best_score = average_score
            best_time = time_taken

    for metric, scores in results.items():
        print(f"\n{metric}:\n{scores}")

    print(f"\nBest Model: {best_model}")

# Main Function

The main function loads the PDF document, defines the questions and true answers, creates conversational chains for different models, collects responses, and evaluates the models based on similarity metrics and time.

In [7]:
def main():
    file_path = 'pdf/Root_Base_Law_of_Root_Sep_5_2024.pdf'
    documents = load_pdf(file_path)

    questions = [
        "I’d like to do something, and the rules don’t say that I can’t do it. Can I do it?",
        "Can another player not consent to an action?",
        "It seems like something should happen, but the rule doesn’t tell me to do that thing. What do I do?"
    ]

    true_answers = [
        "Within the confines of the action, yes! The game will often surprise you with outlandish, unexpected situations, and that’s part of the fun, but this doesn’t mean you can flip the table.",
        "No actions require consent. Just do the thing.",
        "Follow the literal word of the Law, not your instinct, even if a similar rule exists."
    ]

    models_responses = {}
    models = ["llama3.1:8b", "gemma2:9b", "mistral:7b", "qwen2:7b"]

    for model in models:
        qa_chain = create_conversational_chain(documents, model_name=model)
        model_responses = []
        for question in questions:
            response = qa_chain.run({"question": question, "context": documents, "chat_history": []})
            model_responses.append(response)
        models_responses[model] = model_responses

    evaluate_models_time_and_similarity(models_responses, true_answers)

if __name__ == "__main__":
    main()

  embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
  from .autonotebook import tqdm as notebook_tqdm
  response = qa_chain.run({"question": question, "context": documents, "chat_history": []})



Cosine Similarity (TF-IDF):
{'llama3.1:8b': [0.3465677775290853, 0.1419444731784938, 0.17618832884564495], 'gemma2:9b': [0.3073037827292642, 0.18903665694069782, 0.20695495106776807], 'mistral:7b': [0.2613714813028336, 0.10176313430406181, 0.13964412755566039], 'qwen2:7b': [0.25832594910678336, 0.23651085808129835, 0.17179564910579878]}

Cosine Similarity (CountVectorizer):
{'llama3.1:8b': [0.580033831501078, 0.2748282859651492, 0.3581143573661566], 'gemma2:9b': [0.551931511524247, 0.27459450972682964, 0.4202555931245649], 'mistral:7b': [0.4893617568266501, 0.19127301391900148, 0.23148258403413785], 'qwen2:7b': [0.4252175442421335, 0.3095517423099226, 0.3217598666159375]}

Jaccard Similarity:
{'llama3.1:8b': [0.09285714285714286, 0.06097560975609756, 0.039735099337748346], 'gemma2:9b': [0.09016393442622951, 0.04819277108433735, 0.05217391304347826], 'mistral:7b': [0.09090909090909091, 0.05970149253731343, 0.05952380952380952], 'qwen2:7b': [0.05907172995780591, 0.061855670103092786, 0.