In [1]:
import logging
import os
from json import JSONDecodeError
from typing import List

import pandas as pd
from langchain import PromptTemplate
from langchain.chains import QAGenerationChain
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.evaluation import EmbeddingDistance
from langchain.evaluation import load_evaluator, EvaluatorType
from langchain.evaluation.schema import StringEvaluator
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.schema import BaseRetriever
from langchain.schema import Document
from langchain.vectorstores import FAISS

from src.prompt_templates import MULTI_QA_GPT4_PROMPT_TEMPLATE, MULTI_QA_GPT35_PROMPT_TEMPLATE, GRADE_DOCS_PROMPT_TEMPLATE

INDEX_OF_FIRST_QNA_IN_RESPONSE = 0

In [2]:
root_dir = "../linux-kernel"

docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        if file.endswith(".c") and "/.venv/" not in dirpath:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load())
            except Exception as e:
                pass
print(f"{len(docs)}")

4


In [3]:
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    Language,
)

RecursiveCharacterTextSplitter.get_separators_for_language(Language.CPP)

chunks_cpp_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.CPP, chunk_size=250, chunk_overlap=50
)
chunks = chunks_cpp_splitter.split_documents(docs)
print(f"{len(chunks)}")

1227


In [4]:
hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'normalize_embeddings': False}
)

In [5]:
from langchain.embeddings.openai import OpenAIEmbeddings

openai_azure_embeddings = OpenAIEmbeddings(
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-Embedding',
    openai_api_version="2023-03-15-preview"
)

                    deployment_name was transferred to model_kwargs.
                    Please confirm that deployment_name is what you intended.


In [6]:
hf_embeddings_vector_db = FAISS.from_documents(chunks, hf_embeddings)
hf_embeddings_vector_db.save_local("linux-kernel_embeddings")

In [7]:
question = "What is the purpose of the get_user_page in the linux kernel?"
retrieved_chunks = hf_embeddings_vector_db.similarity_search(question)
for i, retrieved_chunk in enumerate(retrieved_chunks):
    print(f"\n\n\ndoc {i}:\n {retrieved_chunk.page_content}")




doc 0:
 *
 * __get_user_pages walks a process's page tables and takes a reference to
 * each struct page that each user address corresponds to at a given
 * instant. That is, it takes the page that would be accessed if a user



doc 1:
 * trying again.
 *
 * Typically this is meant to be used by the futex code.
 *
 * The main difference with get_user_pages() is that this function will
 * unconditionally call handle_mm_fault() which will in turn perform all the



doc 2:
 *
 * get_user_pages_remote walks a process's page tables and takes a reference
 * to each struct page that each user address corresponds to at a given
 * instant. That is, it takes the page that would be accessed if a user



doc 3:
 *
 * get_user_pages_remote is typically used for fewer-copy IO operations,
 * to get a handle on the memory by some means other than accesses
 * via the user virtual addresses. The pages may be submitted for


In [8]:
gpt35_azure_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-GPT35-TURBO',
    openai_api_version="2023-03-15-preview"
)

In [9]:
gpt4_azure_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_key="a8d69f68a36b40789df2cc3fdbaacda9",
    openai_api_base="https://llmx-gpt-canada-east.openai.azure.com/",
    deployment_name='LLM-X-GPT-4',
    openai_api_version="2023-03-15-preview"
)

In [10]:
from langchain.chains import RetrievalQA

qa_rag_chain = RetrievalQA.from_chain_type(
    llm=gpt35_azure_llm,
    retriever=hf_embeddings_vector_db.as_retriever()
)

qa_rag_chain({"query": question})

{'query': 'What is the purpose of the get_user_page in the linux kernel?',
 'result': "The purpose of the `get_user_pages` function in the Linux kernel is to walk a process's page tables and obtain a reference to each `struct page` that corresponds to a user address at a given moment. This function is typically used by the kernel's futex code and is used to access user pages directly. It ensures that the pages are accessible and can be used for various operations, such as IO operations or handling faults."}

In [11]:
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=hf_embeddings_vector_db.as_retriever(),
    llm=gpt35_azure_llm
)

unique_docs = retriever_from_llm.get_relevant_documents(query=question)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the function of the get_user_page in the linux kernel?', '2. How does the get_user_page function serve its purpose in the linux kernel?', '3. Can you explain the role and significance of the get_user_page in the linux kernel?']


In [16]:
def create_qna_GT_df(docs: List[Document], num_of_qna_for_doc: int) -> pd.DataFrame:
    multi_qa_prompt = PromptTemplate.from_template(template=MULTI_QA_GPT4_PROMPT_TEMPLATE,
                                                   partial_variables={"k": num_of_qna_for_doc})
    qa_generation_chain = QAGenerationChain.from_llm(llm=gpt4_azure_llm,
                                                     prompt=multi_qa_prompt)

    documents_cpp_splitter = RecursiveCharacterTextSplitter.from_language(
        language=Language.CPP, chunk_size=2000, chunk_overlap=100
    )
    splitted_docs = documents_cpp_splitter.split_documents(docs)
    print(f"splitted {len(docs)} docs to {len(splitted_docs)}")

    qna_GT = []
    for splitted_doc in splitted_docs:
        try:
            qna = qa_generation_chain.run(splitted_doc.page_content)[INDEX_OF_FIRST_QNA_IN_RESPONSE]
            qna_GT += qna
        except JSONDecodeError:
            print("Failed to generate valid QnA JSON for doc")

    qna_GT_df = pd.DataFrame(qna_GT)
    return qna_GT_df

In [42]:
def get_qna_with_chain_answers_df(qa_rag_chain: RetrievalQA, qna_GT_df: pd.DataFrame) -> pd.DataFrame:
    qna_with_chain_answers = qna_GT_df.copy()
    qna_with_chain_answers["chain_answer"] = qna_with_chain_answers.apply(
        lambda qna: qa_rag_chain({"query": qna["question"]})["result"], axis=1)

    return qna_with_chain_answers

In [43]:
def get_evaluator_score(evaluator: StringEvaluator, qna_with_chain_answer: pd.Series) -> float:
    grade = evaluator.evaluate_strings(
        prediction=qna_with_chain_answer["chain_answer"],
        reference=qna_with_chain_answer["answer"],
        input=qna_with_chain_answer["question"])

    return grade["score"]

In [45]:
def get_retrieval_score(retriever, qna_with_chain_answer: pd.Series):
    GRADE_DOCS_PROMPT = PromptTemplate(input_variables=['result', 'answer', 'query'],
                                       template=GRADE_DOCS_PROMPT_TEMPLATE)
    retrieval_eval_chain = load_evaluator(
        evaluator=EvaluatorType.QA,
        llm=gpt35_azure_llm,
        prompt=GRADE_DOCS_PROMPT
    )

    retrieved_docs = retriever.get_relevant_documents(query=qna_with_chain_answer["question"],
                                                      search_type="similarity_score_threshold",
                                                      search_kwargs={"k": 2})

    grade = retrieval_eval_chain.evaluate_strings(
        prediction=retrieved_docs,
        reference=qna_with_chain_answer["answer"],
        input=qna_with_chain_answer["question"])

    return grade["score"]

In [46]:
def get_grades_for_chain_qna(qna_with_chain_answers_df: pd.DataFrame,
                             retriever: BaseRetriever = None) -> pd.DataFrame:
    grades_for_chain_qna = qna_with_chain_answers_df.copy()

    labeled_criteria_evaluator = load_evaluator(evaluator=EvaluatorType.LABELED_CRITERIA,
                                                criteria="correctness",
                                                llm=gpt35_azure_llm)

    embedding_distance_evaluator = load_evaluator(evaluator=EvaluatorType.EMBEDDING_DISTANCE,
                                                  distance_metric=EmbeddingDistance.COSINE,
                                                  embeddings=hf_embeddings,
                                                  llm=gpt35_azure_llm)

    qa_llm_jugde_evaluator = load_evaluator(evaluator=EvaluatorType.QA,
                                            llm=gpt35_azure_llm)

    grades_for_chain_qna["labeled_criteria_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=labeled_criteria_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    grades_for_chain_qna["embedding_distance_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=embedding_distance_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    grades_for_chain_qna["qa_llm_jugde_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=qa_llm_jugde_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    if retriever:
        grades_for_chain_qna["retrieval_score"] = qna_with_chain_answers_df.apply(
            lambda qna_with_chain_answer: get_retrieval_score(
                retriever=retriever,
                qna_with_chain_answer=qna_with_chain_answer), axis=1)

    return grades_for_chain_qna

In [47]:
def run_evaluation(qa_rag_chain: RetrievalQA, qna_GT_df: pd.DataFrame) -> pd.DataFrame:
    qna_with_chain_answers_df = get_qna_with_chain_answers_df(qa_rag_chain, qna_GT_df)
    grades_for_chain_qna = \
        get_grades_for_chain_qna(qna_with_chain_answers_df=qna_with_chain_answers_df,
                                 retriever=qa_rag_chain.retriever)
    return grades_for_chain_qna

In [50]:
run_evaluation(qa_rag_chain=qa_rag_chain, qna_GT_df=qna_GT_df)



Unnamed: 0,question,answer,chain_answer,labeled_criteria_grades,embedding_distance_grades,qa_llm_jugde_grades,retrieval_score
0,What is the purpose of the 'for' loop in the g...,The 'for' loop is used to calculate the factor...,The purpose of the 'for' loop in the given cod...,,0.3103038,1,
1,What does the variable 'fact' represent in the...,The variable 'fact' stores the factorial of th...,"In the given code, the variable 'fact' represe...",1.0,0.1048536,1,1.0
2,What is the purpose of the given code?,The purpose of the given code is to calculate ...,The purpose of the given code is to input the ...,,0.6754299,0,1.0
3,What is the return type of the 'fact' function?,The return type of the 'fact' function is 'int'.,The return type of the 'fact' function is 'int'.,1.0,-2.220446e-16,1,1.0
4,What is the purpose of the code snippet?,To calculate and display the factorial of a nu...,The purpose of the code snippet is to prompt t...,,0.7435595,0,
5,What is the value of 'fact' after the code sni...,The factorial of the number entered by the user,The value of 'fact' cannot be determined witho...,1.0,0.5335894,1,1.0


In [17]:
embedding_distance_evaluator = load_evaluator(evaluator=EvaluatorType.EMBEDDING_DISTANCE,
                                              distance_metric=EmbeddingDistance.COSINE,
                                              embeddings=hf_embeddings,
                                              llm=gpt35_azure_llm)

In [None]:
qna_GT_df = create_qna_GT_df(docs=docs[:1], num_of_qna_for_doc=3)

splitted 1 docs to 24


In [27]:
qna_GT_df

Unnamed: 0,question,answer
0,What is the purpose of the module implemented ...,The module implemented in the 'arp.c' file of ...
1,Who made major changes to caching and behaviou...,Alexey Kuznetsov made major changes to caching...
2,Who added FDDI support to the Linux Kernel?,Lawrence V. Stefani added FDDI support to the ...
3,What did Jesper D. Brouer contribute to the Li...,Jesper D. Brouer contributed Proxy ARP PVLAN R...
4,What is the purpose of the '#define pr_fmt(fmt...,This line is defining a macro that prepends th...
5,What is the purpose of the 'arp_generic_ops' s...,The 'arp_generic_ops' struct is defining a set...
6,What is the family type for the arp_hh_ops and...,The family type for both arp_hh_ops and arp_di...
7,What is the value of the 'gc_thresh3' field in...,The value of the 'gc_thresh3' field in the 'ar...
8,What does the function arp_mc_map do in the gi...,The function arp_mc_map maps the multicast add...
9,What does the arp_constructor function do in t...,The arp_constructor function constructs a neig...
