In [40]:
import logging
import os
from typing import List

import pandas as pd
from langchain import PromptTemplate
from langchain.chains import QAGenerationChain
from langchain.chat_models import AzureChatOpenAI
from langchain.document_loaders import TextLoader
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.evaluation import EmbeddingDistance
from langchain.evaluation import load_evaluator, EvaluatorType
from langchain.evaluation.schema import StringEvaluator
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.schema import BaseRetriever
from langchain.schema import Document
from langchain.vectorstores import FAISS

from src.prompt_templates import MULTI_QA_GPT35_PROMPT_TEMPLATE, GRADE_DOCS_PROMPT_TEMPLATE

In [3]:
# loader = TextLoader("../state_of_the_union.txt")
# documents = loader.load()
# text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
# docs = text_splitter.split_documents(documents)

In [4]:
root_dir = "../c-programs-master"

docs = []
for dirpath, dirnames, filenames in os.walk(root_dir):
    for file in filenames:
        if file.endswith(".c") and "/.venv/" not in dirpath:
            try:
                loader = TextLoader(os.path.join(dirpath, file), encoding="utf-8")
                docs.extend(loader.load())
            except Exception as e:
                pass
print(f"{len(docs)}")

41


In [7]:
from langchain.text_splitter import (
    RecursiveCharacterTextSplitter,
    Language,
)

RecursiveCharacterTextSplitter.get_separators_for_language(Language.CPP)

cpp_splitter = RecursiveCharacterTextSplitter.from_language(
    language=Language.CPP, chunk_size=200, chunk_overlap=50
)
splitted_docs = cpp_splitter.split_documents(docs)
print(f"{len(splitted_docs)}")

133


In [8]:
hf_embeddings = HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-mpnet-base-v2",
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'normalize_embeddings': False}
)

In [9]:
hf_embeddings_vector_db = FAISS.from_documents(splitted_docs, hf_embeddings)
hf_embeddings_vector_db.save_local("faiss_index")

In [10]:
question = "Where does factorial is being calculated?"
docs = hf_embeddings_vector_db.similarity_search(question)
for i, doc in enumerate(docs):
    print(f"\n\n\ndoc {i}:\n {doc.page_content}")




doc 0:
 for(i = 1; i <= n; i++){
		fact = fact * i;	
	}
	printf("Factorial of %d is = %d", n, fact);
	getch();
}



doc 1:
 // WAP to calculate factorial using recursion fucntion
#include<stdio.h>
int fact(int);



doc 2:
 // WAP to calculate and display factorial of 5
#include<stdio.h>
main()
{
	int i, n, fact=1;
	printf("Enter a number for factorial:\n");
	scanf("%d", &n);
	for(i = 1; i <= n; i++){
		fact = fact * i;



doc 3:
 void main()
{
	int n, f;
	printf("\nEnter a number:\n");
	scanf("%d", &n);
	f = fact(n);
	printf("\nfactorial of %d is %d", n, f);
}


In [11]:
gpt35_azure_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-GPT35-TURBO',
    openai_api_version="2023-03-15-preview"
)

In [8]:
gpt4_azure_llm = AzureChatOpenAI(
    temperature=0,
    openai_api_key="a8d69f68a36b40789df2cc3fdbaacda9",
    openai_api_base="https://llmx-gpt-canada-east.openai.azure.com/",
    deployment_name='LLM-X-GPT-4',
    openai_api_version="2023-03-15-preview"
)

In [12]:
from langchain.embeddings.openai import OpenAIEmbeddings

openai_azure_embeddings = OpenAIEmbeddings(
    openai_api_key="02e3dbabaf334ccb959cbeadbd3f99c3",
    openai_api_base="https://llm-x-gpt.openai.azure.com/",
    deployment_name='LLM-X-Embedding'
)

                    deployment_name was transferred to model_kwargs.
                    Please confirm that deployment_name is what you intended.


In [13]:
from langchain.chains import RetrievalQA

qa_rag_chain = RetrievalQA.from_chain_type(
    llm=gpt35_azure_llm,
    retriever=hf_embeddings_vector_db.as_retriever()
)

question = "How does factorial is calculated in the repository?"
qa_rag_chain({"query": question})

{'query': 'How does factorial is calculated in the repository?',
 'result': "In the repository, the factorial is calculated using a loop. The code iterates from 1 to the given number 'n' and multiplies each number with the previous result to calculate the factorial. The final factorial value is then printed."}

In [14]:
logging.basicConfig()
logging.getLogger('langchain.retrievers.multi_query').setLevel(logging.INFO)
retriever_from_llm = MultiQueryRetriever.from_llm(
    retriever=hf_embeddings_vector_db.as_retriever(),
    llm=gpt35_azure_llm
)

unique_docs = retriever_from_llm.get_relevant_documents(query=question)

INFO:langchain.retrievers.multi_query:Generated queries: ['1. What is the method used to calculate factorial in the repository?', '2. Can you explain the process of calculating factorial in the repository?', '3. How is the factorial calculated within the repository?']


In [41]:
def create_qna_GT_df(docs: List[Document], num_of_qna_for_doc: int) -> pd.DataFrame:
    multi_qa_prompt = PromptTemplate.from_template(template=MULTI_QA_GPT35_PROMPT_TEMPLATE,
                                                   partial_variables={"k": num_of_qna_for_doc})
    qa_generation_chain = QAGenerationChain.from_llm(llm=gpt35_azure_llm,
                                                     prompt=multi_qa_prompt)

    qna_GT = []
    for doc in docs:
        doc_qna = qa_generation_chain.run(doc.page_content)[0]
        qna_GT += doc_qna

    qna_GT_df = pd.DataFrame(qna_GT)
    return qna_GT_df

In [42]:
def get_qna_with_chain_answers_df(qa_rag_chain: RetrievalQA, qna_GT_df: pd.DataFrame) -> pd.DataFrame:
    qna_with_chain_answers = qna_GT_df.copy()
    qna_with_chain_answers["chain_answer"] = qna_with_chain_answers.apply(
        lambda qna: qa_rag_chain({"query": qna["question"]})["result"], axis=1)

    return qna_with_chain_answers

In [43]:
def get_evaluator_score(evaluator: StringEvaluator, qna_with_chain_answer: pd.Series) -> float:
    grade = evaluator.evaluate_strings(
        prediction=qna_with_chain_answer["chain_answer"],
        reference=qna_with_chain_answer["answer"],
        input=qna_with_chain_answer["question"])

    return grade["score"]

In [45]:
def get_retrieval_score(retriever, qna_with_chain_answer: pd.Series):
    GRADE_DOCS_PROMPT = PromptTemplate(input_variables=['result', 'answer', 'query'],
                                       template=GRADE_DOCS_PROMPT_TEMPLATE)
    retrieval_eval_chain = load_evaluator(
        evaluator=EvaluatorType.QA,
        llm=gpt35_azure_llm,
        prompt=GRADE_DOCS_PROMPT
    )

    retrieved_docs = retriever.get_relevant_documents(query=qna_with_chain_answer["question"],
                                                      search_type="similarity_score_threshold",
                                                      search_kwargs={"k": 2})

    grade = retrieval_eval_chain.evaluate_strings(
        prediction=retrieved_docs,
        reference=qna_with_chain_answer["answer"],
        input=qna_with_chain_answer["question"])

    return grade["score"]

In [46]:
def get_grades_for_chain_qna(qna_with_chain_answers_df: pd.DataFrame,
                             retriever: BaseRetriever = None) -> pd.DataFrame:
    grades_for_chain_qna = qna_with_chain_answers_df.copy()

    labeled_criteria_evaluator = load_evaluator(evaluator=EvaluatorType.LABELED_CRITERIA,
                                                criteria="correctness",
                                                llm=gpt35_azure_llm)

    embedding_distance_evaluator = load_evaluator(evaluator=EvaluatorType.EMBEDDING_DISTANCE,
                                                  distance_metric=EmbeddingDistance.COSINE,
                                                  embeddings=hf_embeddings,
                                                  llm=gpt35_azure_llm)

    qa_llm_jugde_evaluator = load_evaluator(evaluator=EvaluatorType.QA,
                                            llm=gpt35_azure_llm)

    grades_for_chain_qna["labeled_criteria_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=labeled_criteria_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    grades_for_chain_qna["embedding_distance_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=embedding_distance_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    grades_for_chain_qna["qa_llm_jugde_grades"] = qna_with_chain_answers_df.apply(
        lambda qna_with_chain_answer: get_evaluator_score(
            evaluator=qa_llm_jugde_evaluator,
            qna_with_chain_answer=qna_with_chain_answer), axis=1)

    if retriever:
        grades_for_chain_qna["retrieval_score"] = qna_with_chain_answers_df.apply(
            lambda qna_with_chain_answer: get_retrieval_score(
                retriever=retriever,
                qna_with_chain_answer=qna_with_chain_answer), axis=1)

    return grades_for_chain_qna

In [47]:
def run_evaluation(qa_rag_chain: RetrievalQA, qna_GT_df: pd.DataFrame) -> pd.DataFrame:
    qna_with_chain_answers_df = get_qna_with_chain_answers_df(qa_rag_chain, qna_GT_df)
    grades_for_chain_qna = \
        get_grades_for_chain_qna(qna_with_chain_answers_df=qna_with_chain_answers_df,
                                 retriever=qa_rag_chain.retriever)
    return grades_for_chain_qna

In [48]:
qna_GT_df = create_qna_GT_df(docs=docs[:3], num_of_qna_for_doc=2)

In [None]:
run_evaluation(qa_rag_chain=qa_rag_chain, qna_GT_df=qna_GT_df)