Import Required Libraries

In [2]:
from langchain_openai import OpenAIEmbeddings
from langchain.vectorstores import Chroma
import os
from git import Repo
from openai import OpenAI
from langchain.schema import Document
from langchain_core.prompts import ChatPromptTemplate
from langchain_openai import ChatOpenAI
from dotenv import load_dotenv
import nltk
from rouge_score.rouge_scorer import RougeScorer

In [None]:
load_dotenv()

In [4]:
CLONE_DIR = r"C:\Users\Rohit\Documents\MyProjects\codebase_rag"  # repos will be stored here.

File Processing

In [5]:
SUPPORTED_EXTENSIONS = [".py", ".js", ".tsx", ".ts", ".java", ".cpp", ".yml"]

IGNORED_DIRS = [".git", "node_modules", "dist", "__pycache__", ".next", ".vscode", ".env", "venv"]

Clones repo if not exits

In [6]:
def clone_repo(repo_url):
    """Clone a repository and return its local path"""
    repo_name = repo_url.split("/")[-1].replace(".git", "")
    repo_path = os.path.join(CLONE_DIR, repo_name)
    
    if not os.path.exists(repo_path):
        Repo.clone_from(repo_url, repo_path)
        print(f"Cloned {repo_name} to {repo_path}")
    else:
        print(f"Repository {repo_name} already exists at {repo_path}")
    
    return repo_path

The function below reads a file's content and returns the relative path and content in a dictionary.

In [7]:
def get_file_content(file_path, repo_path, repo_name):
    try:
        with open(file_path, "r", encoding="utf-8", errors="replace") as f:
            content = f.read()
        rel_path = os.path.relpath(file_path, repo_path)
        return {
            "repo": repo_name,
            "name": rel_path,
            "content": content
        }
    except Exception as e:
        print(f"Error reading file {file_path}: {e}")
        return None

This function provides a way to gather the contents of supported code files in a repository and returns a list of dictionaries where each dictionary contains the relative path of the file and the contents of the file. (EXTRACTS CONTENT)

In [8]:
def get_main_files_content(repo_path, repo_name):
    files_content = []
    try:
        for root, dirs, files in os.walk(repo_path):
            # Skip ignored directories
            dirs[:] = [d for d in dirs if d not in IGNORED_DIRS]
            
            for file in files:
                if os.path.splitext(file)[1] in SUPPORTED_EXTENSIONS:
                    file_path = os.path.join(root, file)
                    file_content = get_file_content(file_path, repo_path, repo_name)
                    if file_content:
                        files_content.append(file_content)
    except Exception as e:
        print(f"Error processing {repo_name}: {str(e)}")
    
    return files_content

In [9]:
def process_codebases(codebase_inputs):
    """Process multiple codebases (either URLs or local paths)"""
    all_files = []
    
    for input_path in codebase_inputs:
        if input_path.startswith("http"):
            # It's a repository URL - clone it
            repo_path = clone_repo(input_path)
            repo_name = os.path.basename(repo_path)
        else:
            # It's a local path - use directly
            repo_path = input_path
            repo_name = os.path.basename(repo_path)
        
        # Get files from this codebase
        files_content = get_main_files_content(repo_path, repo_name)
        all_files.extend(files_content)
    
    return all_files

Embeddings

In [10]:
embedding_model = OpenAIEmbeddings()

File content > Documents

In [11]:
def create_documents(files_content):
    documents = []
    for file in files_content:
        doc = Document(
            page_content=f"REPO: {file['repo']}\nFILE: {file['name']}\nCONTENT:\n{file['content']}",
            metadata={
                "repo": file['repo'],
                "source": file['name']
            }
        )
        documents.append(doc)
    return documents

Store embeddings in chromaDB

In [12]:

def create_vector_store(documents):
    return Chroma.from_documents(
        documents=documents,
        embedding=embedding_model,
        collection_name="multi-codebase-rag"
    )


Given Codebase inputs

In [13]:
CODEBASE_INPUTS = [
    "https://github.com/langchain-ai/langchain.git",
    "https://github.com/evershopcommerce/evershop.git",
    "https://github.com/atinder11/Youtube-Clone.git"
    ]
    

In [None]:
all_files = process_codebases(CODEBASE_INPUTS)
documents = create_documents(all_files)
vectorstore = create_vector_store(documents) 

In [15]:
client = OpenAI(api_key=os.environ.get('OPENAI_API_KEY'))
llm = ChatOpenAI(model="gpt-4-turbo")

Query

In [26]:
query = "How does the Stripe payment module work?"

Top 5

In [27]:
# Retrieve top 5 docs
relevant_docs = vectorstore.similarity_search(query=query, k=5)
contexts = [doc.page_content for doc in relevant_docs]
formatted_context = "\n\n-------\n\n".join(contexts)

In [28]:
SYSTEM_TEMPLATE = """Answer questions using information from these codebases:
    {context}
    
    Guidelines:
    - Specify which repository (REPO) the information comes from
    - If information comes from multiple repos, note that.
    - If unsure, say which repos you checked.
    - Don't invent anything not in the context.
    - Use three sentences maximum and keep the answer concise.
    """
    
prompt = ChatPromptTemplate.from_messages([
        ("system", SYSTEM_TEMPLATE),
        ("human", "Question: {question}")
    ])

In [29]:
chain = prompt | llm  #The pipe operator (|) is used here to create a “chain” that connects the prompt template with the language model.
response = chain.invoke({"context": formatted_context, "question": query})

Response

In [30]:
print("\nAnswer:")
print(response.content)


Answer:
From the REPO: evershop, the Stripe payment module includes several functionalities to manage Stripe payments. It uses a webhook to handle Stripe payment events, capturing, authorizing, and updating payment statuses within the Evershop database accordingly. Additionally, the system incorporates order processing logic that updates order statuses and emits events based on Stripe transaction results, integrates settings for Stripe configurations, and implements error handling to manage failed payment transactions.


Opens and reads the dataset

In [31]:
import json
with open("dataset.json", "r", encoding="utf-8") as f:
    rag_eval_data = json.load(f)

Finds the expected response from the dataset based on the user's query

In [32]:
def find_expected_response(user_query):
    for query in rag_eval_data["queries"]:
        if query["question"].lower() == user_query.lower():
            return query
    return None

In [33]:

def evaluate_rag_system(user_query):

    expected_data = find_expected_response(user_query)
    
    if not expected_data:
        print("No matching question found in the dataset.")
        return
    
    expected_response = expected_data["expected_response"]
    source = expected_data["source"]

    
    relevant_docs = vectorstore.similarity_search(query=user_query, k=5)
    contexts = [doc.page_content for doc in relevant_docs]
    formatted_context = "\n\n-------\n\n".join(contexts)

    
    response = chain.invoke({"context": formatted_context, "question": user_query})
    llm_response = response.content

    bleu_score = 0.0
    rouge1 = {'precision': 0.0, 'recall': 0.0, 'fmeasure': 0.0}
    
    try:
        expected_clean = expected_response.lower()
        llm_clean = llm_response.lower()
        
        # Tokenize/ split sentence to words

        expected_tokens = nltk.word_tokenize(expected_clean)
        llm_tokens = nltk.word_tokenize(llm_clean)
        
        # About Bleu-1:
        # BLEU-1 (Bilingual Evaluation Understudy) is a metric used to measure how similar an AI response is to a expected response.
        # It calculates how many individual words (unigrams) from the expected response appear in the AI-generated response.

        bleu_score = nltk.translate.bleu_score.sentence_bleu(
            [expected_tokens], 
            llm_tokens, 
            weights=(1, 0, 0, 0)         # meaning-only single word matches are considered.
        ) * 100
        
        # ROUGE (Recall-Oriented Understudy for Gisting Evaluation) measures how many words from the expected response appear in the generated response. 

        scorer = RougeScorer(['rouge1'], use_stemmer=True)
        rouge_scores = scorer.score(expected_clean, llm_clean)
        rouge1 = rouge_scores['rouge1']
        
    except Exception as e:

        print(f"Error calculating NLP metrics: {e}")
        print("Trying fallback tokenization...")

        expected_tokens = expected_clean.split()
        llm_tokens = llm_clean.split()
        bleu_score = nltk.translate.bleu_score.sentence_bleu(
            [expected_tokens], 
            llm_tokens, 
            weights=(1, 0, 0, 0)
        ) * 100 if expected_tokens else 0.0

    #################
    # PRINTING RESULTS
    #################

    print("\n===== QUERY EVALUATION =====\n")
    print(f"User Query: {user_query}")
    print(f"Source Repository: {source}")
    print("\nEXPECTED RESPONSE:")
    print(expected_response)
    print("\nLLM RESPONSE:")
    print(llm_response)
    print(f"BLEU-1 Score: {bleu_score:.2f}%")
    print(f"ROUGE-1 Precision: {rouge1.precision * 100:.2f}%")
    print(f"ROUGE-1 Recall: {rouge1.recall * 100:.2f}%")
    print(f"ROUGE-1 F1: {rouge1.fmeasure * 100:.2f}%")



"""
Precision = matching words/ words in generated response

Recall = matching words/ words in expected response

F1 = 2*P*R/(P+R)

"""

user_query = "How does the Stripe payment module work?"
evaluate_rag_system(user_query)


===== QUERY EVALUATION =====

User Query: How does the Stripe payment module work?
Source Repository: evershop

EXPECTED RESPONSE:
The Stripe payment module in the Evershop repository integrates Stripe for processing payments. It retrieves and validates payment intents, updates payment status based on Stripe transaction outcomes, captures payment details, and handles webhook events for payment statuses such as succeeded, capturable, and canceled. It supports both capture and authorization-only modes.

LLM RESPONSE:
The Stripe payment module in the Evershop repository provides functionalities for payment processing using Stripe. It allows capturing and handling Stripe payments through webhooks, setting up payment methods, and managing payment statuses based on Stripe transactions. The module includes mechanisms for updating order statuses based on Stripe payment outcomes, capturing payment intents, and handling Stripe events post-payment such as cancellations and completions, thus ensu