In [10]:
import sys
import pysqlite3

sys.modules['sqlite3'] = pysqlite3

import os
import re
import time
import warnings
import ollama
from datetime import datetime
from functools import cached_property
from langchain_community.llms import AzureOpenAI
from langchain_community.document_loaders import PyPDFLoader,TextLoader
from langchain_text_splitters import (Language,RecursiveCharacterTextSplitter)
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import OpenAIEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms import Ollama
import google.generativeai as genai
from groq import Groq
from langchain_google_genai import GoogleGenerativeAIEmbeddings
import requests
import voyageai
from langchain.retrievers import ContextualCompressionRetriever
from langchain_voyageai import VoyageAIEmbeddings,VoyageAIRerank
from tree_sitter_languages import get_language, get_parser
from llama_index.core.text_splitter import CodeSplitter
from dotenv import load_dotenv
from chunker import get_code_chunks
import subprocess


dotenv_path = '../.env'
load_dotenv(dotenv_path)

False

## LLM CLASS FOR THE WHOLE PIPELINE

In [26]:
class Document:
    def __init__(self, page_content, metadata=None):
        self.page_content = page_content
        self.metadata = metadata

def doc_merger(splits):
    current = 0
    while True:
        doc_lines = len(splits[current].splitlines())
        if doc_lines < 3:
            # merge with next doc
            splits[current] += splits[current + 1]
            splits.pop(current + 1)
        else:
            current += 1
        
        if current == len(splits) - 1:
            return splits

class QAClass:
    # Class-level attributes
    _embeddings = None
    _db = None

    def __init__(self):
        self.timestamp = ""

    @classmethod
    def initialize_embeddings_and_db(cls):
        if cls._embeddings is None or cls._db is None:

            ########### Voyage Code Embeddings ############

            command = ["clang-format","-style={ColumnLimit: 300, AllowShortFunctionsOnASingleLine: All, AllowShortIfStatementsOnASingleLine: true}","-i","original.txt"]

            # Run the command
            subprocess.run(command, check=True)

            file_path = "original.txt"
            with open(file_path, "r") as f:
                docs = f.read()

            splits = get_code_chunks(docs)
            new_splits = [split for split in splits if len(split) > 2]
            new_splits2 = doc_merger(new_splits)
            # documents = [Document(page_content=split) for split in splits]
            documents = [Document(page_content=split) for split in new_splits2]
            os.environ['GOOGLE_API_KEY'] = os.environ.get("GOOGLE_API_KEY")
            cls._embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
            cls._db = Chroma.from_documents(documents=documents, embedding=cls._embeddings)



    # ********************************************
    # LOCAL LLM FUNCTIONS ADDED HERE

    def call_ollama(self, prompt):
        response = requests.post('http://10.103.73.29:6970', json={'message': prompt})
        # response = ollama.chat(model='llama3:8b-instruct-fp16', messages=[{'role': 'user', 'content': prompt}],options={"temperature":0.1})
        # return response['message']['content']
        return response.json()['response']

    # gemini model definition and call
    def call_gemini(self,prompt):

        safe = [
            {
                "category": "HARM_CATEGORY_HARASSMENT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_HATE_SPEECH",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                "threshold": "BLOCK_NONE",
            },
        ]
        genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
        generation_config = {
        "temperature": 0.1,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
        }
        model = genai.GenerativeModel(
        model_name="gemini-1.5-pro",
        generation_config=generation_config,
        safety_settings = safe
        # See https://ai.google.dev/gemini-api/docs/safety-settings
        )
        llm = model.start_chat(history=[])

        response = None
        try_count = 0
        while try_count < 3:
            try:
                response = llm.send_message(prompt).text
                break
            except Exception as e:
                print("Error: ", e)
                time.sleep(5)
                try_count += 1
                print("Retrying No. ", try_count)

        return response     


    def combine_docs(self, docs):
        return "\n\n".join(f"Snippet.{i+2}:\n\n{doc.page_content}" for i, doc in enumerate(docs))

    # os.environ["GROQ_API_KEY"] = os.environ.get("GROQ_API_KEY")
    # def call_groq(self,prompt):
    #         client = Groq(
    #             api_key=os.environ.get("GROQ_API_KEY"),
    #         )
    #         chat_completion = client.chat.completions.create(
    #             temperature=0.1,
    #             messages=[{"role": "user", "content": prompt},],
    #             model="gemma-7b-it",
    #         )

    #         return chat_completion.choices[0].message.content

    # LOCAL LLM FUNCTIONS END HERE
    # ********************************************

    @cached_property
    def llm_chain(self):

        # ***** Model Initialization Starts Here *****
        # Set up your LLM with API key Below (Not required for Ollama)

        # 1. ***** OpenAI API (Azure AI Search) *****

        # Setting up environment variables 
        # os.environ["OPENAI_API_TYPE"] = "Azure"
        # os.environ["OPENAI_API_VERSION"] = "2023-05-15"
        # os.environ["OPENAI_API_BASE"] = "https://debb.openai.azure.com/"
        # os.environ["OPENAI_API_KEY"] = os.environ.get("OPENAI_API_KEY")

        # # Suppressing warnings
        # with warnings.catch_warnings():
        #     warnings.simplefilter("ignore")

        #     # Initialize AzureOpenAI model
        #     llm = AzureOpenAI(
        #         model_name="gpt-3.5-turbo-16k",
        #         model_kwargs={"engine": "openaidemo-15999-16k"},
        #         temperature=0.2
        #     )

        # 2. ***** Gemini *****
        llm = self.call_gemini

        # 3. ***** Local LLM (Ollama) *****

        # llm = self.call_ollama

        # 4. ***** GROQQ CLOUD *****

        # llm = self.call_groq

        # ***** Model Initialization Ends Here *****

        # Ensure embeddings and database are initialized
        self.initialize_embeddings_and_db()

        # Instead of creating a standard RetrievalQA chain, we'll use our custom rag_chain function
        class CustomQAChain:
            def __init__(self, outer_instance):
                self.outer_instance = outer_instance

            def run(self, query, context_query, coverage, fifty_clean):

                # define retriever
                retriever = self.outer_instance._db.as_retriever(search_type="similarity", search_kwargs={"k": 4})

                # Use similarity_search_with_score instead of invoke
                retrieved_docs_with_scores = self.outer_instance._db.similarity_search_with_score(context_query, k=4)

                retrieved_docs = [doc for doc, _ in retrieved_docs_with_scores]
                similarity_scores = [score for _, score in retrieved_docs_with_scores]


                # Sort both lists based on similarity scores in descending order
                sorted_pairs = sorted(zip(retrieved_docs, similarity_scores), key=lambda x: x[1], reverse=True)

                # Unzip the sorted pairs
                retrieved_docs, similarity_scores = zip(*sorted_pairs)

                # if len(query.splitlines()) > 10:
                #     formatted_context_2 = self.outer_instance.combine_docs(retrieved_docs)
                # else:
                # formatted_context_2 = "Snippet.0: \n\n" + fifty_clean + "\n\n" + self.outer_instance.combine_docs(retrieved_docs[:2])
                # Get the first 100 lines and last 100 lines
                first_100_lines = "\n".join(fifty_clean.splitlines()[:100])
                last_100_lines = "\n".join(fifty_clean.splitlines()[-100:])

                # Create the new formatted_context_2
                formatted_context_2 = (
                    "Snippet 0:\n\n" + first_100_lines + "\n\n"
                    "Snippet 1:\n\n" + last_100_lines + "\n\n"
                    + self.outer_instance.combine_docs(retrieved_docs[:2])
                )
                
                return self.outer_instance.call_llm(query, formatted_context_2, coverage, llm)


        return CustomQAChain(self)
    
    # need to make three calls to LLM, 1) relevance of context, 2) generality and 3) security
    def call_llm(self, query, formatted_context, coverage,llm):

        # if coverage:
        #     with open('prompt_in_coverage.txt', 'r') as file:
        #         prompt_template = file.read()
        # else:
        #     with open(' prompt_not_in_coverage.txt', 'r') as file:
        #         prompt_template = file.read()

        # prompt = prompt_template.format(sec_list=sec_list, formatted_context=formatted_context, query=query)
        # prompt_to_send = prompt

        # Classify the context as relevant or not. If not, retain
        # print("Checking Relevance")
        relevance = self.check_relevance(query, formatted_context, llm)
        # print(relevance)
        if relevance == "no":
            return "final verdict: class 4"
        else:
            # query functionality
            # print("Checking Functionality")
            functionality = self.check_functionality(query, formatted_context, llm, coverage)
            # print(functionality)
            # retain since it is needed for required functionality
            if functionality == "yes":
                return "final verdict: class 4"
            else:
                # query security
                # print("Checking Security")
                security = self.check_security(query, formatted_context, llm, coverage)
                # print(security)
                # retain since it is needed for required security
                if security == "yes":
                    return "final verdict: class 4"
                else:
                    return "final verdict: class 1"
                

    # function to check relevance of context with query
    def check_relevance(self, query, formatted_context, llm):
        
        with open("check_relevance_prompt.txt", 'r') as file:
            prompt_template = file.read()

        prompt = prompt_template.format(context=formatted_context, query=query)

        response = llm(prompt)

        # save response to cands/multiagent/security
        # file_name = "cands/multiagent/relevance/relevance" + "_time_"+ str(self.timestamp) + ".blade.c.txt"
        # with open(file_name, 'w') as file:
        #     file.write(response)
        # print(f"Relevance Response: {response}")
        file_name = "cands/multiagent/new_relevance/" + self.timestamp
        with open (file_name, 'w') as file:
            file.write(response)


        # parse response to get "yes" or "no"
        match = re.search(r'\b(yes|no)\b', response.lower().strip())
        if match is not None:
            return match.group(1).lower()
        else:
            return "no"
        # return "yes"

    def check_functionality(self, query, formatted_context, llm, coverage):
       
        with open("functionality_explanation.txt", 'r') as file:
            functionality_explanation = file.read()
            
        prompt1 = functionality_explanation.format(context=formatted_context, query=query)
        # print(f"Functionality Explanation Prompt: \n{prompt1}")
        # response1 = llm(prompt1)
        response1 = llm.send_message(prompt1).text
        # print(f"Functionality Explanation Response: \n{response1}")
        
        
        with open("functionality_prompt.txt", 'r') as file:
            prompt_template = file.read()

        in_cov_statement = "This code snippet included in the code execution path for the required functionality, therefore verify if the given code snippet is important for required functionality of the program."
        not_cov_statement = "This code snippet is not included in the code execution path for the required functionality, therefore verify if the given code snippet is important for the required functionality of the program."

        with open("req_list.txt", 'r') as file:
            req_list = file.read()

        cov_info = in_cov_statement if coverage else not_cov_statement

        # prompt = prompt_template.format(context=formatted_context, query=query, coverage_info=cov_info, req_list=req_list)
        prompt = prompt_template.format(query=response1, coverage_info=cov_info, req_list=req_list)
        # print(f"Functionality Prompt: \n{prompt}")

        # response = llm(prompt)
        response = llm.send_message(prompt).text

        # save response to cands/multiagent/functionality
        # file_name = "cands/multiagent/functionality/functionality" + "_time_"+ str(self.timestamp) + ".blade.c.txt"
        # with open(file_name, 'w') as file:
        #     file.write(response)
        # print(f"Functionality Response:\n{response}")
        filename = "cands/multiagent/new_functionality/" + self.timestamp
        combined_response = response1 + "\n\n" + response
        with open (filename, 'w') as file:
            file.write(combined_response)
            

        # parse response to get "yes" or "no"
        match = self.extract_imp_score_new_prompt(response)
        if match is not None:
            if match == 9:
                return "yes"
            elif match == 1:
                return "no"

        # retain if there is any issue in extracting importance score
        return "yes"

    def check_security(self, query, formatted_context, llm, coverage):

        if os.path.exists("sec_list.txt"):
            with open("sec_list.txt", 'r') as file:
                sec_list = file.read()
            # print("sec_list found and read")
        else:
            sec_list = self.get_security_checks()
            print(sec_list)
            print("Sec List Read")

        with open("security_prompt.txt", 'r') as file:
            prompt_template = file.read()

        in_cov_statement = "This code snippet included in the code execution path for the required functionality, therefore verify if the given code snippet is important for any of the listed potential security vulnerabilities in the program."
        not_cov_statement = "This code snippet is not included in the code execution path for the required functionality, therefore verify if the given code snippet is important for any of the listed potential security vulnerabilities in the program."

        cov_info = in_cov_statement if coverage else not_cov_statement

        prompt = prompt_template.format(context=formatted_context, query=query, coverage_info=cov_info, sec_list=sec_list)

        response = llm(prompt)

        # save response to cands/multiagent/security
        # file_name = "cands/multiagent/security/security" + "_time_"+ str(self.timestamp) + ".blade.c.txt"
        # with open(file_name, 'w') as file:
        #     file.write(response)
        # print(f"Security Response: {response}")
        file_name = "cands/multiagent/new_security/" + self.timestamp
        with open (file_name, 'w') as file:
            file.write(response)

        # parse response to get "yes" or "no"
        match = self.extract_imp_score_new_prompt(response)
        if match is not None:
            if match == 9:
                return "yes"
            elif match == 1:
                return "no"

        # retain if there is any issue in extracting importance score
        return "yes"

    def invoke(self, query, prompt_type, context_query, fifty_clean, passed_time):
        # Retrieve documents based on context query
        # print("Querying: ", query)

        self.timestamp = passed_time

        if prompt_type == 'generality':
            return self.llm_chain.run(query, context_query, False, fifty_clean)
        elif prompt_type == 'security':
            return self.llm_chain.run(query, context_query, True, fifty_clean)
        

    def get_security_checks(self):
        code_path = "original.txt"
        with open(code_path, 'r') as file:
            code = file.read()

        with open("req_list.txt", 'r') as file:
            req_list = file.read()

        sec_prompt = f"""The program to analyze is a core Linux utility. Your task is to analyze the provided program code and the list of **required functionality** to generate a concise list of potential security vulnerabilities that could affect the required functionality. 

### Instructions:
- Consider security and exception-related issues such as buffer overflows, race conditions, input validation issues, improper use of system calls, privilege escalation, and resource management vulnerabilities.
- Focus only on vulnerabilities that could impact the **required functionality**. 
- Address security concerns related to **input validation**, **boundary checking**, **memory management**, and **handling of special file types**.
- Do not include points related to excluded or unrequired functionality.
- **Do not** discuss generality, flags, or unrelated features of the program.
- Ensure the list is limited to a maximum of **20 concise bullet points**.

### Reference Information:
{req_list}

- **Program Code**: 
########
{code}
########

### Output:
- Provide a list of bullet points, each describing a potential security vulnerability or exception that should be tested in the context of the **required functionality** only, including issues related to input validation, memory usage, boundary checks, race conditions, and resource handling. Make sure the list is concise and limited to a maximum of 20 bullet points."""      
        safe = [
            {
                "category": "HARM_CATEGORY_HARASSMENT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_HATE_SPEECH",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_SEXUALLY_EXPLICIT",
                "threshold": "BLOCK_NONE",
            },
            {
                "category": "HARM_CATEGORY_DANGEROUS_CONTENT",
                "threshold": "BLOCK_NONE",
            },
        ]
        # genai.configure(api_key="AIzaSyCeL2G0fQvkgYn95s7p0orgbgOqtO-lZ28")
        genai.configure(api_key=os.environ.get("GENAI_API_KEY"))
        generation_config = {
        "temperature": 0.1,
        "top_p": 0.95,
        "top_k": 64,
        "max_output_tokens": 8192,
        "response_mime_type": "text/plain",
        }
        model = genai.GenerativeModel(
        model_name="gemini-1.5-pro",
        generation_config=generation_config,
        safety_settings = safe
        # See https://ai.google.dev/gemini-api/docs/safety-settings
        )
        llm_sec = model.start_chat(history=[])

        ###
        sec_list = (llm_sec.send_message(sec_prompt)).text

        with open("sec_list.txt", 'w') as file:
            file.write(sec_list)

        return sec_list


    def extract_imp_score_new_prompt(self,text): 

        # Convert the text to lowercase
        lower_text = text.lower()
        target = "final verdict"

        # Find the position of "importance score"
        start_index = lower_text.find(target)
        if start_index == -1:
            return None  # "Importance Score" not found

        # Move the start index to the end of "importance score"
        start_index += len(target)

        # Find the next newline character after "importance score"
        end_index = text.find('\n', start_index)
        if end_index == -1:
            end_index = len(text)  # if no newline, go to the end of the text

        # Extract the substring between "importance score" and the newline
        substring = text[start_index:end_index]

        # Find the number in the substring
        import re
        match = re.search(r'\b([0-9]|[1-9][0-9]|100)\b', substring)
        if match:
            assigned_class = int(match.group(0))
            # removal class
            if assigned_class < 3:
                return 1
            else:
                return 9
        else:
            return None  # No number found

In [27]:
qa = QAClass()

### Example template if you want to check a single candidate set

In [28]:
query = """"""

prompt_type = "security"

context_query = """"""

fifty_clean = """"""
passed_time = "2001"

# qa.invoke(query, prompt_type, context_query, fifty_clean, passed_time)

### Rerun on all the candidate sets to recalculate benchmark scores

In [31]:
import os
from exist_coverage import exist
from tqdm import tqdm
cands_dir = "cands"
context_dir = cands_dir + "/context"
fiftytext_dir = cands_dir + "/fifty_text"
pretext_dir = cands_dir + "/pretext_code"
removed_code_dir = cands_dir + "/removed_code"

output_llm_dir = cands_dir + "/multiagent/output_llm"
    
# make sure output dirs exists
if not os.path.exists("cands/multiagent"):
    os.makedirs("cands/multiagent")
if not os.path.exists(output_llm_dir):
    os.makedirs(output_llm_dir)
if not os.path.exists("cands/multiagent/new_relevance"):
    os.makedirs("cands/multiagent/new_relevance")
if not os.path.exists("cands/multiagent/new_security"):
    os.makedirs("cands/multiagent/new_security")
if not os.path.exists("cands/multiagent/new_functionality"):
    os.makedirs("cands/multiagent/new_functionality")

# Get the total number of files to process
context_files = [f for f in os.listdir(context_dir) if os.path.isfile(os.path.join(context_dir, f))] #you can sample from these if you want an estimate only
total_files = len(context_files)

# Create a single tqdm progress bar
with tqdm(total=total_files, desc="Processing files", unit="file") as pbar:
    for file in context_files:
        # if file exists in cands/multiagent/output_llm, skip
        if os.path.exists(os.path.join(output_llm_dir, os.path.basename(file).replace("context_", "llm_") + ".txt")):
            pbar.update(1)
            continue
        context_query = ""
        with open(os.path.join(context_dir, file), 'r') as f:
            context_query = f.read()
        fifty_clean = ""
        fifty_filename = "fifty_" + os.path.basename(file)
        with open(os.path.join(fiftytext_dir, fifty_filename), 'r') as f:
            fifty_clean = f.read()
        pretext_code = ""
        with open(os.path.join(pretext_dir, file), 'r') as f:
            pretext_code = f.read()
        removed_code = context_query.strip()
        
        exist_in_coverage = os.path.exists(os.path.join(context_dir, file))
        if exist_in_coverage:        
            llm_response = qa.invoke(removed_code, "security", context_query, fifty_clean, file + ".txt")
        else:
            llm_response = qa.invoke(removed_code, "generality", context_query, fifty_clean, file + ".txt")
            
        # save the response to the output directory
        llm_filename = os.path.join(output_llm_dir, os.path.basename(file).replace("context_", "llm_") + ".txt")
        with open(llm_filename, 'w') as f:
            f.write(llm_response)
        
        # Update the progress bar
        pbar.update(1)



    

Processing files:   0%|          | 0/522 [00:00<?, ?file/s]

Error:  block_reason: OTHER

Retrying No.  1
Error:  block_reason: OTHER

Retrying No.  2


Processing files:  21%|██        | 110/522 [00:15<00:56,  7.33file/s]


KeyboardInterrupt: 

### Benchmark
Classifies into different folders in the output directory and returns a benchmark statistic

In [32]:
import os
import re
import glob

# Function to get the line numbers of the code snippet in the context file
def get_line_numbers(file_content):
    lines = file_content.splitlines()
    snippet_line_numbers = [i + 1 for i, line in enumerate(lines) if line.strip() != '']
    return snippet_line_numbers

# Function to extract the LLM response using the provided regex
def extract_imp_score_new_prompt(text):
    lower_text = text.lower()
    target = "final verdict"

    start_index = lower_text.find(target)
    if start_index == -1:
        return None

    start_index += len(target)
    end_index = text.find('\n', start_index)
    if end_index == -1:
        end_index = len(text)

    substring = text[start_index:end_index]
    match = re.search(r'\b([0-9]|[1-9][0-9]|100)\b', substring)
    if match:
        assigned_class = int(match.group(0))
        if assigned_class < 3:
            return 1
        else:
            return 9
    return None

# Function to check if the line starts with a comment (ignoring whitespace) in the gold file
def check_gold_label_line(gold_file_path, line_number):
    try:
        with open(gold_file_path, 'r') as gold_file:
            lines = gold_file.readlines()
            if line_number <= len(lines):
                target_line = lines[line_number - 1].strip()
                if target_line.startswith("//"):
                    return True
                else:
                    return False
    except Exception as e:
        print(f"Error reading gold file {gold_file_path}: {e}")
    return False

# Function to create directories for each category
def create_category_directories(base_output_folder):
    categories = ['false_retention', 'false_removal', 'true_retention', 'true_removal']
    for category in categories:
        os.makedirs(os.path.join(base_output_folder, category), exist_ok=True)
    os.makedirs(incomplete_context_folder, exist_ok=True)
    # print("Folder made:", incomplete_context_folder)


def create_merged_file(context_content, llm_content, time_stamp, category_folder, fifty_text_folder, pretext_code_folder, relevance_code_folder, functionality_code_folder, security_code_folder, incomplete_context_folder):
    context_content = context_content.strip()
    merged_file_name = f"merged_time_{time_stamp}.txt"
    merged_file_path = os.path.join(category_folder, merged_file_name)

    # Define the corresponding filenames for fifty_text and pretext_code
    fifty_text_file = os.path.join(fifty_text_folder, f"fifty_context_{time_stamp}")
    pretext_code_file = os.path.join(pretext_code_folder, f"context_{time_stamp}")
    relevance_code_file = os.path.join(relevance_code_folder, f"context_{time_stamp}" + ".txt")
    functionality_code_file = os.path.join(functionality_code_folder, f"context_{time_stamp}" + ".txt")
    security_code_file = os.path.join(security_code_folder, f"context_{time_stamp}" + ".txt")
    incomplete_context_file = os.path.join(incomplete_context_folder, merged_file_name)
    
    
    # Read the content from the fifty_text file if it exists
    fifty_text_content = ""
    # if len(context_content.splitlines()) < 10:
        # print(f"Length of context content is {len(context_content.splitlines())}, filename: {fifty_text_file}")  
    if os.path.exists(fifty_text_file):
        with open(fifty_text_file, 'r') as file:
            fifty_text_content = file.read().strip()
    else:
        print(f"Fifty text file not found for {fifty_text_file}")
    # else:
    #     print(f"Lenght of context content is {len(context_content)}, filename: {fifty_text_file}")

    # Read the content from the pretext_code file if it exists
    pretext_code_content = ""
    if os.path.exists(pretext_code_file):
        with open(pretext_code_file, 'r') as file:
            pretext_code_content = file.read().strip()
        # print(f"Pretext code file found for {pretext_code_file}")
    # else:
    #     print(f"Pretext code file not found for {pretext_code_file}")        
    
    relevance_code_content = ""
    if os.path.exists(relevance_code_file):
        with open(relevance_code_file, 'r') as file:
            relevance_code_content = file.read().strip()
        
    functionality_code_content = ""
    if os.path.exists(functionality_code_file):
        with open(functionality_code_file, 'r') as file:
            functionality_code_content = file.read().strip()

    security_code_content = ""
    if os.path.exists(security_code_file):
        with open(security_code_file, 'r') as file:
            security_code_content = file.read().strip()


    if not functionality_code_content:
        with open(incomplete_context_file, 'w') as ctx_file:
            ctx_file.write(context_content)
            ctx_file.write("\n\Relevance response:\n\n")
            ctx_file.write(relevance_code_content)
            if pretext_code_content:
                ctx_file.write("\n\nPretext Code:\n\n")
                ctx_file.write(pretext_code_content)
            
            if fifty_text_content:
                ctx_file.write("\n\nFifty Text:\n\n")
                ctx_file.write(fifty_text_content)
            
    else:
        # Write the merged file with all content
        with open(merged_file_path, 'w') as merged_file:
            merged_file.write("Query:\n")
            merged_file.write(context_content)
            
            merged_file.write("\n\nLLM Response:\n\n")
            merged_file.write(llm_content)
            
            if functionality_code_content:
                merged_file.write("\n\nRelevance Code:\n\n")
                merged_file.write(relevance_code_content)
                
                merged_file.write("\n\nFunctionality Code:\n\n")
                merged_file.write(functionality_code_content)
                
                merged_file.write("\n\nSecurity Code:\n\n")
                merged_file.write(security_code_content)                  
                
            if pretext_code_content:
                merged_file.write("\n\nPretext Code:\n\n")
                merged_file.write(pretext_code_content)
            
            if fifty_text_content:
                merged_file.write("\n\nFifty Text:\n\n")
                merged_file.write(fifty_text_content)


    # print(f"Merged file created: {merged_file_path}")


# Main script
def process_context_files(context_folder, llm_response_folder, gold_label_path, base_output_folder):
    context_files = glob.glob(os.path.join(context_folder, "*.c"))
    create_category_directories(base_output_folder)  # Create necessary directories
    false_retention = 0
    false_removal = 0
    true_retention = 0
    true_removal = 0 
    
    for context_file in context_files:
        # try:
            # Read the context file content
            with open(context_file, 'r') as file:
                context_data = file.read()

            # Get line numbers where code snippets exist
            line_numbers = get_line_numbers(context_data)

            # Derive the corresponding LLM response filename from the context filename
            base_filename = os.path.basename(context_file).replace("context_", "llm_") + ".txt"
            llm_response_file = os.path.join(llm_response_folder, base_filename)

            if os.path.exists(llm_response_file):
                # Read the LLM response file
                with open(llm_response_file, 'r') as llm_file:
                    llm_data = llm_file.read()

                # Extract importance score from the LLM response
                imp_score = extract_imp_score_new_prompt(llm_data)

                # For each line number, check the corresponding gold label line
                is_comment = False
                for line_number in line_numbers:
                    is_comment = check_gold_label_line(gold_label_path, line_number)
                    if is_comment:
                        break

                # Generate a time stamp for file naming
                time_stamp = base_filename.replace("llm_", "").replace(".txt", "")

                # Determine the category and update counts
                if imp_score > 5:
                    if is_comment:
                        category = "false_retention"
                        false_retention += 1
                    else:
                        category = "true_retention"
                        true_retention += 1
                else:
                    if is_comment:
                        category = "true_removal"
                        true_removal += 1
                    else:
                        category = "false_removal"
                        false_removal += 1

                # Create the merged response file in the appropriate category folder
                create_merged_file(context_data, llm_data, time_stamp, os.path.join(base_output_folder, category), fifty_text_folder, pretext_code_folder, relevance_code_folder, functionality_code_folder, security_code_folder, incomplete_context_folder)

            else:
                # print(f"LLM response file not found for {base_filename}")
                continue

        # except Exception as e:
        #     print(f"Error processing file {context_file}: {e}")
            
    print(f"False Retention: {false_retention}")
    print(f"False Removal: {false_removal}")
    print(f"True Retention: {true_retention}")
    print(f"True Removal: {true_removal}")
    print(f"Total: {false_retention + false_removal + true_retention + true_removal}")

# Example usage:
context_folder = "cands/context"
llm_response_folder = "cands/multiagent/output_llm"
fifty_text_folder = "cands/fifty_text"
pretext_code_folder = "cands/pretext_code"
relevance_code_folder = "cands/multiagent/new_relevance"
functionality_code_folder = "cands/multiagent/new_functionality"
security_code_folder = "cands/multiagent/new_security"
gold_label_path = "uniq-goldlabel.c"  # Update with your actual gold label file path
base_output_folder = "output"  # Base folder for the output merged responses
incomplete_context_folder = "output/false_retention/incomplete_context"

process_context_files(context_folder, llm_response_folder, gold_label_path, base_output_folder)


False Retention: 75
False Removal: 5
True Retention: 26
True Removal: 4
Total: 110
