In [14]:
import openai
import time
from dotenv import load_dotenv


In [15]:
def append_to_file(query, answer, filename='gpt_3.5_turbo_without_call_graph.txt'):
    with open(filename, 'a') as file:
        file.write("User\n")
        file.write(query + '\n\n')
        file.write("Debloater\n")
        file.write(answer + '\n\n')

## Initialize the assistant

In [16]:

# Assummes you have a .env file containing OPENAI_API_KEY=<your key> in the same directory
load_dotenv()

# Initialize OpenAI Client
client = openai.Client()

# Step 1: Create an Assistant
assistant = client.beta.assistants.create(
    name="Debloater",
    instructions="""You're a senior software engineer whose job is to understand if removing a particular code snippet will affect the overall generality and performance of a program.
Try Understanding the entire program that is stored in the vector database to analyze the input by the user. Search the vector store to find the code snippet provided by the user and use the entire program as context to understand the importance of the code snippet provided by the user. The vector database also contains the function call graph for the entire program. Use that to understand the flow of the program and the importance of the code snippet in the entire program.

Guess the use of the lines of code the user provides in reference to the entire program.

If you don't know the answer, just say that you don't know, don't try to make up an answer.
1) answer in a score from 1 to 10, that answers the importance of code to the entire program's generality, functionality and security.
2) say Remove, or Do not Remove, to tell if removing this code is fine considering the generality, functionality and security implications.
3) Give an explanation on why it can or cannot be removed.

Follow output format:
1) Importance Score : <number>
2) <Remove> or <Don't Remove>
3) <Explanation>

Make sure you don't add any information on your own. Reply in 3 lines to cover each point  
""",
    tools=[{"type": "file_search"}],
    # model="gpt-4-1106-preview"
    model="gpt-3.5-turbo-1106"
)


# Give file to assistant

# Create a vector store
vector_store = client.beta.vector_stores.create(name="rm-code")
 
# Ready the files for upload to OpenAI
# file_paths = ["original.txt"]
file_paths = ["original.txt", "call_graph.txt"]
file_streams = [open(path, "rb") for path in file_paths]
 
# Use the upload and poll SDK helper to upload the files, add them to the vector store,
# and poll the status of the file batch for completion.
file_batch = client.beta.vector_stores.file_batches.upload_and_poll(
  vector_store_id=vector_store.id, files=file_streams
)
 
# You can print the status and the file counts of the batch to see the result of this operation.
print(file_batch.status)
print(file_batch.file_counts)


assistant = client.beta.assistants.update(
  assistant_id=assistant.id,
  tool_resources={"file_search": {"vector_store_ids": [vector_store.id]}},
)



completed
FileCounts(cancelled=0, completed=2, failed=0, in_progress=0, total=2)


### Function to give input

In [17]:
def client_query(user_query, client=client, assistant=assistant):
    # Step 2: Create a Thread
    thread = client.beta.threads.create()

    # Step 3: Add a Message to a Thread
    message = client.beta.threads.messages.create(
        thread_id=thread.id,
        role="user",
        content=user_query
    )

    # Step 4: Run the Assistant
    run = client.beta.threads.runs.create(
        thread_id=thread.id,
        assistant_id=assistant.id,
    )

    # Waits for the run to be completed. 
    while True:
        run_status = client.beta.threads.runs.retrieve(thread_id=thread.id, run_id=run.id)
        if run_status.status == "completed":
            break
        elif run_status.status == "failed":
            print("Run failed:", run_status.last_error)
            break
        time.sleep(2)  # wait for 2 seconds before checking again


    # Step 5: Parse the Assistant's Response and print the Results
    messages = client.beta.threads.messages.list(
        thread_id=thread.id
    )

    # Prints the messages the latest message the bottom
    number_of_messages = len(messages.data)
    print( f'Number of messages: {number_of_messages}')

    for message in reversed(messages.data):
        role = message.role  
        for content in message.content:
            if content.type == 'text':
                response = content.text.value 
                print(f'\n{role}: {response}')

    return messages.data

## LLM Matches LLVM COV

### Case 2

In [8]:
in_val = """while (1) {
  tmp___19 = quote((char const *)ent->fts_path);
  tmp___20 = gettext(
      "WARNING: Circular directory structure.\nThis almost certainly means "
      "that you have a corrupted file system.\nNOTIFY YOUR SYSTEM "
      "MANAGER.\nThe following directory is part of the cycle:\n  %s\n");
  error(0, 0, (char const *)tmp___20, tmp___19);
  goto while_break___0;
}"""

x = client_query(in_val)


append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: while (1) {
  tmp___19 = quote((char const *)ent->fts_path);
  tmp___20 = gettext(
This almost certainly means "
      "that you have a corrupted file system.
NOTIFY YOUR SYSTEM "
      "MANAGER.
The following directory is part of the cycle:
  %s
");
  error(0, 0, (char const *)tmp___20, tmp___19);
  goto while_break___0;
}

assistant: 1) Importance Score: 5
2) Remove


### Case 4

In [19]:
in_val = """if ((unsigned int)s == 2U) {
 if ((unsigned int)is_empty_directory == 4U) {
   s = excise(fts, ent, x, (_Bool)1);
   fts_skip_tree(fts, ent);
 }
}"""

x = client_query(in_val)


append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: if ((unsigned int)s == 2U) {
 if ((unsigned int)is_empty_directory == 4U) {
   s = excise(fts, ent, x, (_Bool)1);
   fts_skip_tree(fts, ent);
 }
}

assistant: 1) Importance Score: 8
2) Do not Remove
3) The provided code snippet is part of the "excise" function, which is responsible for removing files or directories. Removing this code snippet could affect the program's ability to correctly handle the removal of directories based on specific conditions, potentially leading to unexpected behavior or errors in the file removal process .


### Case 5

In [10]:
in_val = """if (status != 0) {
 tmp = gettext("Try `%s --help\' for more information.\n");
 fprintf(stderr, (char const *)tmp, program_name);
} else {
 tmp___0 = gettext("Usage: %s [OPTION]... FILE...\n");
 printf((char const *)tmp___0, program_name);
 tmp___1 = gettext("Remove (unlink) the FILE(s).\n\n  -f, --force         "
                   "  ignore nonexistent files, never prompt\n  -i        "
                   "            prompt before every removal\n");
 fputs_unlocked((char const *)tmp___1, stdout);
 tmp___2 = gettext(
     "  -I                    prompt once before removing more than three "
     "files, or\n                          when removing recursively.  "
     "Less intrusive than -i,\n                          while still "
     "giving protection against most mistakes\n      --interactive[=WHEN] "
     " prompt according to WHEN: never, once (-I), or\n                   "
     "       always (-i).  Without WHEN, prompt always\n");
 fputs_unlocked((char const *)tmp___2, stdout);
 tmp___3 =
     gettext("      --one-file-system  when removing a hierarchy "
             "recursively, skip any\n                          directory "
             "that is on a file system different from\n                   "
             "       that of the corresponding command line argument\n");
 fputs_unlocked((char const *)tmp___3, stdout);
 tmp___4 = gettext(
     "      --no-preserve-root  do not treat `/\' specially\n      "
     "--preserve-root   do not remove `/\' (default)\n  -r, -R, "
     "--recursive   remove directories and their contents recursively\n  "
     "-v, --verbose         explain what is being done\n");
 fputs_unlocked((char const *)tmp___4, stdout);
 tmp___5 = gettext("      --help     display this help and exit\n");
 fputs_unlocked((char const *)tmp___5, stdout);
 tmp___6 = gettext("      --version  output version information and exit\n");
 fputs_unlocked((char const *)tmp___6, stdout);
 tmp___7 = gettext("\nBy default, rm does not remove directories.  Use the "
                   "--recursive (-r or -R)\noption to remove each listed "
                   "directory, too, along with all of its contents.\n");
 fputs_unlocked((char const *)tmp___7, stdout);
 tmp___8 = gettext("\nTo remove a file whose name starts with a `-\', for "
                   "example `-foo\',\nuse one of these commands:\n  %s -- "
                   "-foo\n\n  %s ./-foo\n");
 printf((char const *)tmp___8, program_name, program_name);
 tmp___9 = gettext("\nNote that if you use rm to remove a file, it is "
                   "usually possible to recover\nthe contents of that "
                   "file.  If you want more assurance that the contents "
                   "are\ntruly unrecoverable, consider using shred.\n");
 fputs_unlocked((char const *)tmp___9, stdout);
 emit_ancillary_info();
}"""


x = client_query(in_val)


append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: if (status != 0) {
 tmp = gettext("Try `%s --help' for more information.
");
 fprintf(stderr, (char const *)tmp, program_name);
} else {
 tmp___0 = gettext("Usage: %s [OPTION]... FILE...
");
 printf((char const *)tmp___0, program_name);
 tmp___1 = gettext("Remove (unlink) the FILE(s).

  -f, --force         "
                   "  ignore nonexistent files, never prompt
  -i        "
                   "            prompt before every removal
");
 fputs_unlocked((char const *)tmp___1, stdout);
 tmp___2 = gettext(
     "  -I                    prompt once before removing more than three "
     "files, or
                          when removing recursively.  "
     "Less intrusive than -i,
                          while still "
     "giving protection against most mistakes
      --interactive[=WHEN] "
     " prompt according to WHEN: never, once (-I), or
                   "
     "       always (-i).  Without WHEN, prompt always
");
 fputs_unlocked((char cons

## LLM response says don't remove but the code is not covered

### Case 3: 

In [11]:
in_val = """static struct hash_entry *allocate_entry(Hash_table *table___0) {
 struct hash_entry *new;


 {
   if (table___0->free_entry_list) {
     new = table___0->free_entry_list;
     table___0->free_entry_list = new->next;
   } else {
     new = (struct hash_entry *)malloc(sizeof(*new));
   }
   return (new);
 }
}"""

x = client_query(in_val)

append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: static struct hash_entry *allocate_entry(Hash_table *table___0) {
 struct hash_entry *new;


 {
   if (table___0->free_entry_list) {
     new = table___0->free_entry_list;
     table___0->free_entry_list = new->next;
   } else {
     new = (struct hash_entry *)malloc(sizeof(*new));
   }
   return (new);
 }
}

assistant: 1) Importance Score: 7
2) Do not Remove
3) The provided code snippet is responsible for managing the allocation of new hash entries in the program. Removing this code could significantly impact the program's functionality by causing memory allocation issues or potential memory leaks, affecting the overall performance and reliability of the program   .


## LLM response says remove but the code is covered

### Case 1

In [12]:
in_val = """tmp___16 = 0;"""

x = client_query(in_val)

append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: tmp___16 = 0;

assistant: 1) Importance Score: 3
2) Remove
3) The code snippet seems to be setting the variable "tmp___16" to 0. While this snippet appears to be related to the broader program, the specific relevance or impact of this variable setting on the overall functionality, generality, and security of the program is not immediately clear. Therefore, it seems safe to remove this code snippet as its impact on the rest of the program is not explicitly evident.


### Case 3

In [18]:
in_val = """tmp = 512;"""

x = client_query(in_val)

append_to_file(x[1].content[0].text.value, x[0].content[0].text.value)

Number of messages: 2

user: tmp = 512;

assistant: 1) Importance Score: 5
2) Don't Remove
3) The provided code snippet does not have a direct context provided, but based on the surrounding code related to file and directory manipulation, it appears to be a part of a larger program that handles directory operations. Removing this snippet might impact the program's ability to open directories and perform file operations, affecting functionality and security due to potential errors or unexpected behavior in directory handling.

However, without the full context of the program, it's difficult to assess the exact importance of this snippet in terms of generality and functionality. Therefore, the importance score is moderate, and it's advisable not to remove it to avoid potential adverse effects on the program's directory operations.
