In [94]:
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

In [95]:
from langchain_community.document_loaders import PyPDFLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import Chroma
from langchain_community.embeddings import GPT4AllEmbeddings
from langchain_core.prompts import PromptTemplate
from langchain_community.llms import Ollama
from langchain.callbacks.manager import CallbackManager
from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
import sys
import os
import ollama
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain.text_splitter import CharacterTextSplitter
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.embeddings import HuggingFaceEmbeddings


In [96]:
persist_directory = "./vector_dbs/mpnet"

from langchain.document_loaders import TextLoader

# # 1. Load the text file
# file_path = "original.txt"
# loader = TextLoader(file_path)
# docs = loader.load()

# # 2. Split the document into chunks
# text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=50)
# splits = text_splitter.split_documents(docs)

# # 3. Create Ollama embeddings and vector store
# embeddings = OllamaEmbeddings(model="mistral")
# vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

embeddings = HuggingFaceEmbeddings(model_name="all-mpnet-base-v2")
vectorstore = Chroma(persist_directory=persist_directory, embedding_function=embeddings)


with open('callgraph.txt', 'r') as file:
    # Read the entire contents of the file into a string
    callgraph = file.read()




In [97]:
# 3. Call Ollama Llama3 model
def ollama_llm(question, context, callgraph=callgraph):
    # formatted_prompt = f"Question: {question}\n\nContext: {context}"

    formatted_prompt = f"""You are a senior principal code architect tasked with assessing the security implications of removing code snippets mentioned in
Question: {question}

Evaluate the impact of removing the specific code snippet given above. Try to understand the snippet with respect to the entire code and the given
Context: {context}

If you don't know the answer, or cannot understand the question snippet with the given context, just say that you don't know, don't try to make up an answer.
1) answer in a score from 1 to 10, that answers the importance of code to the entire program's generality, functionality and security.
2) say Remove, or Do not Remove, to tell if removing this code is fine considering the generality, security and functionality implications
3) Give an explanation on why it can or cannot be removed

Follow the exact output format shown below:
1) Importance Score : <number>
2) <Remove> or <Don't Remove> (Do not output anything else)
3) <Explanation>

Make sure you don't add any information on your own. Reply in 3 lines to cover each point.

Answer: """

    response = ollama.chat(model='mistral', messages=[{'role': 'user', 'content': formatted_prompt}])
    return response['message']['content']

# 4. RAG Setup
retriever = vectorstore.as_retriever(search_kwargs={"k":25})

def combine_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

def rag_chain(question):
    retrieved_docs = retriever.invoke(question)
    print("length:",len(retrieved_docs))
    formatted_context = combine_docs(retrieved_docs)
    print(f"""****
    Context Being Picked: 
{formatted_context}
****""")
    return ollama_llm(question, formatted_context)



In [98]:
print(rag_chain("""while (1) {
  tmp___19 = quote((char const *)ent->fts_path);
  tmp___20 = gettext(
      "WARNING: Circular directory structure.
This almost certainly means "
      "that you have a corrupted file system.
NOTIFY YOUR SYSTEM "
      "MANAGER.
The following directory is part of the cycle:
  %s
");
  error(0, 0, (char const *)tmp___20, tmp___19);
  goto while_break___0;
}"""))

length: 100
****
    Context Being Picked: 
tmp___17 = prompt((FTS const *)fts, (FTSENT const *)ent, is_dir, x,
                      (enum Prompt_action)3, (Ternary *)((void *)0));
    s___0 = tmp___17;
    if ((unsigned int)s___0 != 2U) {
      return (s___0);
    }
    tmp___18 = excise(fts, ent, x, is_dir);
    return (tmp___18);
  case_2:
    while (1) {
      tmp___19 = quote((char const *)ent->fts_path);
      tmp___20 = gettext(
          "that you have a corrupted file system.\nNOTIFY YOUR SYSTEM "
          "MANAGER.\nThe following directory is part of the cycle:\n  %s\n");
      error(0, 0, (char const *)tmp___20, tmp___19);
      goto while_break___0;
    }
  while_break___0:
    fts_skip_tree(fts, ent);
    return ((enum RM_status)4);
  case_7:
    tmp___21 = quote((char const *)ent->fts_path);
    tmp___22 = gettext("traversal failed: %s");
    error(0, ent->fts_errno, (char const *)tmp___22, tmp___21);

tmp___17 = prompt((FTS const *)fts, (FTSENT const *)ent, is_dir, x,


In [99]:
print(rag_chain("""if ((unsigned int)s == 2U) {
 if ((unsigned int)is_empty_directory == 4U) {
   s = excise(fts, ent, x, (_Bool)1);
   fts_skip_tree(fts, ent);
 }
}"""))

length: 100
****
    Context Being Picked: 
if ((unsigned int)s == 2U) {
      if ((unsigned int)is_empty_directory == 4U) {
        s = excise(fts, ent, x, (_Bool)1);
        fts_skip_tree(fts, ent);
      }
    }
    if ((unsigned int)s != 2U) {
      mark_ancestor_dirs(ent);
      fts_skip_tree(fts, ent);
    }
    return (s);
  case_8:
    if ((int)ent->fts_info == 6) {
      if (x->one_file_system) {
        if (0L < ent->fts_level) {
          if (ent->fts_statp[0].st_dev != fts->fts_dev) {
            mark_ancestor_dirs(ent);
            tmp___14 = quote((char const *)ent->fts_path);
            tmp___15 =
                gettext("skipping %s, since it\'s on a different device");
            error(0, 0, (char const *)tmp___15, tmp___14);
            return ((enum RM_status)4);
          }
        }
      }
    }
    if ((int)ent->fts_info == 6) {
      tmp___16 = 1;
    } else {
      if ((int)ent->fts_info == 4) {
        tmp___16 = 1;
      } else {
        tmp___16 = 0;
     

In [100]:
in_val = """if (status != 0) {
 tmp = gettext("Try `%s --help\' for more information.\n");
 fprintf(stderr, (char const *)tmp, program_name);
} else {
 tmp___0 = gettext("Usage: %s [OPTION]... FILE...\n");
 printf((char const *)tmp___0, program_name);
 tmp___1 = gettext("Remove (unlink) the FILE(s).\n\n  -f, --force         "
                   "  ignore nonexistent files, never prompt\n  -i        "
                   "            prompt before every removal\n");
 fputs_unlocked((char const *)tmp___1, stdout);
 tmp___2 = gettext(
     "  -I                    prompt once before removing more than three "
     "files, or\n                          when removing recursively.  "
     "Less intrusive than -i,\n                          while still "
     "giving protection against most mistakes\n      --interactive[=WHEN] "
     " prompt according to WHEN: never, once (-I), or\n                   "
     "       always (-i).  Without WHEN, prompt always\n");
 fputs_unlocked((char const *)tmp___2, stdout);
 tmp___3 =
     gettext("      --one-file-system  when removing a hierarchy "
             "recursively, skip any\n                          directory "
             "that is on a file system different from\n                   "
             "       that of the corresponding command line argument\n");
 fputs_unlocked((char const *)tmp___3, stdout);
 tmp___4 = gettext(
     "      --no-preserve-root  do not treat `/\' specially\n      "
     "--preserve-root   do not remove `/\' (default)\n  -r, -R, "
     "--recursive   remove directories and their contents recursively\n  "
     "-v, --verbose         explain what is being done\n");
 fputs_unlocked((char const *)tmp___4, stdout);
 tmp___5 = gettext("      --help     display this help and exit\n");
 fputs_unlocked((char const *)tmp___5, stdout);
 tmp___6 = gettext("      --version  output version information and exit\n");
 fputs_unlocked((char const *)tmp___6, stdout);
 tmp___7 = gettext("\nBy default, rm does not remove directories.  Use the "
                   "--recursive (-r or -R)\noption to remove each listed "
                   "directory, too, along with all of its contents.\n");
 fputs_unlocked((char const *)tmp___7, stdout);
 tmp___8 = gettext("\nTo remove a file whose name starts with a `-\', for "
                   "example `-foo\',\nuse one of these commands:\n  %s -- "
                   "-foo\n\n  %s ./-foo\n");
 printf((char const *)tmp___8, program_name, program_name);
 tmp___9 = gettext("\nNote that if you use rm to remove a file, it is "
                   "usually possible to recover\nthe contents of that "
                   "file.  If you want more assurance that the contents "
                   "are\ntruly unrecoverable, consider using shred.\n");
 fputs_unlocked((char const *)tmp___9, stdout);
 emit_ancillary_info();
}"""


x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
{
    if (status != 0) {
      tmp = gettext("Try `%s --help\' for more information.\n");
      fprintf(stderr, (char const *)tmp, program_name);
    } else {
      tmp___0 = gettext("Usage: %s [OPTION]... FILE...\n");
      printf((char const *)tmp___0, program_name);
      tmp___1 = gettext("Remove (unlink) the FILE(s).\n\n  -f, --force         "
                        "  ignore nonexistent files, never prompt\n  -i        "
                        "            prompt before every removal\n");
      fputs_unlocked((char const *)tmp___1, stdout);
      tmp___2 = gettext(
          "  -I                    prompt once before removing more than three "
          "files, or\n                          when removing recursively.  "
          "Less intrusive than -i,\n                          while still "
          "giving protection against most mistakes\n      --interactive[=WHEN] "
          " prompt according to WHEN: never, once (-I), or\n

In [101]:
in_val = """static struct hash_entry *allocate_entry(Hash_table *table___0) {
 struct hash_entry *new;


 {
   if (table___0->free_entry_list) {
     new = table___0->free_entry_list;
     table___0->free_entry_list = new->next;
   } else {
     new = (struct hash_entry *)malloc(sizeof(*new));
   }
   return (new);
 }
}"""

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
if (!cursor) {
          goto while_break___2;
        }
        next = cursor->next;
        free((void *)cursor);
        cursor = next;
      }
    while_break___2:
      bucket++;
    }
  while_break___1:
    cursor = table___0->free_entry_list;
    while (1) {

      if (!cursor) {
        goto while_break___3;
      }
      next = cursor->next;
      free((void *)cursor);
      cursor = next;
    }
  while_break___3:
    free((void *)table___0->bucket);
    free((void *)table___0);
    return;
  }
}
static struct hash_entry *allocate_entry(Hash_table *table___0) {
  struct hash_entry *new;

  {
    if (table___0->free_entry_list) {
      new = table___0->free_entry_list;
      table___0->free_entry_list = new->next;
    } else {
      new = (struct hash_entry *)malloc(sizeof(*new));
    }
    return (new);
  }
}
static void free_entry(Hash_table *table___0, struct hash_entry *entry) {

if (!cursor) {
          goto while_break___2;
    

In [102]:
in_val = """tmp___16 = 0;"""

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
int tmp;

int tmp;

int *tmp___6;

int *tmp___6;

size_t ilim;
  int tmp___6;
  size_t tmp___7;

size_t ilim;
  int tmp___6;
  size_t tmp___7;

int tmp___5;
  int tmp___6;
  int tmp___7;

int tmp___5;
  int tmp___6;
  int tmp___7;

int *tmp___2;
  size_t maxarglen;
  size_t tmp___4;
  size_t tmp___5;
  _Bool tmp___6;
  int tmp___7;
  size_t len;
  size_t tmp___8;
  struct _ftsent *tmp___9;
  _Bool tmp___10;
  int tmp___11;

int *tmp___2;
  size_t maxarglen;
  size_t tmp___4;
  size_t tmp___5;
  _Bool tmp___6;
  int tmp___7;
  size_t len;
  size_t tmp___8;
  struct _ftsent *tmp___9;
  _Bool tmp___10;
  int tmp___11;

tmp___5 = (size_t)4096;
    }
    tmp___6 = fts_palloc(sp, tmp___5);
    if (!tmp___6) {
      goto mem1;
    }
    if ((unsigned long)*argv != (unsigned long)((void *)0)) {
      parent = fts_alloc(sp, "", (size_t)0);
      if ((unsigned long)parent == (unsigned long)((void *)0)) {
        goto mem2;
      }
      parent->fts_lev

In [103]:
in_val = """tmp = 512;"""

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
int tmp;

int tmp;

int *tmp___6;

int *tmp___6;

size_t ilim;
  int tmp___6;
  size_t tmp___7;

size_t ilim;
  int tmp___6;
  size_t tmp___7;

int *tmp___2;
  size_t maxarglen;
  size_t tmp___4;
  size_t tmp___5;
  _Bool tmp___6;
  int tmp___7;
  size_t len;
  size_t tmp___8;
  struct _ftsent *tmp___9;
  _Bool tmp___10;
  int tmp___11;

int *tmp___2;
  size_t maxarglen;
  size_t tmp___4;
  size_t tmp___5;
  _Bool tmp___6;
  int tmp___7;
  size_t len;
  size_t tmp___8;
  struct _ftsent *tmp___9;
  _Bool tmp___10;
  int tmp___11;

goto switch_break___3;
                    switch_break___3:
                      j++;
                    }
                  while_break___16:;
                  }
                }
                tmp___4 = iswprint((wint_t)w);
                if (!tmp___4) {
                  printable = (_Bool)0;
                }
                m += bytes;
              }
            }
          }
          tmp___5 = mbsinit(

In [104]:
in_val = """extern int fprintf(FILE *__restrict __stream, char const *__restrict __format,
                   ...);"""

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
tmp___14 = tmp___13;
          }
          fprintf(stderr, (char const *)tmp___14, program_name, tmp___11,
                  quoted_name);
        }
        tmp___15 = yesno();
        if (!tmp___15) {
          return ((enum RM_status)3);
        }
      }
    }
    return ((enum RM_status)2);
  }
}
__inline static _Bool nonexistent_file_errno(int errnum) {

tmp___14 = tmp___13;
          }
          fprintf(stderr, (char const *)tmp___14, program_name, tmp___11,
                  quoted_name);
        }
        tmp___15 = yesno();
        if (!tmp___15) {
          return ((enum RM_status)3);
        }
      }
    }
    return ((enum RM_status)2);
  }
}
__inline static _Bool nonexistent_file_errno(int errnum) {

ptrdiff_t argmatch(char const *arg, char const *const *arglist,
                   char const *vallist, size_t valsize);
void argmatch_invalid(char const *context, char const *value,
                      ptrdiff_t problem);
void ar

In [105]:
in_val = """char const diacrit_diac[256] = {
(char const)0, (char const)0, (char const)0, (char const)0, (char const)0,
(char const)0, (char const)0, (char const)0, (char const)0, (char const)0,
(char const)0, (char const)0, (char const)0, (char const)0, (char const)0,
(char const)0, (char const)0, (char const)0, (char const)0, (char const)0,
"""

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
{
    tmp = rpl_fcntl(fd, 0, 3);
    return (tmp);
  }
}
extern __attribute__((__nothrow__)) int(
    __attribute__((__nonnull__(1, 2), __leaf__))
    stat)(char const *__restrict __file, struct stat *__restrict __buf);
extern
    __attribute__((__nothrow__)) int(__attribute__((__nonnull__(2), __leaf__))
                                     fstat)(int __fd, struct stat *__buf);
char const diacrit_base[256];
char const diacrit_diac[256];
char const diacrit_base[256] = {
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, (char const)0, (char const)0,
    (char const)0, (char const)0, 

In [106]:
in_val = "p->fts_accpath = tmp___0;"

x = rag_chain(in_val)

print(x)

length: 100
****
    Context Being Picked: 
fts_lfree(sp->fts_child);
          sp->fts_child = (struct _ftsent *)((void *)0);
        }
      }
      if ((unsigned long)sp->fts_child != (unsigned long)((void *)0)) {
        tmp___3 = fts_safe_changedir(sp, p, -1, (char const *)p->fts_accpath);
        if (tmp___3) {
          tmp___2 = __errno_location();
          p->fts_errno = *tmp___2;
          p->fts_flags = (unsigned short)((int)p->fts_flags | 1);
          p = sp->fts_child;
          while (1) {

fts_lfree(sp->fts_child);
          sp->fts_child = (struct _ftsent *)((void *)0);
        }
      }
      if ((unsigned long)sp->fts_child != (unsigned long)((void *)0)) {
        tmp___3 = fts_safe_changedir(sp, p, -1, (char const *)p->fts_accpath);
        if (tmp___3) {
          tmp___2 = __errno_location();
          p->fts_errno = *tmp___2;
          p->fts_flags = (unsigned short)((int)p->fts_flags | 1);
          p = sp->fts_child;
          while (1) {

free_dir(sp);
      

: 