In [71]:
import pandas as pd
from langchain_core.documents import Document

In [72]:
EXCEL_MUT_OP_PATH = 'my_files/mut_op_java_file.xlsx'
df = pd.read_excel(EXCEL_MUT_OP_PATH)


In [73]:
df

Unnamed: 0,Component,Definition,Example(s)
0,Assignment,An operation that assigns a value to a variable.,x = 10;
1,BinaryOperator,An operator that takes two operands.,"a + b, x > y"
2,Block,A sequence of statements enclosed in curly bra...,{...}
3,ConstructorCall,The creation of a new object instance.,new MyClass()
4,FieldRead,Reading the value from an object's field or at...,"this.name, person.age"
5,FieldWrite,Writing a value to an object's field or attrib...,"this.name = ""new name"";"
6,If,A conditional if statement.,if (condition)
7,Invocation,A method call; an invocation of a method.,myObject.doSomething()
8,Literal,A fixed value directly in the source code.,"""hello"", 123, true, null"
9,LocalVariable,The declaration of a new variable inside a met...,int count = 0;


In [74]:
documents = []
for index, row in df.iterrows():
    page_content = f"""Node type: {row['Component']}. {row['Definition']} Example: {row['Example(s)']}"""
    metadata = row.to_dict()
    metadata['row'] = index
    metadata['source'] = EXCEL_MUT_OP_PATH
    
    doc = Document(page_content=page_content, metadata=metadata)
    documents.append(doc)

In [9]:
documents[6].metadata
    

{'Component': 'If',
 'Definition': 'A conditional if statement.',
 'Example(s)': 'if (condition)',
 'row': 6,
 'source': 'my_files/mut_op_java_file.xlsx'}

In [75]:
from langchain_openai.embeddings import OpenAIEmbeddings
from langchain_community.vectorstores.faiss import FAISS
from langchain.chains import ConversationalRetrievalChain
from dotenv import load_dotenv, find_dotenv
import os
from langchain_openai import ChatOpenAI
from langchain_core.messages import SystemMessage, HumanMessage, AIMessage

from langchain.prompts import (
    ChatPromptTemplate,
    SystemMessagePromptTemplate,
    HumanMessagePromptTemplate,
)

In [76]:
_ = load_dotenv(find_dotenv())

api_openai = os.getenv('OPENAI_API_KEY')
if api_openai:
    print('OpenAI API loaded.')
else:
    print('missing api key: OPEN_API_KEY')

OpenAI API loaded.


In [77]:
embedding_model = OpenAIEmbeddings(model="text-embedding-3-large")
vector_store = FAISS.from_documents(
    documents=documents,
    embedding=embedding_model
)
retriever = vector_store.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k": 6}
)

In [14]:
vector_store.save_local("FaissMutOp")

In [None]:
embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vectorstore = FAISS.load_local(
    folder_path="faiss_index_taco",
    embeddings=embeddings,
    allow_dangerous_deserialization=True)

In [28]:
llm = ChatOpenAI(model="gpt-4o-mini")

In [65]:
system_prompt="""

You are an expert in Abstract Syntax Tree (AST) analysis for the Java language. 
You will be provided with two code snippets, `snippet one` (the buggy version) and `snippet two` (the fixed version). 
These snippets are a processed representation of Java code where identifiers have been replaced with placeholders and IDs.

Your task is to identify the precise AST modifications required to transform `snippet one` into `snippet two`. 
The modifications fall into four categories: `Insert`, `Delete`, `Update`, or `Move`. 
You must associate each modification with a specific Java node component from the manual context.



Follow these steps to generate your final answer:

1.  Perform a structural comparison between `snippet one` and `snippet two`.
2.  Identify all AST nodes that were added (`Insert`), removed (`Delete`), moved from its original place to another (`Move`), or had their value changed (`Update`).
3.  For each modification, identify the type of the AST node involved and match it to a component from the **Official Java AST Component List**.
4.  Format your final answer as a single line, enclosed in square brackets. Each entry must follow the `<Operation> <ComponentType>` format. Separate multiple entries with a semicolon.

Keep in mind that the **ComponentType** is the Java node component type from the manual context




"""

human = """
MANUAL CONTEXT:
{context}

USER'S QUESTION:
{question}

AI:
"""



In [7]:
tp = """
analyze these two code snippets, what are the modifications to transform code 1 into code 2:
<code_snippet_1>
protected void METHOD_1 ( ) { super . METHOD_1 ( ) ; TYPE_1 VAR_1 = ( ( TYPE_1 ) ( VAR_2 . METHOD_2 ( ) ) ) ; VAR_1 . METHOD_3 ( VAR_2 ) ; VAR_3 . i ( VAR_4 , STRING_1 ) ; }
</code_snippet_1>

<code_snippet_2>
protected void METHOD_1 ( ) { super . METHOD_1 ( ) ; VAR_3 . i ( VAR_4 , STRING_1 ) ; }
</code_snippet_2>
"""

In [67]:
messages = [
    SystemMessagePromptTemplate.from_template(system_prompt),
    HumanMessagePromptTemplate.from_template(human) 
]
qa_prompt = ChatPromptTemplate.from_messages(messages)

In [62]:
qa_chain = ConversationalRetrievalChain.from_llm(
    llm=llm,
    retriever=retriever,
    combine_docs_chain_kwargs={"prompt": qa_prompt}, # <--- É AQUI QUE O SYSTEM PROMPT ENTRA
    return_source_documents=True
)

In [68]:
result = qa_chain.invoke({"question": tp, "chat_history": []})

In [69]:
result['answer']

'[Delete LocalVariable]'

In [70]:
result

{'question': '\nanalyze these two code snippets, what are the modifications to transform code 1 into code 2:\n<code_snippet_1>\nprotected void METHOD_1 ( ) { super . METHOD_1 ( ) ; TYPE_1 VAR_1 = ( ( TYPE_1 ) ( VAR_2 . METHOD_2 ( ) ) ) ; VAR_1 . METHOD_3 ( VAR_2 ) ; VAR_3 . i ( VAR_4 , STRING_1 ) ; }\n</code_snippet_1>\n\n<code_snippet_2>\nprotected void METHOD_1 ( ) { super . METHOD_1 ( ) ; VAR_3 . i ( VAR_4 , STRING_1 ) ; }\n</code_snippet_2>\n',
 'chat_history': [],
 'answer': '[Delete LocalVariable]',
 'source_documents': [Document(id='1e007b42-f078-4d76-a710-4d07fb0a2465', metadata={'Component': 'Wra', 'Definition': "A modifier in a method's declaration for visibility or behavior.", 'Example(s)': 'public, static', 'row': 17, 'source': 'my_files/mut_op_java_file.xlsx'}, page_content="Node type: Wra. A modifier in a method's declaration for visibility or behavior. Example: public, static"),
  Document(id='693f7b17-f6a1-4712-b206-7206186d07b8', metadata={'Component': 'Invocation', 'D

In [None]:
from pathlib import Path

In [80]:
caminho_resultados = Path("my_files")

if caminho_resultados.exists() and caminho_resultados.is_dir():
    print("achou a pasta")


achou a pasta


In [None]:
print(f"A pasta '{nome_da_pasta_de_resultados}' foi verificada e/ou criada com sucesso.")

In [None]:


nome_da_pasta_de_resultados = "results"
caminho_resultados = Path(nome_da_pasta_de_resultados)

# Cria a pasta, mas não faz nada (e não dá erro) se ela já existir.
caminho_resultados.mkdir(exist_ok=True)



In [5]:
from RAG_agent import agent_rag_chained

In [6]:
agent = agent_rag_chained()

OpenAI API loaded.


In [8]:
results = agent.invoke({"question": tp, "chat_history": []})

In [11]:
results['answer']

'[Delete LocalVariable; Delete Invocation; Delete Invocation]'