# Código para comparação de PM e PNRs (Langchain)

## Bibliotecas

In [1]:
from langchain_core.documents import Document
from langchain_chroma import Chroma
from langchain_openai import AzureChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_core.runnables import chain

from typing import List
from tqdm.autonotebook import tqdm, trange
import getpass
import random
import os
import pandas as pd
import numpy as np
from uuid import uuid4

  from tqdm.autonotebook import tqdm, trange


## Configurando modelos

In [2]:
model = AzureChatOpenAI(
    api_key=os.environ["API_KEY"],
    azure_endpoint=os.environ["ENDPOINT_URL"],
    azure_deployment=os.environ["AZURE_OPENAI_DEPLOYMENT_NAME"],
    openai_api_version=os.environ["AZURE_OPENAI_API_VERSION"],
    temperature= 0.5,
    top_p= 0.7,
    max_tokens= 1500
)


# Instanciando o modelo de embeddings da HuggingFace via SentenceTransformer
model_name = 'intfloat/multilingual-e5-large'
embedding_model = HuggingFaceEmbeddings(model_name=model_name)

## Funções adicionais

In [3]:
def verificar_variaveis(*nomes_variaveis):
    """
    Verifica se as variáveis de ambiente estão carregadas.

    Args:
    *nomes_variaveis: Lista de nomes das variáveis de ambiente a serem verificadas.
    """
    for nome_variavel in nomes_variaveis:
        valor_variavel = os.getenv(nome_variavel)
        if valor_variavel:
            print(f"{nome_variavel} carregada com sucesso")
        else:
            print(f"{nome_variavel} não encontrada no arquivo .env.")
            

# Acessando a variável de ambiente
api_key = os.getenv("API_KEY")
endpoint = os.getenv("ENDPOINT_URL")
#image = os.getenv("IMAGE_PATH")
# Lista de variáveis de ambiente a verificar
variaveis_para_verificar = ["API_KEY", "ENDPOINT_URL", "IMAGE_PATH"]

# Chama a função de verificação para as variáveis
verificar_variaveis(*variaveis_para_verificar)

API_KEY carregada com sucesso
ENDPOINT_URL carregada com sucesso
IMAGE_PATH carregada com sucesso


In [5]:
# Criar uma lista de documentos
def create_documents(texts):
    documents = [Document(page_content=text) for text in texts]
    uuids = [str(uuid4()) for _ in range(len(documents))]

    return documents,uuids


In [7]:
@chain
def retriever(query: str,k: int) -> List[Document]:
    docs = chroma_db.similarity_search(query,k)

    return docs

In [8]:
def gerar_prompt(item_plano_manutencao,item_inspec, messages):
    
    test_instr = [("user",f"""Maintenance task: {item_plano_manutencao}
                     Inspection report: {item_inspec}
        """)]
    prompt = messages+test_instr

    
    
    return prompt

In [9]:
def get_dict_key(item_procurado,dict):
    # Iterando sobre o dicionário para encontrar a key que contém o item
    for chave, lista in dict.items():
        if item_procurado in lista:
            #print(f"O item '{item_procurado}' está na chave: '{chave}'")
            return chave,lista.index(item_procurado)
    else:
        return  print(f"O item '{item_procurado}' não foi encontrado em nenhuma lista.")

## Importando dados

In [10]:
df_real = pd.read_excel('Listas_geradas.xlsx',sheet_name='Sheet1')
df_sintetico = pd.read_excel('Listas_geradas.xlsx',sheet_name='Sheet2')

display(df_real.head(2))
display(df_sintetico.head(2))

Unnamed: 0,task_normativo,inspec_correta,inspec_correta_gramatica,inspec_parcial,inspec_parcial_gramatica,inspec_errada
0,"Interior Placards, Exterior Placards, Decals, ...",Conducted a thorough examination of the Interi...,Conducted a thru examination of the Intierior ...,Inspection of Interior and Exterior Placards c...,Inspecshun of Intirior an Exterir Placards com...,Reviewed the placards for any signs of damage....
1,"Ventilation System - Inspect clamps, hoses, an...",Conducted a comprehensive inspection of the Ve...,Conducted a comprhensive inspecshun of the Ven...,Inspected a few clamps and hoses of the Ventil...,Inspted a few clampz and hosses of the Ventila...,Reviewed the overall functionality of the vent...


Unnamed: 0,Equipment,inspec_sintetico,categoria
0,1. Battery System,Conducted a comprehensive inspection of the Ba...,geral
1,2. Propeller Assembly,Performed a detailed examination of the Propel...,geral


In [11]:
df_real.columns

Index(['task_normativo', 'inspec_correta', 'inspec_correta_gramatica',
       'inspec_parcial', 'inspec_parcial_gramatica', 'inspec_errada'],
      dtype='object')

## Plano normativo

In [12]:
lista_plano_manutencao = df_real.task_maintenance.to_list()

## Listas de inspeção

### Validação Simples com Mistura Sintética

#### Lista sintetica

In [13]:
lista_sinteico = df_sintetico.inspec_sintetico.to_list()
len(lista_sinteico)

50

#### Inspeção correta

In [14]:
lista_inspec_correta = df_real.inspec_correta.to_list()
len(lista_inspec_correta)

152

In [15]:
lista_inspec_correta_gramatica = df_real.inspec_correta_gramatica.to_list()
len(lista_inspec_correta_gramatica)

152

#### Inspeção Parcial

In [16]:
lista_inspec_parcial = df_real.inspec_parcial.to_list()
len(lista_inspec_parcial)

152

In [17]:
lista_inspec_parcial_gramatica = df_real.inspec_parcial_gramatica.to_list()
len(lista_inspec_parcial_gramatica)

152

#### Inspeção errada

In [18]:
lista_inspec_errado = df_real.inspec_errada.to_list()
len(lista_inspec_errado)

152

#### Dicionario

In [19]:
dic_inspec = {
    'inspec_correta':lista_inspec_correta,
    'inspec_correta_gramatica':lista_inspec_correta_gramatica,
    'inspec_parcial':lista_inspec_parcial,
    'inspec_parcial_gramatica':lista_inspec_parcial_gramatica,
    'inspec_errado':lista_inspec_errado,
     'inspec_sintetico':lista_sinteico  
}

In [20]:
teste1 = lista_inspec_correta+lista_sinteico
teste2 = lista_inspec_correta_gramatica+lista_sinteico
teste3 = lista_inspec_parcial + lista_sinteico
teste4 = lista_inspec_parcial_gramatica + lista_sinteico
teste5 = lista_inspec_errado + lista_sinteico
teste6 = lista_inspec_correta[:50] + lista_inspec_correta_gramatica[50:100] + lista_inspec_parcial + lista_inspec_errado + lista_sinteico

## ChromaDB

In [21]:
documents,uuids  = create_documents(teste6)

In [22]:
# Gerando a indexação vetorial com Chroma
try:
    chroma_db.reset_collection()
    chroma_db = Chroma.from_documents(documents=documents,ids=uuids, embedding=embedding_model)
except:
    chroma_db = Chroma.from_documents(documents=documents,ids=uuids, embedding=embedding_model)

## Few-shots

In [23]:
# Criando lista para usar no fewshot
lista_entrada = []
lista_saida = []

lista_entrada.append("""Normative task: Interior Placards, Exterior Placards, Decals, Markings and Identification Plates - Examine for correct installation and legibility. Refer to Chapter 11 Placards and Markings - Inspection/Check.

Inspection report: Conducted a thorough examination of the Interior Placards, Exterior Placards, Decals, Markings, and Identification Plates. Verified correct installation and ensured that all markings are legible, in accordance with Chapter 11 Placards and Markings - Inspection/Check guidelines.
""")

lista_saida.append("""Compared Entities: Normative task vs Inspection report
Adherence: Yes
Reason: Both documents refer to the same items (Interior and Exterior Placards, Decals, Markings, and Identification Plates) and confirm the procedures of examining for correct installation and legibility. The references to Chapter 11 in both documents match perfectly, indicating full adherence.
""")

In [24]:
lista_entrada.append("""Normative task: Interior Placards, Exterior Placards, Decals, Markings and Identification Plates - Examine for correct installation and legibility. Refer to Chapter 11 Placards and Markings - Inspection/Check.

Inspection report: Performed an extensive assessment of the Seats, Safety Belts, and Interior Panels. All seats were secure and functional. Safety belts were checked for wear and compliance with safety standards.
""")

lista_saida.append("""Compared Entities: Normative task vs Inspection report
Adherence: No
Reason: The maintenance plan specifically refers to placards, decals, and markings, while the inspection report focuses on seats and safety belts. The items inspected do not match the items outlined in the maintenance plan, indicating a lack of adherence.
""")

In [25]:
# Criando o few-shot
# Itere pelas linhas do DataFrame e crie iterações personalizadas
few_shots=[]
for i in range(len(lista_entrada)):
    entrada = lista_entrada[i]
    saida = lista_saida[i]

    few_shots.append(("user",entrada))
    few_shots.append(("assistant",saida))

## Instrução

In [26]:
instrucoes =[("system","""You work for an aircraft maintenance company. 
Your task is to check the aircraft maintenance plan against the inspector's report. 
To carry out this task you will be given two documents, one being the maintenance plan and the other the inspector's report, and you will have to compare them and evaluate if they are adherent or not. Be very rigorous and only consider adherence if they are highly similar and mention all the same components.

To help you with your assessment, use the step-by-step guide:
1) Check that the item in the maintenance plan and the report are related to each other.
2) Check that the procedures described in the Maintenance Plan Assessment match those described in the inspector's report.
3) You may find some spelling or grammar mistakes, but this shouldn't stop you from checking that the maintenance plan and the report are related to each other.

Your result should contain the Compared Entities, the Adherence (Yes or No) and, in a few words, the reason for your answer to adherence.
""")]

In [27]:
messages= instrucoes + few_shots
messages

[('system',
  "You work for an aircraft maintenance company. \nYour task is to check the aircraft maintenance plan against the inspector's report. \nTo carry out this task you will be given two documents, one being the maintenance plan and the other the inspector's report, and you will have to compare them and evaluate if they are adherent or not. Be very rigorous and only consider adherence if they are highly similar and mention all the same components.\n\nTo help you with your assessment, use the step-by-step guide:\n1) Check that the item in the maintenance plan and the report are related to each other.\n2) Check that the procedures described in the Maintenance Plan Assessment match those described in the inspector's report.\n3) You may find some spelling or grammar mistakes, but this shouldn't stop you from checking that the maintenance plan and the report are related to each other.\n\nYour result should contain the Compared Entities, the Adherence (Yes or No) and, in a few words, 

## LLM

In [28]:

df_resultado=pd.DataFrame(columns=["Normative task","Report_1","Resultado_1","Report_2","Resultado_2","Report_3","Resultado_3"])
k = 3

for i in tqdm(range(len(lista_plano_manutencao)), bar_format='{l_bar}{bar:10}{r_bar}{bar:-10b}'):
    item_plano_manutencao = lista_plano_manutencao[i]
    new_row = [item_plano_manutencao]

    result = retriever.invoke(item_plano_manutencao,k=k)
    
    for j in range(k):
        item_inspec = result[j].page_content
        chave_dict, pos_lista = get_dict_key(item_inspec,dic_inspec)
        new_row.append(f"Item da lista: {chave_dict} posição: {pos_lista}\n{item_inspec}") 
        prompt = gerar_prompt(item_plano_manutencao, item_inspec,messages)
        resposta = model.invoke(prompt)
        
        new_row.append(resposta.content)
    df_resultado.loc[i] = new_row

100%|██████████| 152/152 [07:14<00:00,  2.86s/it]


In [29]:
df_resultado["Item_Aderente"] = np.full(df_resultado.shape[0], "") # Se alguma PM for aderente, informar quais foram
df_resultado["Gabarito"] = np.full(df_resultado.shape[0], "") # Coluna vazia somente pra confirmar com gabarito
for i in range(3):
    df_resultado.loc[df_resultado[f'Resultado_{i+1}'].str.contains('Adherence: Yes'), "Item_Aderente"] = df_resultado["Item_Aderente"].loc[df_resultado[f'Resultado_{i+1}'].str.contains('Adherence: Yes')]+f'{i+1},'
    df_resultado.loc[df_resultado[f'Resultado_{i+1}'].str.contains('Adherence: Yes'), "Aderência"] = 'Sim'
    
df_resultado['Aderência'] = df_resultado['Aderência'].fillna('Não')

In [30]:
df_resultado

Unnamed: 0,Normative task,Report_1,Resultado_1,Report_2,Resultado_2,Report_3,Resultado_3,Item_Aderente,Gabarito,Aderência
0,"Interior Placards, Exterior Placards, Decals, ...",Item da lista: inspec_correta posição: 0\nCond...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 0\nInsp...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 0\nRevie...,Compared Entities: Normative task vs Inspectio...,12,,Sim
1,"Ventilation System - Inspect clamps, hoses, an...",Item da lista: inspec_correta posição: 1\nCond...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 1\nInsp...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 134\nIn...,Compared Entities: Normative task vs Inspectio...,1,,Sim
2,Primary Flight Display (PFD) Fan and Multi-Fun...,Item da lista: inspec_correta posição: 2\nCond...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 2\nOper...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_sintetico posição: 39\nE...,Compared Entities: Normative task vs Inspectio...,1,,Sim
3,"Cold and Hot Air Hoses - Check condition, rout...",Item da lista: inspec_correta posição: 3\nInsp...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 3\nInsp...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 4\nInsp...,Compared Entities: Normative task vs Inspectio...,1,,Sim
4,"Heater Components, Inlets, and Outlets - Inspe...",Item da lista: inspec_correta posição: 4\nCond...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 4\nCheck...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 4\nInsp...,Compared Entities: Normative task vs Inspectio...,1,,Sim
...,...,...,...,...,...,...,...,...,...,...
147,Exhaust System - Inspect for cracks and securi...,Item da lista: inspec_parcial posição: 147\nIn...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 147\nCon...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 3\nExami...,Compared Entities: Normative task vs Inspectio...,,,Não
148,Engine Oil - Drain oil sump and oil cooler. Ch...,Item da lista: inspec_errado posição: 148\nChe...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_sintetico posição: 10\nC...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 3\nExami...,Compared Entities: Normative task vs Inspectio...,,,Não
149,"Oil Cooler - Check for obstructions, leaks, an...",Item da lista: inspec_parcial posição: 149\nIn...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_errado posição: 149\nIns...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 147\nIn...,Compared Entities: Normative task vs Inspectio...,1,,Sim
150,Starter and Electrical Connections - Check sec...,Item da lista: inspec_parcial posição: 14\nMai...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 150\nIn...,Compared Entities: Normative task vs Inspectio...,Item da lista: inspec_parcial posição: 147\nIn...,Compared Entities: Normative task vs Inspectio...,,,Não


In [31]:
df_resultado.to_excel('resultado_teste6.xlsx')