In [5]:
import configparser
import matplotlib.pyplot as plt
import csv
import ast
from sklearn.metrics import precision_recall_curve, auc

In [7]:
config = configparser.ConfigParser()
config.read('RESULTS.cfg')

results_path = config.get('DEFAULT', 'RESULTADOS')
esperados_path = config.get('DEFAULT', 'ESPERADOS')

In [8]:
results_dict = {}
with open(results_path, mode='r', newline='') as csv_file:
    reader = csv.reader(csv_file, delimiter=';')
    
    for row in reader:
        query = row[0]
        # Using ast.literal_eval to safely evaluate the list
        pos_doc_score = ast.literal_eval(row[1])
        
        # Ensure the key exists in the dictionary
        if query not in results_dict:
            results_dict[query] = []
        
        # Append the tuple (doc, score) to the list for this query
        results_dict[query].append((pos_doc_score[1], pos_doc_score[2]))

In [10]:
esperados_dict = {}
with open(esperados_path, mode='r', newline='') as csv_file:
    reader = csv.reader(csv_file, delimiter=';')
    
    for row in reader:
        query = row[0]
        doc = row[1].strip()  # Strip to remove any extra spaces
        score = int(row[2].strip())  # Convert score to integer
        
        # Ensure the key exists in the dictionary
        if query not in esperados_dict:
            esperados_dict[query] = []
        
        # Append the tuple (doc, score) to the list for this query
        esperados_dict[query].append((doc, score))

In [11]:
esperados_dict

{'00001': [('00139', 1),
  ('00151', 1),
  ('00166', 1),
  ('00311', 1),
  ('00370', 1),
  ('00392', 1),
  ('00439', 1),
  ('00440', 1),
  ('00441', 1),
  ('00454', 1),
  ('00461', 1),
  ('00502', 1),
  ('00503', 1),
  ('00505', 1),
  ('00520', 1),
  ('00522', 1),
  ('00526', 1),
  ('00527', 1),
  ('00533', 1),
  ('00593', 1),
  ('00619', 1),
  ('00737', 1),
  ('00742', 1),
  ('00789', 1),
  ('00827', 1),
  ('00835', 1),
  ('00861', 1),
  ('00875', 1),
  ('00891', 1),
  ('00921', 1),
  ('00922', 1),
  ('01175', 1),
  ('01185', 1),
  ('01222', 1)],
 '00002': [('00169', 1),
  ('00434', 1),
  ('00454', 1),
  ('00498', 1),
  ('00499', 1),
  ('00592', 1),
  ('00875', 1)],
 '00003': [('00023', 1),
  ('00040', 1),
  ('00139', 1),
  ('00190', 1),
  ('00221', 1),
  ('00246', 1),
  ('00309', 1),
  ('00311', 1),
  ('00325', 1),
  ('00345', 1),
  ('00347', 1),
  ('00356', 1),
  ('00370', 1),
  ('00374', 1),
  ('00375', 1),
  ('00439', 1),
  ('00440', 1),
  ('00454', 1),
  ('00515', 1),
  ('00520',

In [9]:
results_dict

{'00001': [('00533 ', 0.9662811821048936),
  ('00441 ', 0.7689319880207428),
  ('00957 ', 0.7344185787741998),
  ('00827 ', 0.7046231645892093),
  ('00522 ', 0.6868886745763042),
  ('00789 ', 0.6842599582927976),
  ('01193 ', 0.6842599582927976),
  ('00040 ', 0.6842599582927975),
  ('00303', 0.6842599582927975),
  ('00435 ', 0.6842599582927975)],
 '00002': [('01125 ', 0.6772235785770886),
  ('00137 ', 0.6132964199175064),
  ('00666 ', 0.37132293009226197),
  ('00107 ', 0.3713229300922619),
  ('00301', 0.36694737561932217),
  ('00225', 0.3581079473095234),
  ('01137 ', 0.33880830867683953),
  ('00095 ', 0.3249075638307292),
  ('00498 ', 0.31021881788021066),
  ('00114 ', 0.30137953779140153)],
 '00003': [('00515 ', 0.9925126638359466),
  ('01199 ', 0.9500145140610142),
  ('01121 ', 0.9494740928268659),
  ('00635 ', 0.949417116499775),
  ('00077 ', 0.9488681114065991),
  ('00345', 0.9471880915594478),
  ('00309', 0.9461074663209352),
  ('00505 ', 0.9461074663209352),
  ('00615 ', 0.94610

In [None]:
# # Precision + recall
# y_true = [0, 1, 1, 0, 1, 0, 1, 0, 1, 0]
# y_scores1 = [0.1, 0.4, 0.35, 0.8, 0.2, 0.5, 0.7, 0.3, 0.4, 0.6]
# y_scores2 = [0.05, 0.5, 0.3, 0.9, 0.25, 0.55, 0.65, 0.35, 0.45, 0.7]

# # Calculate precision and recall for both models
# precision1, recall1, _ = precision_recall_curve(y_true, y_scores1)
# precision2, recall2, _ = precision_recall_curve(y_true, y_scores2)

# # Calculate PR AUC for both models
# pr_auc1 = auc(recall1, precision1)
# pr_auc2 = auc(recall2, precision2)

# # Plotting the precision-recall curves
# plt.figure(figsize=(8, 6))
# plt.plot(recall1, precision1, marker='.', label=f'Model 1 (AUC = {pr_auc1:.2f})')
# plt.plot(recall2, precision2, marker='.', label=f'Model 2 (AUC = {pr_auc2:.2f})')

# # Adding labels and legend
# plt.xlabel('Recall')
# plt.ylabel('Precisão')
# plt.title('Curva Precisão-Recall')
# plt.legend()
# plt.grid()
# plt.show()