In [1]:
import os
import re
import pandas as pd

In [2]:
def extract_results_from_dir(dir_path: str):
    results = []
    for file in os.listdir(dir_path):
        single_results = {}
        if not file.startswith("_predictions_"):
            continue
        single_results["name"] = file.replace("_predictions_", "").replace(".txt", "")
        single, multi = extract_results_from_file(os.path.join(dir_path, file))

        single_results["rec_num_examples"], single_results["fully_added"], single_results["true_positive"], single_results["avg_min_rank"], single_results["avg_min_smooth_rank"], single_results["avg_mean_rank"], single_results["avg_mean_smooth_rank"], single_results["class_num_examples"], single_results["area_under_roc"] = single[0]

        # for treshold, accuracy, precision, recall, f1, confusion_matrix in multi:
        #     single_results["treshold"] = treshold
        #     single_results["accuracy"] = accuracy
        #     single_results["precision"] = precision
        #     single_results["recall"] = recall
        #     single_results["f1"] = f1
            
        #     results.append(single_results)
        
        for treshold, accuracy, precision, recall, f1, confusion_matrix in multi:
            single_results[f"accuracy_{treshold}"] = accuracy
            single_results[f"precision_{treshold}"] = precision
            single_results[f"recall_{treshold}"] = recall
            single_results[f"f1_{treshold}"] = f1
            
        results.append(single_results)

    return pd.DataFrame.from_records(results)



def extract_results_from_file(file_path: str):
    with open(file_path, "r", encoding="utf-8") as f:
        data = f.read()
    return extract_results_from_string(data)


def extract_results_from_string(string: str):
    single_pattern = r"Recommender quality measures:\s+- Number of test examples: (?P<recommender_number_examples>\d+) \(fully added: (?P<recommender_fully_added>\d+)\)\s+- Proportion of predictions that are in actual neighbours: (?P<recommender_>\d+\.\d+)\s+- The average minimal rank \(>= 0\) of the actual neighbour: (?P<recommender_avg_min_rank>\d+\.\d+)\s+- The average minimal smooth rank \(>= 1.0\) of the actual neighbour: (?P<recommender_avg_min_smooth_rank>\d+\.\d+)\s+- The average mean rank \(>= 0\) of the actual neighbour: (?P<recommender_avg_mean_rank>\d+\.\d+)\s+- The average mean smooth rank \(>= 1.0\) of the actual neighbour: (?P<recommender_avg_maen_smooth_rank>\d+\.\d+)\s+Classification quality measures:\s+\* Number of examples: (?P<class_num_examples>\d+)\s+\* Threshold independent measures:\s+- area under ROC: (?P<class_area_under_ROC>\d+\.\d+)"

    multi_pattern = r"threshold = (?P<treshold>\d+\.\d+)\s*- accuracy: (?P<accuracy>\d+\.\d+)\s*- precision: (?P<precision>\d+\.\d+)\s*- recall: (?P<recall>\d+\.\d+)\s*- F1: (?P<f1>\d+\.\d+)\s*- confusion matrix: (?P<confusion_matrix>\[\[\d+, \d+], \[\d+, \d+]]) \(labels: \[0, 1]\)"

    single_pattern = re.compile(single_pattern)
    multi_pattern = re.compile(multi_pattern)

    single = re.findall(single_pattern, string)
    multi = re.findall(multi_pattern, string)
    return single, multi

In [3]:
dir_path = "dumps/experiments"

results = extract_results_from_dir(dir_path)

In [4]:
results

Unnamed: 0,name,rec_num_examples,fully_added,true_positive,avg_min_rank,avg_min_smooth_rank,avg_mean_rank,avg_mean_smooth_rank,class_num_examples,area_under_roc,...,recall_0.25,f1_0.25,accuracy_0.75,precision_0.75,recall_0.75,f1_0.75,accuracy_0.9,precision_0.9,recall_0.9,f1_0.9
0,WordEmbeddingRecommender_0_p_to_keep_0.1_WordF...,2916,2916,0.050480109739369,301.64643347050753,1.0202251496150552,3446.6445789405416,1.0827406066958851,39165,0.5185124329562589,...,0.0372790356075571,0.0718609646553407,0.5145665773011617,0.9932523616734144,0.0372790356075571,0.0718609646553407,0.5145410442997574,0.9932432432432432,0.0372283847439598,0.0717668310306107
1,Node2VecEdgeEmbeddingRecommender_0_p_to_keep_0...,2916,2916,0.2513717421124828,4.522976680384088,1.0022052856945804,156.7521266870852,1.2974520689185158,39165,0.9912312785246552,...,0.98576710732918,0.9262767121983724,0.924958508872718,0.993364650616559,0.8568606594742441,0.9200772305767818,0.8591344312523937,0.997621379599832,0.722281314896419,0.8379116843435086
2,GNN_0_p_to_keep_0.1_h_size_16,2916,2916,0.0184499314128943,65.93449931412894,1.0040897698708815,747.4080838093008,1.8062755144175584,39165,0.9577372292344016,...,0.9549207313984704,0.8726018837795931,0.8741223030767267,0.946201578408338,0.7955224636580054,0.8643442848494854,0.8415932592876293,0.9684450903051692,0.7088588360431546,0.8185646604667486
3,BagOfWordsRecommender_0_p_to_keep_0.1_empty,2916,2589,0.0166666666666666,1745.7736625514403,3.5293850538544804,8449.966256930418,6.617291110039867,39165,0.5060518432634765,...,0.0123081598541255,0.024312156078039,0.5000893655049151,0.993975903614458,0.008357392493542,0.0165754181525943,0.4992978424613813,0.9925925925925926,0.006787215722028,0.0134822416742126
4,TFIDFRecommender_0_p_to_keep_0.1_cosine,2916,2916,0.042318244170096,178.26954732510288,1.467106525600549,3198.3742229708,37.845908544947726,39165,0.515422642911782,...,0.0312515828394874,0.0605852317360565,0.511017490105962,0.986842105263158,0.0303905181583346,0.0589651614171293,0.505834290820886,0.9949109414758268,0.0198044876665147,0.0388359157727453
5,TFIDFRecommender_1_p_to_keep_0.1_cityblock,2916,2916,0.0431412894375857,95.06893004115226,1.143216618555211,1021.4448314637584,1.5554958317837468,39165,0.5157516518526583,...,0.0318593932026541,0.0617302124736248,0.5095110430231073,0.990791896869245,0.0272501646153066,0.0530415064576555,0.5009064215498532,0.9949494949494948,0.0099782201286531,0.0197582869464921
6,GNN_0_p_to_keep_0.1_default,2916,2916,0.2741426611796982,14.873456790123456,28.565121634392472,1129.187846350249,435.7573302234139,39165,0.9274657792026156,...,0.9244795623765384,0.8368062719207757,0.8492276267075195,0.8771805494984736,0.8150230461429367,0.8449602226481476,0.8440188944210392,0.9005287896592244,0.7763257863546573,0.8338274895955171
7,DummyRecommender_0_p_to_keep_0.1_empty,2916,2916,0.0076817558299039,2391.360425240055,1.0858162564169054,8235.447339128834,1.3845120233718091,39165,0.5027340095846914,...,0.0056728967228891,0.0112795206203736,0.4985318524192519,1.0,0.0052170389505141,0.0103799254257784,0.4985318524192519,1.0,0.0052170389505141,0.0103799254257784
8,WordEmbeddingRecommender_1_p_to_keep_0.1_WordF...,2916,2916,0.0313443072702332,912.4475308641976,1.053077852597394,6349.458652208157,1.1664977572184625,39165,0.5115223986271159,...,0.0231474446639315,0.0452430452430452,0.5075194689135708,0.9956427015250544,0.0231474446639315,0.0452430452430452,0.5073918039065493,0.9977876106194692,0.0228435394823481,0.04466452092102
9,GNN_0_p_to_keep_0.1_h_size_32,2916,2897,0.218312757201646,21.354252400548692,1.1866310637809871,1054.1934444351753,82.38799490556983,39165,0.9373918319669416,...,0.8698779314187307,0.8858741908028783,0.8794842333716328,0.9461835461835462,0.8068176062401864,0.8709606867515993,0.8752457551385165,0.9539260617170792,0.7907106316162691,0.8646837265979839
