In [8]:
import os
import json
import pandas as pd

In [9]:
root_dir = 'simulations'
simulation_folders = os.listdir(root_dir)
dataframes = {}

for folder in simulation_folders:
    folder_path = os.path.join(root_dir, folder)
    
    if os.path.isdir(folder_path):
        json_files = [f for f in os.listdir(folder_path) if f.endswith('.json')]
        
        metrics = []
        for json_file in json_files:
            json_file_path = os.path.join(folder_path, json_file)
            
            with open(json_file_path, 'r') as f:
                json_data = json.load(f)
            
            items = json_data['data']['items']

            metric_data = {item['title']: item['value'] for item in items if item['title'] != 'Time to discovery'}
            metrics.append(metric_data)

        df = pd.DataFrame(metrics)
        dataframes[folder] = df

# Access the DataFrame for a specific simulation folder like this:
# dataframes['Appenzeller-Herzog_2019_-m_logistic_-e_tfidf']

In [10]:
def average_metrics(df):
    metric_averages = {}
    for column in df.columns:
        if df[column].dtype == "object":
            metric_list = df[column].explode().tolist()
            metric_dict = {}
            for metric in metric_list:
                if metric[0] in metric_dict:
                    metric_dict[metric[0]].append(metric[1])
                else:
                    metric_dict[metric[0]] = [metric[1]]
            for key, values in metric_dict.items():
                new_col_name = f"{column} {key}"
                metric_averages[new_col_name] = round(sum(values) / len(values), 2)
        else:
            metric_averages[column] = round(df[column].mean(), 2)
    return metric_averages

    
# split up the name into categories
def split_name(name):
    # dataset name is everything before "-m"
    dataset = name.split("-m")[0]
    # model name is everything between "-m" and "-e"
    model = name.split("-m")[1].split("-e")[0]
    # embedding name is everything after "-e"
    embedding = name.split("-e")[1]
    # remove trailing underscores
    dataset = dataset[:-1]
    model = model[1:-1]
    embedding = embedding[1:]
    return dataset, model, embedding

In [11]:
result_list = []
for df in dataframes:
    dataset, model, embedding = split_name(df)
    result_list.append({**{'dataset': dataset}, **{'model': model}, **{'fe': embedding}, **average_metrics(dataframes[df])})

df = pd.DataFrame(result_list)

# make into a json file
with open("results.json", "w") as json_file:
    json.dump(result_list, json_file)

In [12]:
df.groupby("fe").mean()

Unnamed: 0_level_0,Recall 0.1,Recall 0.25,Recall 0.5,Recall 0.75,Recall 0.9,Work Saved over Sampling 0.95,Extra Relevant record Found 0.1,Average time to discovery
fe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
doc2vec,0.555167,0.798833,0.9365,0.984333,0.996667,0.585833,0.454,343.49
reuseable_MiniLM,0.586379,0.803448,0.92431,0.972069,0.991897,0.580517,0.484138,349.836552
reuseable_sbert,0.598125,0.811875,0.9175,0.964375,0.985,0.596875,0.496667,268.955417
tfidf,0.651146,0.835104,0.938021,0.985521,0.997708,0.612812,0.551042,350.642708


In [13]:
df.groupby("model").mean()

Unnamed: 0_level_0,Recall 0.1,Recall 0.25,Recall 0.5,Recall 0.75,Recall 0.9,Work Saved over Sampling 0.95,Extra Relevant record Found 0.1,Average time to discovery
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic,0.597176,0.813765,0.929059,0.976588,0.994941,0.599529,0.495647,326.048824
nb,0.673846,0.843846,0.941154,0.988462,0.998462,0.626923,0.573846,456.783077
rf,0.605732,0.816707,0.932683,0.980244,0.993659,0.597683,0.504634,325.551829
svm,0.588261,0.805652,0.927101,0.974638,0.991014,0.580145,0.487391,307.03942


In [17]:
df[df["dataset"] == "Chou_2003"]

Unnamed: 0,dataset,model,fe,Recall 0.1,Recall 0.25,Recall 0.5,Recall 0.75,Recall 0.9,Work Saved over Sampling 0.95,Extra Relevant record Found 0.1,Average time to discovery
29,Chou_2003,logistic,doc2vec,0.74,0.86,0.9,0.94,1.0,0.49,0.67,263.1
30,Chou_2003,logistic,reuseable_MiniLM,0.81,0.93,1.0,1.0,1.0,0.71,0.74,132.54
31,Chou_2003,logistic,tfidf,0.74,0.8,0.87,0.93,1.0,0.35,0.67,272.49
32,Chou_2003,nb,tfidf,0.73,0.8,0.93,0.93,1.0,0.45,0.66,255.18
33,Chou_2003,rf,doc2vec,0.7,0.87,0.95,1.0,1.0,0.57,0.63,202.34
34,Chou_2003,rf,reuseable_MiniLM,0.76,0.94,1.0,1.0,1.0,0.75,0.69,148.9
35,Chou_2003,rf,reuseable_sbert,0.65,0.9,1.0,1.0,1.0,0.67,0.58,176.55
36,Chou_2003,rf,tfidf,0.73,0.84,0.88,0.98,1.0,0.38,0.66,245.8
37,Chou_2003,svm,reuseable_MiniLM,0.8,0.89,1.0,1.0,1.0,0.65,0.73,142.91
38,Chou_2003,svm,reuseable_sbert,0.72,0.98,1.0,1.0,1.0,0.71,0.65,142.06


In [18]:
df[df["dataset"] == "Chou_2003"].groupby("model").mean()

Unnamed: 0_level_0,Recall 0.1,Recall 0.25,Recall 0.5,Recall 0.75,Recall 0.9,Work Saved over Sampling 0.95,Extra Relevant record Found 0.1,Average time to discovery
model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
logistic,0.763333,0.863333,0.923333,0.956667,1.0,0.516667,0.693333,222.71
nb,0.73,0.8,0.93,0.93,1.0,0.45,0.66,255.18
rf,0.71,0.8875,0.9575,0.995,1.0,0.5925,0.64,193.3975
svm,0.753333,0.906667,0.956667,0.976667,1.0,0.546667,0.683333,184.493333


In [19]:
df[df["dataset"] == "Chou_2003"].groupby("fe").mean()

Unnamed: 0_level_0,Recall 0.1,Recall 0.25,Recall 0.5,Recall 0.75,Recall 0.9,Work Saved over Sampling 0.95,Extra Relevant record Found 0.1,Average time to discovery
fe,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
doc2vec,0.72,0.865,0.925,0.97,1.0,0.53,0.65,232.72
reuseable_MiniLM,0.79,0.92,1.0,1.0,1.0,0.703333,0.72,141.45
reuseable_sbert,0.685,0.94,1.0,1.0,1.0,0.69,0.615,159.305
tfidf,0.735,0.8225,0.8875,0.9425,1.0,0.365,0.665,260.495
