# Model Zoo Analysis - Agreement Score

## 1. Import libraries

In [43]:
## Import libraries

import numpy as np
import pandas as pd
from itertools import combinations

import os
import json

import random

## 2. Set Global Variables

In [3]:
## Set paths

FT_BERTS = "/Users/kaanaydin/Library/CloudStorage/GoogleDrive-implanguagetransformers@gmail.com/My Drive/imp-project/question-answering/finetuned-berts"

## 2. Retrieve scores

In [84]:
def create_df_with_results(path):
    
    ## Get all models
    models = os.listdir(FT_BERTS)

    ## Remove .DS_Store (if available)
    if '.DS_Store' in models:
        models.remove('.DS_Store')

    ## Retrieve results (from last epoch)

    results = []

    for model in models:
        
        ## Generate path to result file for respective model
        ### new_path = os.path.join(path, model, "results")
        
        ## Open json file
        ### with open(new_path, 'r') as file:
        ###    data = json.load(file)

        ## Get results from last epoch
        ### result = data['epoch_results'][-1]

        ## Put to numpy
        ### result = np.array(result)

        ## Append to results list
        ### results.append(result)

        ## Random list of numbers -> to be deleted once we have the results
        random_list = [random.randint(0, 1) for _ in range(50)]

        ## Append to results list -> to be deleted once we have the results
        results.append(random_list)

    ## Create dataframe of models and results
    df = pd.DataFrame({'Models': models,'Results': results})

    return df

In [85]:
def create_df_extended(df):
    
    ## Create all possible combinations of models
    combs = list(combinations(df['Models'], 2))
    
    ## Create dataframe of combinations
    df_combinations = pd.DataFrame(combs, columns=['Model1', 'Model2'])
    
    ## Retrieve the results of the models for each row 
    df_combinations['Results1'] = df_combinations['Model1'].apply(lambda x: df[df['Models'] == x].Results.values[0])
    df_combinations['Results2'] = df_combinations['Model2'].apply(lambda x: df[df['Models'] == x].Results.values[0])
    
    return df_combinations

In [86]:
def calculate_agreement_score(df):

    agreement_scores = []
    agreement_score = []
    

    ## Iterate over rows
    for index, row in df.iterrows():
        
        ## Calculate agreement score
        results1 = row['Results1']
        results2 = row['Results2']

        ## Calculate agreement scores
        agreement_scores_row = []

        for idx in range(len(results1)):
            if results1[idx] == results2[idx]:
                agreement_scores_row.append(1)
            else:
                agreement_scores_row.append(0)
        
        ## Calculate average agreement score
        agreement_score_row = np.mean(agreement_scores_row)

        ## Append to agreement scores list
        agreement_scores.append(agreement_scores_row)
        agreement_score.append(agreement_score_row)
    
    ## Add agreement scores to dataframe
    df['AgreementScores'] = agreement_scores
    df['AgreementScore'] = agreement_score

    return df

In [87]:
def calculate_final_agreement_score(df):
    
        ## Calculate final agreement score
        final_agreement_score = np.mean(df['AgreementScore'])
    
        return final_agreement_score

In [88]:
original_df = create_df_with_results(FT_BERTS)

In [89]:
original_df

Unnamed: 0,Models,Results
0,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.001,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, ..."
1,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.003,"[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, ..."
2,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.002,"[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


In [90]:
extended_df = create_df_extended(original_df)

In [91]:
extended_df

Unnamed: 0,Model1,Model2,Results1,Results2
0,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.001,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.003,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, ...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, ..."
1,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.001,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.002,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, ...","[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."
2,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.003,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.002,"[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, ...","[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ..."


In [92]:
extended_df_with_agreements = calculate_agreement_score(extended_df)

In [94]:
extended_df_with_agreements

Unnamed: 0,Model1,Model2,Results1,Results2,AgreementScores,AgreementScore
0,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.001,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.003,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, ...","[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, ...","[1, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, ...",0.52
1,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.001,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.002,"[0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, ...","[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, ...",0.42
2,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.003,bert_lr_0.001_wd_0.001_sst2_lr_0.001_wd_0.002,"[0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, ...","[1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, ...","[0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, ...",0.54


In [95]:
output = calculate_final_agreement_score(extended_df_with_agreements)

In [96]:
output

0.49333333333333335