In [None]:
import pandas as pd

In [None]:
def load_df_evaluation_just_test_data_output(file_path):
    df = pd.read_csv(
    file_path,
    sep=" ",
    index_col=0)
    
    df = df.fillna(value={"y_test": -1})

    if len(df[(df.y_train == -1) & (df.y_test != -1) & (df.y_pred == -1)]) != 0:
        print("WARNING, there are test rows without predictions:")
        print(df[(df.y_train == -1) & (df.y_test != -1) & (df.y_pred == -1)].head())
        print(len(df[(df.y_train == -1) & (df.y_test != -1) & (df.y_pred == -1)]))
    else:
        print("All right")

    df_evaluation = df.sort_index()
    
    # IMPORTANT: JUST LOAD THE TEST DATA ROWS!
    df_evaluation = df_evaluation[df_evaluation.y_test != -1]

    df_evaluation = df_evaluation[["y_pred", "y_conf", "y_test"]]
    
    return df_evaluation

In [None]:
import itertools
import numpy as np

def similarity_matrix(_datasets):
    load = load_df_evaluation_just_test_data_output
    
    loaded_datasets = [{ 
        "name": _dataset["name"], 
        "df_eval": load(_dataset["path"])
    } for _dataset in _datasets]
    
    synsets = pd.read_csv(
        _datasets[0]["path"],
        sep=" ",
        index_col=0).y_train.unique()
    
    results = []
    for a, b in itertools.combinations(loaded_datasets, r=2):        
        # group by synset and sort desc by conf
        a_groups = a["df_eval"].reset_index().groupby(["y_pred"]).apply(
            lambda x: x.sort_values(["y_conf"], ascending=False)
        ).groupby(level=0).head(10).groupby(level=0)
        
        b_groups = b["df_eval"].reset_index().groupby(["y_pred"]).apply(
            lambda x: x.sort_values(["y_conf"], ascending=False)
        ).groupby(level=0).head(10).groupby(level=0)
        
        # Intersect the respective synsets and get mean intersection
        intersections = []
        equals = []
        for synset in synsets:
            if (synset not in a_groups.groups) or (synset not in b_groups.groups):
                intersections.append(0)
                continue

            a_words = a_groups.get_group(synset).word.tolist()
            b_words = b_groups.get_group(synset).word.tolist()

            intersections.append(len(
                set(a_words).intersection(
                    b_words)
            )/np.max([len(a_words), len(b_words)]))
            equals.append(len(
                set(a_words).intersection(
                    b_words)
            ))

        print((a["name"], b["name"]), "intersections", np.mean(intersections), "equals", np.sum(equals))
        results.append({
            "combination": (a["name"], b["name"]),
            "intersections": intersections,
            "equals": equals
        })

## When using 50% test data => how many of the matched test data are similar & what would the combined accuracy be?

In [None]:
# using sampling with rng_num=1
ft_lp = {
    "name": "ft_lp",
    "path": "<PROJECT_DIR>/08_propagation_evaluation/20181029-150442/main.txt" 
}

w2v_lp = {
    "name": "w2v_lp",
    "path": "<PROJECT_DIR>/08_propagation_evaluation/20181029-152409/main.txt" 
}

ft_baseline_k200 = {
    "name": "ft_baseline_k200",
    "path": "<DF_EVALUATION_PATH>"
}

w2v_baseline_k200 = {
    "name": "w2v_baseline_k200",
    "path": "<DF_EVALUATION_PATH>"
}

In [None]:
import itertools

def pairwise_combined_accuracy(_datasets):
    loaded_datasets = [{ 
        "name": _dataset["name"], 
        "df_eval": load_df_evaluation_just_test_data_output(_dataset["path"])
    } for _dataset in _datasets]
    
    for d in loaded_datasets:
        df = d["df_eval"]
        if len(df[df.y_test == -1]) > 0:
            print("All y_test should have a value!")
        
        accuracy = len(df[df.y_pred == df.y_test]) / len(df)
        print(d["name"], "test accuracy: ", accuracy)
        
    for a, b in itertools.combinations(loaded_datasets, r=2): 
        combined = a["df_eval"].join(b["df_eval"], lsuffix="_a", rsuffix="_b")[["y_pred_a", "y_test_a", "y_pred_b"]]
        print()
        print()
        print(a["name"], b["name"])
        print("Equal test predictions", len(combined[(combined.y_pred_a != -1) & (combined.y_pred_b != -1) & (combined.y_pred_a == combined.y_pred_b)])/len(combined))
        print("Combined accuracies", len(combined[(combined.y_pred_a == combined.y_test_a) | (combined.y_pred_b == combined.y_test_a)])/len(combined))

        
datasets = [ft_lp, w2v_lp, ft_baseline_k200, w2v_baseline_k200]      
# similarity_matrix(datasets)
pairwise_combined_accuracy(datasets)