## Imports

In [None]:
import os
import json
import pandas as pd
from fnmatch import fnmatch
from pathlib import Path
from os.path import basename
from pprint import pprint

import matplotlib.pyplot as plt

## Load Evaluation Results

In [None]:
eval_results_datasets_dict = dict()
for dataset in os.listdir('../models_datasets'):
    eval_results_dataset_dict = dict()
    for root_dir, sub_dir, file_names in os.walk('../models_datasets/' + dataset):
        for file_name in file_names:
            if fnmatch(file_name, 'evaluation_results.json'):
                file_path = os.path.join(root_dir, file_name)
                model_name = basename(Path(file_path).parent).split('hold_all_')[-1]
                #print(model_name)

                with open(file_path, 'r') as fp:
                    eval_results = json.load(fp)
                    #pprint(eval_results)                
                    eval_results_dataset_dict[model_name] = eval_results
    eval_results_datasets_dict[dataset] = eval_results_dataset_dict

# eval_results_datasets_dict

In [None]:
no_of_items_to_recommend = 10

datasets_eval_results = []
for dataset in eval_results_datasets_dict:
    eval_results_dataset_dict = eval_results_datasets_dict[dataset]
    dataset_eval = dict()
    dataset_eval['dataset'] = dataset
    for model in eval_results_dataset_dict:
        #print(dataset)
        eval_metrics = eval_results_dataset_dict[model]['no_of_items_to_recommend'][str(no_of_items_to_recommend)]
        
        for eval_metric, score in eval_metrics.items():
            model_eval_metric = model + '_' + eval_metric
            # print(model_eval_metric, score)
            dataset_eval[model_eval_metric] = score
    datasets_eval_results.append(dataset_eval)
datasets_eval_results_df = pd.DataFrame(datasets_eval_results)
# datasets_eval_results_df

In [None]:
datasets_eval_results_df.sort_values('dataset', inplace=True)

In [None]:
dataset_quarters_dict = {
    'dataset_1' : '2016Q2',
    'dataset_2' : '2016Q3',
    'dataset_3' : '2016Q4',
    'dataset_4' : '2017Q1',
}
datasets_eval_results_df['quarter'] = datasets_eval_results_df['dataset'].map(dataset_quarters_dict)
datasets_eval_results_df.head()

## Plot Evaluation Results

In [None]:
def get_eval_results(metric):
    #metric_cols = ['dataset']
    metric_cols = ['quarter']
    for col in datasets_eval_results_df.columns:
        if metric in col:
            metric_cols.append(col)
    return datasets_eval_results_df[metric_cols]

def plot_eval_results(metric):
    metric_df = get_eval_results(metric)
    for col in metric_df.columns:
        #if col == 'dataset':
        #    continue
        if col == 'quarter':
            continue
        model_name, metric = col.split('_avg_')
        #plt.plot('dataset', col, data=metric_df, marker='o', label=model_name)
        plt.plot('quarter', col, data=metric_df, marker='o', label=model_name)
    plt.legend()  
    #plt.xlabel('Datasets')
    plt.xlabel('Quarter')
    plt.ylabel(metric)
    plt.show()

In [None]:
plot_eval_results('f1_score')

In [None]:
plot_eval_results('precision')

In [None]:
plot_eval_results('recall')

In [None]:
plot_eval_results('mcc')

In [None]:
def get_roc2_results():
    #metric_cols = ['dataset']
    metric_cols = ['quarter']
    for col in datasets_eval_results_df.columns:
        if 'tpr' in col or 'fpr' in col:
            metric_cols.append(col)
    return datasets_eval_results_df[metric_cols]

def plot_roc2_results():
    metric_df = get_roc2_results()
    model_tpr_fpr = dict()
    for col in metric_df.columns:     
        if col == 'quarter':
            continue
        model_name, metric = col.split('_avg_')
        if model_name not in model_tpr_fpr:
            model_tpr_fpr[model_name] = dict()
        temp = pd.DataFrame(metric_df[['quarter', col]]).set_index('quarter').to_dict()
        model_tpr_fpr[model_name][metric] = temp[col]
    #pprint(model_tpr_fpr)
    for model in model_tpr_fpr:     
        plt.plot(list(model_tpr_fpr[model]['fpr'].values()), list(model_tpr_fpr[model]['tpr'].values()), 
                 marker='o', label=model)    
    plt.legend()  
    plt.title('ROC2')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')    
    plt.show()

In [None]:
plot_roc2_results()