In [81]:
import pandas as pd
import numpy as np
import pickle
from sklearn.metrics import *
import os
import itertools
import matplotlib.pyplot as plt
import seaborn as sns

In [82]:
result_root_path = '/nas/home/cborrelli/bot_speech/results/closed_set_binary'


In [83]:
nfft_list = [512, 256, 128]
hop_size_list = [256, 128, 64]
selected_features_list = [['lpc', 'bicoh'],['bicoh'],['lpc']]

number_lpc_order = 49
stop_lpc_order = 50
normalizers_keys = ["minmax", "zscore"]
classifiers_keys = ["svm", "rf"]

## Table general

In [84]:
results_df = pd.DataFrame(columns=['nfft', 'hop_size', 'selected_features', 'number_lpc_order', 'stop_lpc_order', 
                                   'normalization', 'classifier', 
                                   'accuracy_train', 'accuracy_dev','accuracy_eval',
                                   'cm_train','cm_dev','cm_eval',
                                   'model_params' ])

for selected_features in selected_features_list:
    for fft_params in zip(nfft_list, hop_size_list):
        nfft = fft_params[0]
        hop_size = fft_params[1]
        for c in classifiers_keys:
            for n in normalizers_keys:
                result_name = "class_{}_norm_{}_nfft_{}_hop-size_{}_numberlpcorder_{}_stoplpcorder_{}".format(
                        c, n, nfft, hop_size,
                        number_lpc_order,
                        stop_lpc_order)

                result_name = result_name + "_selected_features_" + "-".join(
                    s for s in selected_features) + ".npy"

                result_filename = os.path.join(result_root_path, result_name)

                if os.path.exists(result_filename):
                    result_dict = pickle.load(open(result_filename, "rb" ))
                    cm_train = confusion_matrix(result_dict['y_train'], result_dict['y_predict_train'],
                                               normalize='true')
                    cm_dev = confusion_matrix(result_dict['y_dev'], result_dict['y_predict_dev'],
                                             normalize='true')
                    cm_eval = confusion_matrix(result_dict['y_eval'], result_dict['y_predict_eval'],
                                              normalize='true')

                    acc_train = balanced_accuracy_score(result_dict['y_train'], result_dict['y_predict_train'])
                    acc_dev = balanced_accuracy_score(result_dict['y_dev'], result_dict['y_predict_dev'])
                    acc_eval = balanced_accuracy_score(result_dict['y_eval'], result_dict['y_predict_eval'])


                    results_df.loc[len(results_df.index)] = [nfft, hop_size, selected_features, 
                                                           number_lpc_order, stop_lpc_order,
                                                           n, c, 
                                                           acc_train, acc_dev, acc_eval,
                                                           cm_train, cm_dev, cm_eval, result_dict['best_model']]

In [102]:
features_selected_table = [['bicoh'], ['bicoh'], ['bicoh'], ['lpc'], ['lpc', 'bicoh'], 
                           ['lpc', 'bicoh'], ['lpc', 'bicoh']]
nfft_table = [[512], [256], [128], [], [512], [256], [128]]
hop_size_table = [[256], [128], [64], [], [256], [128], [64]]

columns = zip(features_selected_table, nfft_table, hop_size_table)

dataframe_columns = []
for c in columns:
    dataframe_columns.append('Feat {} NFFT {} HopSize {}'.format(c[0], c[1], c[2]))

dataframe_index = ['dev', 'eval']
table = pd.DataFrame(columns=dataframe_columns, index = dataframe_index)



columns = zip(features_selected_table, nfft_table, hop_size_table)

for col in columns:
    feat = col[0]
    
    
    if feat == ['lpc']:
        partial = results_df[[a == feat for a in results_df['selected_features']]]
        
        table.loc['dev','Feat {} NFFT {} HopSize {}'.format(col[0], col[1], col[2])] = np.max(
            partial['accuracy_dev'])
        table.loc['eval','Feat {} NFFT {} HopSize {}'.format(col[0], col[1], col[2])] = np.max(
            partial['accuracy_eval'])
    else:
        nfft = col[1]
        nfft = nfft[0]
        hop_size = col[2]
        hop_size = hop_size[0]
        partial = results_df[(results_df['nfft']==nfft) & (results_df['hop_size']==hop_size) & 
                       ([a == feat for a in results_df['selected_features']])]
        
        table.loc['dev','Feat {} NFFT {} HopSize {}'.format(col[0], col[1], col[2])] = np.max(
            partial['accuracy_dev'])
        table.loc['eval','Feat {} NFFT {} HopSize {}'.format(col[0], col[1], col[2])] = np.max(
            partial['accuracy_eval'])

In [103]:
dataframe_columns

["Feat ['bicoh'] NFFT [512] HopSize [256]",
 "Feat ['bicoh'] NFFT [256] HopSize [128]",
 "Feat ['bicoh'] NFFT [128] HopSize [64]",
 "Feat ['lpc'] NFFT [] HopSize []",
 "Feat ['lpc', 'bicoh'] NFFT [512] HopSize [256]",
 "Feat ['lpc', 'bicoh'] NFFT [256] HopSize [128]",
 "Feat ['lpc', 'bicoh'] NFFT [128] HopSize [64]"]

In [104]:
table

Unnamed: 0,Feat ['bicoh'] NFFT [512] HopSize [256],Feat ['bicoh'] NFFT [256] HopSize [128],Feat ['bicoh'] NFFT [128] HopSize [64],Feat ['lpc'] NFFT [] HopSize [],"Feat ['lpc', 'bicoh'] NFFT [512] HopSize [256]","Feat ['lpc', 'bicoh'] NFFT [256] HopSize [128]","Feat ['lpc', 'bicoh'] NFFT [128] HopSize [64]"
dev,0.726056,0.694968,0.687488,0.925649,0.906676,0.920901,0.941906
eval,0.591724,0.578243,0.578345,0.73856,0.741133,0.736621,0.735406


## Table alg by alg

In [107]:
alg_dict = {'A01': 1, 'A02': 2, 'A03': 3, 'A04': 4, 'A05': 5, 'A06': 6,
                'A07': 7, 'A08': 8, 'A09': 9, 'A10': 10, 'A11': 11, 'A12': 12, 'A13': 13,
                'A14': 14, 'A15': 15, 'A16': 16, 'A17': 17, 'A18': 18, 'A19': 19}

alg_dev_dict = {'A01': 1, 'A02': 2, 'A03': 3, 'A04': 4, 'A05': 5, 'A06': 6}
alg_eval_dict = {'A07': 7, 'A08': 8, 'A09': 9, 'A10': 10, 'A11': 11, 'A12': 12, 'A13': 13,
                'A14': 14, 'A15': 15, 'A16': 16, 'A17': 17, 'A18': 18, 'A19': 19}


alg_results_df = pd.DataFrame(columns=['nfft', 'hop_size', 'selected_features', 'number_lpc_order', 'stop_lpc_order', 
                                   'normalization', 'classifier', 
                                   'accuracy_dev','accuracy_eval',
                                   'cm_dev','cm_eval',
                                   'model_params','alg' ])



for selected_features in selected_features_list:
    for fft_params in zip(nfft_list, hop_size_list):
        nfft = fft_params[0]
        hop_size = fft_params[1]
        for c in classifiers_keys:
            for n in normalizers_keys:
                result_name = "class_{}_norm_{}_nfft_{}_hop-size_{}_numberlpcorder_{}_stoplpcorder_{}".format(
                        c, n, nfft, hop_size,
                        number_lpc_order,
                        stop_lpc_order)

                result_name = result_name + "_selected_features_" + "-".join(
                    s for s in selected_features) + ".npy"

                result_filename = os.path.join(result_root_path, result_name)

                if os.path.exists(result_filename):
                    result_dict = pickle.load(open(result_filename, "rb" ))

                    for key_alg in alg_dict:
                        if key_alg in alg_dev_dict:
                            y_predict_dev = result_dict['y_predict_dev']
                            y_dev = result_dict['y_dev']
                            y_dev_orig = result_dict['y_dev_orig']



                            selected_indexes = [a or b for a, b in zip(y_dev_orig == alg_dict[key_alg], y_dev_orig == 0)]

                            y_predict_dev = y_predict_dev[selected_indexes]
                            y_dev = y_dev[selected_indexes]

                            cm_dev = confusion_matrix(y_dev, y_predict_dev, normalize='true')
                            acc_dev = balanced_accuracy_score(y_dev, y_predict_dev)

                            alg_results_df.loc[len(alg_results_df.index)] = [nfft, hop_size, selected_features,
                                                                           number_lpc_order, stop_lpc_order,
                                                                           n, c, acc_dev, None, cm_dev, None, 
                                                                           result_dict['best_model'], key_alg
                                                                          ]

                        if key_alg in alg_eval_dict:

                            y_predict_eval = result_dict['y_predict_eval']
                            y_eval = result_dict['y_eval']
                            y_eval_orig = result_dict['y_eval_orig']



                            selected_indexes = [a or b for a, b in zip(y_eval_orig == alg_dict[key_alg], y_eval_orig == 0)]

                            y_predict_eval = y_predict_eval[selected_indexes]
                            y_eval = y_eval[selected_indexes]

                            cm_eval = confusion_matrix(y_eval, y_predict_eval, normalize='true')
                            acc_eval = balanced_accuracy_score(y_eval, y_predict_eval)

                            alg_results_df.loc[len(alg_results_df.index)] = [nfft, hop_size, selected_features,
                                                                           number_lpc_order, stop_lpc_order,
                                                                           n, c, None, acc_eval, None, cm_eval, 
                                                                           result_dict['best_model'], key_alg
                                                                          ]

In [128]:
alg_list = list(alg_dict.keys())
dev_list = list(alg_dev_dict.keys())
eval_list = list(alg_eval_dict.keys())

features_selected_table = [['bicoh'], ['bicoh'], ['bicoh'], ['lpc'], ['lpc', 'bicoh'], 
                           ['lpc', 'bicoh'], ['lpc', 'bicoh']]
nfft_table = [[512], [256], [128], ' ', [512], [256], [128]]
hop_size_table = [[256], [128], [64], ' ', [256], [128], [64]]

columns = zip(features_selected_table, nfft_table, hop_size_table)

dataframe_columns = []
for c in columns:
    dataframe_columns.append('Feat {} NFFT {} HopSize {}'.format(c[0], c[1], c[2]))

dataframe_index = alg_list

alg_table = pd.DataFrame(columns=dataframe_columns, index = dataframe_index)


columns = zip(features_selected_table, nfft_table, hop_size_table)

for col in columns:
    feat = col[0]
    
    for alg in alg_list:
        if feat == ['lpc']:
            partial = alg_results_df[([a == feat for a in alg_results_df['selected_features']]) 
                                 & (alg_results_df['alg'] == alg)]
            if alg in dev_list:                
                alg_table.loc[alg, 'Feat {} NFFT {} HopSize {}'.
                              format(col[0], col[1], col[2]) ] = np.max(partial['accuracy_dev'])
            if alg in eval_list:
                alg_table.loc[alg, 'Feat {} NFFT {} HopSize {}'.
                              format(col[0], col[1], col[2]) ] = np.max(partial['accuracy_eval'])
            
        else:
            nfft = col[1]
            nfft = nfft[0]
            hop_size = col[2]
            hop_size = hop_size[0]
            partial = alg_results_df[(alg_results_df['nfft']==nfft) & (alg_results_df['hop_size']==hop_size) & 
                           ([a == feat for a in alg_results_df['selected_features']]) & 
                                     (alg_results_df['alg'] == alg)]

            if alg in dev_list:                
                alg_table.loc[alg, 'Feat {} NFFT {} HopSize {}'.
                              format(col[0], col[1], col[2]) ] = np.max(partial['accuracy_dev'])
            if alg in eval_list:
                alg_table.loc[alg, 'Feat {} NFFT {} HopSize {}'.
                              format(col[0], col[1], col[2]) ] = np.max(partial['accuracy_eval'])

In [129]:
alg_table

Unnamed: 0,Feat ['bicoh'] NFFT [512] HopSize [256],Feat ['bicoh'] NFFT [256] HopSize [128],Feat ['bicoh'] NFFT [128] HopSize [64],Feat ['lpc'] NFFT HopSize,"Feat ['lpc', 'bicoh'] NFFT [512] HopSize [256]","Feat ['lpc', 'bicoh'] NFFT [256] HopSize [128]","Feat ['lpc', 'bicoh'] NFFT [128] HopSize [64]"
A01,0.614937,0.525835,0.570023,0.929053,0.917495,0.918845,0.940808
A02,0.881151,0.872635,0.86298,0.940164,0.940006,0.938761,0.945569
A03,0.859488,0.846206,0.846968,0.952282,0.948351,0.950213,0.962004
A04,0.546383,0.505412,0.499132,0.885979,0.826753,0.87907,0.915062
A05,0.80486,0.801131,0.777942,0.94646,0.942806,0.944668,0.954519
A06,0.655303,0.628364,0.609178,0.898109,0.868059,0.898389,0.932016
A07,0.540675,0.504877,0.500624,0.865069,0.81331,0.864407,0.905338
A08,0.693097,0.626965,0.590915,0.951433,0.955464,0.955354,0.95369
A09,0.54254,0.508032,0.507543,0.834549,0.88248,0.864657,0.835021
A10,0.534095,0.515549,0.50388,0.511092,0.491767,0.48658,0.492846


In [130]:
alg_table.to_csv("tables/closed_set_binary.csv")

In [131]:
table.to_csv("tables/closed_set_binary_tot.csv")