In [1]:
try:
    import xml.etree.cElementTree as ET
except ImportError:
    import xml.etree.ElementTree as ET

import numpy as np
import pandas as pd

### GLOBAL CONSTANTS
PATH_STEP1 = '/home/jaum/PG/pg/Final_Output/step1/emd/'

CONFIG_TAG = 'config'
SCORE_TAG = 'score'
CLF_TAG = 'classifier'

KNN_TAG = 'KNeighborsClassifier(n_neighbors=1)'
RF_TAG = 'RandomForestClassifier(random_state=1010)'
SVM_TAG = 'SVC(random_state=1010)'


In [2]:
### METHODS NEEDED

#tag = classifier
#key_attr = config
#ret_attr = score
def parse_unique_tag(file_in,tag,key_attr,ret_attr):
    
    unique_tags = []
    for elem in file_in.iter(tag):
        if elem.attrib[key_attr[0]] == key_attr[1]:
            unique_tags.append(float(elem.attrib[ret_attr]))
    return unique_tags

def get_clfs_scores(file_in):
    knn_score = parse_unique_tag(file_in, CLF_TAG, (CONFIG_TAG, KNN_TAG), SCORE_TAG)
    rf_score = parse_unique_tag(file_in, CLF_TAG, (CONFIG_TAG, RF_TAG), SCORE_TAG)
    svm_score = parse_unique_tag(file_in, CLF_TAG, (CONFIG_TAG, SVM_TAG), SCORE_TAG)
    
    return knn_score, rf_score, svm_score

def get_score_results_df(normal_file_path, unbiased_file_path):
    normal_file = ET.parse(normal_file_path)
    unbiased_file = ET.parse(unbiased_file_path)

    knn_normal_score, rf_normal_score, svm_normal_score = get_clfs_scores(normal_file)
    knn_unbiased_score, rf_unbiased_score, svm_unbiased_score = get_clfs_scores(unbiased_file)

    knn_normal_score_by_imfs, rf_normal_score_by_imfs, svm_normal_score_by_imfs = [], [], []
    knn_unbiased_score_by_imfs, rf_unbiased_score_by_imfs, svm_unbiased_score_by_imfs = [], [], []
    for i,hi_limit in enumerate(range(100,len(knn_normal_score)+1, 100)):
        knn_normal_score_by_imfs.append(np.mean(knn_normal_score[i*100:hi_limit]))
        rf_normal_score_by_imfs.append(np.mean(rf_normal_score[i*100:hi_limit]))
        svm_normal_score_by_imfs.append(np.mean(svm_normal_score[i*100:hi_limit]))

        knn_unbiased_score_by_imfs.append(np.mean(knn_unbiased_score[i*100:hi_limit]))
        rf_unbiased_score_by_imfs.append(np.mean(rf_unbiased_score[i*100:hi_limit]))
        svm_unbiased_score_by_imfs.append(np.mean(svm_unbiased_score[i*100:hi_limit]))

    #SEETING UP THE DATAFRAME
    normal_idx = ['Normal_F1_Imfs_4',
                  'Normal_F1_Imfs_8',
                  'Normal_F1_Imfs_16']
    unbiased_idx = ['Unbiased_F1_Imfs_4',
                    'Unbiased_F1_Imfs_8',
                    'Unbiased_F1_Imfs_16']
    #ss = series
    knn_normal_score_ss, rf_normal_score_ss, svm_normal_score_ss  = pd.Series(knn_normal_score_by_imfs, index=normal_idx), \
                                                                    pd.Series(rf_normal_score_by_imfs, index=normal_idx), \
                                                                    pd.Series(svm_normal_score_by_imfs, index=normal_idx)
    normal_score_df = pd.DataFrame([knn_normal_score_ss, rf_normal_score_ss, svm_normal_score_ss],
                                  index=[KNN_TAG, RF_TAG, SVM_TAG])

    knn_unbiased_score_ss, rf_unbiased_score_ss, svm_unbiased_score_ss = pd.Series(knn_unbiased_score_by_imfs,
                                                                                   index=unbiased_idx), \
                                                                         pd.Series(rf_unbiased_score_by_imfs,
                                                                                   index=unbiased_idx), \
                                                                         pd.Series(svm_unbiased_score_by_imfs, 
                                                                                   index=unbiased_idx)
    unbiased_score_df = pd.DataFrame([knn_unbiased_score_ss, rf_unbiased_score_ss, svm_unbiased_score_ss],
                                    index=[KNN_TAG, RF_TAG, SVM_TAG])
    
    return normal_score_df, unbiased_score_df

def get_best_config_df(normal_score_df, unbiased_score_df):
    best_normal_knn = np.argmax(normal_score_df.loc[KNN_TAG]), round(max(normal_score_df.loc[KNN_TAG]),6)
    best_normal_rf = np.argmax(normal_score_df.loc[RF_TAG]), round(max(normal_score_df.loc[RF_TAG]),6)
    best_normal_svm = np.argmax(normal_score_df.loc[SVM_TAG]), round(max(normal_score_df.loc[SVM_TAG]),6)
    
    best_normal_df = pd.DataFrame([pd.Series({'Best_Normal': best_normal_knn[0], 'Score': best_normal_knn[1]}),
                                   pd.Series({'Best_Normal': best_normal_rf[0], 'Score': best_normal_rf[1]}),
                                   pd.Series({'Best_Normal': best_normal_svm[0], 'Score': best_normal_svm[1]})],
                                  index=[KNN_TAG,RF_TAG,SVM_TAG])

    best_unbiased_knn = np.argmax(unbiased_score_df.loc[KNN_TAG]), round(max(unbiased_score_df.loc[KNN_TAG]),6)
    best_unbiased_rf = np.argmax(unbiased_score_df.loc[RF_TAG]), round(max(unbiased_score_df.loc[RF_TAG]),6)
    best_unbiased_svm = np.argmax(unbiased_score_df.loc[SVM_TAG]), round(max(unbiased_score_df.loc[SVM_TAG]),6)
    
    best_unbiased_df = pd.DataFrame([pd.Series({'Best_Unbiased': best_unbiased_knn[0], 'Score': best_unbiased_knn[1]}),
                                     pd.Series({'Best_Unbiased': best_unbiased_rf[0], 'Score': best_unbiased_rf[1]}),
                                     pd.Series({'Best_Unbiased': best_unbiased_svm[0], 'Score': best_unbiased_svm[1]})],
                                    index=[KNN_TAG,RF_TAG,SVM_TAG])

    return pd.merge(best_normal_df, best_unbiased_df, how='inner', left_index=True, right_index=True)

def get_all_df(normal_file_name, unbiased_file_name):
    normal_file_path = PATH_STEP1 + normal_file_name
    unbiased_file_path = PATH_STEP1 + unbiased_file_name
    
    normal_score_df, unbiased_score_df = get_score_results_df(normal_file_path, unbiased_file_path)
    best_config_df = get_best_config_df(normal_score_df, unbiased_score_df)
    
    return normal_score_df, unbiased_score_df, best_config_df

In [3]:
## Analyzing the best EMD number of IMFs for both NORMAL and UNBIASED experiments with F1_MACRO score
NORMAL_F1_RESULTS_FILE = 'step1_emd_normal_f1_results.xml'
UNBIASED_F1_RESULTS_FILE = 'step1_emd_unbiased_f1_results.xml'

normal_score_df, unbiased_score_df, best_config_df = get_all_df(NORMAL_F1_RESULTS_FILE, UNBIASED_F1_RESULTS_FILE)

In [4]:
normal_score_df

Unnamed: 0,Normal_Acc_Imfs_4,Normal_Acc_Imfs_8,Normal_Acc_Imfs_16
KNeighborsClassifier(n_neighbors=1),0.97294,0.971712,0.970467
RandomForestClassifier(random_state=1010),0.983662,0.984794,0.979457
SVC(random_state=1010),0.090052,0.084823,0.069922


In [5]:
unbiased_score_df

Unnamed: 0,Unbiased_Acc_Imfs_4,Unbiased_Acc_Imfs_8,Unbiased_Acc_Imfs_16
KNeighborsClassifier(n_neighbors=1),0.478133,0.485018,0.481589
RandomForestClassifier(random_state=1010),0.500085,0.545623,0.493885
SVC(random_state=1010),0.097579,0.091025,0.057748


In [6]:
best_config_df

Unnamed: 0,Best_Normal,Score_x,Best_Unbiased,Score_y
KNeighborsClassifier(n_neighbors=1),Normal_Acc_Imfs_4,0.97294,Unbiased_Acc_Imfs_8,0.485018
RandomForestClassifier(random_state=1010),Normal_Acc_Imfs_8,0.984794,Unbiased_Acc_Imfs_8,0.545623
SVC(random_state=1010),Normal_Acc_Imfs_4,0.090052,Unbiased_Acc_Imfs_4,0.097579
