In [2]:
%load_ext autoreload
%autoreload 2

In [1]:
import numpy as np
import pandas as pd
import random

from multiprocessing import Pool
from functools import partial

from sklearn.model_selection import KFold

from scipy.stats import ttest_ind

from prettytable import PrettyTable

In [5]:
import performances_evaluation

In [6]:
import smote_cd

# Maupiti dataset 

In [7]:
X = np.load('data/maupiti_X_for_smote.npy')
Y = np.load('data/maupiti_y_for_smote.npy')

In [8]:
Y.shape

(2301, 4)

# Results with GB

In [97]:
%%time
list_iter = np.arange(50)
with Pool(8) as pool:
    all_res_parallel_gb = pool.map(partial(performances_evaluation.eval_perf_gb_maupiti, X=X, Y=Y, k_folds=5), list_iter)

Wall time: 16min 34s


In [99]:
r2_compositional_tot_gb, r2_raw_tot_gb, r2_logratio_tot_gb = [],[],[]
logloss_compositional_tot_gb, logloss_raw_tot_gb, logloss_logratio_tot_gb = [], [], []
rmse_compositional_tot_gb, rmse_raw_tot_gb, rmse_logratio_tot_gb = [], [], []
accuracy_compositional_tot_gb, accuracy_raw_tot_gb, accuracy_logratio_tot_gb = [], [], []
f1_compositional_tot_gb, f1_raw_tot_gb, f1_logratio_tot_gb = [], [], []

r2_compositional_std_gb, r2_raw_std_gb, r2_logratio_std_gb = [], [], []
logloss_compositional_std_gb, logloss_raw_std_gb, logloss_logratio_std_gb = [], [], []
rmse_compositional_std_gb, rmse_raw_std_gb, rmse_logratio_std_gb = [], [], []
accuracy_compositional_std_gb, accuracy_raw_std_gb, accuracy_logratio_std_gb = [], [], []
f1_compositional_std_gb, f1_raw_std_gb, f1_logratio_std_gb = [], [], []

r2_compositional, r2_raw, r2_logratio = [], [], []
logloss_compositional, logloss_raw, logloss_logratio = [], [], []
rmse_compositional, rmse_raw, rmse_logratio = [], [], []
accuracy_compositional, accuracy_raw, accuracy_logratio = [], [], []
f1_compositional, f1_raw, f1_logratio = [], [], []

In [100]:
for i in range(len(all_res_parallel_gb)):
    r2_compositional_temp, r2_raw_temp, r2_logratio_temp, logloss_compositional_temp, logloss_raw_temp, logloss_logratio_temp, rmse_compositional_temp, rmse_raw_temp, rmse_logratio_temp, accuracy_compositional_temp, accuracy_raw_temp, accuracy_logratio_temp, f1_compositional_temp, f1_raw_temp, f1_logratio_temp = all_res_parallel_gb[i]
    r2_compositional.append(np.round(np.mean(r2_compositional_temp,axis=0),5))
    r2_raw.append(np.round(np.mean(r2_raw_temp,axis=0),5))
    r2_logratio.append(np.round(np.mean(r2_logratio_temp,axis=0),5))
    logloss_compositional.append(np.round(np.mean(logloss_compositional_temp),5))
    logloss_raw.append(np.round(np.mean(logloss_raw_temp),5))
    logloss_logratio.append(np.round(np.mean(logloss_logratio_temp),5))
    rmse_compositional.append(np.round(np.mean(rmse_compositional_temp),5))
    rmse_raw.append(np.round(np.mean(rmse_raw_temp),5))
    rmse_logratio.append(np.round(np.mean(rmse_logratio_temp),5))
    accuracy_compositional.append(np.round(np.mean(accuracy_compositional_temp),5))
    accuracy_raw.append(np.round(np.mean(accuracy_raw_temp),5))
    accuracy_logratio.append(np.round(np.mean(accuracy_logratio_temp),5))
    f1_compositional.append(np.round([np.mean(i) for i in f1_compositional_temp],5))
    f1_raw.append(np.round([np.mean(i) for i in f1_raw_temp],5))
    f1_logratio.append(np.round([np.mean(i) for i in f1_logratio_temp],5))

r2_compositional_tot_gb.append(np.round(np.mean(r2_compositional,axis=0),5))
r2_raw_tot_gb.append(np.round(np.mean(r2_raw,axis=0),5))
r2_logratio_tot_gb.append(np.round(np.mean(r2_logratio,axis=0),5))
logloss_compositional_tot_gb.append(np.round(np.mean(logloss_compositional),5))
logloss_raw_tot_gb.append(np.round(np.mean(logloss_raw),5))
logloss_logratio_tot_gb.append(np.round(np.mean(logloss_logratio),5))
rmse_compositional_tot_gb.append(np.round(np.mean(rmse_compositional),5))
rmse_raw_tot_gb.append(np.round(np.mean(rmse_raw),5))
rmse_logratio_tot_gb.append(np.round(np.mean(rmse_logratio),5))
accuracy_compositional_tot_gb.append(np.round(np.mean(accuracy_compositional),5))
accuracy_raw_tot_gb.append(np.round(np.mean(accuracy_raw),5))
accuracy_logratio_tot_gb.append(np.round(np.mean(accuracy_logratio),5))
f1_compositional_tot_gb.append(np.round(np.mean(f1_compositional,axis=0),5))
f1_raw_tot_gb.append(np.round(np.mean(f1_raw,axis=0),5))
f1_logratio_tot_gb.append(np.round(np.mean(f1_logratio,axis=0),5))

r2_compositional_std_gb.append(np.round(np.std(r2_compositional,axis=0),5))
r2_raw_std_gb.append(np.round(np.std(r2_raw,axis=0),5))
r2_logratio_std_gb.append(np.round(np.std(r2_logratio,axis=0),5))
logloss_compositional_std_gb.append(np.round(np.std(logloss_compositional),5))
logloss_raw_std_gb.append(np.round(np.std(logloss_raw),5))
logloss_logratio_std_gb.append(np.round(np.std(logloss_logratio),5))
rmse_compositional_std_gb.append(np.round(np.std(rmse_compositional),5))
rmse_raw_std_gb.append(np.round(np.std(rmse_raw),5))
rmse_logratio_std_gb.append(np.round(np.std(rmse_logratio),5))
accuracy_compositional_std_gb.append(np.round(np.std(accuracy_compositional),5))
accuracy_raw_std_gb.append(np.round(np.std(accuracy_raw),5))
accuracy_logratio_std_gb.append(np.round(np.std(accuracy_logratio),5))
f1_compositional_std_gb.append(np.round(np.std(f1_compositional,axis=0),5))
f1_raw_std_gb.append(np.round(np.std(f1_raw,axis=0),5))
f1_logratio_std_gb.append(np.round(np.std(f1_logratio,axis=0),5))

In [101]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'RESULTS ON MAUPITI DATA WITH GRADIENT BOOSTING TREE'
pt.add_row(['Raw',accuracy_raw_tot_gb[0], logloss_raw_tot_gb[0], np.round(np.mean(f1_raw_tot_gb),5), rmse_raw_tot_gb[0], np.round(np.mean(r2_raw_tot_gb),5)])
pt.add_row(['Compositional',accuracy_compositional_tot_gb[0], logloss_compositional_tot_gb[0], np.round(np.mean(f1_compositional_tot_gb),5), rmse_compositional_tot_gb[0], np.round(np.mean(r2_compositional_tot_gb),5)])
pt.add_row(['Logratio',accuracy_logratio_tot_gb[0], logloss_logratio_tot_gb[0], np.round(np.mean(f1_logratio_tot_gb),5), rmse_logratio_tot_gb[0], np.round(np.mean(r2_logratio_tot_gb),5)])

print(pt)

+-------------------------------------------------------------------------+
|           RESULTS ON MAUPITI DATA WITH GRADIENT BOOSTING TREE           |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.85706  |    2.53805    | 0.80873  |  0.229  | 0.58278 |
| Compositional | 0.85864  |    2.48591    | 0.82241  | 0.22631 | 0.59555 |
|    Logratio   | 0.85884  |    2.50403    | 0.82223  | 0.22608 | 0.59571 |
+---------------+----------+---------------+----------+---------+---------+


In [102]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'STD ON TECATOR DATA WITH GRADIENT BOOSTING TREE'
pt.add_row(['Raw',accuracy_raw_std_gb[0], logloss_raw_std_gb[0], np.round(np.mean(f1_raw_std_gb),5), rmse_raw_std_gb[0], np.round(np.mean(r2_raw_std_gb),5)])
pt.add_row(['Compositional',accuracy_compositional_std_gb[0], logloss_compositional_std_gb[0], np.round(np.mean(f1_compositional_std_gb),5), rmse_compositional_std_gb[0], np.round(np.mean(r2_compositional_std_gb),5)])
pt.add_row(['Logratio',accuracy_logratio_std_gb[0], logloss_logratio_std_gb[0], np.round(np.mean(f1_logratio_std_gb),5), rmse_logratio_std_gb[0], np.round(np.mean(r2_logratio_std_gb),5)])

print(pt)

+-------------------------------------------------------------------------+
|             STD ON TECATOR DATA WITH GRADIENT BOOSTING TREE             |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.00312  |    0.19585    | 0.03091  | 0.00265 | 0.01815 |
| Compositional | 0.00364  |    0.14871    | 0.02792  | 0.00302 | 0.01847 |
|    Logratio   | 0.00307  |    0.18208    | 0.02843  | 0.00269 | 0.01912 |
+---------------+----------+---------------+----------+---------+---------+


In [103]:
print('Accuracy raw-compositional',ttest_ind(accuracy_raw,accuracy_compositional))
print('Accuracy raw-logratio',ttest_ind(accuracy_raw,accuracy_logratio))

print('Crossentropy raw-compositional',ttest_ind(logloss_raw,logloss_compositional))
print('Crossentropy raw-logratio',ttest_ind(logloss_raw,logloss_logratio))

print('RMSE raw-compositional',ttest_ind(rmse_raw,rmse_compositional))
print('RMSE raw-logratio',ttest_ind(rmse_raw,rmse_logratio))

print('R2 raw-compositional',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_compositional,axis=1)))
print('R2 raw-logratio',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_logratio,axis=1)))

print('F1 raw-compositional',ttest_ind(np.mean(f1_raw,axis=1),np.mean(f1_compositional,axis=1)))
print('F1 raw-logratio',ttest_ind(np.mean(f1_raw,axis=1),np.mean(f1_logratio,axis=1)))

Accuracy raw-compositional Ttest_indResult(statistic=-2.309539143950532, pvalue=0.02301253868208329)
Accuracy raw-logratio Ttest_indResult(statistic=-2.850461348812811, pvalue=0.005322770488433908)
Crossentropy raw-compositional Ttest_indResult(statistic=1.484240525110806, pvalue=0.1409544694943164)
Crossentropy raw-logratio Ttest_indResult(statistic=0.8905284551524786, pvalue=0.37536324214979133)
RMSE raw-compositional Ttest_indResult(statistic=4.681831866321815, pvalue=9.142609764084055e-06)
RMSE raw-logratio Ttest_indResult(statistic=5.405851432327238, pvalue=4.5439583486187e-07)
R2 raw-compositional Ttest_indResult(statistic=-4.936904242054122, pvalue=3.2589575877884725e-06)
R2 raw-logratio Ttest_indResult(statistic=-5.138545990759507, pvalue=1.4125855279348236e-06)
F1 raw-compositional Ttest_indResult(statistic=-9.265914358492433, pvalue=4.745424194572028e-15)
F1 raw-logratio Ttest_indResult(statistic=-9.343402259187783, pvalue=3.222407945783229e-15)


# With NN

In [145]:
%%time
list_iter = np.arange(50)
with Pool(8) as pool:
    all_res_parallel_nn = pool.map(partial(performances_evaluation.eval_perf_nn_maupiti, X=X, Y=Y, k_folds=5), list_iter)

Wall time: 1h 9min 27s


In [146]:
r2_compositional_tot_nn, r2_raw_tot_nn, r2_logratio_tot_nn = [],[],[]
logloss_compositional_tot_nn, logloss_raw_tot_nn, logloss_logratio_tot_nn = [], [], []
rmse_compositional_tot_nn, rmse_raw_tot_nn, rmse_logratio_tot_nn = [], [], []
accuracy_compositional_tot_nn, accuracy_raw_tot_nn, accuracy_logratio_tot_nn = [], [], []
f1_compositional_tot_nn, f1_raw_tot_nn, f1_logratio_tot_nn = [], [], []

r2_compositional_std_nn, r2_raw_std_nn, r2_logratio_std_nn = [], [], []
logloss_compositional_std_nn, logloss_raw_std_nn, logloss_logratio_std_nn = [], [], []
rmse_compositional_std_nn, rmse_raw_std_nn, rmse_logratio_std_nn = [], [], []
accuracy_compositional_std_nn, accuracy_raw_std_nn, accuracy_logratio_std_nn = [], [], []
f1_compositional_std_nn, f1_raw_std_nn, f1_logratio_std_nn = [], [], []

r2_compositional, r2_raw, r2_logratio = [], [], []
logloss_compositional, logloss_raw, logloss_logratio = [], [], []
rmse_compositional, rmse_raw, rmse_logratio = [], [], []
accuracy_compositional, accuracy_raw, accuracy_logratio = [], [], []
f1_compositional, f1_raw, f1_logratio = [], [], []

In [147]:
for i in range(len(all_res_parallel_nn)):
    r2_compositional_temp, r2_raw_temp, r2_logratio_temp, logloss_compositional_temp, logloss_raw_temp, logloss_logratio_temp, rmse_compositional_temp, rmse_raw_temp, rmse_logratio_temp, accuracy_compositional_temp, accuracy_raw_temp, accuracy_logratio_temp, f1_compositional_temp, f1_raw_temp, f1_logratio_temp = all_res_parallel_nn[i]
    r2_compositional.append(np.round(np.mean(r2_compositional_temp,axis=0),5))
    r2_raw.append(np.round(np.mean(r2_raw_temp,axis=0),5))
    r2_logratio.append(np.round(np.mean(r2_logratio_temp,axis=0),5))
    logloss_compositional.append(np.round(np.mean(logloss_compositional_temp),5))
    logloss_raw.append(np.round(np.mean(logloss_raw_temp),5))
    logloss_logratio.append(np.round(np.mean(logloss_logratio_temp),5))
    rmse_compositional.append(np.round(np.mean(rmse_compositional_temp),5))
    rmse_raw.append(np.round(np.mean(rmse_raw_temp),5))
    rmse_logratio.append(np.round(np.mean(rmse_logratio_temp),5))
    accuracy_compositional.append(np.round(np.mean(accuracy_compositional_temp),5))
    accuracy_raw.append(np.round(np.mean(accuracy_raw_temp),5))
    accuracy_logratio.append(np.round(np.mean(accuracy_logratio_temp),5))
    f1_compositional.append(np.round(np.mean(f1_compositional_temp,axis=0),5))
    f1_raw.append(np.round(np.mean(f1_raw_temp,axis=0),5))
    f1_logratio.append(np.round(np.mean(f1_logratio_temp,axis=0),5))

r2_compositional_tot_nn.append(np.round(np.mean(r2_compositional,axis=0),5))
r2_raw_tot_nn.append(np.round(np.mean(r2_raw,axis=0),5))
r2_logratio_tot_nn.append(np.round(np.mean(r2_logratio,axis=0),5))
logloss_compositional_tot_nn.append(np.round(np.mean(logloss_compositional),5))
logloss_raw_tot_nn.append(np.round(np.mean(logloss_raw),5))
logloss_logratio_tot_nn.append(np.round(np.mean(logloss_logratio),5))
rmse_compositional_tot_nn.append(np.round(np.mean(rmse_compositional),5))
rmse_raw_tot_nn.append(np.round(np.mean(rmse_raw),5))
rmse_logratio_tot_nn.append(np.round(np.mean(rmse_logratio),5))
accuracy_compositional_tot_nn.append(np.round(np.mean(accuracy_compositional),5))
accuracy_raw_tot_nn.append(np.round(np.mean(accuracy_raw),5))
accuracy_logratio_tot_nn.append(np.round(np.mean(accuracy_logratio),5))
f1_compositional_tot_nn.append(np.round(np.mean(f1_compositional,axis=0),5))
f1_raw_tot_nn.append(np.round(np.mean(f1_raw,axis=0),5))
f1_logratio_tot_nn.append(np.round(np.mean(f1_logratio,axis=0),5))

r2_compositional_std_nn.append(np.round(np.std(r2_compositional,axis=0),5))
r2_raw_std_nn.append(np.round(np.std(r2_raw,axis=0),5))
r2_logratio_std_nn.append(np.round(np.std(r2_logratio,axis=0),5))
logloss_compositional_std_nn.append(np.round(np.std(logloss_compositional),5))
logloss_raw_std_nn.append(np.round(np.std(logloss_raw),5))
logloss_logratio_std_nn.append(np.round(np.std(logloss_logratio),5))
rmse_compositional_std_nn.append(np.round(np.std(rmse_compositional),5))
rmse_raw_std_nn.append(np.round(np.std(rmse_raw),5))
rmse_logratio_std_nn.append(np.round(np.std(rmse_logratio),5))
accuracy_compositional_std_nn.append(np.round(np.std(accuracy_compositional),5))
accuracy_raw_std_nn.append(np.round(np.std(accuracy_raw),5))
accuracy_logratio_std_nn.append(np.round(np.std(accuracy_logratio),5))
f1_compositional_std_nn.append(np.round(np.std(f1_compositional,axis=0),5))
f1_raw_std_nn.append(np.round(np.std(f1_raw,axis=0),5))
f1_logratio_std_nn.append(np.round(np.std(f1_logratio,axis=0),5))

In [159]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'RESULTS ON MAUPITI DATA WITH NEURAL NETWORK'
pt.add_row(['Raw',accuracy_raw_tot_nn[0], logloss_raw_tot_nn[0], np.round(np.mean(f1_raw_tot_nn),5), rmse_raw_tot_nn[0], np.round(np.mean(r2_raw_tot_nn),5)])
pt.add_row(['Compositional',accuracy_compositional_tot_nn[0], logloss_compositional_tot_nn[0], np.round(np.mean(f1_compositional_tot_nn),5), rmse_compositional_tot_nn[0], np.round(np.mean(r2_compositional_tot_nn),5)])
pt.add_row(['Logratio',accuracy_logratio_tot_nn[0], logloss_logratio_tot_nn[0], np.round(np.mean(f1_logratio_tot_nn),5), rmse_logratio_tot_nn[0], np.round(np.mean(r2_logratio_tot_nn),5)])

print(pt)

+-------------------------------------------------------------------------+
|               RESULTS ON MAUPITI DATA WITH NEURAL NETWORK               |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.87657  |    4.04811    | 0.83129  | 0.21433 | 0.62426 |
| Compositional | 0.87753  |    3.95596    | 0.83404  | 0.21338 | 0.62248 |
|    Logratio   | 0.87719  |    3.98183    | 0.83479  | 0.21356 | 0.62338 |
+---------------+----------+---------------+----------+---------+---------+


In [158]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'STD ON MAUPITI DATA WITH NEURAL NETWORK'
pt.add_row(['Raw',accuracy_raw_std_nn[0], logloss_raw_std_nn[0], np.round(np.mean(f1_raw_std_nn),5), rmse_raw_std_nn[0], np.round(np.mean(r2_raw_std_nn),5)])
pt.add_row(['Compositional',accuracy_compositional_std_nn[0], logloss_compositional_std_nn[0], np.round(np.mean(f1_compositional_std_nn),5), rmse_compositional_std_nn[0], np.round(np.mean(r2_compositional_std_nn),5)])
pt.add_row(['Logratio',accuracy_logratio_std_nn[0], logloss_logratio_std_nn[0], np.round(np.mean(f1_logratio_std_nn),5), rmse_logratio_std_nn[0], np.round(np.mean(r2_logratio_std_nn),5)])

print(pt)

+-------------------------------------------------------------------------+
|                 STD ON MAUPITI DATA WITH NEURAL NETWORK                 |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.00317  |    0.41591    | 0.00848  | 0.00286 | 0.01771 |
| Compositional | 0.00347  |    0.40597    | 0.00962  | 0.00317 | 0.02043 |
|    Logratio   | 0.00327  |    0.45557    | 0.00886  | 0.00266 |  0.0169 |
+---------------+----------+---------------+----------+---------+---------+


In [157]:
print('Accuracy raw-compositional',ttest_ind(accuracy_raw,accuracy_compositional))
print('Accuracy raw-logratio',ttest_ind(accuracy_raw,accuracy_logratio))

print('Crossentropy raw-compositional',ttest_ind(logloss_raw,logloss_compositional))
print('Crossentropy raw-logratio',ttest_ind(logloss_raw,logloss_logratio))

print('RMSE raw-compositional',ttest_ind(rmse_raw,rmse_compositional))
print('RMSE raw-logratio',ttest_ind(rmse_raw,rmse_logratio))

print('R2 raw-compositional',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_compositional,axis=1)))
print('R2 raw-logratio',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_logratio,axis=1)))

print('F1 raw-compositional',ttest_ind(np.mean(f1_raw,axis=1),np.mean(f1_compositional,axis=1)))
print('F1 raw-logratio',ttest_ind(np.mean(f1_raw,axis=1),np.mean(f1_logratio,axis=1)))

Accuracy raw-compositional Ttest_indResult(statistic=-1.4377765210760756, pvalue=0.15368303385051568)
Accuracy raw-logratio Ttest_indResult(statistic=-0.9481744871736097, pvalue=0.34537291846759643)
Crossentropy raw-compositional Ttest_indResult(statistic=1.1098023014533045, pvalue=0.26980004506105587)
Crossentropy raw-logratio Ttest_indResult(statistic=0.7520911489017255, pvalue=0.45379883612241256)
RMSE raw-compositional Ttest_indResult(statistic=1.5655607929688617, pvalue=0.12067497337000917)
RMSE raw-logratio Ttest_indResult(statistic=1.3779621474646544, pvalue=0.17135325489144612)
R2 raw-compositional Ttest_indResult(statistic=0.6326381559798302, pvalue=0.5284433361124202)
R2 raw-logratio Ttest_indResult(statistic=0.36611783191774483, pvalue=0.7150663273751731)
F1 raw-compositional Ttest_indResult(statistic=-2.0487696786513108, pvalue=0.043157795126318454)
F1 raw-logratio Ttest_indResult(statistic=-2.831723043150941, pvalue=0.0056193184518877805)


# With Dirichlet

In [105]:
%%time
list_iter = np.arange(50)
with Pool(8) as pool:
    all_res_parallel_dirichlet = pool.map(partial(performances_evaluation.eval_perf_dirichlet, X=X, Y=Y, k_folds=5), list_iter)

Wall time: 26min 41s


In [106]:
r2_compositional_tot_dirichlet, r2_raw_tot_dirichlet, r2_logratio_tot_dirichlet = [],[],[]
logloss_compositional_tot_dirichlet, logloss_raw_tot_dirichlet, logloss_logratio_tot_dirichlet = [], [], []
rmse_compositional_tot_dirichlet, rmse_raw_tot_dirichlet, rmse_logratio_tot_dirichlet = [], [], []
accuracy_compositional_tot_dirichlet, accuracy_raw_tot_dirichlet, accuracy_logratio_tot_dirichlet = [], [], []
f1_compositional_tot_dirichlet, f1_raw_tot_dirichlet, f1_logratio_tot_dirichlet = [], [], []

r2_compositional_std_dirichlet, r2_raw_std_dirichlet, r2_logratio_std_dirichlet = [], [], []
logloss_compositional_std_dirichlet, logloss_raw_std_dirichlet, logloss_logratio_std_dirichlet = [], [], []
rmse_compositional_std_dirichlet, rmse_raw_std_dirichlet, rmse_logratio_std_dirichlet = [], [], []
accuracy_compositional_std_dirichlet, accuracy_raw_std_dirichlet, accuracy_logratio_std_dirichlet = [], [], []
f1_compositional_std_dirichlet, f1_raw_std_dirichlet, f1_logratio_std_dirichlet = [], [], []

In [107]:
for i in range(len(all_res_parallel_dirichlet)):
    r2_compositional_temp, r2_raw_temp, r2_logratio_temp, logloss_compositional_temp, logloss_raw_temp, logloss_logratio_temp, rmse_compositional_temp, rmse_raw_temp, rmse_logratio_temp, accuracy_compositional_temp, accuracy_raw_temp, accuracy_logratio_temp, f1_compositional_temp, f1_raw_temp, f1_logratio_temp = all_res_parallel_dirichlet[i]
    r2_compositional.append(np.round(np.mean(r2_compositional_temp,axis=0),5))
    r2_raw.append(np.round(np.mean(r2_raw_temp,axis=0),5))
    r2_logratio.append(np.round(np.mean(r2_logratio_temp,axis=0),5))
    logloss_compositional.append(np.round(np.mean(logloss_compositional_temp),5))
    logloss_raw.append(np.round(np.mean(logloss_raw_temp),5))
    logloss_logratio.append(np.round(np.mean(logloss_logratio_temp),5))
    rmse_compositional.append(np.round(np.mean(rmse_compositional_temp),5))
    rmse_raw.append(np.round(np.mean(rmse_raw_temp),5))
    rmse_logratio.append(np.round(np.mean(rmse_logratio_temp),5))
    accuracy_compositional.append(np.round(np.mean(accuracy_compositional_temp),5))
    accuracy_raw.append(np.round(np.mean(accuracy_raw_temp),5))
    accuracy_logratio.append(np.round(np.mean(accuracy_logratio_temp),5))
    f1_compositional.append(np.round(np.mean([np.mean(i) for i in f1_compositional_temp]),5))
    f1_raw.append(np.round(np.mean([np.mean(i) for i in f1_raw_temp]),5))
    f1_logratio.append(np.round(np.mean([np.mean(i) for i in f1_logratio_temp]),5))

r2_compositional_tot_dirichlet.append(np.round(np.mean(r2_compositional,axis=0),5))
r2_raw_tot_dirichlet.append(np.round(np.mean(r2_raw,axis=0),5))
r2_logratio_tot_dirichlet.append(np.round(np.mean(r2_logratio,axis=0),5))
logloss_compositional_tot_dirichlet.append(np.round(np.mean(logloss_compositional),5))
logloss_raw_tot_dirichlet.append(np.round(np.mean(logloss_raw),5))
logloss_logratio_tot_dirichlet.append(np.round(np.mean(logloss_logratio),5))
rmse_compositional_tot_dirichlet.append(np.round(np.mean(rmse_compositional),5))
rmse_raw_tot_dirichlet.append(np.round(np.mean(rmse_raw),5))
rmse_logratio_tot_dirichlet.append(np.round(np.mean(rmse_logratio),5))
accuracy_compositional_tot_dirichlet.append(np.round(np.mean(accuracy_compositional),5))
accuracy_raw_tot_dirichlet.append(np.round(np.mean(accuracy_raw),5))
accuracy_logratio_tot_dirichlet.append(np.round(np.mean(accuracy_logratio),5))
f1_compositional_tot_dirichlet.append(np.round(np.mean(f1_compositional,axis=0),5))
f1_raw_tot_dirichlet.append(np.round(np.mean(f1_raw,axis=0),5))
f1_logratio_tot_dirichlet.append(np.round(np.mean(f1_logratio,axis=0),5))

r2_compositional_std_dirichlet.append(np.round(np.std(r2_compositional,axis=0),5))
r2_raw_std_dirichlet.append(np.round(np.std(r2_raw,axis=0),5))
r2_logratio_std_dirichlet.append(np.round(np.std(r2_logratio,axis=0),5))
logloss_compositional_std_dirichlet.append(np.round(np.std(logloss_compositional),5))
logloss_raw_std_dirichlet.append(np.round(np.std(logloss_raw),5))
logloss_logratio_std_dirichlet.append(np.round(np.std(logloss_logratio),5))
rmse_compositional_std_dirichlet.append(np.round(np.std(rmse_compositional),5))
rmse_raw_std_dirichlet.append(np.round(np.std(rmse_raw),5))
rmse_logratio_std_dirichlet.append(np.round(np.std(rmse_logratio),5))
accuracy_compositional_std_dirichlet.append(np.round(np.std(accuracy_compositional),5))
accuracy_raw_std_dirichlet.append(np.round(np.std(accuracy_raw),5))
accuracy_logratio_std_dirichlet.append(np.round(np.std(accuracy_logratio),5))
f1_compositional_std_dirichlet.append(np.round(np.std(f1_compositional,axis=0),5))
f1_raw_std_dirichlet.append(np.round(np.std(f1_raw,axis=0),5))
f1_logratio_std_dirichlet.append(np.round(np.std(f1_logratio,axis=0),5))

  return array(a, dtype, copy=False, order=order, subok=True)


In [108]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'RESULTS ON TECATOR DATA WITH DIRICHLET MODEL'
pt.add_row(['Raw',accuracy_raw_tot_dirichlet[0], logloss_raw_tot_dirichlet[0], np.round(np.mean(f1_raw_tot_dirichlet),5), rmse_raw_tot_dirichlet[0], np.round(np.mean(r2_raw_tot_dirichlet),5)])
pt.add_row(['Compositional',accuracy_compositional_tot_dirichlet[0], logloss_compositional_tot_dirichlet[0], np.round(np.mean(f1_compositional_tot_dirichlet),5), rmse_compositional_tot_dirichlet[0], np.round(np.mean(r2_compositional_tot_dirichlet),5)])
pt.add_row(['Logratio',accuracy_logratio_tot_dirichlet[0], logloss_logratio_tot_dirichlet[0], np.round(np.mean(f1_logratio_tot_dirichlet),5), rmse_logratio_tot_dirichlet[0], np.round(np.mean(r2_logratio_tot_dirichlet),5)])

print(pt)

+-------------------------------------------------------------------------+
|               RESULTS ON TECATOR DATA WITH DIRICHLET MODEL              |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.80096  |    1.67552    | 0.68372  | 0.26212 | 0.42004 |
| Compositional | 0.80961  |    1.65353    | 0.76171  | 0.26186 | 0.42277 |
|    Logratio   | 0.80984  |    1.66257    | 0.76183  | 0.26174 | 0.42281 |
+---------------+----------+---------------+----------+---------+---------+


In [109]:
columns_names = ['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2']
rows_names = ['Raw', 'Compositional', 'Logratio']

pt = PrettyTable(['Model', 'Accuracy', 'Cross-entropy', 'F1-score', 'RMSE', 'R2'])
pt.title = 'STD ON TECATOR DATA WITH DIRICHLET MODEL'
pt.add_row(['Raw',accuracy_raw_std_dirichlet[0], logloss_raw_std_dirichlet[0], np.round(np.mean(f1_raw_std_dirichlet),5), rmse_raw_std_dirichlet[0], np.round(np.mean(r2_raw_std_dirichlet),5)])
pt.add_row(['Compositional',accuracy_compositional_std_dirichlet[0], logloss_compositional_std_dirichlet[0], np.round(np.mean(f1_compositional_std_dirichlet),5), rmse_compositional_std_dirichlet[0], np.round(np.mean(r2_compositional_std_dirichlet),5)])
pt.add_row(['Logratio',accuracy_logratio_std_dirichlet[0], logloss_logratio_std_dirichlet[0], np.round(np.mean(f1_logratio_std_dirichlet),5), rmse_logratio_std_dirichlet[0], np.round(np.mean(r2_logratio_std_dirichlet),5)])

print(pt)

+-------------------------------------------------------------------------+
|                 STD ON TECATOR DATA WITH DIRICHLET MODEL                |
+---------------+----------+---------------+----------+---------+---------+
|     Model     | Accuracy | Cross-entropy | F1-score |   RMSE  |    R2   |
+---------------+----------+---------------+----------+---------+---------+
|      Raw      | 0.05615  |    0.87358    | 0.12696  | 0.03318 | 0.16349 |
| Compositional | 0.04912  |    0.83899    | 0.06407  | 0.03561 | 0.17354 |
|    Logratio   | 0.04907  |    0.85125    | 0.06384  | 0.03572 | 0.17372 |
+---------------+----------+---------------+----------+---------+---------+


In [110]:
print('Accuracy raw-compositional',ttest_ind(accuracy_raw,accuracy_compositional))
print('Accuracy raw-logratio',ttest_ind(accuracy_raw,accuracy_logratio))

print('Crossentropy raw-compositional',ttest_ind(logloss_raw,logloss_compositional))
print('Crossentropy raw-logratio',ttest_ind(logloss_raw,logloss_logratio))

print('RMSE raw-compositional',ttest_ind(rmse_raw,rmse_compositional))
print('RMSE raw-logratio',ttest_ind(rmse_raw,rmse_logratio))

print('R2 raw-compositional',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_compositional,axis=1)))
print('R2 raw-logratio',ttest_ind(np.mean(r2_raw,axis=1),np.mean(r2_logratio,axis=1)))

print('F1 raw-compositional',ttest_ind(np.mean(f1_raw),np.mean(f1_compositional)))
print('F1 raw-logratio',ttest_ind(np.mean(f1_raw),np.mean(f1_logratio)))

Accuracy raw-compositional Ttest_indResult(statistic=-1.1546541114215945, pvalue=0.2496237450645589)
Accuracy raw-logratio Ttest_indResult(statistic=-1.185887198197051, pvalue=0.23708800702248567)
Crossentropy raw-compositional Ttest_indResult(statistic=0.1806269088703874, pvalue=0.8568451783523706)
Crossentropy raw-logratio Ttest_indResult(statistic=0.10562221303668105, pvalue=0.9159890269695223)
RMSE raw-compositional Ttest_indResult(statistic=0.053947035666336345, pvalue=0.9570317542882343)
RMSE raw-logratio Ttest_indResult(statistic=0.0771721189128673, pvalue=0.9385645476179647)
R2 raw-compositional Ttest_indResult(statistic=-0.11437828804003904, pvalue=0.9090538248130264)
R2 raw-logratio Ttest_indResult(statistic=-0.11581646124921083, pvalue=0.9079153804788491)
F1 raw-compositional Ttest_indResult(statistic=-55.64396655833857, pvalue=1.2073658200231692e-11)
F1 raw-logratio Ttest_indResult(statistic=-62.24099722309945, pvalue=4.936058380771813e-12)
