## Import libraries

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import SVG, Markdown, display, display_pretty
pd.set_option('display.max_columns', None)

In [None]:
np.set_printoptions(precision=4)
np.set_printoptions(suppress=True)

## Load data

In [None]:
df = pd.read_csv('../Results/results_experiments_hyperparameters.csv')

In [None]:
df.shape

In [None]:
df.head()

In [None]:
pd.set_option('display.max_rows', df.shape[0]+1)
df[df['Latent Space']==10].sort_values(['domain_BrayCurtis'])

In [None]:
group_columns = ['Input transform', 'Output transform', 'Reconstruction Loss', 'Latent Space',
                 'Bioma Autoencoder', 'Domain Autoencoder', 'Activation Encoder',
                 'Activation Decoder', 'Activation Latent',
                 'Batch Size', 'Learning Rate', 'Optimizer']
metrics_columns = ['domain_mse', 'domain_mae', 'domain_mape', 'domain_BrayCurtis',
                   'domain_pearson_corr', 'domain_jensen_shannon_divergence']

df.columns

## Mean validation results averaging by column

In [None]:
df[group_columns[0:3] + metrics_columns].groupby(group_columns[0:3]).mean().head()

In [None]:
df[[group_columns[3]] + metrics_columns].groupby(group_columns[3]).mean().head()

In [None]:
df[[group_columns[4]] + metrics_columns].groupby(group_columns[4]).mean().head(100)

In [None]:
df[[group_columns[5]] + metrics_columns].groupby(group_columns[5]).mean().head(100)

In [None]:
df[[group_columns[6]] + metrics_columns].groupby(group_columns[6]).mean().head()

In [None]:
df[[group_columns[7]] + metrics_columns].groupby(group_columns[7]).mean().head()

In [None]:
df[[group_columns[8]] + metrics_columns].groupby(group_columns[8]).mean().head()

In [None]:
df[[group_columns[9]] + metrics_columns].groupby(group_columns[9]).mean().head()

In [None]:
df[group_columns[10:12] + metrics_columns].groupby(group_columns[10:12]).mean().head()

### Best experiment per metric

In [None]:
print(metrics_columns[0], df[metrics_columns[0]].min())

df[df[metrics_columns[0]] == df[metrics_columns[0]].min()].head()

In [None]:
print(metrics_columns[1], df[metrics_columns[1]].min())

df[df[metrics_columns[1]] == df[metrics_columns[1]].min()].head()

In [None]:
print(metrics_columns[2], df[metrics_columns[2]].min())

df[df[metrics_columns[2]] == df[metrics_columns[2]].min()].head()

In [None]:
print(metrics_columns[3], df[metrics_columns[3]].min())

df[df[metrics_columns[3]] == df[metrics_columns[3]].min()].head()

In [None]:
print(metrics_columns[4], df[metrics_columns[4]].max())

df[df[metrics_columns[4]] == df[metrics_columns[4]].max()].head()

In [None]:
print(metrics_columns[5], df[metrics_columns[5]].min())

df[df[metrics_columns[5]] == df[metrics_columns[5]].min()].head()

# Conclusions

- There is not too much diference among the activation functions
- Center log ratio and percentage works similary, none transformation works much worse (see how graph evolves)
- The bigger the latent space the better although there is not too much difference between 50 and 100
- Smaller autoencoder work better in general but we got the best results with bigger autoencoders (more layers, more nodes per layer)
- batch size 64 with lr of 0.001 works better than batch size of 128 with lr 0.01. Probably because the small dataset