# Summary of the Best Results

Using the best results in the paper, we show compute the accuracy reached by each algorithm with 30% and 80% of training data.

## Fetch the Dataset

In [1]:
import urllib, tarfile, os

file_url = 'http://www.lpthe.jussieu.fr/~erbin/files/data/cicy3o_data.tar.gz'
file_out = './cicy3o.tar.gz'
file_dat = 'cicy3o.h5'

if not os.path.isfile(file_out):
    urllib.request.urlretrieve(file_url, file_out)
    
if not os.path.isfile(file_dat):
    with tarfile.open(file_out, 'r') as tar:
        tar.extract(file_dat)

In [2]:
import pandas as pd

dat = pd.read_hdf(os.path.join('.', file_dat))

## Remove the Outliers

In [3]:
dat_out   = dat
dat_noout = dat.loc[(dat['h11'] > 0) &
                    (dat['h11'] < 17) &
                    (dat['h21'] > 14) &
                    (dat['h21'] < 87)
                   ]

dat_out   = dat_out[['h11', 'h21', 'num_cp', 'dim_cp', 'dim_h0_amb', 'matrix']]
dat_noout = dat_noout[['h11', 'h21', 'num_cp', 'dim_cp', 'dim_h0_amb', 'matrix']]

## Dense Format Extraction

In [4]:
import numpy as np

def extract_series(series: pd.Series) -> pd.Series:
    '''
    Extract a Pandas series into its dense format.
    
    Required arguments:
        series: the pandas series.
        
    Returns:
        the pandas series in dense format.
    '''
    # avoid direct overwriting
    series = series.copy()
    
    # cget the maximum size of each axis
    max_shape = series.apply(np.shape).max()
    
    # return the transformed series
    if np.prod(max_shape) > 1:
        # compute the necessary shift and apply it
        offset = lambda s: [(0, max_shape[i] - np.shape(s)[i])
                            for i in range(len(max_shape))
                           ]
        return series.apply(lambda s: np.pad(s, offset(s), mode='constant'))
    else:
        return series
    
# apply it to the matrix
dat_out   = dat_out.apply(extract_series)
dat_noout = dat_noout.apply(extract_series)

In [5]:
def explode_variables(series: pd.Series) -> pd.DataFrame:
    '''
    Take one variable and explode its components in a new column.
    
    Required arguments:
        series: the variable to explode.
        
    Returns:
        a dataframe containing one observable for each column.
    '''
    # avoid direct overwriting
    series = series.copy()
    
    if series.apply(lambda x: np.prod(np.shape(x))).max() == 1:
        return series
    else:
        # flatten the array
        series = series.apply(lambda x: np.reshape(x, (-1,)))

        # explode over columns
        series = series.apply(pd.Series).\
                        rename(columns=lambda x: \
                                       series.name + '_{}'.format(x+1))

        return series

## Matrix PCA

In [6]:
mat_out   = dat_out['matrix'].apply(lambda x: np.reshape(x, (-1,)))
mat_noout = dat_noout['matrix'].apply(lambda x: np.reshape(x, (-1,)))

In [7]:
from sklearn.decomposition import PCA

pca_out   = PCA(n_components=0.99, random_state=42).fit_transform(list(mat_out.values))
pca_noout = PCA(n_components=0.99, random_state=42).fit_transform(list(mat_noout.values))

assert pca_out.shape[1] == pca_noout.shape[1]

In [8]:
columns = ['pca_{}'.format(n+1) for n in range(pca_out.shape[1])]

pca_out   = pd.DataFrame(pca_out, columns=columns)
pca_noout = pd.DataFrame(pca_noout, columns=columns)

In [9]:
mat_out   = explode_variables(mat_out)
mat_noout = explode_variables(mat_noout)

## Input Features

In [10]:
num_cp_out   = explode_variables(dat_out['num_cp']).reset_index(drop=True)
num_cp_noout = explode_variables(dat_noout['num_cp']).reset_index(drop=True)

In [11]:
dim_cp_out   = explode_variables(dat_out['dim_cp']).reset_index(drop=True)
dim_cp_noout = explode_variables(dat_noout['dim_cp']).reset_index(drop=True)

In [12]:
dim_h0_amb_out   = explode_variables(dat_out['dim_h0_amb']).reset_index(drop=True)
dim_h0_amb_noout = explode_variables(dat_noout['dim_h0_amb']).reset_index(drop=True)

In [13]:
inp_out   = pd.concat([#num_cp_out,
                       #dim_cp_out,
                       #dim_h0_amb_out,
                       #pca_out,
                       mat_out
                      ],
                      axis=1,
                      ignore_index=True
                     )
inp_noout = pd.concat([#num_cp_noout,
                       #dim_cp_noout,
                       #dim_h0_amb_noout,
                       #pca_noout,
                       mat_noout
                      ],
                      axis=1,
                      ignore_index=True
                     )

In [14]:
lab_out   = dat_out[['h11', 'h21']]
lab_noout = dat_noout[['h11', 'h21']]

## Train and Test Splits

In [15]:
from sklearn.model_selection import train_test_split

inp_out_train_80, inp_out_test_80, \
lab_out_train_80, lab_out_test_80 = train_test_split(inp_out,
                                                     lab_out,
                                                     train_size=0.8,
                                                     shuffle=True,
                                                     random_state=42
                                                    )

inp_out_train_30, inp_out_test_30, \
lab_out_train_30, lab_out_test_30 = train_test_split(inp_out,
                                                     lab_out,
                                                     train_size=0.3,
                                                     shuffle=True,
                                                     random_state=42
                                                    )

inp_noout_train_80, inp_noout_test_80, \
lab_noout_train_80, lab_noout_test_80 = train_test_split(inp_noout,
                                                         lab_noout,
                                                         train_size=0.8,
                                                         shuffle=True,
                                                         random_state=42
                                                        )

inp_noout_train_30, inp_noout_test_30, \
lab_noout_train_30, lab_noout_test_30 = train_test_split(inp_noout,
                                                         lab_noout,
                                                         train_size=0.3,
                                                         shuffle=True,
                                                         random_state=42
                                                        )

In [16]:
import numpy as np

def accuracy(y_true, y_pred):
    '''
    Compute the accuracy.
    
    Required arguments:
        y_true: the true values,
        y_pred: the predictions.
    '''
    
    return np.mean((y_true == y_pred).astype(int))

## Linear Regression

In [17]:
from sklearn.linear_model import Lasso

lab_out_test_80_pred = np.floor(Lasso(alpha=1.0e-3, max_iter=1e5, random_state=42).\
                                fit(inp_out_train_80, lab_out_train_80).
                                predict(inp_out_test_80)
                               ).astype(int)
lab_out_test_30_pred = np.floor(Lasso(alpha=1.0e-3, max_iter=1e5, random_state=42).\
                                fit(inp_out_train_30, lab_out_train_30).
                                predict(inp_out_test_30)
                               ).astype(int)
lab_noout_test_80_pred = np.floor(Lasso(alpha=1.0e-3, max_iter=1e5, random_state=42).\
                                  fit(inp_noout_train_80, lab_noout_train_80).
                                  predict(inp_noout_test_80)
                                 ).astype(int)
lab_noout_test_30_pred = np.floor(Lasso(alpha=1.0e-3, max_iter=1e5, random_state=42).\
                                  fit(inp_noout_train_30, lab_noout_train_30).
                                  predict(inp_noout_test_30)
                                 ).astype(int)

In [18]:
print('80% training data:')
print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,0],
                                                                 lab_out_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,1],
                                                                 lab_out_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,0],
                                                                 lab_noout_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,1],
                                                                 lab_noout_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('')
print('30% training data:')

print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,0],
                                                                 lab_out_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,1],
                                                                 lab_out_test_30_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,0],
                                                                 lab_noout_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,1],
                                                                 lab_noout_test_30_pred[:,1]
                                                                )
                                                       )
     )

80% training data:
    accuracy on h11 w/ outliers:  0.48
    accuracy on h21 w/ outliers:  0.11
    accuracy on h11 w/o outliers: 0.50
    accuracy on h21 w/o outliers: 0.12

30% training data:
    accuracy on h11 w/ outliers:  0.47
    accuracy on h21 w/ outliers:  0.11
    accuracy on h11 w/o outliers: 0.49
    accuracy on h21 w/o outliers: 0.10


## Linear SVR

In [36]:
from sklearn.svm import LinearSVR
from sklearn.multioutput import MultiOutputRegressor

lab_out_test_80_pred = np.floor(MultiOutputRegressor(LinearSVR(C=0.05,
                                                               epsilon=0.2,
                                                               fit_intercept=True,
                                                               intercept_scaling=50,
                                                               random_state=42
                                                              ),
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_80, lab_out_train_80).
                                predict(inp_out_test_80)
                               ).astype(int)
lab_out_test_30_pred = np.floor(MultiOutputRegressor(LinearSVR(C=0.05,
                                                               epsilon=0.2,
                                                               fit_intercept=True,
                                                               intercept_scaling=50,
                                                               random_state=42
                                                              ),
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_30, lab_out_train_30).
                                predict(inp_out_test_30)
                               ).astype(int)
lab_noout_test_80_pred = np.floor(MultiOutputRegressor(LinearSVR(C=0.05,
                                                                 epsilon=0.2,
                                                                 fit_intercept=True,
                                                                 intercept_scaling=50,
                                                                 random_state=42
                                                                ),
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_80, lab_noout_train_80).
                                  predict(inp_noout_test_80)
                                 ).astype(int)
lab_noout_test_30_pred = np.floor(MultiOutputRegressor(LinearSVR(C=0.05,
                                                                 epsilon=0.2,
                                                                 fit_intercept=True,
                                                                 intercept_scaling=50,
                                                                 random_state=42
                                                                ),
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_30, lab_noout_train_30).
                                  predict(inp_noout_test_30)
                                 ).astype(int)

In [37]:
print('80% training data:')
print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,0],
                                                                 lab_out_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,1],
                                                                 lab_out_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,0],
                                                                 lab_noout_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,1],
                                                                 lab_noout_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('')
print('30% training data:')

print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,0],
                                                                 lab_out_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,1],
                                                                 lab_out_test_30_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,0],
                                                                 lab_noout_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,1],
                                                                 lab_noout_test_30_pred[:,1]
                                                                )
                                                       )
     )

80% training data:
    accuracy on h11 w/ outliers:  0.48
    accuracy on h21 w/ outliers:  0.10
    accuracy on h11 w/o outliers: 0.55
    accuracy on h21 w/o outliers: 0.11

30% training data:
    accuracy on h11 w/ outliers:  0.53
    accuracy on h21 w/ outliers:  0.10
    accuracy on h11 w/o outliers: 0.52
    accuracy on h21 w/o outliers: 0.10


## Gaussian SVR

In [30]:
from sklearn.svm import SVR
from sklearn.multioutput import MultiOutputRegressor

lab_out_test_80_pred = np.rint(MultiOutputRegressor(SVR(kernel='rbf', C=15, gamma=0.03, epsilon=0.1),
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_80, lab_out_train_80).
                                predict(inp_out_test_80)
                               ).astype(int)
lab_out_test_30_pred = np.rint(MultiOutputRegressor(SVR(kernel='rbf', C=15, gamma=0.03, epsilon=0.1),
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_30, lab_out_train_30).
                                predict(inp_out_test_30)
                               ).astype(int)
lab_noout_test_80_pred = np.rint(MultiOutputRegressor(SVR(kernel='rbf', C=15, gamma=0.03, epsilon=0.1),
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_80, lab_noout_train_80).
                                  predict(inp_noout_test_80)
                                 ).astype(int)
lab_noout_test_30_pred = np.rint(MultiOutputRegressor(SVR(kernel='rbf', C=15, gamma=0.03, epsilon=0.1),
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_30, lab_noout_train_30).
                                  predict(inp_noout_test_30)
                                 ).astype(int)

In [31]:
print('80% training data:')
print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,0],
                                                                 lab_out_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,1],
                                                                 lab_out_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,0],
                                                                 lab_noout_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,1],
                                                                 lab_noout_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('')
print('30% training data:')

print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,0],
                                                                 lab_out_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,1],
                                                                 lab_out_test_30_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,0],
                                                                 lab_noout_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,1],
                                                                 lab_noout_test_30_pred[:,1]
                                                                )
                                                       )
     )

80% training data:
    accuracy on h11 w/ outliers:  0.68
    accuracy on h21 w/ outliers:  0.19
    accuracy on h11 w/o outliers: 0.68
    accuracy on h21 w/o outliers: 0.19

30% training data:
    accuracy on h11 w/ outliers:  0.57
    accuracy on h21 w/ outliers:  0.15
    accuracy on h11 w/o outliers: 0.56
    accuracy on h21 w/o outliers: 0.16


## Random Forests

In [26]:
from sklearn.ensemble import RandomForestRegressor

lab_out_test_80_pred = np.floor(RandomForestRegressor(criterion='mae',
                                                     max_depth=100,
                                                     max_leaf_nodes=100,
                                                     min_samples_leaf=2,
                                                     min_samples_split=10,
                                                     min_weight_fraction_leaf=1.0e-3,
                                                     n_estimators=200,
                                                     random_state=42,
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_80, lab_out_train_80).
                                predict(inp_out_test_80)
                               ).astype(int)
lab_out_test_30_pred = np.floor(RandomForestRegressor(criterion='mae',
                                                     max_depth=100,
                                                     max_leaf_nodes=100,
                                                     min_samples_leaf=2,
                                                     min_samples_split=10,
                                                     min_weight_fraction_leaf=1.0e-3,
                                                     n_estimators=200,
                                                     random_state=42,
                                                     n_jobs=-1
                                                    ).\
                                fit(inp_out_train_30, lab_out_train_30).
                                predict(inp_out_test_30)
                               ).astype(int)
lab_noout_test_80_pred = np.floor(RandomForestRegressor(criterion='mae',
                                                       max_depth=100,
                                                       max_leaf_nodes=100,
                                                       min_samples_leaf=2,
                                                       min_samples_split=10,
                                                       min_weight_fraction_leaf=1.0e-3,
                                                       n_estimators=200,
                                                       random_state=42,
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_80, lab_noout_train_80).
                                  predict(inp_noout_test_80)
                                 ).astype(int)
lab_noout_test_30_pred = np.floor(RandomForestRegressor(criterion='mae',
                                                       max_depth=100,
                                                       max_leaf_nodes=100,
                                                       min_samples_leaf=2,
                                                       min_samples_split=10,
                                                       min_weight_fraction_leaf=1.0e-3,
                                                       n_estimators=200,
                                                       random_state=42,
                                                       n_jobs=-1
                                                      ).\
                                  fit(inp_noout_train_30, lab_noout_train_30).
                                  predict(inp_noout_test_30)
                                 ).astype(int)

In [27]:
print('80% training data:')
print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,0],
                                                                 lab_out_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,1],
                                                                 lab_out_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,0],
                                                                 lab_noout_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,1],
                                                                 lab_noout_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('')
print('30% training data:')

print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,0],
                                                                 lab_out_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,1],
                                                                 lab_out_test_30_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,0],
                                                                 lab_noout_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,1],
                                                                 lab_noout_test_30_pred[:,1]
                                                                )
                                                       )
     )

80% training data:
    accuracy on h11 w/ outliers:  0.55
    accuracy on h21 w/ outliers:  0.11
    accuracy on h11 w/o outliers: 0.57
    accuracy on h21 w/o outliers: 0.12

30% training data:
    accuracy on h11 w/ outliers:  0.58
    accuracy on h21 w/ outliers:  0.10
    accuracy on h11 w/o outliers: 0.54
    accuracy on h21 w/o outliers: 0.12


## Gradient Boosted Trees

In [26]:
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.multioutput import MultiOutputRegressor

lab_out_test_80_pred = np.floor(MultiOutputRegressor(GradientBoostingRegressor(criterion='mae',
                                                                               learning_rate=0.3,
                                                                                 loss='ls',
                                                                               max_depth=100,
                                                                               min_samples_split=10,
                                                                               min_weight_fraction_leaf=0.01,
                                                                               n_estimators=100,
                                                                               subsample=0.8,
                                                                               random_state=42
                                                                              ),
                                                     n_jobs=-1).\
                                fit(inp_out_train_80, lab_out_train_80).
                                predict(inp_out_test_80)
                               ).astype(int)
lab_out_test_30_pred = np.floor(MultiOutputRegressor(GradientBoostingRegressor(criterion='mae',
                                                                               learning_rate=0.3,
                                                                                 loss='ls',
                                                                               max_depth=100,
                                                                               min_samples_split=10,
                                                                               min_weight_fraction_leaf=0.01,
                                                                               n_estimators=100,
                                                                               subsample=0.8,
                                                                               random_state=42
                                                                              ),
                                                     n_jobs=-1).\
                                fit(inp_out_train_30, lab_out_train_30).
                                predict(inp_out_test_30)
                               ).astype(int)
lab_noout_test_80_pred = np.floor(MultiOutputRegressor(GradientBoostingRegressor(criterion='mae',
                                                                                 learning_rate=0.3,
                                                                                 loss='ls',
                                                                                 max_depth=100,
                                                                                 min_samples_split=10,
                                                                                 min_weight_fraction_leaf=0.01,
                                                                                 n_estimators=100,
                                                                                 subsample=0.8,
                                                                                 random_state=42
                                                                                ),
                                                       n_jobs=-1).\
                                  fit(inp_noout_train_80, lab_noout_train_80).
                                  predict(inp_noout_test_80)
                                 ).astype(int)
lab_noout_test_30_pred = np.floor(MultiOutputRegressor(GradientBoostingRegressor(criterion='mae',
                                                                                 learning_rate=0.3,
                                                                                 loss='ls',
                                                                                 max_depth=100,
                                                                                 min_samples_split=10,
                                                                                 min_weight_fraction_leaf=0.01,
                                                                                 n_estimators=100,
                                                                                 subsample=0.8,
                                                                                 random_state=42
                                                                                ),
                                                       n_jobs=-1).\
                                  fit(inp_noout_train_30, lab_noout_train_30).
                                  predict(inp_noout_test_30)
                                 ).astype(int)

In [27]:
print('80% training data:')
print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,0],
                                                                 lab_out_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_80.values[:,1],
                                                                 lab_out_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,0],
                                                                 lab_noout_test_80_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_80.values[:,1],
                                                                 lab_noout_test_80_pred[:,1]
                                                                )
                                                       )
     )
print('')
print('30% training data:')

print('    accuracy on h11 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,0],
                                                                 lab_out_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/ outliers:  {:.2f}'.format(accuracy(lab_out_test_30.values[:,1],
                                                                 lab_out_test_30_pred[:,1]
                                                                )
                                                       )
     )
print('    accuracy on h11 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,0],
                                                                 lab_noout_test_30_pred[:,0]
                                                                )
                                                       )
     )
print('    accuracy on h21 w/o outliers: {:.2f}'.format(accuracy(lab_noout_test_30.values[:,1],
                                                                 lab_noout_test_30_pred[:,1]
                                                                )
                                                       )
     )

80% training data:
    accuracy on h11 w/ outliers:  0.55
    accuracy on h21 w/ outliers:  0.11
    accuracy on h11 w/o outliers: 0.57
    accuracy on h21 w/o outliers: 0.12

30% training data:
    accuracy on h11 w/ outliers:  0.58
    accuracy on h21 w/ outliers:  0.10
    accuracy on h11 w/o outliers: 0.54
    accuracy on h21 w/o outliers: 0.12
