## Import files

In [None]:
import os
import numpy as np
import pandas as pd

sub_path = "/content/drive/My Drive/Colab Notebooks/ML stacking/best_prediction"
all_files = [name for name in os.listdir(sub_path)]
print(all_files)

# Read and concatenate submissions
outs = [pd.read_csv(os.path.join(sub_path, f), index_col=0) for f in all_files]
concat_sub = pd.concat(outs, axis=1)
cols = list(map(lambda x: "is_iceberg_" + str(x), range(len(concat_sub.columns))))
concat_sub.columns = cols
concat_sub.reset_index(inplace=True)
concat_sub.head()

['200_ens_densenet.csv', 'Keras+TF.csv', 'ResNeXt.csv', 'submission38.csv', 'submission43.csv']


Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4
0,5941774d,0.01943216,0.005586,0.0198065,0.008875,0.005586
1,4023181e,0.03168809,0.145927,0.08600441,0.146779,0.031688
2,b20200e4,4e-08,1.5e-05,3.8755430000000003e-17,0.048646,0.001
3,e7f018bb,0.9925741,0.999914,0.977856,0.999184,0.999
4,4371c8c3,0.02215107,0.033843,0.002992679,0.277251,0.022151


In [None]:
def prepare_submission_train(proba, path):
    test = pd.read_json("/content/drive/My Drive/Colab Notebooks/ML stacking/data/test.json")
    proba = np.array(proba)

    submission = pd.DataFrame()
    submission['id'] = test['id']
    submission['is_iceberg'] = proba.reshape((-1, 1))
    submission.to_csv(path, float_format='%.15f', index=False)

In [None]:
# cutoff threshold for lower and upper bounds
cutoff_lo = 0.90
cutoff_hi = 0.10

# get the data fields ready for stacking
concat_sub['is_iceberg_max'] = concat_sub.iloc[:, 1:6].max(axis=1)
concat_sub['is_iceberg_min'] = concat_sub.iloc[:, 1:6].min(axis=1)
concat_sub['is_iceberg_mean'] = concat_sub.iloc[:, 1:6].mean(axis=1)
concat_sub['is_iceberg_median'] = concat_sub.iloc[:, 1:6].median(axis=1)

## MinMax

In [None]:
x = concat_sub['is_iceberg_0']
y = concat_sub['is_iceberg_1']
concat_sub['is_iceberg'] = np.where((x > 0.5) & (y > 0.5), np.maximum(x, y), np.where((x < 0.5) & (y < 0.5), np.minimum(x, y), x))
prepare_submission_train(concat_sub['is_iceberg'].to_numpy(), path='./Stacked_MinMax.csv')
concat_sub.head()

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg
0,5941774d,0.01943216,0.005586,0.0198065,0.008875,0.005586,0.019807,0.005586,0.011857,0.008875,0.005586202
1,4023181e,0.03168809,0.145927,0.08600441,0.146779,0.031688,0.146779,0.031688,0.088417,0.086004,0.03168809
2,b20200e4,4e-08,1.5e-05,3.8755430000000003e-17,0.048646,0.001,0.048646,3.8755430000000003e-17,0.009932,1.5e-05,4e-08
3,e7f018bb,0.9925741,0.999914,0.977856,0.999184,0.999,0.999914,0.977856,0.993706,0.999,0.9999136
4,4371c8c3,0.02215107,0.033843,0.002992679,0.277251,0.022151,0.277251,0.002992679,0.071678,0.022151,0.02215107


## MinMax + BestBase

In [None]:
# load the model with best base performance
sub_base = pd.read_csv('/content/drive/My Drive/Colab Notebooks/ML stacking/best_prediction/ResNeXt.csv')
concat_sub['is_iceberg_base'] = sub_base['is_iceberg']
concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:4] > cutoff_lo, axis=1), 
                                    concat_sub['is_iceberg_max'], 
                                    np.where(np.all(concat_sub.iloc[:,1:4] < cutoff_hi, axis=1),
                                             concat_sub['is_iceberg_min'], 
                                             concat_sub['is_iceberg_base']))
concat_sub['is_iceberg'] = np.clip(concat_sub['is_iceberg'].values, 0.001, 0.999)
prepare_submission_train(concat_sub['is_iceberg'].to_numpy(), path='./Stacked_MinMax_BestBase.csv')
concat_sub.head()

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg,is_iceberg_base
0,5941774d,0.01943216,0.005586,0.0198065,0.008875,0.005586,0.019807,0.005586,0.011857,0.008875,0.005586,0.0198065
1,4023181e,0.03168809,0.145927,0.08600441,0.146779,0.031688,0.146779,0.031688,0.088417,0.086004,0.086004,0.08600441
2,b20200e4,4e-08,1.5e-05,3.8755430000000003e-17,0.048646,0.001,0.048646,3.8755430000000003e-17,0.009932,1.5e-05,0.001,3.8755430000000003e-17
3,e7f018bb,0.9925741,0.999914,0.977856,0.999184,0.999,0.999914,0.977856,0.993706,0.999,0.999,0.977856
4,4371c8c3,0.02215107,0.033843,0.002992679,0.277251,0.022151,0.277251,0.002992679,0.071678,0.022151,0.002993,0.002992679


## MinMax + Median

In [None]:
concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:6] > cutoff_lo, axis=1), 
                                   concat_sub['is_iceberg_max'], 
                                   np.where(np.all(concat_sub.iloc[:,1:6] < cutoff_hi, axis=1),
                                            concat_sub['is_iceberg_min'], 
                                            concat_sub['is_iceberg_median']))
concat_sub['is_iceberg'] = np.clip(concat_sub['is_iceberg'].values, 0.001, 0.999)
prepare_submission_train(concat_sub['is_iceberg'].to_numpy(), path='./Stacked_MinMax_Median.csv')
concat_sub.head()

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg,is_iceberg_base
0,5941774d,0.01943216,0.005586,0.0198065,0.008875,0.005586,0.019807,0.005586,0.011857,0.008875,0.005586,0.0198065
1,4023181e,0.03168809,0.145927,0.08600441,0.146779,0.031688,0.146779,0.031688,0.088417,0.086004,0.086004,0.08600441
2,b20200e4,4e-08,1.5e-05,3.8755430000000003e-17,0.048646,0.001,0.048646,3.8755430000000003e-17,0.009932,1.5e-05,0.001,3.8755430000000003e-17
3,e7f018bb,0.9925741,0.999914,0.977856,0.999184,0.999,0.999914,0.977856,0.993706,0.999,0.999,0.977856
4,4371c8c3,0.02215107,0.033843,0.002992679,0.277251,0.022151,0.277251,0.002992679,0.071678,0.022151,0.022151,0.002992679


## MinMax + Mean

In [None]:
concat_sub['is_iceberg'] = np.where(np.all(concat_sub.iloc[:,1:6] > cutoff_lo, axis=1), 
                                   concat_sub['is_iceberg_max'], 
                                   np.where(np.all(concat_sub.iloc[:,1:6] < cutoff_hi, axis=1),
                                            concat_sub['is_iceberg_min'], 
                                            concat_sub['is_iceberg_mean']))
prepare_submission_train(concat_sub['is_iceberg'].to_numpy(), path='./Stacked_MinMax_Mean.csv')
concat_sub.head()

Unnamed: 0,id,is_iceberg_0,is_iceberg_1,is_iceberg_2,is_iceberg_3,is_iceberg_4,is_iceberg_max,is_iceberg_min,is_iceberg_mean,is_iceberg_median,is_iceberg,is_iceberg_base
0,5941774d,0.01943216,0.005586,0.0198065,0.008875,0.005586,0.019807,0.005586,0.011857,0.008875,0.005586,0.0198065
1,4023181e,0.03168809,0.145927,0.08600441,0.146779,0.031688,0.146779,0.031688,0.088417,0.086004,0.08841726,0.08600441
2,b20200e4,4e-08,1.5e-05,3.8755430000000003e-17,0.048646,0.001,0.048646,3.8755430000000003e-17,0.009932,1.5e-05,3.8755430000000003e-17,3.8755430000000003e-17
3,e7f018bb,0.9925741,0.999914,0.977856,0.999184,0.999,0.999914,0.977856,0.993706,0.999,0.9999136,0.977856
4,4371c8c3,0.02215107,0.033843,0.002992679,0.277251,0.022151,0.277251,0.002992679,0.071678,0.022151,0.07167767,0.002992679
