# 1. SETTINGS

In [None]:
# import packages
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import log_loss
import scipy.stats
from scipy.stats.mstats import gmean
import os
import time

In [None]:
# helper functions
import functions
from functions import *

In [None]:
# pandas options
pd.set_option('display.max_columns', None)

In [None]:
# dark background style
plt.style.use('dark_background')

In [None]:
# ignore warnings
import warnings
warnings.filterwarnings('ignore')

In [None]:
# garbage collection
import gc
gc.enable()

# 2. DATA PREPARATION

In [None]:
# import CSV
df = pd.read_csv('../data/data_v2.csv')
print(df.shape)

In [None]:
# target variable
target = 'granted_number_of_nights'

In [None]:
# partitioning
train = df[df[target].isnull() == False]
test  = df[df[target].isnull() == True]
print(train.shape)
print(test.shape)

In [None]:
# target variable
y = train.sort_values('request_id')[target]
test_ids = test['request_id']
classes = y.unique()
classes

In [None]:
### IMPORT OOF AND TEST PREDS

# which model to stack?
model = ''

# list names
names = sorted(os.listdir('../oof_preds'))
names = [s for s in names if model in s]

# preprocessing loop
for name in names:

    # load preds
    tmp_tr = pd.read_csv('../oof_preds/'   + str(name))
    tmp_te = pd.read_csv('../submissions/' + str(name))

    # sort OOF preds by ID
    if 'request_id' in tmp_tr:
        tmp_tr = tmp_tr.sort_values('request_id')
        tr_id  = tmp_tr['request_id']
        del tmp_tr['request_id']
        
    # extract test IDs
    if 'request_id' in tmp_te:
        tmp_te = tmp_te.sort_values('request_id')
        te_id  = tmp_te['request_id']
        del tmp_te['request_id']
        
    # rename columns
    tmp_tr.columns = [str(name) + '_' + str(l.replace('class_', '')) for l in list(tmp_tr.columns)]    
    tmp_te.columns = [str(name) + '_' + str(l.replace('class_', '')) for l in list(tmp_te.columns)]  

    # cbind data
    if name == names[0]:     
        train = tmp_tr 
        test  = tmp_te
    else:
        train = pd.concat([train, tmp_tr], axis = 1)
        test  = pd.concat([test,  tmp_te], axis = 1)
        

# put back id
train.insert(0, column = 'id', value = tr_id)
test.insert(0,  column = 'id', value = te_id)
        
# display information
print('- Train shape:', train.shape)
print('- Test shape:',  test.shape)

# 3. BLENDING

In [None]:
# average probs for differnt classes
for cl in ['_0', '_1', '_2', '_3']:
    
    # train
    preds = [l for l in list(train.columns) if l.endswith(cl)]
    blend = train[preds].mean(axis = 1)
    train['blend_' + cl] = blend
    
    # test
    preds = [l for l in list(test.columns) if l.endswith(cl)]
    blend = test[preds].mean(axis = 1)
    test['blend_' + cl] = blend
    
# extract predictions
preds_oof  = train.filter(like = 'blend', axis = 1)
preds_test = test.filter(like  = 'blend', axis = 1)

In [None]:
# try different ensembles
#from scipy.stats.mstats import gmean
#armean_tr = np.array(train[models].mean(axis    = 1))
#median_tr = np.array(train[models].median(axis  = 1))
#gemean_tr = gmean(np.array(train[models]), axis = 1)
#min_tr    = np.array(train[models].min(axis     = 1))
#max_tr    = np.array(train[models].max(axis     = 1))

In [None]:
# define the competition scorer
def competition_scorer(y_true, y_pred):
    return log_loss(y_true, y_pred, sample_weight = 10**y_true)

# check performance
cv_perf = np.round(competition_scorer(y, preds_oof), 6)
cv_perf

# 4. SUBMISSION

In [None]:
# file name
model = 'blend'
perf  = str(round(cv_perf, 6))[2:7]
name  = model + '_' + perf

In [None]:
# export submission
#sub = pd.DataFrame({'id': test['id'], 'duration': preds_test})
#sub.to_csv('../submissions/' + str(name) + '.csv', index = False)
#sub.head()

In [None]:
# export submission
sub = pd.DataFrame(preds_test)
sub.insert(0, column = 'request_id', value = test['request_id'].reset_index(drop = True))
sub.columns = ['id', '0', '1', '2', '3']
sub.to_csv('../submissions/' + str(name) + '.csv', index = False)
sub.head()

In [None]:
# submit to QSCORE
comment = ''
submit_prediction(sub, sep = ',', index = False, comment = str(comment) + ' - ' + name)