In [1]:
import pandas as pd
import numpy as np
import pandas as pd
import numpy as np
from sklearn.model_selection import GridSearchCV, cross_val_score, KFold
import xgboost
import lightgbm
import matplotlib.pyplot as plt
from sklearn.decomposition import PCA, SparsePCA, MiniBatchSparsePCA, KernelPCA
from sklearn.pipeline import Pipeline
from sklearn.metrics import log_loss, accuracy_score
import copy
from sklearn.neighbors import KNeighborsClassifier
from sklearn.cluster import KMeans
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import ExtraTreesClassifier, GradientBoostingClassifier, RandomForestClassifier, AdaBoostClassifier, BaggingClassifier
from sklearn.neural_network import MLPClassifier

kfold = KFold(5, shuffle=True, random_state=999)

In [2]:
from trainer import Trainer
from meta_featured_trainer import MetaFeatureTrainer
from resnet import fineTuneResNet50,lessFilterResNet50, paperResNet18, nnResNet, get_ensemble_resnet
from senet import senetXX_generic
from swwae_trainer import SWWAETrainer
from swwae import get_swwae
import pandas as pd
import numpy as np
import torch
from torch.optim import lr_scheduler

In [3]:
class base_tuner():
    def __init__(self, X, y):
        self.X = X
        self.y = y
        self.default_params = {}
        
    def fit_and_update_params(self, params, update=True):
        clf = self.get_clf()

        gs = GridSearchCV(clf, params, scoring='neg_log_loss', cv=kfold, return_train_score=False )
        gs.fit(self.X, self.y)
        
        cv_df = pd.DataFrame().from_dict(gs.cv_results_)
        cv_df = cv_df[['mean_test_score', 'std_test_score', 'params', 'rank_test_score']]
        cv_df = cv_df.sort_values(by=['rank_test_score', 'std_test_score']).reset_index(drop=True)
        best_params = cv_df.loc[0, 'params']
        
        if update is True:
            self.default_params.update(best_params)
        
        print('Selected hyper-params:', best_params)
        print('==============================> cv score: {:.4f}'.format(cv_df.loc[0, 'mean_test_score']))
        return best_params
    
    def tune(self):
        pass
    
    def get_clf(self):
        return None

In [4]:
class lgbm_tuner(base_tuner):
    def __init__(self, X, y):
        super(lgbm_tuner, self).__init__(X, y)
        self.default_params = {
            'n_jobs': 4,
            'objective': 'binary',
            'random_state': 0,
            'boosting_type': 'dart'
        }
    
    def tune_est_num_and_lr(self):
        params = {
            'n_estimators': [100, 200, 400, 800],
            'learning_rate': [0.1, 0.05, 0.01, 0.005]
        }

        self.fit_and_update_params(params)
   
    def tune_leaves_num_and_gamma(self):
        params = {
            'num_leaves': [2, 3, 7, 15, 31, 63],
            'min_split_gain': [.0, .1, .2]
        }
        self.fit_and_update_params(params)
     
    def tune_sampling(self):
        params = {
            'subsample': [1., .8, .6, .4, .2],
            'colsample_bytree': [1., .8, .6, .4, .2]
        }
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
        
    def tune_regularization(self):
        params = {
            'reg_alpha': [1., .8, .6, .4, .2, .0],
            'reg_lambda': [1., .8, .6, .4, .2, .0]
        }
        
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            elif v == 0.:
                next_params[k] = [.0, .05, .1, .15]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
    
    def tune(self):
        print('lgb tuner start tuning')
        self.tune_est_num_and_lr()
        self.tune_leaves_num_and_gamma()
        self.tune_sampling()
        self.tune_regularization()
        
        return self.get_clf()
    
    def get_clf(self):
        return lightgbm.LGBMClassifier(**self.default_params)

In [5]:
class xgb_tuner(base_tuner):
    def __init__(self, X, y):
        super(xgb_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'n_jobs': 4,
            'objective': 'binary:logistic',
            'seed': 0,
            'eval_metric': 'logloss'
        }
    
    def tune_booster(self):
        params = {
            'booster': ['dart', 'gbtree']
        }
        self.fit_and_update_params(params)
        
    def tune_est_num_and_lr(self):
        params = {
            'n_estimators': [100, 200, 400, 800],
            'learning_rate': [0.1, 0.05, 0.01, 0.005]
        }

        self.fit_and_update_params(params)
   
    def tune_max_depth(self):
        params = {
            'max_depth': [1, 3, 5, 7, 9]
        }
        self.fit_and_update_params(params)
        
    def tune_child_w_and_gamma(self):
        params = {
            'min_child_weight': [1, 2, 4, 6, 8, 10],
            'gamma': [0, 0.1, 0.2]
        }
        self.fit_and_update_params(params)
     
    def tune_sampling(self):
        params = {
            'subsample': [1., .8, .6, .4, .2],
            'colsample_bytree': [1., .8, .6, .4, .2]
        }
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
        
    def tune_regularization(self):
        params = {
            'reg_alpha': [1., .8, .6, .4, .2, .0],
            'reg_lambda': [1., .8, .6, .4, .2, .0]
        }
        
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            elif v == 0.:
                next_params[k] = [.0, .05, .1, .15]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
    
    def tune(self):
        print('xgb tuner start tuning')
        self.tune_booster()
        self.tune_est_num_and_lr()
        self.tune_max_depth()
        self.tune_child_w_and_gamma()
        self.tune_sampling()
        self.tune_regularization()
        
        return self.get_clf()
    
    def get_clf(self):
        return xgboost.XGBClassifier(**self.default_params)

In [6]:
class lr_tuner(base_tuner):
    def __init__(self, X, y):
        super(lr_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'penalty': 'l2',
            'max_iter': 2000
        }

    def tune(self):
        print('logistic regression tuner start tuning')
        params = {
            'solver': ['lbfgs', 'sag']
        }
        
        self.fit_and_update_params(params)
        
        return self.get_clf()
    
    def get_clf(self):
        return LogisticRegression(**self.default_params)

In [7]:
class mlp_tuner(base_tuner):
    def __init__(self, X, y):
        super(mlp_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'learning_rate': 'adaptive',
            'learning_rate_init': 0.005,
            'max_iter': 2000,
            'random_state':0
        }

    def tune(self):
        print('mlp tuner start tuning')
        params = {
            'solver':['lbfgs', 'sgd', 'adam'],
            'hidden_layer_sizes': [(100,), (150,), (100, 100,)],
        }
        
        self.fit_and_update_params(params)
        
        params = {
            'alpha': [10., 5., 2., 1., .8, .5, .2, .1],
        }
        
        self.fit_and_update_params(params)
        
        return self.get_clf()
    
    def get_clf(self):
        return MLPClassifier(**self.default_params)

In [8]:
class adb_tuner(base_tuner):
    def __init__(self, X, y):
        super(adb_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'algorithm': 'SAMME.R',
            'random_state':0
        }

    def tune(self):
        print('adaboost tuner start tuning')
        params = {
            'n_estimators': [100, 200, 400, 800],
            'learning_rate': [0.1, 0.05, 0.01, 0.005]
        }
        
        self.fit_and_update_params(params)

        return self.get_clf()
    
    def get_clf(self):
        return AdaBoostClassifier(**self.default_params)

In [9]:
class bg_tuner(base_tuner):
    def __init__(self, X, y):
        super(bg_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'random_state':0,
            'bootstrap_features':True
        }
        
    def tune_est_num(self):
        params = {
            'n_estimators': [100, 200, 400, 800]
        }

        self.fit_and_update_params(params)
     
    def tune_sampling(self):
        params = {
            'max_samples': [1., .8, .6, .4, .2],
            'max_features': [1., .8, .6, .4, .2]
        }
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
    
    def tune(self):
        print('bagging tuner start tuning')
        self.tune_est_num()
        self.tune_sampling()

        return self.get_clf()
    
    def get_clf(self):
        return BaggingClassifier(**self.default_params)

In [10]:
class gb_tuner(base_tuner):
    def __init__(self, X, y):
        super(gb_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'random_state':0
        }
        
    def tune_loss_criterion(self):
        params = {
            'loss': ['deviance', 'exponential'],
            'criterion': ['friedman_mse', 'mse']
        }

        self.fit_and_update_params(params)
        
    def tune_est_num_and_lr(self):
        params = {
            'n_estimators': [100, 200, 400, 800],
            'learning_rate': [0.1, 0.05, 0.01, 0.005]
        }

        self.fit_and_update_params(params)
   
    def tune_max_depth(self):
        params = {
            'max_depth': [1, 3, 5, 7, 9]
        }
        self.fit_and_update_params(params)
        
    def tune_child(self):
        params = {
            'min_samples_split': [2, 3, 7, 15,31],
            'min_impurity_decrease': [0, 0.1, 0.2]
        }
        self.fit_and_update_params(params)
     
    def tune_sampling(self):
        params = {
            'subsample': [1., .8, .6, .4, .2],
            'max_features': [1., .8, .6, .4, .2]
        }
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
      
    def tune(self):
        print('gradient boosting tuner start tuning')
        self.tune_loss_criterion()
        self.tune_est_num_and_lr()
        self.tune_max_depth()
        self.tune_child()
        self.tune_sampling()
        
        return self.get_clf()
    
    def get_clf(self):
        return GradientBoostingClassifier(**self.default_params)

In [11]:
class rf_tuner(base_tuner):
    def __init__(self, X, y):
        super(rf_tuner, self).__init__(X, y)
        self.X = X
        self.y = y
        self.default_params = {
            'random_state':0,
            'n_jobs': 4
        }
        
    def tune_loss_criterion(self):
        params = {
            'class_weight': [None, 'balanced'],
            'criterion': ['gini', 'entropy']
        }

        self.fit_and_update_params(params)
        
    def tune_est_num(self):
        params = {
            'n_estimators': [100, 200, 400, 800]
        }

        self.fit_and_update_params(params)
   
    def tune_max_depth(self):
        params = {
            'max_depth': [1, 3, 5, 7, 9]
        }
        self.fit_and_update_params(params)
        
    def tune_child(self):
        params = {
            'min_samples_split': [2, 3, 7, 15,31],
            'min_impurity_decrease': [0, 0.1, 0.2]
        }
        self.fit_and_update_params(params)
     
    def tune_sampling(self):
        params = {
            'max_features': [1., .8, .6, .4, .2]
        }
        best_parmas = self.fit_and_update_params(params, update=False)
        
        next_params = {}
        for k,v in best_parmas.items():
            if v == 1.:
                next_params[k] = [1., .95, .9, .85]
            else:
                next_params[k] = [v+.15, v+.1, v+.05, v, v-.05, v-.1, v-.15]
 
        self.fit_and_update_params(next_params)
      
    def tune(self):
        print('random forest tuner start tuning')
        self.tune_loss_criterion()
        self.tune_est_num()
        self.tune_max_depth()
        self.tune_child()
        self.tune_sampling()
        
        return self.get_clf()
    
    def get_clf(self):
        return RandomForestClassifier(**self.default_params)

In [12]:
class et_tuner(rf_tuner):
    def __init__(self, X, y):
        super(et_tuner, self).__init__(X, y)

    def tune(self):
        print('extra tree tuner start tuning')
        self.tune_loss_criterion()
        self.tune_est_num()
        self.tune_max_depth()
        self.tune_child()
        self.tune_sampling()
        
        return self.get_clf()
    
    def get_clf(self):
        return ExtraTreesClassifier(**self.default_params)

In [13]:
from stacking_models_api import StackingAveragedModels

In [14]:
def get_features(fold, train, test,seed=0):
    is_transfer_learning = False
    model = lessFilterResNet50(fc_output=True)
    model_name= 'Trained_model/resnet_origin_09_soft_pseudo_label_n_valid_' + str(fold) + '.db'
    optimizer = torch.optim.Adam(model.parameters(), lr=0.005, weight_decay=0.001)

    tr = Trainer(
            model,
            optimizer,
            epochs=1800,
            milestones=[300, 1100, 1600],
            gamma=0.2,
            batch_size=128, 
            use_cuda=True, 
            gpu_idx=0,
            best_model_name = model_name,
            seed=seed,
            verbose=0)

    tr.load_checkpoint()

    data_augmentation_args = {
        'mirror': False, # not useful here
        'rotate': True,
        'scale': True,
        'translation': True
    }

    new_train = tr.test(train, is_transfer_learning=is_transfer_learning, is_augment=True, data_augmentation_args=data_augmentation_args)
    new_test = tr.test(test, is_transfer_learning=is_transfer_learning, is_augment=True, data_augmentation_args=data_augmentation_args)

    del model, optimizer, tr
    
    columns = ['f_{}'.format(i) for i in range(new_train.shape[1])]
    new_train_df = pd.DataFrame(data=new_train,    # values
                  index=[i for i in range(new_train.shape[0])],
                  columns=columns)  # 1st row as the column names
    new_test_df = pd.DataFrame(data=new_test,    # values
                  index=[i for i in range(new_test.shape[0])],
                  columns=columns)  # 1st row as the column names
    
    return new_train_df, new_test_df

In [15]:
import gc
gc.enable()

train_df = pd.read_json('Data/no_denoise_processed_train.json')
test_df = pd.read_json('Data/test.json')
print("Test data shape: {}".format(test_df.shape))
test_ids = test_df['id']
del test_df
test_df = pd.read_json('Data/no_denoise_processed_test.json')

train_df.sort_index(inplace=True)

# pl part
pl_train_df = pd.read_json('Data/public_lb_1229_cpl_test.json')
pl_train_df.sort_index(inplace=True)
pl_train_df.is_iceberg = [1.0 if v >= 0.5 else 0. for v in pl_train_df.is_iceberg.values]
new_train = pd.concat([train_df, pl_train_df]).reset_index(drop=True)

del train_df, pl_train_df
train_df = new_train
#

test_df.sort_index(inplace=True)

features = train_df.columns.tolist()
features.remove('is_iceberg')

train_y = train_df['is_iceberg'].values


for i in range(5):
    fold = i + 1
    
    print("=================================================")
    print("Processing fold ", fold)
    print("=================================================")
    
    train_X, test_X = get_features(fold, train_df, test_df)
    print(train_X.shape, test_X.shape)
    
    tuners = [lr_tuner(train_X, train_y),
              #mlp_tuner(train_X, train_y),
              #adb_tuner(train_X, train_y),
              #bg_tuner(train_X, train_y),
              #gb_tuner(train_X, train_y),
              #rf_tuner(train_X, train_y),
              #et_tuner(train_X, train_y),
              lgbm_tuner(train_X, train_y), 
              #xgb_tuner(train_X, train_y)
             ]
    clfs = []
    
    for tuner in tuners:
        tuner.tune()
        clfs.append(tuner.get_clf())
    
    for i, clf in enumerate(clfs):
        if isinstance(tuners[i], lgbm_tuner) or isinstance(tuners[i], xgb_tuner):
            if isinstance(tuners[i], lgbm_tuner):
                name = 'lgb'
            else:
                name = 'xgb'
            
            clf.fit(train_X, train_y)
            predictions = clf.predict_proba(test_X)[:,1]
            print("Fold {}, {} tuner predictions\n".format(fold, name), predictions)
        
            submission = pd.DataFrame()
            submission['id'] = test_ids
            submission['is_iceberg'] = predictions
            submission.to_csv('Submissions/submission_{}_0117_auto_fine_tune_fold_{}.csv'.format(name, fold), 
                              float_format="%.15f", index=False)
     
    
    print("\n*** Start to train meta model for stacking averaged model ***\n")
    
    sl_base_models_dict = {}
    for i, clf in enumerate(clfs):
        sl_base_models_dict['clf_'+str(i+1)] = clf
        
    semi_sl_base_models_dict = {
        'knn_8': KNeighborsClassifier(n_neighbors=8),
        'knn_16': KNeighborsClassifier(n_neighbors=16),
        'knn_32': KNeighborsClassifier(n_neighbors=32),
        'knn_64': KNeighborsClassifier(n_neighbors=64)
    }

    usl_base_models_dict = {
        'kmean_2': KMeans(n_clusters=2),
        'kmean_3': KMeans(n_clusters=3),
        'kmean_4': KMeans(n_clusters=4)
    }
    
    sam = StackingAveragedModels(sl_base_models_dict=sl_base_models_dict, 
                             semi_sl_base_models_dict=semi_sl_base_models_dict,
                             usl_base_models_dict=usl_base_models_dict,
                             meta_model=LogisticRegression(),
                             target_col='is_iceberg',
                             eval_func=log_loss,
                             is_classification=True,
                             random_state=999)
    
    sam.fit(train_X, train_y)
    
    # get meta dataframe to train
    meta_df = sam.get_meta_train_dataframe(get_dummies=True, pca_enabled=False)
    # shuffle meta data frame
    meta_df = meta_df.sample(frac=1, random_state=0).reset_index(drop=True)
    print('feature correlations')
    print(meta_df.corr())
    
    features = meta_df.columns.tolist()
    features.remove('is_iceberg')
    meta_X = meta_df[features]
    meta_y = np.array(meta_df['is_iceberg']).reshape((meta_df.shape[0],))
    print("meta size: ", meta_X.shape, meta_X.shape)
    
    # fine tune for meta model
    meta_tuners = [lgbm_tuner(meta_X, meta_y), xgb_tuner(meta_X, meta_y)]
    meta_clfs = [] 
    for tuner in meta_tuners:
        tuner.tune()
        meta_clfs.append(tuner.get_clf())
        
    for i, clf in enumerate(meta_clfs):
        if isinstance(meta_tuners[i], lgbm_tuner) or isinstance(meta_tuners[i], xgb_tuner):
            if isinstance(meta_tuners[i], lgbm_tuner):
                name = 'lgb'
            else:
                name = 'xgb'
            
            sam.reset_meta_model(clf)
            predictions = sam.predict_proba(test_X)[:,1]
            print("Fold {}, meta model ({} tuner) predictions\n".format(fold, name), predictions)
        
            submission = pd.DataFrame()
            submission['id'] = test_ids
            submission['is_iceberg'] = predictions
            submission.to_csv('Submissions/submission_{}_sam_0117_auto_fine_tune_fold_{}.csv'.format(name, fold), 
                              float_format="%.15f", index=False)
            

Test data shape: (8424, 4)
Processing fold  1
gpu: 0  available: True
epoch= 252 best_loss= 0.1741405725479126
(4590, 2049) (8424, 2049)
logistic regression tuner start tuning
Selected hyper-params: {'solver': 'lbfgs'}
lgb tuner start tuning
Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 7}
Selected hyper-params: {'colsample_bytree': 0.6, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.5499999999999999, 'subsample': 0.8}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Fold 1, lgb tuner predictions
 [ 0.00341267  0.11423964  0.00287673 ...,  0.00343424  0.99688222
  0.00343424]

*** Start to train meta model for stacking averaged model ***


 clf_1
score= 0.0429539140283
score= 0.0309537057287
score= 0.0292276034853
score= 0.057013355122
score= 0.0470402668193
Avg score =  0.0414377690367

 clf_2
score= 0.0455429391202

Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 31}
Selected hyper-params: {'colsample_bytree': 0.6, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.75, 'subsample': 0.8}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
xgb tuner start tuning
Selected hyper-params: {'booster': 'dart'}
Selected hyper-params: {'learning_rate': 0.01, 'n_estimators': 800}
Selected hyper-params: {'max_depth': 1}
Selected hyper-params: {'gamma': 0, 'min_child_weight': 1}
Selected hyper-params: {'colsample_bytree': 0.4, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.45, 'subsample': 0.75}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0309244699783 

Fold 1, meta model (lgb tuner) predictions
 [ 0.00333329  0.09623024  0.00261212 ...,  0.00251963  0.99699215
  0.00284684]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0378541702311 

Fold 1, meta model (xgb tuner) predictions
 [ 0.00258018  0.04832762  0.00181707 ...,  0.00236612  0.99710077
  0.00203023]
Processing fold  2
gpu: 0  available: True
epoch= 82 best_loss= 0.16970072189966837
(4590, 2049) (8424, 2049)
logistic regression tuner start tuning
Selected hyper-params: {'solver': 'lbfgs'}
lgb tuner start tuning
Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.2, 'num_leaves': 7}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 0.8}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Fold 2, lgb tuner predictions
 [ 0.02831142  0.02907418  0.0031012  ...,  0.00417901  0.99596336
  0.00310245]

*** Start to train meta model for stacking averaged model ***


 clf_1
score= 0.0626761723562
score= 0.0495507

Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 0.8500000000000001}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
xgb tuner start tuning
Selected hyper-params: {'booster': 'dart'}
Selected hyper-params: {'learning_rate': 0.01, 'n_estimators': 800}
Selected hyper-params: {'max_depth': 1}
Selected hyper-params: {'gamma': 0, 'min_child_weight': 6}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 1.0}
Selected hyper-params: {'colsample_bytree': 0.9, 'subsample': 1.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.05, 'reg_lambda': 0.0}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0613208766451 

Fold 2, meta model (lgb tuner) predictions
 [ 0.01482597  0.01290676  0.00377235 ...,  0.00402632  0.99385634
  0.00377235]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0589263816295 

Fold 2, meta model (xgb tuner) predictions
 [ 0.01470658  0.01535854  0.00220421 ...,  0.00236653  0.99574465
  0.00220421]
Processing fold  3
gpu: 0  available: True
epoch= 300 best_loss= 0.16262948016325632
(4590, 2049) (8424, 2049)
logistic regression tuner start tuning
Selected hyper-params: {'solver': 'lbfgs'}
lgb tuner start tuning
Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 0.6, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.44999999999999996, 'subsample': 0.8500000000000001}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.1, 'reg_lambda': 0.05}
Fold 3, lgb tuner predictions
 [ 0.00368071  0.05762772  0.00368071 ...,  0.00368071  0.99576115
  0.01216079]

*** Start to train meta model for stacking averaged model ***


 clf_1
score=

Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 0.6, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.65, 'subsample': 0.75}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
xgb tuner start tuning
Selected hyper-params: {'booster': 'dart'}
Selected hyper-params: {'learning_rate': 0.01, 'n_estimators': 800}
Selected hyper-params: {'max_depth': 1}
Selected hyper-params: {'gamma': 0, 'min_child_weight': 1}
Selected hyper-params: {'colsample_bytree': 0.6, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.7, 'subsample': 0.9}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.2}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.15000000000000002}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0379837196419 

Fold 3, meta model (lgb tuner) predictions
 [ 0.00316157  0.08958538  0.00360967 ...,  0.00316157  0.99649392
  0.00422459]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0362292681531 

Fold 3, meta model (xgb tuner) predictions
 [ 0.0018739   0.06316341  0.00189205 ...,  0.0018739   0.99786359
  0.00190981]
Processing fold  4
gpu: 0  available: True
epoch= 259 best_loss= 0.17009364068508148
(4590, 2049) (8424, 2049)
logistic regression tuner start tuning
Selected hyper-params: {'solver': 'sag'}
lgb tuner start tuning
Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 400}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 1.0}
Selected hyper-params: {'colsample_bytree': 0.95, 'subsample': 0.95}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.2}
Selected hyper-params: {'reg_alpha': 0.15, 'reg_lambda': 0.30000000000000004}
Fold 4, lgb tuner predictions
 [ 0.01330292  0.11762051  0.00142958 ...,  0.0014951   0.9984987
  0.00303092]

*** Start to train meta model for stacking averaged model ***


 clf_1
score= 0.0356096436326

Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 200}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 0.4, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.30000000000000004, 'subsample': 0.8500000000000001}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
xgb tuner start tuning
Selected hyper-params: {'booster': 'dart'}
Selected hyper-params: {'learning_rate': 0.01, 'n_estimators': 800}
Selected hyper-params: {'max_depth': 1}
Selected hyper-params: {'gamma': 0, 'min_child_weight': 6}
Selected hyper-params: {'colsample_bytree': 0.8, 'subsample': 1.0}
Selected hyper-params: {'colsample_bytree': 0.8, 'subsample': 1.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.2}
Selected hyper-params: {'reg_alpha': 0.1, 'reg_lambda': 0.15000000000000002}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0387350992077 

Fold 4, meta model (lgb tuner) predictions
 [ 0.00685648  0.17429484  0.00516023 ...,  0.00398264  0.99604993
  0.00508245]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0363099994122 

Fold 4, meta model (xgb tuner) predictions
 [ 0.00265923  0.09102806  0.00196323 ...,  0.00196323  0.99769646
  0.00196323]
Processing fold  5
gpu: 0  available: True
epoch= 281 best_loss= 0.19984116653601328
(4590, 2049) (8424, 2049)
logistic regression tuner start tuning
Selected hyper-params: {'solver': 'sag'}
lgb tuner start tuning
Selected hyper-params: {'learning_rate': 0.05, 'n_estimators': 800}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 3}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 1.0}
Selected hyper-params: {'colsample_bytree': 1.0, 'subsample': 1.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.4}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.4}
Fold 5, lgb tuner predictions
 [  6.33709981e-04   1.21753052e-01   1.86734338e-03 ...,   1.86734338e-03
   9.98037538e-01   7.35076134e-03]

*** Start to train meta model for stacking averaged model ***


 clf_1
score= 0.

Selected hyper-params: {'learning_rate': 0.1, 'n_estimators': 400}
Selected hyper-params: {'min_split_gain': 0.0, 'num_leaves': 2}
Selected hyper-params: {'colsample_bytree': 0.4, 'subsample': 1.0}
Selected hyper-params: {'colsample_bytree': 0.30000000000000004, 'subsample': 0.95}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
xgb tuner start tuning
Selected hyper-params: {'booster': 'dart'}
Selected hyper-params: {'learning_rate': 0.01, 'n_estimators': 800}
Selected hyper-params: {'max_depth': 1}
Selected hyper-params: {'gamma': 0, 'min_child_weight': 2}
Selected hyper-params: {'colsample_bytree': 0.4, 'subsample': 0.8}
Selected hyper-params: {'colsample_bytree': 0.55, 'subsample': 0.9500000000000001}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}
Selected hyper-params: {'reg_alpha': 0.0, 'reg_lambda': 0.0}


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0324641439372 

Fold 5, meta model (lgb tuner) predictions
 [ 0.00231271  0.19204495  0.00212978 ...,  0.00231271  0.99691895
  0.00196867]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  out_of_fold_predictions[self.target_col] = self.out_of_fold_predictions[self.target_col]


meta model's training set score=  0.0314214897927 

Fold 5, meta model (xgb tuner) predictions
 [ 0.00170587  0.13889556  0.00162751 ...,  0.00170587  0.99758363
  0.00182244]
