# Preparation

## Imports

In [1]:
# Essentials
import numpy as np
import pandas as pd
import datetime
import random

# Plots
import seaborn as sns
import matplotlib.pyplot as plt

# Models
from sklearn.ensemble import RandomForestClassifier, GradientBoostingRegressor, AdaBoostRegressor, BaggingRegressor, ExtraTreesClassifier
from sklearn.kernel_ridge import KernelRidge
from sklearn.linear_model import RidgeClassifier, RidgeCV
from sklearn.linear_model import ElasticNet, ElasticNetCV
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from mlxtend.classifier import StackingCVClassifier
import lightgbm as lgb
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import Pool, CatBoostClassifier

# Stats
from scipy.stats import skew, norm
from scipy.special import boxcox1p
from scipy.stats import boxcox_normmax

# Misc
from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.model_selection import StratifiedKFold, cross_val_score, validation_curve
from sklearn.metrics import log_loss, confusion_matrix
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import scale
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import RobustScaler
from sklearn.decomposition import PCA

pd.set_option('display.max_columns', None)

# Ignore useless warnings
import warnings
warnings.filterwarnings(action="ignore")
pd.options.display.max_seq_items = 8000
pd.options.display.max_rows = 8000

import os

## Read data

In [2]:
# Read in the dataset as a dataframe
train = pd.read_csv("../input/tabular-playground-series-may-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-may-2021/test.csv")
submission = pd.read_csv("../input/tabular-playground-series-may-2021/sample_submission.csv")

#train.info()
#test.info()
#submission.info()

# Data Exploration

## Target distribution

In [3]:
'''
sns.set_style("white")
sns.set_color_codes(palette='deep')
f, ax = plt.subplots(figsize=(8, 7))
#Check the new distribution 
sns.histplot(train['target'].sort_values(), color="b");
ax.xaxis.grid(False)
ax.set(ylabel="Frequency")
ax.set(xlabel="Target")
ax.set(title="Target distribution")
sns.despine(trim=True, left=True)
plt.show()
'''

'\nsns.set_style("white")\nsns.set_color_codes(palette=\'deep\')\nf, ax = plt.subplots(figsize=(8, 7))\n#Check the new distribution \nsns.histplot(train[\'target\'].sort_values(), color="b");\nax.xaxis.grid(False)\nax.set(ylabel="Frequency")\nax.set(xlabel="Target")\nax.set(title="Target distribution")\nsns.despine(trim=True, left=True)\nplt.show()\n'

In [4]:
'''
# Skew and kurt
print("Skewness: %f" % train['Target'].skew())
print("Kurtosis: %f" % train['Target'].kurt())
'''

'\n# Skew and kurt\nprint("Skewness: %f" % train[\'Target\'].skew())\nprint("Kurtosis: %f" % train[\'Target\'].kurt())\n'

## Features EDA

In [5]:
'''
# visualising some more outliers in the data values
fig, axs = plt.subplots(ncols=2, nrows=1, figsize=(12, 120))
plt.subplots_adjust(right=2)
plt.subplots_adjust(top=2)
sns.color_palette("husl", 8)
for i, feature in enumerate(list(train_features), 1):
    plt.subplot(len(list(train_features)), 3, i)
    sns.boxplot(x=feature, y=train_labels, hue=train_labels, palette='Blues', data=train_features)
        
    plt.xlabel('{}'.format(feature), size=15,labelpad=12.5)
    plt.ylabel('Target', size=15, labelpad=12.5)
    
    for j in range(2):
        plt.tick_params(axis='x', labelsize=12)
        plt.tick_params(axis='y', labelsize=12)
    
    plt.legend(loc='best', prop={'size': 10})
        
plt.show()
'''

'\n# visualising some more outliers in the data values\nfig, axs = plt.subplots(ncols=2, nrows=1, figsize=(12, 120))\nplt.subplots_adjust(right=2)\nplt.subplots_adjust(top=2)\nsns.color_palette("husl", 8)\nfor i, feature in enumerate(list(train_features), 1):\n    plt.subplot(len(list(train_features)), 3, i)\n    sns.boxplot(x=feature, y=train_labels, hue=train_labels, palette=\'Blues\', data=train_features)\n        \n    plt.xlabel(\'{}\'.format(feature), size=15,labelpad=12.5)\n    plt.ylabel(\'Target\', size=15, labelpad=12.5)\n    \n    for j in range(2):\n        plt.tick_params(axis=\'x\', labelsize=12)\n        plt.tick_params(axis=\'y\', labelsize=12)\n    \n    plt.legend(loc=\'best\', prop={\'size\': 10})\n        \nplt.show()\n'

## Correlation

Filter by RF feature importance first when the number of features is too large.

In [6]:
'''
# Random Forest Classifier
rf = RandomForestClassifier(n_estimators=100, random_state=42)

rf_model = rf.fit(train_features, train_labels)
#rf_pred = rf_model.predict_proba(test_features)

forest_importances = pd.Series(rf.feature_importances_, index=train_features.columns)
top_feat = forest_importances.sort_values(ascending = False).head(20)
top_feat

train_features[top_feat.index]
'''

'\n# Random Forest Classifier\nrf = RandomForestClassifier(n_estimators=100, random_state=42)\n\nrf_model = rf.fit(train_features, train_labels)\n#rf_pred = rf_model.predict_proba(test_features)\n\nforest_importances = pd.Series(rf.feature_importances_, index=train_features.columns)\ntop_feat = forest_importances.sort_values(ascending = False).head(20)\ntop_feat\n\ntrain_features[top_feat.index]\n'

In [7]:
'''
#corr = train_features[top_feat.index].corr()
#corr
corr = train.corr()
plt.subplots(figsize=(15,12))
sns.heatmap(corr, vmax=0.9, cmap="Blues", square=True)
'''

'\n#corr = train_features[top_feat.index].corr()\n#corr\ncorr = train.corr()\nplt.subplots(figsize=(15,12))\nsns.heatmap(corr, vmax=0.9, cmap="Blues", square=True)\n'

### Further exploration for high correlation to target

In [8]:
'''
data = pd.concat([train['feature_38'], train['target']], axis=1)
f, ax = plt.subplots(figsize=(8, 6))
fig = sns.boxplot(x=train['feature_38'], y="target", data=data)
#fig.axis(ymin=0, ymax=800000);
'''

'\ndata = pd.concat([train[\'feature_38\'], train[\'target\']], axis=1)\nf, ax = plt.subplots(figsize=(8, 6))\nfig = sns.boxplot(x=train[\'feature_38\'], y="target", data=data)\n#fig.axis(ymin=0, ymax=800000);\n'

In [9]:
'''
data = pd.concat([train['SalePrice'], train['TotalBsmtSF']], axis=1)
data.plot.scatter(x='TotalBsmtSF', y='SalePrice', alpha=0.3, ylim=(0,800000));
'''

"\ndata = pd.concat([train['SalePrice'], train['TotalBsmtSF']], axis=1)\ndata.plot.scatter(x='TotalBsmtSF', y='SalePrice', alpha=0.3, ylim=(0,800000));\n"

# Data Preprocessing

In [10]:
# log target if skewed
# log(1+x) transform
# train["SalePrice"] = np.log1p(train["SalePrice"])

In [11]:
#Remove outliers

## Split datasets

In [12]:
# Split features and labels
train_labels = train['target'].reset_index(drop=True)
train_features = train.drop(['id','target'], axis=1)
test_features = test.drop(['id'], axis=1)
train_labels.head()

0    Class_2
1    Class_1
2    Class_1
3    Class_4
4    Class_2
Name: target, dtype: object

In [13]:
'''
# Combine train and test features in order to apply the feature transformation pipeline to the entire dataset
all_features = pd.concat([train_features, test_features]).reset_index(drop=True)
all_features.shape
'''

'\n# Combine train and test features in order to apply the feature transformation pipeline to the entire dataset\nall_features = pd.concat([train_features, test_features]).reset_index(drop=True)\nall_features.shape\n'

## Missing values

## Skewed Features

# Feature Engineering

In [14]:
'''
# feature of zero or nonzero values

def zeroornot(res, ls):
    m = res.shape[1]
    for l in ls:
        res = res.assign(newcol=pd.Series(res[l] == 0).astype(int)) 
        res.columns.values[m] = l + '_zero'
        m += 1
    return res

train_features = zeroornot(train_features, train_features.columns.tolist())
test_features = zeroornot(test_features, test_features.columns.tolist())
'''

"\n# feature of zero or nonzero values\n\ndef zeroornot(res, ls):\n    m = res.shape[1]\n    for l in ls:\n        res = res.assign(newcol=pd.Series(res[l] == 0).astype(int)) \n        res.columns.values[m] = l + '_zero'\n        m += 1\n    return res\n\ntrain_features = zeroornot(train_features, train_features.columns.tolist())\ntest_features = zeroornot(test_features, test_features.columns.tolist())\n"

In [15]:
#train_features = train_features.drop(train_features.iloc[:,0:50], axis=1)
#test_features = test_features.drop(test_features.iloc[:,0:50], axis=1)

## Encode categorical features

## Recreate training and test sets

In [16]:
'''
X = all_features.iloc[:len(train_labels), :]
X_test = all_features.iloc[len(train_labels):, :]
X.shape, train_labels.shape, X_test.shape
'''

'\nX = all_features.iloc[:len(train_labels), :]\nX_test = all_features.iloc[len(train_labels):, :]\nX.shape, train_labels.shape, X_test.shape\n'

# Feature Selection

# Model Validation and Selection

In [17]:
# Setup cross validation folds
kf = StratifiedKFold(n_splits=5, random_state=42, shuffle=True)

# Define error metrics
def loss(y, y_pred):
    return np.sqrt(log_loss(y, y_pred))

def cv_loss(model, X = train_features):
    loss = np.sqrt(-cross_val_score(model, X, train_labels, scoring="neg_log_loss", cv=kf, n_jobs=-1))
    return (loss)

In [18]:
'''
# Light Gradient Boosting Regressor
lightgbm = LGBMRegressor(objective='regression', 
                       num_leaves=6,
                       learning_rate=0.01, 
                       n_estimators=7000,
                       max_bin=200, 
                       bagging_fraction=0.8,
                       bagging_freq=4, 
                       bagging_seed=8,
                       feature_fraction=0.2,
                       feature_fraction_seed=8,
                       min_sum_hessian_in_leaf = 11,
                       verbose=-1,
                       random_state=42)

# XGBoost Regressor
xgboost = XGBRegressor(learning_rate=0.01,
                       n_estimators=6000,
                       max_depth=4,
                       min_child_weight=0,
                       gamma=0.6,
                       subsample=0.7,
                       colsample_bytree=0.7,
                       objective='reg:linear',
                       nthread=-1,
                       scale_pos_weight=1,
                       seed=27,
                       reg_alpha=0.00006,
                       random_state=42)

# Ridge Regressor
ridge_alphas = [1e-15, 1e-10, 1e-8, 9e-4, 7e-4, 5e-4, 3e-4, 1e-4, 1e-3, 5e-2, 1e-2, 0.1, 0.3, 1, 3, 5, 10, 15, 18, 20, 30, 50, 75, 100]
ridge = make_pipeline(RobustScaler(), RidgeCV(alphas=ridge_alphas, cv=kf))

# Support Vector Regressor
svr = make_pipeline(RobustScaler(), SVR(C= 20, epsilon= 0.008, gamma=0.0003))

# Gradient Boosting Regressor
gbr = GradientBoostingRegressor(n_estimators=6000,
                                learning_rate=0.01,
                                max_depth=4,
                                max_features='sqrt',
                                min_samples_leaf=15,
                                min_samples_split=10,
                                loss='huber',
                                random_state=42)  

# Random Forest Regressor
rf = RandomForestRegressor(n_estimators=1200,
                          max_depth=15,
                          min_samples_split=5,
                          min_samples_leaf=5,
                          max_features=None,
                          oob_score=True,
                          random_state=42)

# Stack up all the models above, optimized using xgboost
stack_gen = StackingCVRegressor(regressors=(xgboost, lightgbm, svr, ridge, gbr, rf),
                                meta_regressor=xgboost,
                                use_features_in_secondary=True)
                                
'''

"\n# Light Gradient Boosting Regressor\nlightgbm = LGBMRegressor(objective='regression', \n                       num_leaves=6,\n                       learning_rate=0.01, \n                       n_estimators=7000,\n                       max_bin=200, \n                       bagging_fraction=0.8,\n                       bagging_freq=4, \n                       bagging_seed=8,\n                       feature_fraction=0.2,\n                       feature_fraction_seed=8,\n                       min_sum_hessian_in_leaf = 11,\n                       verbose=-1,\n                       random_state=42)\n\n# XGBoost Regressor\nxgboost = XGBRegressor(learning_rate=0.01,\n                       n_estimators=6000,\n                       max_depth=4,\n                       min_child_weight=0,\n                       gamma=0.6,\n                       subsample=0.7,\n                       colsample_bytree=0.7,\n                       objective='reg:linear',\n                       nthread=-1

In [19]:

# XGBoost Classifier
xgb = XGBClassifier(learning_rate = 0.1,
                        colsample_bytree = 0.5,
                        max_depth = 10,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.9,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)

#xgb_model = xgb.fit(train_features, train_labels)
#xgb_pred = xgb_model.predict_proba(test_features)


In [20]:

# XGBoost Classifier2
xgb2 = XGBClassifier(n_estimators=110,
                        learning_rate = 0.5,
                        colsample_bytree = 0.13,
                       max_depth = 2,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)

#xgb2_model = xgb2.fit(train_features, train_labels)
#xgb2_pred = xgb2_model.predict_proba(test_features)

In [21]:
# XGBoost Classifier final for stacking

xgbf = XGBClassifier(n_estimators=180, 
                        learning_rate = 0.6, 
                        colsample_bytree = 0.7, 
                       max_depth = 1,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)
                   

In [22]:

# Random Forest Classifier
rf = RandomForestClassifier(min_samples_split = 5,
                            min_samples_leaf = 5,
                            max_depth = None,
                            bootstrap = True,
                            n_jobs=-1,
                            criterion = "entropy",
                            n_estimators=500,
                            max_features = 12,
                            random_state = 42)
'''
rf_model = rf.fit(train_features, train_labels)
rf_pred = rf_model.predict_proba(test_features)
'''

'\nrf_model = rf.fit(train_features, train_labels)\nrf_pred = rf_model.predict_proba(test_features)\n'

In [23]:

# Light Gradient Boosting Regressor
lgb =  LGBMClassifier(objective='multiclass', 
                       num_leaves=6,
                    max_depth=6,
                       learning_rate=0.1, 
                       n_estimators=220,
                       max_bin=200, 
                       bagging_fraction=0.8,
                       bagging_freq=4, 
                       bagging_seed=8,
                       feature_fraction=0.7,
                       feature_fraction_seed=8,
                            verbose=-1,
                       random_state=17,
                   n_jobs=-1)

#lgb_model = lgb.fit(train_features, train_labels)
#lgb_pred = lgb_model.predict_proba(test_features)


In [24]:

# Extra Trees Classifier
ext =ExtraTreesClassifier(  min_samples_split = 5,
                            min_samples_leaf = 5,
                            max_depth = 15,
                            bootstrap = True,
                            n_jobs=-1,
                            n_estimators=10,
                            max_features = 20,
                            random_state = 42,
                            criterion = 'entropy')
'''
ext_model = ext.fit(train_features, train_labels)
ext_pred = ext_model.predict_proba(test_features)
'''

'\next_model = ext.fit(train_features, train_labels)\next_pred = ext_model.predict_proba(test_features)\n'

In [25]:

#CatBoost
cat_features = train_features.columns.values.tolist()

train_dataset = Pool(data=train_features,
                     label=train_labels,
                     cat_features=cat_features)

eval_dataset = Pool(data=test_features,
                    cat_features=cat_features)

# Initialize CatBoostClassifier
cat = CatBoostClassifier(n_estimators=500,
                           learning_rate=0.3,
                           max_depth=2,
                           loss_function='MultiClass',
                          random_state=17,
                          thread_count=-1,
                            colsample_bylevel=0.5,
                            min_data_in_leaf=5)
# Fit model
#cat.fit(train_dataset)
# Get predicted probabilities for each class
#cat_pred = cat.predict_proba(eval_dataset)


In [26]:

#CatBoost2
cat_features = train_features.columns.values.tolist()

train_dataset = Pool(data=train_features,
                     label=train_labels,
                     cat_features=cat_features)

eval_dataset = Pool(data=test_features,
                    cat_features=cat_features)

# Initialize CatBoostClassifier
cat2 = CatBoostClassifier(n_estimators=550,
                           learning_rate=0.5,
                           max_depth=1,
                           loss_function='MultiClass',
                          random_state=17,
                          thread_count=-1,
                            colsample_bylevel=0.4,
                            min_data_in_leaf=5)
# Fit model
#cat2.fit(train_dataset)
# Get predicted probabilities for each class
#cat2_pred = cat2.predict_proba(eval_dataset)

In [27]:
%%time
# Stack up all the models above, optimized using lgb
stack_gen = StackingCVClassifier(classifiers = (rf, lgb, xgb, ext),
                                meta_classifier = lgb,
                                 use_probas= True,
                                use_features_in_secondary=True, 
                                 verbose=2,
                                 n_jobs=-1,
                                random_state=17)

#stack_gen_model = stack_gen.fit(np.array(train_features), np.array(train_labels))
#stack_pred = stack_gen_model.predict_proba(np.array(test_features))


CPU times: user 22 µs, sys: 0 ns, total: 22 µs
Wall time: 26.5 µs


In [28]:
'''
%%time
# Stack2
# Stack up all the models above, optimized using cat
stack_gen2 = StackingCVClassifier(classifiers = (lgb, xgb2, cat),
                                meta_classifier = cat,
                                 use_probas= True,
                                use_features_in_secondary=True, 
                                 verbose=2,
                                 n_jobs=-1,
                                random_state=17)

stack_gen2_model = stack_gen2.fit(np.array(train_features), np.array(train_labels))
#stack2_pred = stack_gen2_model.predict_proba(np.array(test_features))
'''

'\n%%time\n# Stack2\n# Stack up all the models above, optimized using cat\nstack_gen2 = StackingCVClassifier(classifiers = (lgb, xgb2, cat),\n                                meta_classifier = cat,\n                                 use_probas= True,\n                                use_features_in_secondary=True, \n                                 verbose=2,\n                                 n_jobs=-1,\n                                random_state=17)\n\nstack_gen2_model = stack_gen2.fit(np.array(train_features), np.array(train_labels))\n#stack2_pred = stack_gen2_model.predict_proba(np.array(test_features))\n'

In [29]:
'''
cat2_pred = cat2.predict_proba(eval_dataset)
stack_pred = stack_gen_model.predict_proba(np.array(test_features))
stack2_pred = stack_gen2_model.predict_proba(np.array(test_features))
'''

'\ncat2_pred = cat2.predict_proba(eval_dataset)\nstack_pred = stack_gen_model.predict_proba(np.array(test_features))\nstack2_pred = stack_gen2_model.predict_proba(np.array(test_features))\n'

In [30]:
'''
testf_features = np.append(stack_pred,stack2_pred,axis=1)
testf_features = np.append(testf_features,cat2_pred,axis=1)
'''

'\ntestf_features = np.append(stack_pred,stack2_pred,axis=1)\ntestf_features = np.append(testf_features,cat2_pred,axis=1)\n'

In [31]:
'''
cat2_predtr = cat2.predict_proba(train_dataset)
stack_predtr = stack_gen_model.predict_proba(np.array(train_features))
stack2_predtr = stack_gen2_model.predict_proba(np.array(train_features))
'''

'\ncat2_predtr = cat2.predict_proba(train_dataset)\nstack_predtr = stack_gen_model.predict_proba(np.array(train_features))\nstack2_predtr = stack_gen2_model.predict_proba(np.array(train_features))\n'

In [32]:
'''
trainf_features = np.append(stack_predtr,stack2_predtr,axis=1)
trainf_features = np.append(trainf_features,cat2_predtr,axis=1)
'''

'\ntrainf_features = np.append(stack_predtr,stack2_predtr,axis=1)\ntrainf_features = np.append(trainf_features,cat2_predtr,axis=1)\n'

In [33]:
'''
%%time
# Stackfinal

xgbf_model = xgbf.fit(trainf_features, train_labels)
stackf_pred = xgbf_model.predict_proba(testf_features)
'''

'\n%%time\n# Stackfinal\n\nxgbf_model = xgbf.fit(trainf_features, train_labels)\nstackf_pred = xgbf_model.predict_proba(testf_features)\n'

In [34]:

scores = {}

score = cv_loss(stack_gen)
print("rf: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['rf'] = (score.mean(), score.std())


rf: 1.0455 (0.0007)


In [None]:
'''
def cvsf_loss(model, X = trainf_features):
    loss = np.sqrt(-cross_val_score(model, X, train_labels, scoring="neg_log_loss", cv=kf, n_jobs=-1))
    return (loss)

scores = {}

score = cvsf_loss(xgbf)
print("rf: {:.4f} ({:.4f})".format(score.mean(), score.std()))
scores['rf'] = (score.mean(), score.std())
'''

In [None]:
'''
# Blend models in order to make the final predictions more robust to overfitting
def blended_predictions(X):
    return ((0.1 * ridge_model_full_data.predict(X)) + \
            (0.2 * svr_model_full_data.predict(X)) + \
            (0.1 * gbr_model_full_data.predict(X)) + \
            (0.1 * xgb_model_full_data.predict(X)) + \
            (0.1 * lgb_model_full_data.predict(X)) + \
            (0.05 * rf_model_full_data.predict(X)) + \
            (0.35 * stack_gen_model.predict(np.array(X))))

# Get final precitions from the blended model
blended_score = rmsle(train_labels, blended_predictions(X))
scores['blended'] = (blended_score, 0)
print('RMSLE score on train data:')
print(blended_score)


'''

## Confusion Matrix

In [None]:
#stack_pred2 = stack_gen_model.predict(np.array(train_features))

In [None]:
#unique, counts = np.unique(stack_pred2, return_counts=True)
#np.asarray((unique, counts)).T

In [None]:
#np.around(confusion_matrix(train_labels,stack_pred2, normalize = 'pred'),3)

# Hyperparameter Tuning

## Validation Curves

In [None]:
'''
num_est = [180,220,260]
vc_model = XGBClassifier(n_estimators=180, 
                        learning_rate = 0.6, 
                        colsample_bytree = 0.7, 
                       max_depth = 1,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)


# Calculate accuracy on training and test set using the
# parameter with 3-fold cross validation
train_score, test_score = validation_curve( vc_model,
                                X = trainf_features, y = train_labels, 
                                param_name = 'n_estimators', 
                                param_range = num_est, cv = 2, scoring="neg_log_loss", n_jobs=-1
                            )
 
# Calculating mean and standard deviation of training score
mean_train_score = -np.mean(train_score, axis = 1)
std_train_score = np.std(train_score, axis = 1)
 
# Calculating mean and standard deviation of testing score
mean_test_score = -np.mean(test_score, axis = 1)
std_test_score = np.std(test_score, axis = 1)
 
# Plot mean accuracy scores for training and testing scores
plt.plot(num_est, mean_train_score,
     label = "Training Score", color = 'b')
plt.plot(num_est, mean_test_score,
   label = "Cross Validation Score", color = 'g')
 
# Creating the plot
plt.title("Validation Curve")
plt.xlabel("param")
plt.ylabel("LogLoss")
plt.tight_layout()
plt.legend(loc = 'best')
plt.show()
'''

In [None]:
#mean_test_score

## Random Hyperparameter Grid

In [None]:
'''
# Number of trees in random forest
n_estimators = [int(x) for x in np.linspace(start = 10, stop = 500, num = 10)]
# Number of features to consider at every split
max_features = [int(x) for x in np.linspace(5, 15, num = 10)]
max_features.append("auto")
# Maximum number of levels in tree
max_depth = [int(x) for x in np.linspace(2, 10, num = 5)]
max_depth.append(None)
# Minimum number of samples required to split a node
min_samples_split = 5
# Minimum number of samples required at each leaf node
min_samples_leaf = 5
# Method of selecting samples for training each tree
bootstrap = True

#XGB
learning_rate = [0.01,0.1,0.3,0.5, 0.7,1]
colsample_bytree = [0.05,0.1, 0.3, 0.5,0.7,1]

#LGB
feature_fraction = [0.1, 0.3, 0.5, 0.7, 0.9, 1]

#CAT
colsample_bylevel =[0.1, 0.3, 0.5, 0.7, 0.9, 1]

# Create the random grid
random_grid = {'n_estimators': n_estimators,
               #'max_features': max_features,
               'max_depth': max_depth,
               #'min_samples_split': min_samples_split,
               #'min_samples_leaf': min_samples_leaf,
               #'bootstrap': bootstrap,
               'learning_rate': learning_rate,
               'colsample_bytree': colsample_bytree
               #'feature_fraction':feature_fraction
               #'colsample_bylevel':colsample_bylevel
              }
'''

In [None]:
'''
# Use the random grid to search for best hyperparameters
# First create the base model to tune
rf = XGBClassifier(#n_estimators=110,
                        #learning_rate = 0.5,
                        #colsample_bytree = 0.13,
                       #max_depth = 2,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)
# Random search of parameters, using 2 fold cross validation, 
# search across 30 different combinations, and use all available cores
rf_random = RandomizedSearchCV(estimator = rf, param_distributions = random_grid, n_iter = 20, cv = 2, verbose=2, random_state=42, n_jobs = -1)
# Fit the random search model
rf_random.fit(trainf_features, train_labels)
'''

In [None]:
#rf_random.best_params_

## Grid Search

In [None]:
'''
# Create the parameter grid based on the results of random search 
param_grid = {
    'max_depth': [1,2,3],
    'n_estimators': [450,500,550],
    'learning_rate':[0.1,0.3,0.5],
    'colsample_bylevel':[0.3,0.5,0.7]
}
# Create a based model
rf = CatBoostClassifier(#n_estimators=500,
                           #learning_rate=0.3,
                           #max_depth=2,
                           loss_function='MultiClass',
                          random_state=17,
                          thread_count=-1,
                            #colsample_bylevel=0.5,
                            min_data_in_leaf=5)

# Instantiate the grid search model
grid_search = GridSearchCV(estimator = rf, param_grid = param_grid, 
                          cv = 2, n_jobs = -1, verbose = 2)
  '''

In [None]:
'''
# Fit the grid search to the data
grid_search.fit(train_features, train_labels)
#best_grid = 
grid_search.best_estimator_
'''

In [None]:
#grid_search.best_params_

# Submission

In [None]:
# Read in sample_submission dataframe
submission[['Class_1', 'Class_2', 'Class_3', 'Class_4']] = stack_pred
submission.head()

In [None]:
submission.to_csv("submission_stackf2.csv", index=False)

In [None]:
'''
Performance

Basic rf (n=100)
CV 1.0633 (0.0022)
public 1.12521

Featured rf (n=100)
CV 1.0643 (0.0024)
public 1.12140

Tuned rf
CV 1.0506 (0.0005)
public 1.09957

Tuned rf with criterion = "entropy"
CV 1.0501 (0.0007)
public 1.09912

Basic xgb
public 1.09495

Tuned xgb
CV 1.0473 (0.0007)
public 1.08944

Tuned2 xgb
CV 1.0453 (0.0006)
public 1.08896

Basic lgb
CV 1.0470 (0.0006)
public 1.09036

Tuned lgb
CV 1.0454 (0.0007)
public 1.08871

Basic ext
CV 1.0643 (0.0011)
public 1.12962

Tuned ext
CV 1.0550 (0.0004)
public 1.10729

Tuned2 ext
CV 1.0536 (0.0005)
public 1.10495

Stacking (rf, xgb, lgb, ext)>lgb
CV 1.0455 (0.0007)
public 1.08811

Basic cat
CV 1.0516 (0.0009)
public 1.10367

Tuned cat
CV 1.0450 (0.0008)
public 1.09039

Tuned2 cat
CV 1.0449 (0.0007)
public 1.09092

Stacking1 (xgb, lgb, cat)>lgb
CV 1.0450 (0.0008)
public 1.08605

Stacking2 (xgb2, lgb, cat)>cat
CV 1.0448 (0.0008)
public 1.08766

Stacking3 (xgb2, lgb, cat)>xgb2
CV 1.0458 (0.0008)
public 1.08729

Stackingf (Stacking1, Stacking2, cat2)>xgb2
CV 1.0272 (0.0009)
public 1.09985

Stackingf2 (Stacking1, Stacking2, cat2)>xgbf
CV 1.0216 (0.0008)
public 1.11130


rf = RandomForestClassifier(min_samples_split = 5,
                            min_samples_leaf = 5,
                            max_depth = None,
                            bootstrap = True,
                            n_jobs=-1,
                            criterion = "entropy",
                            n_estimators=500,
                            max_features = 12,
                            random_state = 42)

xgb1 = XGBClassifier(learning_rate = 0.1,
                        colsample_bytree = 0.5,
                        max_depth = 10,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.9,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)
                       
xgb2 = XGBClassifier(n_estimators=110,
                        learning_rate = 0.5,
                        colsample_bytree = 0.13,
                       max_depth = 2,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)
                       
                       
lgb = LGBMClassifier(objective='multiclass', 
                       num_leaves=6,
                    max_depth=6,
                       learning_rate=0.1, 
                       n_estimators=220,
                       max_bin=200, 
                       bagging_fraction=0.8,
                       bagging_freq=4, 
                       bagging_seed=8,
                       feature_fraction=0.7,
                       feature_fraction_seed=8,
                            verbose=-1,
                       random_state=17,
                   n_jobs=-1)
                   
ext = ExtraTreesClassifier(  min_samples_split = 5,
                            min_samples_leaf = 5,
                            max_depth = 15,
                            bootstrap = True,
                            n_jobs=-1,
                            n_estimators=10,
                            max_features = 20,
                            random_state = 42,
                            criterion = 'entropy')
                            

cat = CatBoostClassifier(n_estimators=500,
                           learning_rate=0.3,
                           max_depth=2,
                           loss_function='MultiClass',
                          random_state=17,
                          thread_count=-1,
                            colsample_bylevel=0.5,
                            min_data_in_leaf=5)
                            
                            
cat2 = CatBoostClassifier(n_estimators=550,
                           learning_rate=0.5,
                           max_depth=1,
                           loss_function='MultiClass',
                          random_state=17,
                          thread_count=-1,
                            colsample_bylevel=0.4,
                            min_data_in_leaf=5)
                            
xgbf = XGBClassifier(n_estimators=180, 
                        learning_rate = 0.6, 
                        colsample_bytree = 0.7, 
                       max_depth = 1,
                        min_child_weight=5,
                       gamma=0.001,
                       subsample=0.7,
                       objective='multi:softprob',
                       nthread=-1,
                       scale_pos_weight=1,
                       reg_alpha=0.00006,
                       random_state=42)
                   
'''