In [None]:
!/opt/conda/bin/python3.7 -m pip install --upgrade pip -q
!pip install --upgrade xgboost -q

In [None]:
import numpy as np 
import pandas as pd

import xgboost as xgb

import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import KFold

from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization
from tensorflow.keras.initializers import TruncatedNormal
from tensorflow.keras.regularizers import l1, l2, l1_l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import EarlyStopping, LearningRateScheduler, ReduceLROnPlateau

train = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/train.csv')
test = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/test.csv')
sub = pd.read_csv('/kaggle/input/tabular-playground-series-jan-2021/sample_submission.csv')

X, y, X_test = train.iloc[:,1:-1], train.iloc[:,-1], test.iloc[:,1:]

from sklearn.preprocessing import StandardScaler

feature_data = pd.concat([train.iloc[:,1:-1], test.iloc[:,1:]])
feature_data_norm = StandardScaler().fit_transform(feature_data)

train.iloc[:,1:-1] = feature_data_norm[:train.shape[0]]
test.iloc[:,1:] = feature_data_norm[:test.shape[0]]

X_norm, X_test_norm = train.iloc[:,1:-1], test.iloc[:,1:]

In [None]:
from sklearn.decomposition import PCA

pca = PCA(n_components=5)
rX = pca.fit_transform(X_norm)
rX_test = pca.transform(X_test_norm)

In [None]:
# Compute the correlation matrix
corr = train.iloc[:,1:].corr()

# Generate a mask for the upper triangle
mask = np.triu(np.ones_like(corr, dtype=bool))

# Set up the matplotlib figure
f, ax = plt.subplots(figsize=(11, 9))

# Generate a custom diverging colormap
cmap = sns.diverging_palette(230, 20, as_cmap=True)

# Draw the heatmap with the mask and correct aspect ratio
sns.heatmap(corr, mask=mask, cmap=cmap, vmax=.3, center=0,
            square=True, linewidths=.5, cbar_kws={"shrink": .5})

In [None]:
#  param = {
#      'booster': 'dart',
#      'subsample': 0.8,
#      'rate_drop': 0.6,
#      'one_drop': 1,
#         'max_depth': 10,
#         'min_child_weight': 50,  
#         'tree_method': 'gpu_hist',
#         'objective':'reg:squarederror',
#      'normalize_type ':'forest'
#      'colsample_bytree': 0.8,
#      'grow_policy ': 'lossguide'
#          'max_delta_step': 10,
#         'n_jobs':-1,
#         'reg_lambda':1e-3,
#         'eta':0.1,  
#         'eval_metric': ['mae','rmse'],
#         'verbosity': 1,
#         'predictor': 'gpu_predictor',
#          'seed': 11647
#     }
    
# param = {
#         'booster': 'gbtree',
#         'max_depth': 20,
#         'subsample': 0.51,
#         'min_child_weight': 3,  
#         'tree_method': 'gpu_hist',
#         'n_jobs':-1,
#         'eta':0.09,  
#         'eval_metric': 'mae',
#         'verbosity': 1,
#         'predictor': 'gpu_predictor'
#     }

In [None]:
def kfold_validation(X, y, X_test,n_fold=10, seeds=[0, 1, 2, 3, 4, 5, 6]):
    """
        Runs a repeated KFold on a given model and data
        
        input:
            model: model to traing the data
            X, y, X_test: datasets needed for training and prediction
            n_rep, n_fold: parameters of Repeated KFold
        
        return:
            model: trained model
            preds: predictions at each fold
            avg_preds: average of all predictions
    """
    index = 0  # Keep track of the loss and val_loss (history object)
    prediction = np.zeros((X_test.shape[0])) # For every single prediction
    preds = np.empty((len(seeds) * n_fold, X_test.shape[0])) # Saving all the predictions
      
        
#     param = {
#         'booster': 'gbtree',
#         'max_depth': 20,
#         'subsample': 0.8,
#         'min_child_weight': 3,  
#         'tree_method': 'gpu_hist',
#         'n_jobs':-1,
#         'eta':0.09,  
#         'eval_metric': 'mae',
#         'verbosity': 1,
#         'predictor': 'gpu_predictor'
#     }
    param = {
     'booster': 'gblinear',
        'objective':'reg:squarederror',
        'n_jobs':-1,
        'top_k': 10,
        'updater': 'coord_descent',
        'reg_lambda': 9e-3,
        'reg_alpha': 5e-3,
        'feature_selector': 'greedy',
        'eta':0.3,  
        'eval_metric': ['mae','rmse'],
        'verbosity': 1,
    }
    
    dtest = xgb.DMatrix(X_test)
    
    eary_stopping = xgb.callback.EarlyStopping(rounds=30, 
                                               metric_name='mae', 
                                               data_name='eval', 
                                               maximize=False, 
                                               save_best=True)
    
    bst = None
    for seed in seeds:
        kf = KFold(n_splits=n_fold, random_state=seed,shuffle=True)
        LOAD_MODEL = False
        
        for train_indices, val_indices in kf.split(X, y):
            # Data divided into Train and Validation splits
            if type(X) == np.ndarray:
                X_train, X_val = X[train_indices,:], X[val_indices,:]
                y_train, y_val = y[train_indices], y[val_indices]
                
            else:
                X_train, X_val = X.iloc[train_indices, :], X.iloc[val_indices,: ]
                y_train, y_val = y.iloc[train_indices], y.iloc[val_indices]
            
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            print(f'{seed}-{(index + 1)%n_fold}th fold')
            
            bst = xgb.train(param, 
                    dtrain, 
                    num_boost_round=500, 
                    evals=[(dtrain, 'train'), (dval, 'eval')],
                    verbose_eval =True,
                    callbacks=[xgb.callback.EarlyStopping(rounds=20, 
                                               metric_name='mae', 
                                               data_name='eval', 
                                               maximize=False)],
                    xgb_model=f'/kaggle/working/xg00_{seed}' if LOAD_MODEL else None
                   )
            
            LOAD_MODEL = True
            bst.save_model(f'/kaggle/working/xg00_{seed}')
    

            #------------------ Predictions -------------------
            model_prediction = bst.predict(dtest)

            # Saving the predictions for each fold
            preds[index] = model_prediction
            index += 1

            # Starting different fold or end of folding
            print('#----------------#----------------#----------------#----------------#----------------#')
        
    # Averaging the predictions
    p = pd.DataFrame(preds)
    p = p.sum() / (n_fold * len(seeds))
    
    sub['target'] = p
    sub.to_csv('subX.csv', index=False)
    
    return preds, p,bst

In [None]:
_, avg_pred, bst = kfold_validation(X, y, X_test, 10, seeds=[991, 21])

In [None]:
# fig, ax = plt.subplots(figsize=(30, 30))
# xgb.plot_tree(bst, num_trees=9, ax=ax)
# plt.show()

In [None]:
# fig, ax = plt.subplots(figsize=(30, 30))
# xgb.plot_tree(bst, num_trees=5, ax=ax)
# plt.show()

In [None]:
# fig, ax = plt.subplots(3,1,figsize=(10, 10))
# xgb.plot_importance(bst, ax=ax[0])
# xgb.plot_importance(bst, ax=ax[1], importance_type='gain', title='gain')
# xgb.plot_importance(bst, ax=ax[2], importance_type='cover', title='cover')
# plt.show()

# Neural Network

In [None]:

model = Sequential(layers = [
    Dense(16,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(16,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01),
          activation='softmax'),
    BatchNormalization(),
    Dense(32,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(32,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01), 
          activation='softmax'),
    BatchNormalization(),
    Dense(32,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(32,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01), 
          activation='softmax'),
    BatchNormalization(),
    Dense(64,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(64,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01), 
          activation='softmax'),
    BatchNormalization(),
    Dense(128,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(128,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01), 
          activation='softmax'),
    BatchNormalization(),
    Dense(256,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01)),
    Dense(256,
         kernel_initializer=TruncatedNormal(0,2,111),
         kernel_regularizer=l1(0.001),
         bias_initializer=TruncatedNormal(0,0.1,121),
         bias_regularizer=l2(0.01), 
          activation='softmax'),
    BatchNormalization(),
    Dense(1)
])

In [None]:
model.compile(optimizer=Adam(0.0015), loss='mae')
hist = model.fit(X,y,
          validation_split=0.1, verbose=1, 
          batch_size=1024, epochs=1000,
          callbacks=[
              EarlyStopping(monitor='val_loss', 
                            min_delta=0, 
                            patience=20, 
                            verbose=1, 
                            mode='min', 
                            restore_best_weights=True),
              ReduceLROnPlateau(monitor='val_loss', 
                                factor=0.9, 
                                patience=5, 
                                verbose=1,
                                min_delta=0, 
                                cooldown=0, 
                                min_lr=1e-12)
          ]
         )

In [None]:
pd.DataFrame(hist.history).plot(y=['loss','val_loss'], title='loss', figsize=(10,10))

In [None]:
sub['target'] = model.predict(X_test).reshape(X_test.shape[0])
sub.to_csv('subN.csv', index=False)

In [None]:
model.save('nn01')