In [None]:
!pip install pytorch-tabnet
!pip install rgf_python

In [None]:
import numpy as np
import pandas as pd
from pathlib import Path
import matplotlib.pyplot as plt
import optuna
import xgboost as xgb
from optuna.samplers import TPESampler
from sklearn.preprocessing import RobustScaler, QuantileTransformer, StandardScaler
from sklearn.model_selection import train_test_split, cross_val_score, StratifiedKFold, KFold
from sklearn.metrics import mean_squared_error
import catboost
import lightgbm
from sklearn.linear_model import Ridge, BayesianRidge, LinearRegression, ElasticNet
from scipy.optimize import minimize
import optuna
import pickle
from pytorch_tabnet.tab_model import TabNetRegressor
from sklearn.gaussian_process import GaussianProcessRegressor
from sklearn.tree import DecisionTreeRegressor
from rgf.sklearn import RGFRegressor
from sklearn.ensemble import RandomForestRegressor, ExtraTreesRegressor
import seaborn as sns

In [None]:
input_dir = Path('../input/tabular-playground-series-aug-2021/')
train_df = pd.read_csv(input_dir / 'train.csv')
test_df = pd.read_csv(input_dir / 'test.csv')
sample_submission = pd.read_csv(input_dir / 'sample_submission.csv')

In [None]:
X = train_df.drop(['id', 'loss'], axis=1).values
y = train_df['loss'].values
X_test = test_df.drop(['id'], axis=1).values

In [None]:
X = pd.DataFrame(X)
fig = X[[x for x in range(20)]].hist(figsize=(20, 10))

In [None]:
fig = X[[x for x in range(20, 40)]].hist(figsize=(20, 10))

In [None]:
fig = X[[x for x in range(40, 60)]].hist(figsize=(20, 10))

In [None]:
fig = X[[x for x in range(60, 80)]].hist(figsize=(20, 10))

In [None]:
fig = X[[x for x in range(80, 100)]].hist(figsize=(20, 10))

In [None]:
plt.figure(figsize=(20, 10))
sns.histplot(y, kde=True, color='purple')

In [None]:
scaler = StandardScaler()
X = scaler.fit_transform(X)
X_test = scaler.transform(X_test)

In [None]:
xgb_params = {'max_depth': 11, 'subsample': 0.6, 'n_estimators': 1700, 
              'reg_alpha': 40, 'reg_lambda': 18, 'min_child_weight': 16}
xgb_params['eta'] = 0.01

In [None]:
lgb_params =     {'learning_rate': 0.029318941369264474, 'n_estimators': 1326, 'num_leaves': 59, 
                  'max_delta_step': 0.27592939689630575, 'max_depth': 155, 'colsample_bynode': 0.769021843273818, 
                  'colsample_bytree': 0.2598752413950403, 'reg_alpha': 6.370691591205219, 'reg_lambda': 5.909481328409366, 
                  'subsample': 0.5496580806696991, 'min_child_samples': 267, 'cat_smooth': 38}

In [None]:
kf = KFold(n_splits=10, shuffle=True, random_state=2021)
final_test_preds = []
X = pd.DataFrame(X)
y = pd.DataFrame(y)
lgb_base_model = lightgbm.LGBMRegressor(**lgb_params, device = 'gpu', gpu_platform_id = 0, gpu_device_id = 0)
ctb_base_model = catboost.CatBoostRegressor(
                             task_type='GPU',
                             )
xgb_base_model = xgb.XGBRegressor(gpu_id=0, tree_method = 'gpu_hist', **xgb_params)
meta_estimator1 = LinearRegression()
meta_estimator2 = BayesianRidge()
meta_estimator3 = ElasticNet()
final_estimator = Ridge()
for fold, (train_idx, test_idx) in enumerate(kf.split(X, y)):
    tbn_base_model = TabNetRegressor(verbose=0)
    print('*'*15, f'Fold {fold+1}', '*'*15, '\n')
    print('Stage 1 Training/Predictions', '\n')
    X_train, X_valid = X.iloc[train_idx].to_numpy(), X.iloc[test_idx].to_numpy()
    y_train, y_valid = y.iloc[train_idx].to_numpy(), y.iloc[test_idx].to_numpy()
    
    tbn_base_model.fit(X_train, y_train, eval_set=[(X_valid, y_valid)], patience=3, )
    print(f'Stage 1 Model 1: TabNet Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, tbn_base_model.predict(X_valid), squared=False)}')
    
    y_train, y_valid = y_train.squeeze(), y_valid.squeeze()
    
    lgb_base_model.fit(X_train, y_train)
    print(f'Stage 1 Model 2: LightGBM Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, lgb_base_model.predict(X_valid), squared=False)}')
    
    ctb_base_model.fit(X_train, y_train, verbose=False)
    print(f'Stage 1 Model 3: CatBoost Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, ctb_base_model.predict(X_valid), squared=False)}')
    
    xgb_base_model.fit(X_train, y_train, verbose=False)
    print(f'Stage 1 Model 4: XGBoost Regressor | Fold {fold+1} Loss: {mean_squared_error(y_valid, xgb_base_model.predict(X_valid), squared=False)}')
    
    print('\n', '*'*15, 'Stage 2 Training/Predictions', '*'*15, '\n')
    
    blend_train = np.c_[lgb_base_model.predict(X_valid), ctb_base_model.predict(X_valid), xgb_base_model.predict(X_valid), tbn_base_model.predict(X_valid)]
    blend_test = np.c_[lgb_base_model.predict(X_test), ctb_base_model.predict(X_test), xgb_base_model.predict(X_test), tbn_base_model.predict(X_test)]
    meta_estimator1.fit(blend_train, y_valid)
    meta_valid1 = meta_estimator1.predict(blend_train)
    meta_test1 = meta_estimator1.predict(blend_test)
    
    print(f'Meta Estimator 1: Linear Regression | Score: {mean_squared_error(y_valid, meta_valid1, squared=False)}')
    
    meta_estimator2.fit(blend_train, y_valid)
    meta_valid2 = meta_estimator2.predict(blend_train)
    meta_test2 = meta_estimator2.predict(blend_test)

    print(f'Meta Estimator 2: Bayesian Ridge Regressor | Score: {mean_squared_error(y_valid, meta_valid2, squared=False)}')
    
    meta_estimator3.fit(blend_train, y_valid)
    meta_valid3 = meta_estimator3.predict(blend_train)
    meta_test3 = meta_estimator3.predict(blend_test)
    
    print(f'Meta Estimator 3: ElasticNet Regressor | Score: {mean_squared_error(y_valid, meta_valid3, squared=False)}')
    
    print('\n', '*'*15, 'Stage 3 Training/Predictions', '*'*15, '\n')
    
    blend_train = np.c_[meta_valid1, meta_valid2, meta_valid3]
    blend_test = np.c_[meta_test1, meta_test2, meta_test3]
    final_estimator.fit(blend_train, y_valid)
    print(f'Final Meta Estimator: Ridge Regressor | Score: {mean_squared_error(y_valid, final_estimator.predict(blend_train), squared=False)}')
    final_test_preds.append(final_estimator.predict(blend_test))
    print('\n')

In [None]:
sample_submission['loss'] = sum(final_test_preds)/10
sample_submission.to_csv('submission.csv', index=False)