# Tabular playground - Jan 2021

This note book demonstrates how to train different machine learning algorithms and regularize them. This notebook is intended for beginners. please upvote if you like the content. 

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns

In [None]:
train_df = pd.read_csv('../input/tabular-playground-series-jan-2021/train.csv')
test_df = pd.read_csv('../input/tabular-playground-series-jan-2021/test.csv')
sample_sub = pd.read_csv('../input/tabular-playground-series-jan-2021/sample_submission.csv')

In [None]:
train_df.head()

In [None]:
print(f'Train shape:{train_df.shape}')
print(f'Test shape:{test_df.shape}')

In [None]:
train_df.describe()

In [None]:
train_df.info()

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X = train_df.drop('target', axis=1)
y = train_df['target']

In [None]:
X_train, X_val, y_train, y_val = train_test_split(X, y,test_size=0.2, random_state=3)

In [None]:
from lightgbm import LGBMRegressor

lgb_model = LGBMRegressor(n_estimators = 493,metric='rmse', 
                          reg_alpha = 2, reg_lambda=10, colsample_bytree=0.8)

In [None]:
lgb_model.fit(X_train, y_train, eval_set=(X_val, y_val))

In [None]:
y_pred_lgb = lgb_model.predict(test_df)

In [None]:
import xgboost as xgb

In [None]:
params = {'objective': 'reg:squarederror', 
          'eval_metric': 'rmse',
          'eta': 0.01,
          'max_depth': 10, 
          'subsample': 0.6, 
          'colsample_bytree': 0.6,
          'lambda':10,
          'alpha':2,
          'random_state': 42, 
          'silent': True}
    
tr_data = xgb.DMatrix(X_train, y_train)
va_data = xgb.DMatrix(X_val, y_val)

watchlist = [(tr_data, 'train'), (va_data, 'valid')]

model_xgb = xgb.train(params, tr_data, 1000, watchlist, maximize=False,verbose_eval=100)

In [None]:
dtest = xgb.DMatrix(test_df)
y_xgb_pred = model_xgb.predict(dtest, ntree_limit=model_xgb.best_ntree_limit)

In [None]:
from catboost import CatBoostRegressor

In [None]:
cb_model = CatBoostRegressor(iterations=795,
                             learning_rate=0.05,
                             depth=10,
                             reg_lambda=10,
                             eval_metric='RMSE',
                             random_seed = 42,
                             bagging_temperature = 0.2,
                             od_type='Iter',
                             metric_period = 50,
                             od_wait=20)

cb_model.fit(X_train, y_train,
             eval_set=(X_val, y_val),
             use_best_model=True,
             verbose=50)

In [None]:
cat_pred =cb_model.predict(test_df)

In [None]:
sample_sub['target'] = (y_pred_lgb + y_xgb_pred + cat_pred) / 3 # averaging results.

In [None]:
sample_sub = sample_sub.round(6)

In [None]:
sample_sub.to_csv('submission.csv',columns=['id', 'target'], header=True, index=False)