In [None]:
import pandas as pd
import numpy as np

import lightgbm as lgb
import catboost as cbt
import xgboost as xgb

import random, os
import math

import matplotlib.pyplot as plt
import seaborn as sns
import math
from math import pi

from mlxtend.regressor import StackingCVRegressor
from sklearn.linear_model import BayesianRidge
from sklearn.preprocessing import StandardScaler, MinMaxScaler
from statsmodels.tsa.seasonal import seasonal_decompose
from sklearn.metrics import mean_squared_error

from sklearn.model_selection import KFold
from pylab import rcParams
rcParams['figure.figsize']=15,4

import warnings
warnings.simplefilter('ignore')

RANDOM_SEED = 42

def seed_everything(seed=42):
    random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    np.random.seed(seed)

seed_everything(RANDOM_SEED)

In [None]:
train=pd.read_csv("../input/tabular-playground-series-aug-2021/train.csv").drop('id',axis=1)
test=pd.read_csv("../input/tabular-playground-series-aug-2021/test.csv").drop('id',axis=1)
sample=pd.read_csv("../input/tabular-playground-series-aug-2021/sample_submission.csv")

In [None]:
features=[col for col in train.columns if col!='loss']

In [None]:
xgb_params = {
    #'tree_method':'gpu_hist',         ## parameters for gpu
    #'gpu_id':0,                       #
    #'predictor':'gpu_predictor',      #
    'n_estimators': 10000,
    'learning_rate': 0.03628302216953097,
    'subsample': 0.7875490025178415,
    'colsample_bytree': 0.11807135201147481,
    'max_depth': 3,
    'booster': 'gbtree', 
    'reg_lambda': 0.0008746338866473539,
    'reg_alpha': 23.13181079976304,
    'n_jobs':-1,
    'random_state':40
}

In [None]:
model = xgb.XGBRegressor(**xgb_params)

sub=[]
i=0
kf=KFold(n_splits=5,shuffle=True,random_state=41)
for train_index,test_index in kf.split(train):
    i+=1
    xtrain=train.iloc[train_index]
    xvalid=train.iloc[test_index]
    
    df_test=test.copy()
    
    scale=MinMaxScaler()
    
    xtrain[features]=scale.fit_transform(xtrain[features])
    xvalid[features]=scale.transform(xvalid[features])
    df_test=scale.transform(df_test)
    df_test=pd.DataFrame(df_test,columns=test.columns)
    
    model.fit(xtrain.drop('loss',axis=1),xtrain['loss'], 
                early_stopping_rounds=300,
                eval_set=[(xvalid.drop('loss',axis=1), xvalid['loss'])], 
                verbose=2000)
    pred_valid=model.predict(xvalid.drop('loss',axis=1))
    pred_train=model.predict(xtrain.drop('loss',axis=1))
    print(f'fold {i} validation error ',mean_squared_error(xvalid['loss'],pred_valid,squared=False))
    print(f'fold {i} training error ',mean_squared_error(xtrain['loss'],pred_train,squared=False))
    print("--------------------")
    
    pred=model.predict(df_test)
    sub.append(pred)

In [None]:
predictions=np.mean(np.column_stack(sub),axis=1)
sample['loss']=predictions

In [None]:
sample.to_csv('sub5.csv',index=False)