In [None]:
import pandas as pd
import numpy as np
import plotly.express as px
from sklearn.model_selection import train_test_split
from sklearn.model_selection import cross_val_score, KFold
import optuna
from sklearn.metrics import mean_squared_error
import xgboost as xgb

import warnings
warnings.filterwarnings("ignore")

In [None]:
train=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/train.csv')
test=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/test.csv')
sample_submission=pd.read_csv('/kaggle/input/tabular-playground-series-jul-2021/sample_submission.csv')
train['date_time'] = pd.to_datetime(train['date_time'])
test['date_time'] = pd.to_datetime(test['date_time'])

In [None]:
train

In [None]:
import math

cat_features = ['months','hour',"min","is_weekend","dayofweek","working_hours"]

def pb_add(X):   
   
    X['months'] = X["date_time"].dt.month.astype(int)
    X["hour"] = X["date_time"].dt.hour.astype(int)
    X["min"] = X["date_time"].dt.minute.astype(int)
    X["dayofweek"] = X["date_time"].dt.dayofweek.astype(int)    
    X["working_hours"] =  X["hour"].isin(np.arange(8, 21, 1)).astype(int)
    X["is_weekend"] = (X["date_time"].dt.dayofweek >= 5).astype(int)
    
    X["SMC"] = (X["absolute_humidity"] * 100) / train["relative_humidity"]
 
    return X

In [None]:
train = pb_add(train.copy())
train.drop(columns = 'date_time', inplace = True)

target_carbon_monoxide=train[['target_carbon_monoxide']]
target_benzene=train[['target_benzene']]
target_nitrogen_oxides=train[['target_nitrogen_oxides']]

train.drop(columns = ['target_carbon_monoxide','target_benzene','target_nitrogen_oxides'], inplace = True)

In [None]:
data=train
target=target_carbon_monoxide

def objective(trial,data=data,target=target):
    
    train_x, test_x, train_y, test_y = train_test_split(data, target, test_size=0.15,random_state=42)
    param = {
        'objective':'reg:squaredlogerror',
        'booster':trial.suggest_categorical('booster', ['dart','gbtree']),
        'tree_method':'gpu_hist',  # this parameter means using the GPU when training our model to speedup the training process
        'lambda': trial.suggest_loguniform('lambda', 1e-3, 10.0),
        'alpha': trial.suggest_loguniform('alpha', 1e-3, 10.0),
        'colsample_bytree': trial.suggest_categorical('colsample_bytree', [0.3,0.4,0.5,0.6,0.7,0.8,0.9, 1.0]),
        'subsample': trial.suggest_categorical('subsample', [0.4,0.5,0.6,0.7,0.8,1.0]),
        'learning_rate': trial.suggest_uniform('learning_rate', 0.001, 0.1),
        'n_estimators': trial.suggest_int('n_estimators', 600, 1200),
        'max_depth': trial.suggest_categorical('max_depth', [3,5,7,9,11,13,15]),
        'random_state': trial.suggest_categorical('random_state', [24, 48,2020,1002]),
        'min_child_weight': trial.suggest_int('min_child_weight', 1, 400),
    }
    model = xgb.XGBRegressor(**param) 
        
    model.fit(train_x,train_y,eval_set=[(test_x,test_y)],early_stopping_rounds=20,verbose=False)
    
    preds = model.predict(test_x)
    
    rmse = mean_squared_error(test_y, preds,squared=False)
    
    return rmse


In [None]:
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150) 
print('Number of finished trials:', len(study.trials))
study.best_trial.params

In [None]:
catrgr_carbon_mono = xgb.XGBRegressor(**study.best_trial.params)
catrgr_carbon_mono.fit(data,target, verbose=False)

In [None]:
data=train
target=target_benzene
optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)
print('Number of finished trials:', len(study.trials))
study.best_trial.params

In [None]:
catrgr_benzene = xgb.XGBRegressor(**study.best_trial.params)
catrgr_benzene.fit(data,target,verbose=False)

In [None]:
data=train
target=target_nitrogen_oxides

optuna.logging.set_verbosity(optuna.logging.WARNING)
study = optuna.create_study(direction='minimize')
study.optimize(objective, n_trials=150)
print('Number of finished trials:', len(study.trials))
study.best_trial.params

In [None]:
catrgr_nitro= xgb.XGBRegressor(**study.best_trial.params)
catrgr_nitro.fit(data,target,verbose=False)

In [None]:
test = pb_add(test.copy())
x_date_time = test['date_time'].copy()
test.drop(columns = 'date_time', inplace = True)

In [None]:
sub = pd.DataFrame()
sub['date_time']=x_date_time
sub['target_carbon_monoxide'] = catrgr_carbon_mono.predict(test)
sub['target_benzene'] = catrgr_benzene.predict(test)
sub['target_nitrogen_oxides'] = catrgr_nitro.predict(test)


In [None]:
sub.to_csv('submission.csv',index = False)