In [None]:
import numpy as np
import math
import pandas as pd
import warnings
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_log_error
from sklearn.preprocessing import StandardScaler,MinMaxScaler
from sklearn.model_selection import StratifiedKFold,train_test_split
from catboost import CatBoostRegressor

warnings.filterwarnings("ignore")
sns.set_style("darkgrid")

In [None]:
train = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/train.csv")
test  = pd.read_csv("/kaggle/input/tabular-playground-series-jul-2021/test.csv")
train_x = train.iloc[:,:-3]
train_y = train.iloc[:,-3:]
date_time = pd.to_datetime(train_x["date_time"])
train_x["month"] = date_time.dt.month
train_x["hour"] = date_time.dt.hour
train_x["day_of_week"] = date_time.dt.dayofweek
train_analyze = pd.concat((train_x,train_y),axis=1)

In [None]:
test_date_time = pd.to_datetime(test["date_time"])
test["day_of_week"] = test_date_time.dt.dayofweek
test["month"] = test_date_time.dt.month
test["hour"] = test_date_time.dt.hour

**Time based analysis of chemicals**

In [None]:
targets = train_y.columns
fig,ax = plt.subplots(3,3,figsize=(17,10))
for w in range(3):
    sns.barplot(train_analyze["hour"],train_analyze[targets[w]],ax=ax[0,w])
    sns.barplot(train_analyze["day_of_week"],train_analyze[targets[w]],ax=ax[1,w])
    sns.barplot(train_analyze["month"],train_analyze[targets[w]],ax=ax[2,w])

**Feature correlations - pearson**

In [None]:
plt.figure(figsize=(18,10))
sns.heatmap(train_analyze.corr(),annot=True)

**Sensors and their patterns in the target var**

In [None]:
fig,ax = plt.subplots(3,5,figsize=(18,12))

for j in range(5):
    sns.scatterplot(y=train_analyze["sensor_"+str(j+1)],x=train_analyze["target_carbon_monoxide"],ax=ax[0,j]
                   ,alpha=0.5,ec="red",s=6)
    sns.scatterplot(y=train_analyze["sensor_"+str(j+1)],x=train_analyze["target_benzene"],ax=ax[1,j]
                   ,alpha=0.5,ec="green",s=6)
    sns.scatterplot(y=train_analyze["sensor_"+str(j+1)],x=train_analyze["target_nitrogen_oxides"],ax=ax[2,j]
                   ,alpha=0.5,ec="yellow",s=6)

**Data transform and splits**

In [None]:
features = train_analyze.columns[1:11].tolist()#+["day_of_week"]
x = train_analyze[features]
y = np.log1p(train_analyze[train_y.columns])
xtrain,xtest,ytrain,ytest = train_test_split(x,y,test_size=0.45)
ytr_cm = ytrain.iloc[:,0]
ytr_be = ytrain.iloc[:,1]
ytr_no = ytrain.iloc[:,2]

yte_cm = ytest.iloc[:,0]
yte_be = ytest.iloc[:,1]
yte_no = ytest.iloc[:,2]

**Baseline Catboost regressor**

In [None]:
print("Features used for model : ",features)
test_set = test[features]
early_stop = 3
verbose = 0
print("--"*25)
print("Fitting with CO")
catregcm = CatBoostRegressor(iterations=1200,depth=6,early_stopping_rounds=early_stop,verbose=verbose,use_best_model=True)
catregcm.fit(xtrain,ytr_cm,eval_set=(xtest,yte_cm))

rmsle = np.sqrt(mean_squared_log_error(yte_cm,catregcm.predict(xtest)))
print(f"RMSLE score carbon monoxide: {rmsle*2}")


predict_catcm = np.expm1(catregcm.predict(test_set))
print("--"*25)
print("Fitting with Benzene")
catregbe = CatBoostRegressor(iterations=1200,depth=6,early_stopping_rounds=early_stop,verbose=verbose,use_best_model=True)
catregbe.fit(xtrain,ytr_be,eval_set=(xtest,yte_be))

rmsle = np.sqrt(mean_squared_log_error(yte_be,catregbe.predict(xtest)))
print(f"RMSLE score benzene: {rmsle*2}")
predict_catbe = np.expm1(catregbe.predict(test_set))

print("--"*25)
print("Fitting with NO")
catregno = CatBoostRegressor(iterations=1430,depth=6,early_stopping_rounds=early_stop,verbose=verbose,use_best_model=True)
catregno.fit(xtrain,ytr_no,eval_set=(xtest,yte_no))

rmsle = np.sqrt(mean_squared_log_error(yte_no,catregno.predict(xtest)))
print(f"RMSLE score Nitrogen_oxide: {rmsle*2}")
predict_catno = np.expm1(catregno.predict(test_set))
print("--"*25)

**submission code**

In [None]:
frame = pd.DataFrame({"date_time":test["date_time"].tolist(),
                     "target_carbon_monoxide":predict_catcm,
                     "target_benzene":predict_catbe,
                     "target_nitrogen_oxides":predict_catno})


frame.to_csv("sample_sub.csv",index=False)