# Tabular Playground Series - Jul 2021
Continued from [last time ](https://www.kaggle.com/astashiro/tps-jul2021-03add-features).

## CatBoost and Prophet

Since blending in PyCaretdid not cope well with the large number of features we added, so we decided to add only SEASON. When the sensor was off, we used the value predicted by PyCaret as the feature value and predicted by prophet.

In [None]:
!pip install pycaret

In [None]:
!pip install shap

In [None]:
import numpy as np
import pandas as pd
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
from pycaret.regression import setup, blend_models, create_model, finalize_model, plot_model, predict_model, interpret_model
import shap
from fbprophet import Prophet

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-jul-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-jul-2021/test.csv')
df_sub = pd.read_csv('../input/tabular-playground-series-jul-2021/sample_submission.csv')

In [None]:
df_train['IsTrain'] = 1
df_test['IsTrain'] = 0
df = pd.concat([df_train, df_test], sort=False,axis=0)

df['date_time'] = pd.to_datetime(df['date_time'])
df.loc[(df['date_time'].dt.month == 12) | (df['date_time'].dt.month <= 2), 'season'] = 1
df.loc[(df['date_time'].dt.month >= 3) & (df['date_time'].dt.month <= 5), 'season'] = 2
df.loc[(df['date_time'].dt.month >= 6) & (df['date_time'].dt.month <= 8), 'season'] = 3
df.loc[(df['date_time'].dt.month >= 9) & (df['date_time'].dt.month <= 11), 'season'] = 4
df.drop(columns = 'date_time', inplace = True)

train = df.query('IsTrain == 1').drop(['IsTrain'], axis=1)
test =  df.query('IsTrain == 0').drop(['IsTrain','target_carbon_monoxide','target_benzene','target_nitrogen_oxides'], axis=1)

In [None]:
train

### Predicted by catboost
#### Carbon monoxide

In [None]:
train1 = train.drop(['target_benzene', 'target_nitrogen_oxides'], axis=1)
train1

In [None]:
reg1 = setup(data=train1, target='target_carbon_monoxide', categorical_features=['season'], silent= True, session_id=1)
catboost1 = create_model("catboost")
et1 = create_model("et")
lightgbm1 = create_model("lightgbm")
gbr1 = create_model("gbr")
rf1 = create_model("rf")
blend1 = blend_models(estimator_list= [catboost1, et1, lightgbm1, gbr1, rf1])
final1 = finalize_model(blend1)
pred1 = predict_model(final1, data=test)

#### Benzene

In [None]:
train2 = train.loc[:,['sensor_2','target_benzene']]
train2.head()

In [None]:
reg2 = setup(data=train2, target='target_benzene', silent= True, session_id=2)
catboost2 = create_model("catboost")
et2 = create_model("et")
lightgbm2 = create_model("lightgbm")
gbr2 = create_model("gbr")
rf2 = create_model("rf")
blend2 = blend_models(estimator_list= [catboost2, et2, lightgbm2, gbr2, rf2])
final2 = finalize_model(blend2)
pred2 = predict_model(final2, data=test)

#### Nitrogen oxides

In [None]:
train3 = train.drop(['target_benzene', 'target_carbon_monoxide'], axis=1)
train3

In [None]:
reg3 = setup(data=train3, target='target_nitrogen_oxides', categorical_features=['season'], silent= True, session_id=3)
catboost3 = create_model("catboost")
et3 = create_model("et")
lightgbm3 = create_model("lightgbm")
gbr3 = create_model("gbr")
rf3 = create_model("rf")
blend3 = blend_models(estimator_list= [catboost3, et3, lightgbm3, gbr3, rf3])
final3 = finalize_model(blend3)
pred3 = predict_model(final3, data=test)

In [None]:
df_sub.target_carbon_monoxide = pred1.Label
df_sub.target_benzene = pred2.Label
df_sub.target_nitrogen_oxides = pred3.Label
df_sub

### Prophet

Using the carbon monoxide and nitrogen oxide values predicted by catboost as training data, prophet predicts the areas where the sensors are off.

#### Carbon monoxide

In [None]:
def do_prophet(train):
    m = Prophet(yearly_seasonality=False, weekly_seasonality=False, daily_seasonality=True)
    m.fit(train)
    future = m.make_future_dataframe(periods=100,freq='H')
    predict = m.predict(future)
    fig1 = m.plot(predict)
    fig2 = m.plot_components(predict)
    return(predict)

In [None]:
train_c1 = df_sub.query("date_time >= '2011-01-01 09:00:00' & date_time < '2011-01-02 21:00:00'").loc[:,['date_time','target_carbon_monoxide']]
train_c1 = train_c1.rename(columns={'date_time': 'ds', 'target_carbon_monoxide': 'y'})
pred11 = do_prophet(train_c1)

In [None]:
pred_c1 = pred11.query("ds >= '2011-01-02 21:00:00' & ds <= '2011-01-05 00:00:00'").loc[:,['ds','yhat']]
pred_c1 =pred_c1.reset_index(drop=True)

In [None]:
train_c2 = df_sub.query("date_time >= '2011-01-26 06:00:00' & date_time < '2011-01-28 17:00:00'").loc[:,['date_time','target_carbon_monoxide']]
train_c2 = train_c2.rename(columns={'date_time': 'ds', 'target_carbon_monoxide': 'y'})
pred12 = do_prophet(train_c2)

In [None]:
pred_c2 = pred12.query("ds >= '2011-01-28 17:00:00' & ds <= '2011-01-29 01:00:00'").loc[:,['ds','yhat']]
pred_c2 =pred_c2.reset_index(drop=True)

In [None]:
train_c3 = df_sub.query("date_time >= '2011-02-07 03:00:00' & date_time < '2011-02-08 17:00:00'").loc[:,['date_time','target_carbon_monoxide']]
train_c3 = train_c3.rename(columns={'date_time': 'ds', 'target_carbon_monoxide': 'y'})
pred13 = do_prophet(train_c3)

In [None]:
pred_c3 = pred13.query("ds >= '2011-02-08 17:00:00' & ds <= '2011-02-11 20:00:00'").loc[:,['ds','yhat']]
pred_c3 =pred_c3.reset_index(drop=True)

#### Nitrogen oxides

In [None]:
train_n1 = df_sub.query("date_time >= '2011-01-01 09:00:00' & date_time < '2011-01-02 21:00:00'").loc[:,['date_time','target_nitrogen_oxides']]
train_n1 = train_n1.rename(columns={'date_time': 'ds', 'target_nitrogen_oxides': 'y'})
pred31 = do_prophet(train_n1)

In [None]:
pred_n1 = pred31.query("ds >= '2011-01-02 21:00:00' & ds <= '2011-01-05 00:00:00'").loc[:,['ds','yhat']]
pred_n1 =pred_n1.reset_index(drop=True)

In [None]:
train_n2 = df_sub.query("date_time >= '2011-01-26 06:00:00' & date_time < '2011-01-28 17:00:00'").loc[:,['date_time','target_nitrogen_oxides']]
train_n2 = train_n2.rename(columns={'date_time': 'ds', 'target_nitrogen_oxides': 'y'})
pred32 = do_prophet(train_n2)

In [None]:
pred_n2 = pred32.query("ds >= '2011-01-28 17:00:00' & ds <= '2011-01-29 01:00:00'").loc[:,['ds','yhat']]
pred_n2 =pred_n2.reset_index(drop=True)

In [None]:
train_n3 = df_sub.query("date_time >= '2011-02-07 03:00:00' & date_time < '2011-02-08 17:00:00'").loc[:,['date_time','target_nitrogen_oxides']]
train_n3 = train_n3.rename(columns={'date_time': 'ds', 'target_nitrogen_oxides': 'y'})
pred33 = do_prophet(train_n3)

In [None]:
pred_n3 = pred33.query("ds >= '2011-02-08 17:00:00' & ds <= '2011-02-11 20:00:00'").loc[:,['ds','yhat']]
pred_n3 =pred_n3.reset_index(drop=True)

In [None]:
sub_temp1 = df_sub.query("date_time < '2011-01-02 21:00:00'")
sub_temp2 = df_sub.query("date_time >= '2011-01-02 21:00:00' & date_time <= '2011-01-05 00:00:00'")
sub_temp3 = df_sub.query("date_time > '2011-01-05 00:00:00' & date_time < '2011-01-28 17:00:00'")
sub_temp4 = df_sub.query("date_time >= '2011-01-28 17:00:00' & date_time <= '2011-01-29 01:00:00'")
sub_temp5 = df_sub.query("date_time > '2011-01-29 01:00:00' & date_time < '2011-02-08 17:00:00'")
sub_temp6 = df_sub.query("date_time >= '2011-02-08 17:00:00' & date_time <= '2011-02-11 20:00:00'")
sub_temp7 = df_sub.query("date_time > '2011-02-11 20:00:00'")

sub_temp2 =sub_temp2.reset_index(drop=True)
sub_temp2.target_carbon_monoxide = pred_c1.yhat
sub_temp2.target_nitrogen_oxides = pred_n1.yhat
sub_temp4 =sub_temp4.reset_index(drop=True)
sub_temp4.target_carbon_monoxide = pred_c2.yhat
sub_temp4.target_nitrogen_oxides = pred_n2.yhat
sub_temp6 =sub_temp6.reset_index(drop=True)
sub_temp6.target_carbon_monoxide = pred_c3.yhat
sub_temp6.target_nitrogen_oxides = pred_n3.yhat

submission = pd.concat([sub_temp1, sub_temp2, sub_temp3, sub_temp4, sub_temp5, sub_temp6, sub_temp7], sort=False,axis=0)

In [None]:
submission

In [None]:
submission.to_csv('pycaret_prophet_submission.csv',index=False)