# Tabular Playground Series - Jul 2021
Continued from [last time ](https://www.kaggle.com/astashiro/tps-jul2021-07lightautoml).

## Pseudo labels
Special thanks to [this notebook](https://www.kaggle.com/alexryzhkov/tps-lightautoml-baseline-with-pseudolabels).

In [None]:
!pip install pycaret

In [None]:
!pip install shap

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from pycaret.regression import setup, blend_models, create_model, finalize_model, plot_model, predict_model, interpret_model
import shap
from fbprophet import Prophet

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-jul-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-jul-2021/test.csv')
pseudolabels =pd.read_csv('../input/tps-jul2021-07lightautoml/LightAutoML_submission.csv')

In [None]:
df_train['date_time'] = pd.to_datetime(df_train['date_time'])
df_test['date_time'] = pd.to_datetime(df_test['date_time'])

In [None]:
test = df_test.copy()
for col in ['target_carbon_monoxide', 'target_benzene', 'target_nitrogen_oxides']:
    df_test[col] = pseudolabels[col]

test = df_test.drop(['target_carbon_monoxide', 'target_nitrogen_oxides'], axis=1)

In [None]:
train = pd.concat([df_train, df_test], sort=False,axis=0)

In [None]:
train

In [None]:
test

### Predict with Pycaret

In [None]:
def do_pycaret(target, train, test, categorical_features):
    reg = setup(data=train, target=target, categorical_features=categorical_features, silent= True)
    catboost = create_model("catboost")
    et = create_model("et")
    lightgbm = create_model("lightgbm")
    gbr = create_model("gbr")
    rf = create_model("rf")
    blend = blend_models(estimator_list= [catboost, et, lightgbm, gbr, rf], optimize='RMSLE')
    predh = predict_model(blend)
    final = finalize_model(blend)
    pred = predict_model(final, data=test)
    return(pred, catboost, lightgbm, rf)

#### Carbon monoxide

In [None]:
train1 = train.drop(['target_nitrogen_oxides'], axis=1)
pred1, catboost1, lightgbm1, rf1 = do_pycaret('target_carbon_monoxide', train1, test, None)
pred1

In [None]:
interpret_model(catboost1)

In [None]:
interpret_model(lightgbm1)

In [None]:
interpret_model(rf1)

#### Nitrogen oxides

In [None]:
train3 = train.drop(['target_carbon_monoxide'], axis=1)
pred3, catboost3, lightgbm3, rf3 = do_pycaret('target_nitrogen_oxides', train3, test, None)
pred3

In [None]:
interpret_model(catboost3)

In [None]:
interpret_model(lightgbm3)

In [None]:
interpret_model(rf3)

In [None]:
submission = pd.read_csv('../input/tabular-playground-series-jul-2021/sample_submission.csv')
submission.target_carbon_monoxide = pred1.Label
submission.target_benzene = pseudolabels.target_benzene
submission.target_nitrogen_oxides = pred3.Label
submission

### Merge predictions

In [None]:
submission.to_csv('pycaret_withpseudolabels_submission.csv',index=False)