# Tabular Playground Series - Aug 2021
A quick review of the data, predictions in PyCaret, and SHAP.

In [None]:
!pip install pycaret

In [None]:
!pip install shap

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
import missingno as msno

import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

from pycaret.regression import setup, create_model, tune_model, finalize_model, blend_models, predict_model, plot_model, interpret_model
import shap

In [None]:
df_train = pd.read_csv('../input/tabular-playground-series-aug-2021/train.csv')
df_test = pd.read_csv('../input/tabular-playground-series-aug-2021/test.csv')

## Overview
There are no missing values and there seems to be no correlation with loss for any of the features. It's so abstract, I don't even know where to begin! I would like to gradually investigate this from now on.I'm not sure we'll know anything within a month.

In [None]:
df_train.head()

In [None]:
df_train.info()

In [None]:
msno.matrix(df_train)

In [None]:
plt.figure(figsize=(16,16))
sns.heatmap(df_train.corr(),cmap='PuRd')
plt.show()

In [None]:
cols = df_train.columns.values
fig, ax = plt.subplots(25, 4, figsize=(16,100))
cnt = 0
for i in cols:
    if i == 'loss':
        break
    elif i == 'id':
        continue
    else:
        sns.histplot(df_train[i], ax=ax[cnt//4, cnt%4], color='lightskyblue')
        cnt += 1
    
plt.show()

## Predict and check SHAP with PyCaret
We want to check the importance of the features in SHAP, so once we do, we will use PyCaret to predict them in CatBoost and display SHAP.

In [None]:
def do_pycaret(target, train, test):
    reg = setup(data=train, target=target,normalize=True, silent=True, session_id=42)
    catboost = create_model("catboost", fold=5)
    lightgbm = create_model("lightgbm", fold=5)
    blended = blend_models(estimator_list= [catboost, lightgbm], fold=5, optimize='RMSE')
    predh = predict_model(blended)
    final = finalize_model(blended)
    pred = predict_model(final, data=test)
    return(pred, catboost, lightgbm)

In [None]:
pred, catboost, lightgbm = do_pycaret('loss', df_train, df_test)

In [None]:
pred

In [None]:
plot_model(catboost, plot='feature')

In [None]:
plot_model(lightgbm, plot='feature')

In [None]:
interpret_model(catboost)

In [None]:
interpret_model(lightgbm)

## Submission

In [None]:
submission = pd.read_csv('../input/../input/tabular-playground-series-aug-2021/sample_submission.csv')
submission.loss = pred.Label
submission

In [None]:
submission.to_csv('PyCaret_blend_sub.csv',index=False)