# Tabular Playground Series - Feb 2021

In [None]:
import numpy as np  
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from scipy.stats import norm
from scipy import stats
import warnings
warnings.filterwarnings('ignore')


In [None]:
train = pd.read_csv('../input/tabular-playground-series-feb-2021/train.csv')
test = pd.read_csv('../input/tabular-playground-series-feb-2021/test.csv')
submission = pd.read_csv('../input/tabular-playground-series-feb-2021/sample_submission.csv')

In [None]:
train.head()

In [None]:
train['target'].describe()

In [None]:
plt.figure(figsize=(10,7))
sns.distplot(train['target'], color= "Red" )
plt.title('Distribution of Target', fontsize=18)
plt.show()
print("Skewness: %f" % train['target'].skew())
print("Kurtosis: %f" % train['target'].kurt())

In [None]:
plt.figure(figsize=(10,7))
ax =sns.boxplot(x=train["target"],color='Red')
plt.title('Distribution of Target', fontsize=18)
plt.show()

In [None]:
plt.figure(figsize=(10,7))
res = stats.probplot(train['target'], plot=plt)
plt.show()

In [None]:
f, ax = plt.subplots(figsize=(18, 10))
corrmat = train.corr().sort_values(by='target',ascending=False).T
mask=np.triu(np.ones_like(train.corr(), dtype=bool))
sns.heatmap(corrmat, vmax=.2,annot=True,mask=mask,cmap="Reds")
plt.title('Correlation Table')
plt.show()

In [None]:
cols = corrmat.nlargest(6, 'target')['target'].index
sns.pairplot(train[cols], size = 2.5)
plt.show()

In [None]:
f, ax = plt.subplots(nrows=4, ncols=4, figsize=(18, 12))
sns.kdeplot(train.iloc[:,11], shade=True,ax=ax[0,0],color='red')
sns.kdeplot(train.iloc[:,12], shade=True,ax=ax[0,1],color='red')
sns.kdeplot(train.iloc[:,13], shade=True,ax=ax[0,2],color='red')
sns.kdeplot(train.iloc[:,14], shade=True,ax=ax[0,3],color='red')

sns.kdeplot(train.iloc[:,15], shade=True,ax=ax[1,0],color='red')
sns.kdeplot(train.iloc[:,16], shade=True,ax=ax[1,1],color='red')
sns.kdeplot(train.iloc[:,17], shade=True,ax=ax[1,2],color='red')
sns.kdeplot(train.iloc[:,18], shade=True,ax=ax[1,3],color='red')

sns.kdeplot(train.iloc[:,19], shade=True,ax=ax[2,0],color='red')
sns.kdeplot(train.iloc[:,20], shade=True,ax=ax[2,1],color='red')
sns.kdeplot(train.iloc[:,21], shade=True,ax=ax[2,2],color='red')
sns.kdeplot(train.iloc[:,22], shade=True,ax=ax[2,3],color='red')

sns.kdeplot(train.iloc[:,23], shade=True,ax=ax[3,0],color='red')
sns.kdeplot(train.iloc[:,24], shade=True,ax=ax[3,1],color='red')
sns.kdeplot(train.iloc[:,25], shade=True,ax=ax[3,0],color='red')

f.delaxes(ax[3, 2])
f.delaxes(ax[3, 3])
plt.tight_layout()
plt.show()

In [None]:
cols=['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6','cat7', 'cat8', 'cat9']
for col in cols:
    plt.figure(figsize=(8,4))
    train[col].value_counts().plot(kind='bar',color='Red', stacked=True,colormap='Paired')
    plt.title(col)
    plt.grid()
    plt.show()

 

In [None]:
cols=['cat0', 'cat1', 'cat2', 'cat3', 'cat4', 'cat5', 'cat6','cat7', 'cat8', 'cat9']
for col in cols:
    plt.figure(figsize=(10,5))
    sns.boxplot(data=train, y=col, x='target', orient="h", palette="Set2")
    plt.xticks(rotation=90)
    plt.grid()
    plt.show()

In [None]:
cols = corrmat.nlargest(6, 'target')['target'].index
fig, ((ax1, ax2), (ax3, ax4))= plt.subplots(nrows=2, ncols=2, figsize=(14,8))
var1 = cols[1]
data = pd.concat([train['target'], train[var1]], axis=1)
sns.regplot(y=var1, x='target', data=data, fit_reg=True, ax=ax1)


var2 = cols[2]
data = pd.concat([train['target'], train[var2]], axis=1)
sns.regplot(y=var2, x='target', data=data, fit_reg=True, ax=ax2,color='red', marker='s')

var3 =  cols[3]
data = pd.concat([train['target'], train[var3]], axis=1)
sns.regplot(y=var3, x='target', data=data, fit_reg=True, ax=ax3,color='orange', marker='^')

var4 = cols[4]
data = pd.concat([train['target'], train[var4]], axis=1)
sns.regplot(y=var4, x='target', data=data, fit_reg=True, ax=ax4,color='green', marker='+')

plt.show()