In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import plotly.express as px

In [None]:
train = pd.read_csv("../input/tabular-playground-series-dec-2021/train.csv")
test = pd.read_csv("../input/tabular-playground-series-dec-2021/test.csv")
submit = pd.read_csv("../input/tabular-playground-series-dec-2021/sample_submission.csv")

In [None]:
train.drop(["Id"] , axis = 1 , inplace = True)
test.drop(["Id"] , axis = 1 , inplace = True)
TARGET = 'Cover_Type'
FEATURES = [col for col in train.columns if col not in ['id', TARGET]]
RANDOM_STATE = 2021

In [None]:
train.head()

In [None]:
test.head()

In [None]:
submit.head()

In [None]:
plt.hist(train['Cover_Type'])

In [None]:
plt.figure(figsize=(10,8))
sns.countplot(x='Cover_Type', data=train)

In [None]:
corr=train.corr()
v=10
colmn = corr.nlargest(v, 'Cover_Type')['Cover_Type'].index 
xm = np.corrcoef(train[colmn].values.T)
sns.set(font_scale=1.25)
plt.figure(figsize=(18, 18))
hm = sns.heatmap(xm, cbar=True, annot=True, square=True, fmt='.2f', annot_kws={'size': 10},yticklabels=colmn.values, xticklabels=colmn.values)
plt.show()

In [None]:
train.iloc[:, :-1].describe().T.sort_values(by='std' , ascending = False).style.background_gradient().bar(subset=["max"], color='green').bar(subset=["mean",], color='blue')

In [None]:
df = pd.concat([train[FEATURES], test[FEATURES]], axis=0)
cat_features = [col for col in FEATURES if df[col].nunique() < 25]
cont_features = [col for col in FEATURES if df[col].nunique() >= 25]
del df
plt.pie([len(cat_features), len(cont_features)],labels=['Categorical', 'Continuos'],autopct='%1.1f%%')
plt.show()

In [None]:
ncols = 5
nrows = int(len(cont_features) / ncols + (len(FEATURES) % ncols > 0))-1

fig, axes = plt.subplots(nrows, ncols, figsize=(18, 8), facecolor='#EAEAF2')

for r in range(nrows):
    for c in range(ncols):
        col = cont_features[r*ncols+c]
        sns.kdeplot(x=train[col], ax=axes[r, c],label='Train data')
        sns.kdeplot(x=test[col], ax=axes[r, c],label='Test data')
        axes[r, c].set_ylabel('')
        axes[r, c].set_xlabel(col, fontsize=8, fontweight='bold')
        axes[r, c].tick_params(labelsize=5, width=0.5)
        axes[r, c].xaxis.offsetText.set_fontsize(4)
        axes[r, c].yaxis.offsetText.set_fontsize(4)
plt.show()