In [None]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from warnings import simplefilter


simplefilter("ignore")

def hide_spines(ax, spines=["top", "right", "left", "bottom"]):
    for spine in spines:
        ax.spines[spine].set_visible(False)
        
    return ax

In [None]:
train_path = "../input/tabular-playground-series-feb-2022/train.csv"
train = pd.read_csv(train_path)
train_ids = train.pop("row_id").to_numpy()

test_path = "../input/tabular-playground-series-feb-2022/test.csv"
test = pd.read_csv(test_path)
test_ids = test.pop("row_id").to_numpy()

In [None]:
categorical_columns = train.columns[train.dtypes == "object"].to_numpy()
numeric_columns = train.columns[train.dtypes != "object"].to_numpy()

In [None]:
fig = plt.figure(figsize=(15, 7))
fig.set_facecolor("#fff")
ax = fig.add_subplot()
ax.set_facecolor("#fff")
ax.grid(color="lightgrey", alpha=0.7, axis="both", zorder=0)

sns.countplot(x="target", data=train, palette="magma", zorder=2)
plt.xticks(rotation=30)

ax.xaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax.yaxis.set_tick_params(labelsize=10, size=0, pad=5)
ax = hide_spines(ax)

ax.set_ylabel("Count", fontsize=14, labelpad=10)
ax.set_xlabel("target", fontsize=14, labelpad=20)

ax.set_title("Target Distribution", loc="left", fontsize=25, fontweight="bold")
fig.show()

In [None]:
fig = plt.figure(figsize=(17, 12*41))
fig.set_facecolor("#fff")
rows, cols = 200, 2

for idx, numeric_column in enumerate(numeric_columns):
    ax = fig.add_subplot(rows, cols, idx+1)
    ax.set_facecolor("#fff")
    ax.grid(color="lightgrey", alpha=0.7, axis="both", zorder=0)
    sns.kdeplot(x=numeric_column, fill=True, alpha=0.8, linewidth=0.7, edgecolor="#000", data=train, label="Train", zorder=2)
    sns.kdeplot(x=numeric_column, fill=True, alpha=0.8, linewidth=0.7, edgecolor="#000", data=test, label="Test", zorder=2)
    
    ax.xaxis.set_tick_params(labelsize=10, size=0, pad=5)
    ax.yaxis.set_tick_params(labelsize=10, size=0, pad=5)
    ax = hide_spines(ax)
    
    if idx % cols == 0:
        ax.set_ylabel("Density")
    else:
        ax.set_ylabel("")
        
    ax.set_xlabel(numeric_column)
    ax.legend()
    
fig.tight_layout()
fig.show()

In [None]:
fig = plt.figure(figsize=(15,15))
fig.set_facecolor("#fff")
ax = fig.add_subplot()
ax.set_facecolor("#fff")

corr = train[numeric_columns].corr()
sns.heatmap(corr, annot=False, cmap='magma', vmin=-1, vmax=+1)
ax.xaxis.set_tick_params(labelsize=8, size=0, pad=5)
ax.yaxis.set_tick_params(labelsize=8, size=0, pad=5)
ax.set_title("Pearson Correlation", loc="left", fontsize=25, fontweight="bold")

plt.show()

In [None]:
fig = plt.figure(figsize=(17, 12*41))
fig.set_facecolor("#fff")
rows, cols = 200, 2

for idx, numeric_column in enumerate(numeric_columns):
    ax = fig.add_subplot(rows, cols, idx+1)
    ax.set_facecolor("#fff")
    ax.grid(color="lightgrey", alpha=0.7, axis="both", zorder=0)
    ax = sns.kdeplot(x=numeric_column, hue="target", fill=True, alpha=0.8, linewidth=0.7, edgecolor="#000", data=train, zorder=2, ax=ax)
    
    ax.xaxis.set_tick_params(labelsize=10, size=0, pad=5)
    ax.yaxis.set_tick_params(labelsize=10, size=0, pad=5)
    
    ax = hide_spines(ax)
    
    if idx % cols == 0:
        ax.set_ylabel("Density")
    else:
        ax.set_ylabel("")
        
    ax.set_xlabel(numeric_column)
    
    if (idx + 1) != cols:
        ax.legend()

fig.tight_layout()
fig.show()