## Explotry Data Analysis 

In [None]:
import numpy as np
import pandas as pd
import plotly
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns

import matplotlib as mpl
from matplotlib.ticker import StrMethodFormatter
import seaborn as sns
import datetime
import random

In [None]:
data = pd.read_csv("/kaggle/input/tabular-playground-series-jun-2022/data.csv")

In [None]:
data.head(6)

In [None]:
data.dtypes

In [None]:
data.shape

In [None]:
data.describe()

In [None]:
data.nunique()

In [None]:
data.isnull().sum()

In [None]:
data.isnull()

In [None]:
data.duplicated().sum()

In [None]:
plt.figure(figsize = (50,40));
corr = data.corr()
sns.heatmap(corr,cmap='YlGnBu',annot = True,square = True,vmax=.8, linewidths=0.01,linecolor="white",annot_kws = {'size':12});
plt.show()

## Data Visualization 

In [None]:
data.hist(figsize=(30,30));

## Missing values into F Features

In [None]:
features_f = [f for f in data.columns if data[f].dtype == 'float64' ]
features_i = [f for f in data.columns if data[f].dtype == 'int64' and f != 'id']

In [None]:
def check(df):
    col_list = df.columns.values
    rows = []
    for col in col_list:
        tmp = (col,
              data[col].dtype,
              data[col].isnull().sum(),
              data[col].count(),
              data[col].nunique(),
              data[col].unique())
        rows.append(tmp)
    df = pd.DataFrame(rows) 
    df.columns = ['feature','dtype','nan','count','nunique','unique']
    return df

In [None]:
check(data[features_i])

in this field we can observe the data doesn't have any null values

#### New check for Integers values

In [None]:
check(data[features_f])

## Ploting of missing values

In [None]:
plt.figure(figsize=(20, 10))
sns.heatmap(data[features_f].isnull(), cmap = 'Reds')
plt.show()

In [None]:
data['n_missing'] = data[features_f].isna().sum(axis = 1)
data['n_missing'].value_counts().plot( kind = 'bar', title = 'Number of missing Values per data')
plt.show()

## F1 data Division

In [None]:
columns_name = list(data.columns)
columns_name = columns_name[1:len(columns_name)]
columns_name_F1 = [col for col in columns_name if col[:3] == "F_1"]

In [None]:
background_color = "#00FF7F"

plt.rcParams['figure.dpi'] = 600
fig = plt.figure(figsize=(10, 10), facecolor='#f6f5f5')
gs = fig.add_gridspec(5, 3)
gs.update(wspace=0.3, hspace=0.3)

run_no = 0
for row in range(0, 5):
    for col in range(0, 3):
        locals()["ax"+str(run_no)] = fig.add_subplot(gs[row, col])
        locals()["ax"+str(run_no)].set_facecolor(background_color)
        for s in ["top","right"]:
            locals()["ax"+str(run_no)].spines[s].set_visible(False)
        run_no += 1  

features = columns_name_F1

run_no = 0
for col in features:
    sns.kdeplot(ax=locals()["ax"+str(run_no)], x=data[col], zorder=2, alpha=1, linewidth=1, color='#ff355d')
    locals()["ax"+str(run_no)].grid(which='major', axis='x', zorder=0, color='#EEEEEE', linewidth=0.4)
    locals()["ax"+str(run_no)].grid(which='major', axis='y', zorder=0, color='#EEEEEE', linewidth=0.4)
    locals()["ax"+str(run_no)].set_ylabel('')
    locals()["ax"+str(run_no)].set_xlabel(col, fontsize=6, fontweight='bold')
    locals()["ax"+str(run_no)].tick_params(labelsize=6, width=0.5)
    locals()["ax"+str(run_no)].xaxis.offsetText.set_fontsize(6)
    locals()["ax"+str(run_no)].yaxis.offsetText.set_fontsize(6)
    run_no += 1

plt.show()

In [None]:
features = data.columns[data.columns.str.startswith('F_1')]
num_rows = len(features) // 3
num_cols = 3

## Ploting using ploty 

In [None]:
fig = make_subplots(rows=num_rows, cols=num_cols)

idx = -1
for row in range(1, num_rows+1):
    for col in range(1, num_cols+1):
        idx += 1
        fig.add_trace(go.Histogram(x=data[features[idx]], nbinsx=20, marker=dict(color = '#90EE90'), name=features[idx]), row=row, col=col)
        
fig.update_traces(marker_line_width=1,marker_line_color="Black")
fig.update_layout(height=600,
                      title= "Plots for features starts with F_1")
fig.show()