In [1]:
import numpy as np 
import pandas as pd 
from math import ceil

In [2]:
def numTable(df, target):
    '''
    Given a dataframe with numerical variables df and a target variable in the dataframe,
    returns statistical info about the dataframe, including skew, kurt, missing values, zero values and correlation to target variable
    '''
    desc = df.describe().T
    skew = df.skew().T
    kurt = df.kurt().T
    rows = df.shape[0]
    nulls = round(100 * df.isnull().sum()/rows,2)
    nulls = nulls.T
    uniq = df.nunique().T
    corr = df.corr()[target].T
    zero = round(100* (df == 0).astype(int).sum()/rows, 2)
    
    X = pd.concat([desc, skew, kurt, nulls, zero, uniq, corr], axis=1)
    X.columns = ['Count', 'Mean', 'Std', 'Min', '25%', '50%', '75%', 'Max', 'Skew', 'Kurt', '% Missing', '% Zero', 'Nuniques','Corr']
    X['Count']= round(X['Count'])
    X.sort_values('Corr', ascending=False, inplace=True)
    return X
    
    

In [3]:
def catTable(df):
    '''
    Given a dataframe with categorical variables df
    returns statistical info about the dataframe, 
    including missing values
    '''
    desc = df.astype(str).describe().T
    rows = df.shape[0]
    nulls = 100 * df.isnull().sum()/rows
    nulls = nulls.T
    
    X = pd.concat([desc, nulls], axis=1)
    X.columns = ['Count', 'Unique', 'Top', 'Freq', '% Missing']
    X.sort_values('% Missing', ascending=False, inplace=True)
    return X