Actually I borrowed the idea and code from the cool kernel by @alijs [here](https://www.kaggle.com/alijs1/ieee-transaction-columns-reference) during IEEE-CIS Fraud Detection competition. Though I was quite unlucky in that contest, i did learn some good tricks. Now, preparing presentation about EDA techiniques for my colleagues and going through my notes, I thought I'd be happy if this universal approach might come in handy for someone here. 

The idea is to generate basic stats for each column so you could get some first insights quickly and then save it easily as html doc to have always at hand.
Also I added some basic stats (get_stats function) I get used to gathering at the start of EDA. I've seen this code and its reincarnations across many Kaggle notebooks before and believe (but not quite sure) that it had originated by @artgor in one of his old kernels.

UPDATE:
Added Pandas Profiling report stats ([pandas-profiling](https://pandas-profiling.github.io/pandas-profiling/docs/master/index.html)) as a good, easy-to-use alternative you can launch and go grab some coffee. It provides you with tons of auto-generated stats for each feature - just click on **toggle details** to see all metrics. I run it with **minimal=True** option (saved to pandas_profiler_output.html) since the standard, full API doesn't go well with such a large dataset. If you still want to try the full mode, you are free to do it - launch it, go grab some coffee, start your business, get married etc. When you get back, it may still be running.

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from IPython.core.display import HTML

import gc

import warnings
warnings.filterwarnings("ignore")
pd.set_option('display.max_columns', 300)
pd.set_option("display.max_rows", 15)
#pd.set_option("display.max_rows", None)

from pandas_profiling import ProfileReport

plt.style.use('ggplot')

In [None]:
train = pd.read_csv('/kaggle/input/lish-moa/train_features.csv')
test = pd.read_csv('/kaggle/input/lish-moa/test_features.csv')

In [None]:
#Intro function for exploratory data analysis
def get_stats(df):
    """
    Function returns a dataframe with the following stats for each column of df dataframe:
    - Unique_values
    - Percentage of missing values
    - Percentage of zero values
    - Percentage of values in the biggest category
    - data type
    """
    stats = []
    for col in df.columns:
        if df[col].dtype not in ['object', 'str', 'datetime64[ns]']:
            zero_cnt = df[df[col] == 0][col].count() * 100 / df.shape[0]
        else:
            zero_cnt = 0

        stats.append((col, df[col].nunique(),
                      df[col].isnull().sum() * 100 / df.shape[0],
                      zero_cnt,
                      df[col].value_counts(normalize=True, dropna=False).values[0] * 100,
                      df[col].dtype))

    df_stats = pd.DataFrame(stats, columns=['Feature', 'Unique_values',
                                            'Percentage of missing values',
                                            'Percentage of zero values',
                                            'Percentage of values in the biggest category',
                                            'type'])
    # df_stats.sort_values('Percentage of zero values', ascending=False, inplace=True)

    del stats
    gc.collect()

    return df_stats

## Introductory stats ## 

In [None]:
get_stats(train)

In [None]:
get_stats(test)

Here you might want to change the scope of features, included in the reference.

In [None]:
head_cols = [col for col in train.columns if ('g-' not in col)&('c-' not in col)]
g_cols = [col for col in train.columns if 'g-' in col][:100]
c_cols = [col for col in train.columns if 'c-' in col]

included_cols = head_cols + g_cols + c_cols
split_on = ['sig_id','g-0','c-0']

## Pandas Profiling stats ## 

In [None]:
profile = ProfileReport(train[included_cols], title="Pandas Profiling Report", minimal=True)
profile

In [None]:
profile.to_file("pandas_profiler_output.html")

In [None]:
display(HTML('<h2 id="home"> Feature reference guide </h2>'))

In [None]:
# EDA detailed output
class EDA():
    def __init__(self, train, test, included_cols, N=5, advanced_graph=True):
        self.train = train
        self.test = test
        self.included_cols = included_cols
        self.N = N
        self.advanced_graph = advanced_graph

    def h(self, content):
        display(HTML(content))

    def _desc(self, data, col, label):
        d0 = data.describe().reset_index()
        d0.columns = [col, label]
        return d0.append({col: 'unique values', label: data.unique().shape[0]}, ignore_index=True) \
            .append({col: 'NaNs', label: data.isnull().sum()}, ignore_index=True) \
            .append({col: 'NaNs %', label: np.round(data.isnull().sum() / data.shape[0], 4)}, ignore_index=True) \

    def desc(self, col, categorical=False):

        d0 = self._desc(self.train[col], col, 'Train')
        d1 = self._desc(self.test[col], col, 'Test')

        dd = d0.merge(d1)
        display(dd)  
        
        if col not in ['sig_id']:
            self.h('<b>Top  '+ str(self.N) +' most popular values (NaN = -999):</b>')
            d0 = self.train[[col]].fillna(-999).value_counts(col).reset_index(drop=False)
            d0.columns = [col,'Count'] 
            d1 = self.test[[col]].fillna(-999).value_counts(col).reset_index(drop=False)
            d1.columns = [col,'Count'] 
            dd = d0.merge(d1, how='left', on=col).head(self.N)
            dd = dd.rename({'Count_x': 'Count in Train (desc)', 'Count_y': 'Count in Test'}, axis=1)
            display(dd)
        
            if self.advanced_graph and categorical:
                fig, axs = plt.subplots(1, 2, figsize=(15, 3))
                d0.plot(kind='bar', x= col, y='Count', color='green', alpha=0.8, title='Train: ' + col, legend=False, xlabel='', ylabel='Count',rot=0, ax=axs[0])
                d1.plot(kind='bar', x= col, y='Count', color='red', alpha=0.8, title='Test: ' + col, legend=False, xlabel='', rot=0, ax=axs[1])
                plt.show()         

        del dd, d0, d1
        gc.collect()
        
    def desc_target(self, col, categorical=False):
        dd = self._desc(self.train_target[col], col, 'Train target')
        display(dd)     

        del dd
        gc.collect()    

    def hist(self, col):
        fig, axs = plt.subplots(1, 2, figsize=(15, 3))
        self.train[col].plot(kind='hist',bins=70, color='green', alpha=0.8, title='Train histogram: ' + col, ax=axs[0])
        self.test[col].plot(kind='hist',bins=70, color='red', alpha=0.8, title='Test histogram: ' + col, ax=axs[1])
        plt.ylabel('')
        plt.show()

    def corr(self, col):
        num_vars = [f for f in self.train.columns if self.train[f].dtype != 'object']
        corrs = self.train[num_vars].corrwith(self.train[col]).reset_index().sort_values(0, ascending=False).reset_index(
            drop=True).rename({'index': 'Column', 0: 'Correlation with ' + col}, axis=1)
        self.h('<b>Most correlated features with ' + col + ':</b>')
        trx = pd.concat([corrs.head(self.N+1), corrs.dropna().tail(self.N)])

        def linkx(val):
            return '<a href="#c_{}">{}</a>'.format(val, val) if val in self.included_cols else val

        trx['Column'] = trx['Column'].apply(linkx)
        self.h(trx.to_html(escape=False))

        del trx, corrs
        gc.collect()       

    def numeric(self, col):
        self.hist(col)
        self.desc(col)
        self.corr(col)

    def categorical(self, col):
        self.desc(col, categorical=True)

    def run(self, col):
        self.h('<h3 id="c_' + col + '">' + col + '</h3>' + '<a style="font-size:11px" href="#home">(Jump to top)</a>')   
        self.categorical(col) if self.train[col].dtype == 'object' else self.numeric(col)

In [None]:
eda = EDA(train, test, included_cols)

In [None]:
eda.h('<b id="home">Links to features info:</b> ' + ', '.join([('<li>' if col in split_on else '') + '<a href="#c_' + col + '">' + col + '</a>' for col in included_cols]))
eda.h('Train shape: <b>' + str(train.shape) + '</b>' + 
  '<br>Test shape: <b>' + str(test.shape) + '</b>')
eda.h('Train preview:')
display(train.head(10))

In [None]:
for c in eda.included_cols:
    eda.run(c)