In [1]:
import os
import numpy as np
import pandas as pd

import plotly.graph_objects as go

In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [3]:
from read_missing_data import read_missing_df

In [4]:
data = read_missing_df()

In [5]:
def plot_missing_value_correlation(data, fig=None, width=500, height=500, colorscale='YlGnBu', reverse_font_color=False):
    if fig is None:
        fig = go.Figure()

    x = data.columns.values.tolist()
    y = data.index.values.tolist()
    z = np.round(data.values, 2).tolist()

    fig.add_trace(
        go.Heatmap(
            x=x,
            y=y,
            z=z,
            colorscale=colorscale,
            showscale=True,
            reversescale=False,
            zmin=-1,
            zmax=1,
            hovertemplate="Corr ( %{x}, %{y} ) = %{z} <extra></extra>"
        )
    )

    annotation_text = z
    annotations = []
    for n, row in enumerate(annotation_text):
        for m, val in enumerate(row):
            font_color = "#000000" if (val < 0.5) ^ reverse_font_color else "#FFFFFF"
            if np.isnan(annotation_text[n][m]):
                text = ''
            else:
                text = str(annotation_text[n][m])
            annotations.append(
                go.layout.Annotation(
                    text=text,
                    x=x[m],
                    y=y[n],
                    xref="x1",
                    yref="y1",
                    font=dict(color=font_color),
                    showarrow=False,
                )
            )


    fig.update_layout(
        title_text='Missing Value Correlation',
        annotations=annotations,
        xaxis = dict(scaleanchor = 'y', constrain='range'),
        width=width,
        height=height,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(238,238,238,1)',
    )
    
    fig.update_yaxes(autorange='reversed', showgrid=False)
    fig.update_xaxes(tickangle=-45, showgrid=False)

    return fig, annotations

In [6]:
data_ms = data.copy(deep=True)
data_ms = data_ms.isna()

data_ms = data_ms.replace(True, 1)
data_ms = data_ms.replace(False, 0)

ms_cols = list(data.isna().sum()[data.isna().sum()!=0].index)

In [7]:
fig, _ = plot_missing_value_correlation(data=data_ms[ms_cols].corr(), width=800, height=800, colorscale='RdBu')
fig.show()

In [8]:
fig, _ = plot_missing_value_correlation(data=data_ms.corr(), width=800, height=800, colorscale='RdBu')
fig.show()