In [1]:
import os
import numpy as np
import pandas as pd

import plotly
import plotly.graph_objects as go

In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [3]:
from read_missing_data import read_missing_df

In [4]:
data = read_missing_df()

### Row missing rate with threshold

### This plot shows
- Line with filled area to show accumulated remained row counts.
- x-axis is row missing ratio's threshold, and y-axis is remained row counts at the given threshold.

#### Main trace
- Scatter
```python
trace = go.Scatter(
    x=index,
    y=values,
    mode='lines',
    line={'shape': 'hv'},
    fill='tozeroy',
)
```

In [5]:
row_missing = (data.isna().sum(axis=1)/data.shape[1])
accum_row_missing = row_missing.value_counts().sort_index().cumsum().repeat([2]+[1]*(len(row_missing.value_counts())-2) + [2])

In [6]:
xidx = list(accum_row_missing.index)
xidx[len(accum_row_missing)-1] = 1.0

yvalues = list(accum_row_missing.values)
yvalues_pct = list(accum_row_missing.values/data.shape[0])

fig = go.Figure()

hovertemplate = 'Threshold: %{x}<br>Remained rows: %{y} <extra></extra>'

fig.add_trace(
    go.Scatter(x=xidx, y=yvalues, mode='lines', 
               line={'shape': 'hv'}, fill='tozeroy', hovertemplate=hovertemplate)
)
fig.add_trace(
    go.Scatter(x=xidx, y=yvalues_pct, orientation='v', 
                     opacity=0, hoverinfo='skip',
                     yaxis='y2')
)


fig.update_layout(
    title=dict(
        text='Remained Rows with Applying Missing Ratio Threshold',
        font=dict(size=22),
        y=0.99,
        x=0.0,
        xanchor='left',
        yanchor='top',
    ),
    width=722,
    height=448,
    xaxis={'title':'Missing Ratio Threshold', 'range': [0, 1]},
    yaxis={'side': 'right', 'range': [0, data.shape[0]*1.05]},
    yaxis2={'title': 'Remained Rows', 'side': 'left', 'overlaying': 'y', 'range': [0, 1.05]},
    showlegend=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(238,238,238,1)',
    modebar=dict(
        bgcolor='rgba(0,0,0,0)', activecolor='rgba(68,68,68,0.7)', color='rgba(68,68,68,0.3)',
        remove=['zoom', 'lasso', 'select'],
    ),
    barmode='stack',
)

fig.show(config={'displaylogo': False})
# fig.write_html('./example_plots/missing_ratio_threshold.html', config={'displaylogo':False}, include_plotlyjs='cdn', full_html=False)

### Row missing ratio

### This plot shows
- Row missing ratio summary through histogram.

#### Main trace
```python
trace = go.Histogram(
    x=data,
    xbins=go.histogram.XBins(start=0, end=1, size=0.01),
    ...
)
```
- xbins: define the start and the end of the x axis and the bin width. 

In [7]:
# plot with histogram
row_missing = (data.isna().sum(axis=1)/data.shape[1])
row_missing_nidx = row_missing.nunique()


fig = go.Figure()

hovertemplate = 'Missing ratio: %{x}<br>Counts: %{y}<extra></extra>'

fig.add_trace(
    go.Histogram(
        x=row_missing,
        xbins=go.histogram.XBins(start=0, end=1, size=0.01),
        hovertemplate=hovertemplate,
    )
)


fig.update_layout(
    title=dict(
        text='Row missing ratio',
        font=dict(size=22),
        y=0.99,
        x=0.0,
        xanchor='left',
        yanchor='top',
    ),
    width=722,
    height=448,
    xaxis={'title':'Missing ratio', 'range': [-0.01, 1]},
    yaxis={'title': 'log(Counts)', 'type': 'log'},
    showlegend=False,
    paper_bgcolor='rgba(0,0,0,0)',
    plot_bgcolor='rgba(238,238,238,1)',
    modebar=dict(
        bgcolor='rgba(0,0,0,0)', activecolor='rgba(68,68,68,0.7)', color='rgba(68,68,68,0.3)',
        remove=['zoom', 'lasso', 'select'],
    ),
    barmode='stack',
)

fig.show(config={'displaylogo': False})
# fig.write_html('./example_plots/row_missing_ratio.html', config={'displaylogo':False}, include_plotlyjs='cdn', full_html=False)