In [1]:
import os
import numpy as np
import pandas as pd

import plotly.graph_objects as go
from automl.analysis.ploting import AutoBins

In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [3]:
data = pd.read_csv('./data/bank.csv')

In [4]:
def sliding_mask(x0=None, x1=None):
    return dict(
            fillcolor= "rgba(246,178,107,0.5)",
            line={"width": 0},
            type="rect",
            layer='below',
            x0=x0,
            x1=x1,
            xref="x",
            y0=0,
            y1=1,
            yref="paper")

In [5]:
def plot_numeric_outlier(data, x: str, lower_bound: int = None, upper_bound: int = None, width=722, height=448):
    arr = data[~data[x].isna()][x]
    
    autobins = AutoBins()
    nbins = np.nanmin([
        autobins.get_len_step(arr),
        autobins.get_mean_diff_step(arr),
        autobins.get_power_step(arr)
    ])
    
    min_x = min(arr)
    max_x = max(arr)
    step = (max_x - min_x)/int(nbins)
        
    if lower_bound is None or lower_bound < min_x:
        lower_bound = None
    if upper_bound is None or upper_bound > (max_x + step):
        upper_bound = None
    
    
    shapes = []
    main_arr = arr
    bound_arr = []
    if lower_bound is not None:
        shapes.append(sliding_mask(x0=min_x, x1=lower_bound))
        main_arr = main_arr[main_arr > lower_bound]
        bound_arr = list(arr[arr <= lower_bound])
    if upper_bound is not None:
        shapes.append(sliding_mask(x0=upper_bound, x1=max_x+step))
        main_arr = main_arr[main_arr < upper_bound]
        bound_arr.extend(list(arr[arr >= upper_bound]))

    fig = go.Figure()
    if len(bound_arr) > 0:
        fig.add_trace(
            go.Histogram(x=bound_arr,
                         autobinx=False,
                         xbins=go.histogram.XBins(start=min(arr), end=max(arr)+1, size=step),
                         marker=dict(color="rgba(236,78,78,0.8)"),
                         hovertemplate=x + " (%{x}): %{y} <extra></extra>",
                         name='outlier',
                        )
        )


    fig.add_trace(
        go.Histogram(x=main_arr,
                     autobinx=False,
                     xbins=go.histogram.XBins(start=min(arr), end=max(arr)+1, size=step),
                     marker=dict(color="rgba(53,77,204,1)"),
                     hovertemplate=x + " (%{x}): %{y} <extra></extra>",
                     name='data',
                    )
    )

    fig.update_layout(
        title=dict(
                text=f'Numeric outlier: {x}',
                font=dict(size=22),
                y=0.99,
                x=0.0,
                xanchor='left',
                yanchor='top',
            ),
        width=width,
        height=height,
        template='plotly_white',
        barmode='stack',
        shapes=shapes,
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(238,238,238,1)',
        modebar=dict(
            bgcolor='rgba(0,0,0,0)', activecolor='rgba(68,68,68, 0.7)', color='rgba(68, 68, 68, 0.3)',
            remove=['zoom', 'lasso', 'select'],
        ),
    )
    
    fig.update_xaxes(title=x)
    fig.update_yaxes(title='Count', 
                     type='log',
                     showticklabels=True,
                     dtick=1,
                     tickformat="f",
                     tickmode='auto',
                     ticklabeloverflow='allow',
                     mirror='allticks',
                    )

    return fig

In [6]:
fig = plot_numeric_outlier(data=data, x='balance', lower_bound=None, upper_bound=80000)
fig.show(config={'displaylogo': False})
# fig.write_html('./automl_plot/outlier_numeric.html', config={'displaylogo': False}, include_plotlyjs='cdn', full_html=False)