In [1]:
import os
import numpy as np
import pandas as pd

import plotly.graph_objects as go

In [2]:
from plotly.offline import init_notebook_mode, iplot
init_notebook_mode(connected=True)

In [3]:
data = pd.read_csv('./data/bank.csv')

In [4]:
def plot_categorical_outlier(data, x: str, min_count: int = None, width=722, height=448):
    data_count = data[x].value_counts()
    arr_x = np.array(data_count.index)
    arr_y = np.array(data_count.values)

    color=np.array(['rgb(64,87,210)']*len(arr_y))
    if min_count is not None:
        color[arr_y<min_count]='rgb(231,92,82)'
        color[arr_y>=min_count]='rgb(64,87,210)'
        
    hovertemplate="""Counts: %{y}<extra></extra>"""

    fig = go.Figure(
        data=go.Bar(
            x=arr_x,
            y=arr_y,
            marker=dict(color=color),
            hovertemplate=hovertemplate,
    )
    )

    fig.update_layout(
        title=dict(
            text=f'Categorical outlier: {x}',
            font=dict(size=22),
            y=0.99,
            x=0.0,
            xanchor='left',
            yanchor='top',
        ),
        width=width,
        height=height,
        hoverlabel=dict(
            bgcolor="white",
            font_size=12,
            font_family="Rockwell",
        ),
        paper_bgcolor='rgba(0,0,0,0)',
        plot_bgcolor='rgba(238,238,238,1)',
        modebar=dict(
            bgcolor='rgba(0,0,0,0)', activecolor='rgba(68,68,68, 0.7)', color='rgba(68, 68, 68, 0.3)',
            remove=['zoom', 'lasso', 'select'],
        ),
    )

    fig.update_xaxes(title=x, tickmode='linear')
    fig.update_yaxes(title='Counts', ticksuffix=' ')

    return fig

In [5]:
fig = plot_categorical_outlier(data, x='job', min_count=300)
fig.show(config={'displaylogo':False})
# fig.write_html('./automl_plot/outlier_category.html', config={'displaylogo':False}, include_plotlyjs='cdn', full_html=False)