In [None]:
%load_ext autoreload
%autoreload 2

from skorecard.reporting import create_report 
from skorecard import datasets

import pandas as pd
import numpy as np
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline, make_pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import roc_auc_score

from skorecard.bucketers import DecisionTreeBucketer, EqualWidthBucketer, OrdinalCategoricalBucketer

from sklearn.linear_model import LogisticRegression
from plotly.subplots import make_subplots

from dabl import detect_types

In [None]:
import dash
import dash_html_components as html
import dash_core_components as dcc
from dash.dependencies import Input, Output
import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
from dabl import detect_types
from sklearn.pipeline import make_pipeline
pd.options.plotting.backend = "plotly"

In [None]:
app = JupyterDash(__name__)

app.layout = html.Div([
    html.Div([
        dcc.Graph(id='barplot',
                  config={'displayModeBar': False},
                  animate=True)
    ]),
    
    html.Div([
        dcc.Slider(
            id='n_bin--slider',
            min=2,
            max=40,
            value=2,
            marks={str(i): str(i) for i in range(2, 40, 2)},
            step=None)
        ]),
    html.Div([
        dcc.Dropdown(
                id='dropdown-column',
                options=[{'label': i, 'value': i} for i in X.columns],
                value='LIMIT_BAL'
        )])
            ])


In [None]:
def generate_bucketed(n_bins, column):
    X, y = datasets.load_uci_credit_card(return_X_y=True)
    n_bins = int(n_bins)
    detected_types = detect_types(X)
    cat_columns = X.columns[(detected_types['categorical']==True) | (detected_types['low_card_int']==True)]
    num_columns = X.columns[(detected_types['continuous']==True) | (detected_types['dirty_float']==True)]

    bucket_pipeline = make_pipeline(
        EqualWidthBucketer(bins=n_bins, variables=list(num_columns)),
        OrdinalCategoricalBucketer(variables=list(cat_columns))
    )

    pipeline = Pipeline([
        ('bucketing', bucket_pipeline),
        ('one-hot-encoding', OneHotEncoder()),
        ('lr', LogisticRegression())
    ])

    pipeline.fit(X, y)
    auc = f"AUC = {roc_auc_score(y, pipeline.predict_proba(X)[:,1]):.4f}"
    
    
    bucketer=bucket_pipeline.named_steps['equalwidthbucketer']
    #column='LIMIT_BAL'
    X = X.copy()
    X_transform = bucketer.transform(X)
    df = pd.DataFrame(
        {
            "BUCKET": X_transform[column].value_counts().keys(),
            "NUMBER_IN_BUCKET": X_transform[column].value_counts().values,
            "PERCENTAGE_IN_BUCKET": X_transform[column].value_counts(normalize=True).values,
        }
    )
    X_transform["target"] = y

    # Defaults
    tmp = (
        X_transform.groupby([column])["target"].sum().reset_index().rename(columns={column: "BUCKET", "target": "BADS"})
    )

    # Merge defaults
    df = df.merge(tmp, how="left", on="BUCKET")

    # Default rates
    df["DEFAULT_RATE"] = df["BADS"] / df["NUMBER_IN_BUCKET"]  # todo: can we divide by 0 accidentally?

    # Get aggregations
    X_transform[f"{column}_ORIGINAL"] = X[column]
    tmp = X_transform.groupby([column]).agg({f"{column}_ORIGINAL": ["min", "max", "mean"]}).reset_index()

    # Rename due to .agg()
    tmp.columns = ["_".join(c) for c in tmp.columns.ravel()]
    tmp = tmp.rename(columns={f"{column}_": "BUCKET"})  # todo: there must be a cleaner way than this shit

    # Merge aggregations
    df = df.merge(tmp, how="left", on="BUCKET")
    
    return df, auc


def create_barplot(df, auc):
    bin_number = df.shape[0]
    df = df.sort_values('BUCKET')
    # Create figure with secondary y-axis
    fig = make_subplots(specs=[[{"secondary_y": True}]])
    
#     fig.update_traces(mode='lines+markers')
    fig.update_traces()

    fig.update_xaxes(showgrid=False)

    fig.update_yaxes(showgrid=False)
    
    fig.add_annotation(x=0, y=0.85, xanchor='left', yanchor='bottom',
                       xref='paper', yref='paper', showarrow=False, align='left',
                       bgcolor='rgba(255, 255, 255, 0.5)', text=bin_number)

    # Add traces
    fig.add_trace(
        go.Bar(x=df['BUCKET'], y=df['PERCENTAGE_IN_BUCKET'], name="Percentages"),
        secondary_y=False
    )

    fig.add_trace(
        go.Scatter(x=df['BUCKET'], y=df['DEFAULT_RATE'], name="Default Rates"),
        secondary_y=True
    )
    fig.update_yaxes(title_text="Percentage", secondary_y=False)
    fig.update_yaxes(title_text="Default Rate", secondary_y=True)

    fig.update_layout(
        title=auc,
        xaxis_title="Bucket Number",
        font_family="Courier New"
    )
    return fig

@app.callback(dash.dependencies.Output('barplot', 'figure'),
              [dash.dependencies.Input('n_bin--slider', 'value'),
               dash.dependencies.Input('dropdown-column', 'value')])
def update_plot(n_bins, column):
    ''' Update plot with new n_bins '''

    df, auc = generate_bucketed(n_bins, column)

    return create_barplot(df, auc)

In [None]:
app.run_server(mode='jupyterlab', port = 8890, dev_tools_ui=True, #debug=True,
              dev_tools_hot_reload =True, threaded=True)

In [None]:
tmp = generate_bucketed(12)
create_barplot(tmp)

In [None]:
tmp

In [None]:
X, y = datasets.load_uci_credit_card(return_X_y=True)
detected_types = detect_types(X)
cat_columns = X.columns[(detected_types['categorical']==True) | (detected_types['low_card_int']==True)]
num_columns = X.columns[(detected_types['continuous']==True) | (detected_types['dirty_float']==True)]


In [None]:
num_columns

In [None]:
X, y = datasets.load_uci_credit_card(return_X_y=True)
detected_types = detect_types(X)
cat_columns = X.columns[(detected_types['categorical']) | (detected_types['low_card_int']==True)]
num_columns = X.columns[(detected_types['continuous']) | (detected_types['dirty_float']==True)]


In [None]:
cat_columns = X.columns[(detected_types['categorical']) | (detected_types['low_card_int'])]