In [8]:
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import pickle
import pandas as pd
import plotly.graph_objects as go
import numpy as np


data = pd.read_csv("../data/test_df.csv")
data = data.drop(columns=["index", "TARGET"])
data = data.set_index("SK_ID_CURR")

model = pickle.load(open("../pickle_lgbm_classifier.pkl", "rb"))
cat_features = pickle.load(open("../pickle_cat_features.pkl", "rb"))
categorical_names = pickle.load(open("../pickle_categorical_names.pkl", "rb"))

explainer = pickle.load(open("../pickle_explainer.pkl", "rb"))
shap_values = pickle.load(open("../pickle_shap_values.pkl", 'rb'))

prob = model.predict_proba(data, num_iteration=model.best_iteration_)[:,1]
pred = [0 if i<=0.18 else 1 for i in prob]

labels = ["Accepté","Refusé"]
values = pd.Series(pred).value_counts()

fig_pie_credit = go.Figure(data=[go.Pie(labels =labels, values = values)])
fig_pie_credit.update_traces(marker=dict(colors=["#0ecf10", "#f90531"]))

df_features_importance = pd.DataFrame(model.feature_importances_, columns=["importance"])
df_features_importance["feature"] = model.feature_name_
df_features_importance = df_features_importance.sort_values(by="importance", ascending=False)
x = list(df_features_importance.iloc[:5,0])
x.reverse()
y = list(df_features_importance.iloc[:5,1])
y.reverse()
hist_colors = ["#8dd3c7", "#ffffb3", "#bebada", "#fb8072", "#80b1d3"]
fig_features_importance = go.Figure(data=[go.Bar(x=x,
                                                 y=y,
                                                 orientation='h',
                                                 marker_color=hist_colors
                                                )
                                         ])
x.reverse()
y.reverse()
hist_colors.reverse()


external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
app.layout = html.Div(children=[
    html.Div(children= [
                html.H1(children='Home Credit')
    ], className="title"),
    html.Div(children=[
        html.Div(children=[
            html.H3(children="Proportion de prêts accordés"),
            dcc.Graph(
                id='pie_credits',
                figure=fig_pie_credit)
        ], id="pie_chart")
    ], id="container"),
    html.Div(children=[
        html.Div(children=[
            html.H3(children="Attributs les plus importants dans le modèle"),
            dcc.Graph(
                id="features_importance",
                figure=fig_features_importance)
        ], className="five columns"),
        html.Div(children=[
            html.H3(children="Répartitions des prêts refusés par attribut"),
            html.Div(children=[
                html.Label(children="Choisissez un attribut :", className = "four columns"),
                dcc.Dropdown(
                id="feature_choice",
                options=[{"label":y[i], "value":i} for i in range(5)],
                value=0,
                className="three columns")
            ], className="row"),
            dcc.Graph(id="histogram")
        ], className="seven columns")
    ], className="row"),
        html.H3(children="Valeur de l'importance de chaque attribut"),
        html.Div(children=[
            html.Label(children="L'attribut", className="one columns"),
            dcc.Dropdown(id="importance_feature",
                         options=[{"label":i, "value":i} for i in df_features_importance["feature"]],
                         value=df_features_importance.iloc[5,1],
                         className="three columns"
            )
        ], className="row"),
        html.H5(id="display_importance"),
    html.Iframe(src="tests/shap_force.html")
])


@app.callback(
    Output("histogram", "figure"),
    Input("feature_choice", "value")
)
def display_histogram(feature_index):
    idx = int(feature_index)
    feature_data = pd.DataFrame(data[y[idx]])
    feature_data["TARGET"] = pred
    min_feature = min(feature_data[y[idx]])
    max_feature = max(feature_data[y[idx]])
    feature_data["BINNED"] = pd.cut(feature_data[y[idx]], bins=np.linspace(min_feature,max_feature,num=11))
    grouped_data = feature_data.groupby("BINNED").mean()
    fig = go.Figure(data=go.Bar(x=grouped_data.index.astype(str), 
                                y=100*grouped_data["TARGET"], 
                                marker_color=hist_colors[idx]
                               )
                   )
    return fig

@app.callback(
    Output("display_importance", "children"),
    Input("importance_feature", "value")
)
def display_feature_importance(feature):
    importance = df_features_importance.loc[df_features_importance["feature"]==feature, "importance"].values[0]
    return f"a pour importance {importance}"


app.run_server(debug=True, use_reloader=False) 

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


In [5]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']

app.layout = html.Div(children=[
    html.Div(children=[
        html.Div(children=[
            html.H3(children=f"Clients consultables"),
            dcc.Dropdown(id="customers"),
            html.H4(id="default_proba"),
            html.H1(id="credit_agrement")
        ], className="six columns"),
        html.Div(children=[
            html.H3(children="Filtres"),
            html.Div(children=[
                html.Label(children="Prêt :", className="three columns"),
                dcc.RadioItems(id="filter_agrement",
                    options=[
                        {"label": "Non renseigné", "value": -1},
                        {'label': 'Accepté', 'value': 0},
                        {'label': 'Refusé', 'value': 1},
                    ],
                    value=-1,
                    labelStyle={'display': 'inline-block'},
                    className=" nine columns"
                )                 
            ], className="row"),
            html.Div(children=[
                html.Label(children="Genre :", className="three columns"),
                dcc.RadioItems(id="filter_gender",
                    options=[
                        {"label": "Non renseigné", "value": -1},
                        {'label': 'Femme', 'value': 0},
                        {'label': 'Homme', 'value': 1},
                    ],
                    value=-1,
                    labelStyle={'display': 'inline-block'},
                    className=" nine columns"
                )                 
            ], className="row")
        ], className="six columns")
    ], className="row"),
    html.Div([
        dcc.Graph(id="shap_waterfall")
    ])
])


@app.callback(
    Output("default_proba", "children"),
    Output("credit_agrement", "children"), 
    Output("credit_agrement", "style"),
    Output("shap_waterfall", "figure"),
    Input("customers", "value")
)
def display_score(id_customer):
    proba = model.predict_proba(np.array(data.loc[id_customer]).reshape(1, -1), num_iteration=model.best_iteration_)
    score = round(proba[0][1]*100,1)
    if score < 15 :
        agrement = "Prêt accepté"
        style = {"color":"green"}
    else :
        agrement = "Prêt refusé"
        style = {"color":"red"}
    
    idx_customer = list(data.index).index(id_customer)
    df_shap = pd.DataFrame(shap_values[idx_customer], columns=["shap_value"])
    df_shap["feature"] = data.columns
    df_shap = df_shap.sort_values(by="shap_value", key=abs)
    print(df_shap)
    y_shap = list(df_shap.iloc[:5,0])
    y_shap.append(df_shap.iloc[5:,0].sum())
    y_shap.append(0)
    x_shap = list(df_shap.iloc[:5,1])
    x_shap.append("SUM OF OTHER FEATURES")
    x_shap.append("")
    print(x_shap)
    print(y_shap)
    fig_waterfall = go.figure(data=go.Waterfall(
        orientation='v',
        x=x_shap,
        y=y_shap,
        measure=["relative"]*7
    ))

    return (f"Risque de défaut de paiement pour le client {id_customer} :  {score}", 
            agrement,
            style,
            fig_waterfall)

@app.callback(
    Output("customers", "options"),
    Output("customers", "value"),
    Input("filter_agrement", "value"),
    Input("filter_gender", "value")
)
def filter_customers(value_agrement, value_gender):
    
    if value_agrement == -1 :
        df = data
    elif value_agrement == 0 :
        mask = (np.where(np.array(pred)==0))
        df = data.iloc[mask]
    else : 
        mask = (np.where(np.array(pred)==1))
        df = data.iloc[mask]
    
    if value_gender != -1 :
        df = df[df.CODE_GENDER==value_gender]
    
    list_cust = [{"label":i, "value":i} for i in list(df.index)]
    first_cust = df.index[0]
    return list_cust, first_cust



app.run_server(debug=True, use_reloader=False)

Dash is running on http://127.0.0.1:8050/

Dash is running on http://127.0.0.1:8050/

 * Serving Flask app "__main__" (lazy loading)
 * Environment: production
[2m   Use a production WSGI server instead.[0m
 * Debug mode: on


In [36]:
df_features_importance.iloc[6,1]

'APPROVED_CNT_PAYMENT_MEAN'

In [16]:
def display_score(id_customer):
    proba = model.predict_proba(np.array(data.loc[id_customer]).reshape(1, -1), num_iteration=model.best_iteration_)
    score = round(proba[0][1]*100,1)
    if score < 15 :
        agrement = "Prêt accepté"
        style = {"color":"green"}
    else :
        agrement = "Prêt refusé"
        style = {"color":"red"}
    
    idx_customer = list(data.index).index(id_customer)
    print(idx_customer)
    df_shap = pd.DataFrame(shap_values[1][idx_customer], columns=["shap_value"])
    df_shap["feature"] = data.columns
    df_shap = df_shap.sort_values(by="shap_value")
    print(df_shap)
    y_shap = list(df_shap.iloc[:5,0])
    y_shap.append(df_shap.iloc[5:,0].sum())
    y_shap.append(0)
    x_shap = list(df_shap.iloc[:5,1])
    x_shap.append("SUM OF OTHER FEATURES")
    x_shap.append("")
    print(x_shap)
    print(y_shap)
    fig_waterfall = go.figure(data=go.Waterfall(
        orientation='v',
        x=x_shap,
        y=y_shap,
        measure=["relative"]*7
    ))

    return (f"Risque de défaut de paiement pour le client {id_customer} :  {score}", 
            agrement,
            style,
            fig_waterfall)

In [17]:
display_score(100001)

0
     shap_value                 feature
40    -0.677898            EXT_SOURCE_2
39    -0.420451            EXT_SOURCE_1
11    -0.128915     NAME_EDUCATION_TYPE
1     -0.096714             CODE_GENDER
15    -0.078609              DAYS_BIRTH
..          ...                     ...
497    0.089239     POS_SK_DPD_DEF_MEAN
530    0.091810  INSTAL_AMT_PAYMENT_SUM
8      0.100974         AMT_GOODS_PRICE
189    0.118043  ACTIVE_DAYS_CREDIT_MAX
41     0.735058            EXT_SOURCE_3

[676 rows x 2 columns]
['EXT_SOURCE_2', 'EXT_SOURCE_1', 'NAME_EDUCATION_TYPE', 'CODE_GENDER', 'DAYS_BIRTH', 'SUM OF OTHER FEATURES', '']
[-0.6778979486345404, -0.4204507057540398, -0.12891498812811467, -0.09671354658018683, -0.07860946546351959, 1.4857842268775536, 0]


AttributeError: module 'plotly.graph_objects' has no attribute 'figure'

In [15]:
shap_values[1][0].shape

(676,)

In [3]:
["relative"]*7

['relative',
 'relative',
 'relative',
 'relative',
 'relative',
 'relative',
 'relative']