In [1]:
import pandas as pd
from sklearn.externals import joblib
from sklearn.metrics import mean_absolute_error, mean_squared_error, mean_squared_log_error, r2_score
from sklearn.utils import check_array
import numpy as np
from numba import jit, vectorize, float64, int64
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from datetime import datetime
from sklearn.externals import joblib
from math import sqrt
from IPython import display
import dash
import dash_core_components as dcc
import dash_html_components as html
import warnings
import plotly.graph_objs as go
from plotly import tools
from plotly.figure_factory import create_2d_density
from plotly.graph_objs import graph_objs

In [2]:
warnings.filterwarnings('ignore')

In [3]:
rf = joblib.load('./data/rfc_trained.pkl')
train = pd.read_feather('./data/xc_train.feather')
valid = pd.read_feather('./data/xc_valid.feather')
y_train = pd.read_csv('./data/yc_train.csv')
y_valid = pd.read_csv('./data/yc_valid.csv')

# show the Dash plot

In [4]:
def show_app(app,  # type: dash.Dash
             port=9999,
             width=700,
             height=350,
             offline=True,
             style=True,
             **dash_flask_kwargs):
    """
    Run the application inside a Jupyter notebook and show an iframe with it
    :param app:
    :param port:
    :param width:
    :param height:
    :param offline:
    :return:
    """
    url = 'http://localhost:%d' % port
    iframe = '<iframe src="{url}" width={width} height={height}></iframe>'.format(url=url,
                                                                                  width=width,
                                                                                  height=height)
    display.display_html(iframe, raw=True)
    if offline:
        app.css.config.serve_locally = True
        app.scripts.config.serve_locally = True
    if style:
        external_css = ["https://fonts.googleapis.com/css?family=Raleway:400,300,600",
                        "https://maxcdn.bootstrapcdn.com/font-awesome/4.7.0/css/font-awesome.min.css",
                        "http://getbootstrap.com/dist/css/bootstrap.min.css", ]

        for css in external_css:
            app.css.append_css({"external_url": css})

        external_js = ["https://code.jquery.com/jquery-3.2.1.min.js",
                       "https://cdn.rawgit.com/plotly/dash-app-stylesheets/a3401de132a6d0b652ba11548736b1d1e80aa10d/dash-goldman-sachs-report-js.js",
                       "http://getbootstrap.com/dist/js/bootstrap.min.js"]

        for js in external_js:
            app.scripts.append_script({"external_url": js})

    return app.run_server(debug=False,  # needs to be false in Jupyter
                          port=port)
                          #**dash_flask_kwargs)

In [5]:
app = dash.Dash()

In [6]:
rf = joblib.load('./data/rfc_trained.pkl')
train = pd.read_feather('./data/xc_train.feather')
valid = pd.read_feather('./data/xc_valid.feather')
y_train = pd.read_csv('./data/yc_train.csv',header=None).iloc[:,0]
y_valid = pd.read_csv('./data/yc_valid.csv',header=None).iloc[:,0]

def pred_to_hard(val, arr):
    arr[arr > val] = 1
    arr[arr <= val] = 0
    return arr


def hard_results(preds, target, value):
    hard = pred_to_hard(value, preds.copy())
    tp_ = np.sum(np.logical_and((hard == 1), (y_valid.values == 1)))
    tn_ = np.sum(np.logical_and((hard == 0), (y_valid.values == 0)))
    fp_ = np.sum(np.logical_and((hard == 1), (y_valid.values == 0)))
    fn_ = np.sum(np.logical_and((hard == 0), (y_valid.values == 1)))
    return (tp_,tn_,fp_,fn_)


def hard_pred_df(data, target, model):
    df = pd.DataFrame(columns=['Break','TP','TN', 'FP', 'FN'])
    preds = model.predict_proba(data)[:,1]
    for i,v in enumerate(np.arange(0,1.001,.001)):
        hard = pred_to_hard(v, preds.copy())
        tp_ = np.sum(np.logical_and((hard == 1), (y_valid.values == 1)))
        tn_ = np.sum(np.logical_and((hard == 0), (y_valid.values == 0)))
        fp_ = np.sum(np.logical_and((hard == 1), (y_valid.values == 0)))
        fn_ = np.sum(np.logical_and((hard == 0), (y_valid.values == 1)))
        df.loc[i] = {
                     'Break':v,
                     'TP':tp_,
                     'TN':tn_,
                     'FP':fp_,
                     'FN':fn_
                    }
    return df

In [7]:
hard_pred = hard_pred_df(valid,y_valid,rf)

hard_pred.head()

Unnamed: 0,Break,TP,TN,FP,FN
0,0.0,32195.0,40376.0,9969.0,0.0
1,0.001,32195.0,40376.0,9969.0,0.0
2,0.002,32195.0,40471.0,9874.0,0.0
3,0.003,32195.0,40547.0,9798.0,0.0
4,0.004,32195.0,40614.0,9731.0,0.0


In [8]:
breaks = list(hard_pred['Break'])

In [9]:
slider_labels = {}
for i,x in enumerate(breaks):
    if i%100 ==0:
        slider_labels[round(x,1)]=round(x,1) 

In [42]:
colorscale =[[0, 'rgb(126, 247, 27)'],[1.0,'rgb(5, 57, 94)']]

In [77]:
import dash
import dash_html_components as html
import dash_core_components as dcc

preds = rf.predict_proba(valid)[:,1]

app = dash.Dash()

app.layout = html.Div([
        html.Div([
        dcc.Graph(id = 'metric-table'),
        
    
        dcc.Graph(id='auc-graph'),
            
            
                dcc.Slider(id='pred-slider',
                        min=0,
                        max=1,
                        step=0.001,
                        value=.5,
                        marks = slider_labels),
    
    
        #html.Div(
        ],
        
        style={'display':'inline-block',
               'width':'49%'}
    ),
    
    
    html.Div([
        dcc.Graph(id='heatmap-graph'),
        dcc.Graph(id='precision-graph'),
        
    ],
        style={'display':'inline-block',
               'width':'49%',
               'height':'80%'
              }
    )
]
)

@app.callback(
              dash.dependencies.Output('heatmap-graph','figure'),
              [dash.dependencies.Input('pred-slider','value')]
             )
def update_heatmap(value):
    res = hard_pred.query('Break == @value')
    return {
        'data':[go.Heatmap(

            x=['TP', 'TN'],
            y=['FP', 'FN'],
            z = [[int(res.TP), int(res.TN)], [int(res.FP), int(res.FN)]],
            colorscale = colorscale
        )],
        'layout': go.Layout(
            height=400,
            title = 'Confusion Matrix'
        )
    }




@app.callback(
              dash.dependencies.Output('auc-graph','figure'),
              [dash.dependencies.Input('pred-slider','value')]
             )
def update_auc(value):
    res = hard_pred.query('Break == @value')
    return {
        'data':[go.Scatter(
            x=hard_pred.FP,
            y=hard_pred.TP,
            mode='markers',
            name = 'all TP/FP'
        ), 
               go.Scatter(
               x = res.FP,
               y = res.TP,
               mode = 'markers',
               marker = {'size':15,
                       "opacity":0.7, 
                        "color":'rgb(22, 199, 229)'},
                name = value
               )],
        'layout': go.Layout(
            height=400,
            title = 'AUC Curve',
            xaxis = {'title' :'FP'},
            yaxis = {'title':'TP'}
        
        )
    }




@app.callback(
              dash.dependencies.Output('metric-table','figure'),
              [dash.dependencies.Input('pred-slider','value')]
             )
def update_table(value):
    res = hard_pred.query('Break == @value')
    return {
        'data':[go.Table(
            type = 'table',
            header = dict(values = ['Value','TP', 'TN', 'FP', 'FN']),
            cells = dict(values = [[value], [res.TP],[res.TN], [res.FP], [res.FN]]))
               ],
        'layout':go.Layout(
                    height = 300,
        width = '49%')
        
    }



@app.callback(
              dash.dependencies.Output('precision-graph','figure'),
              [dash.dependencies.Input('pred-slider','value')]
             )
def update_precision(value):
    res = hard_pred.query('Break == @value')
    return {
        'data':[go.Scatter(
            # recall TP / (TP + FN)
            x=res.TP/(res.TP+res.FN),
            # precision TP / (TP + FP)
            y=res.TP/(res.TP+res.FP),
            mode='markers',
            marker = {'size':15,
                       "opacity":0.7, 
                        "color":'rgb(22, 199, 229)'},
            name = value
        ),
               go.Scatter(
               x = hard_pred.TP/(hard_pred.TP+hard_pred.FN),
               y = hard_pred.TP/(hard_pred.TP+hard_pred.FP),
               mode = 'markers',
               name = 'totals')
               ],
        'layout': go.Layout(
            height=400,
            title = 'Precision-Recall Graph',
            xaxis = {'title' :'Recall'},
            yaxis = {'title':'Precision'}
        )
    }

In [None]:
show_app(app,  # type: dash.Dash
             port=9999,
             width=1000,
             height=700,
             offline=True,
             style=True)

 * Running on http://127.0.0.1:9999/ (Press CTRL+C to quit)
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET / HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_core_components/rc-slider@6.1.2.css?v=0.22.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_core_components/react-select@1.0.0-rc.3.min.css?v=0.22.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_core_components/react-virtualized@9.9.0.css?v=0.22.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_core_components/react-virtualized-select@3.1.0.css?v=0.22.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_core_components/react-dates@12.3.0.css?v=0.22.1 HTTP/1.1[0m" 200 -
127.0.0.1 - - [02/May/2018 17:32:26] "[37mGET /_dash-component-suites/dash_renderer/react@15.4.2.min.js?v=0.12.1 HTTP/1.1[0m" 