# Regional Performance Forecasting

In [None]:
#@title Model (press Play to run)
!pip install scikit-learn==0.24.2 -q
import pandas as pd
import numpy as np
import ipywidgets as widgets
from IPython.display import display

pd.set_option('max_colwidth',30)
pd.set_option('display.float_format', '{:.3f}'.format)

import warnings
warnings.filterwarnings("ignore")

from IPython.display import Javascript
display(Javascript('''google.colab.output.setIframeHeight(0, true, {maxHeight: 5000})'''))

from google.colab import drive
drive.mount('/content/drive/')

import pickle
with open(r'/content/drive/MyDrive/Colab Notebooks/data/models.pkl', 'rb') as f:
    models = pickle.load(f)

with open(r'/content/drive/MyDrive/Colab Notebooks/data/features.pkl', 'rb') as f:
    features = pickle.load(f)

with open(r'/content/drive/MyDrive/Colab Notebooks/data/labels.pkl', 'rb') as f:
    labels = pickle.load(f)    

#data functions #######################################################################################################

#normalize array so it sums to 1
def n1(array):
    return array/array.sum()

def generate_synthetic_inference_vector(user_entries, populator_dict):
    user_entries = pd.DataFrame([user_entries])
    
    dmbr = populator_dict['delay_mins_populator'].predict(user_entries[['day_name', 'severity']]).clip(min=0)
    pcbr = n1(populator_dict['passcount_populator'].predict(user_entries[['day_name', 'total_passcount']])).clip(min=0)*user_entries['total_passcount'].values[0]
    ptbo = n1(populator_dict['planned_populator'].predict(user_entries[['day_name', 'total_planned']])).clip(min=0)*user_entries['total_planned'].values[0]
    
    return pd.DataFrame(np.concatenate((dmbr, pcbr, ptbo), axis=1), columns=populator_dict['all_feature_names'])

def infer(model_dict, output_columns, prefix, inference_vector):
    predicted_percentages = np.zeros((1,output_columns.shape[-1]))
    
    for (index, (operator, model)) in enumerate(model_dict.items()):
        if model is not None:
            output_for_this_operator = model.predict(inference_vector)[0]
            predicted_percentages[0,index] = output_for_this_operator
    
    if prefix is not None:
        planned_train_column_names = [col for col in inference_vector if col.startswith(prefix)]
        planned_trains = inference_vector[planned_train_column_names].values
        national_percentage = (predicted_percentages*planned_trains).sum()/planned_trains.sum()
    else:
        national_percentage = None
    
    return {'p':pd.DataFrame(predicted_percentages, columns = output_columns), 'n':national_percentage}

#UI #######################################################################################################

def setup_initial(*args):
  layout = widgets.Layout(width='200px', height='22px')
  #day selector
  w = widgets.Dropdown(options = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday'],
                      value='Monday',
                      disabled=False,)
  hbox = widgets.HBox([widgets.Label(layout = layout,
                                    value = 'Day of the week'),
                      w,])
  hboxlist.append(hbox)
  widgetlist['day_name'] = w

  #other fields
  placeholders = {'severity':0.33,'total_passcount':129543,'total_planned':16334}
  for widget_desc, user_input_field in zip(['Delay severity', 'Total passenger count', 'Total planned trains'], \
                                          ['severity', 'total_passcount', 'total_planned']):
      w = widgets.FloatText(value = placeholders[user_input_field],
                          disabled=False,)
      hbox = widgets.HBox([widgets.Label(layout = layout,
                                        value = widget_desc),
                          w,])
      hboxlist.append(hbox)
      widgetlist[user_input_field] = w

  prefill_button = widgets.Button(description = 'Prefill values' , layout = widgets.Layout(width='504px', height='30px'))    
  prefill_button.on_click(prefill)

  for hbox in hboxlist:
      display(hbox)
  display(prefill_button)

def setup_grid(*args):
    global prefixes
    #order = dcbr, dmbr, pcbr, ptbo, tbr
    fnames = models['populator_dict']['all_feature_names']

    prefixes = sorted(list(set([i.split(':')[0] for i in fnames])))
    
    num_rows = max([len([i for i in fnames if i.startswith(prefix)]) for prefix in prefixes]) + 3
    num_cols = len(prefixes) + 1

    grid = widgets.GridspecLayout(num_rows, num_cols)
    
    for idx, variable_name in enumerate(fnames):
        w = widgets.IntText(layout = widgets.Layout(width='100px', height='22px'),
                              value = 0,
                              disabled=False,)
        w.observe(update_summary, names = 'value')

        hbox = widgets.HBox([widgets.Label(layout = widgets.Layout(width='130px', height='30px'),
                                           value = variable_name.split(':')[-1]),
                             w,])

        grid[[i for i in fnames if i.startswith(variable_name.split(':')[0])].index(variable_name)+1, prefixes.index(variable_name.split(':')[0])] = hbox
        widgetlist[variable_name] = w
    #ipdb.set_trace(context = 6)
    for idx, prefix in enumerate(prefixes + ['TIPLOCs']):
        #column names
        grid[0, idx] = widgets.HBox([widgets.Label(layout = widgets.Layout(width='220px', height='30px'), value = prefix)])

        #inttext containing totals
        w  = widgets.IntText(layout = widgets.Layout(width='100px', height='22px'),
                              value = 0,
                              disabled=True,)
        grid[num_rows-1, idx] = widgets.HBox([widgets.Label(layout = widgets.Layout(width='130px', height='30px'), value = prefix), w])
        widgetlist['Total'+prefix] = w

    #just a label that says 'totals'
    grid[num_rows-2, 0] = widgets.Label(layout = widgets.Layout(width='130px', height='30px'), value = 'Totals')

    widgetlist['grid_accordion'] = widgets.Accordion(children = [grid])
    widgetlist['grid_accordion'].set_title(0, 'Inputs')
    widgetlist['grid'] = grid

    setup_tiplocs()
    display(widgetlist['grid_accordion'])

def setup_tiplocs():
    #widgetlist['grid'][0, widgetlist['grid'].n_columns-1] = widgets.HBox([widgets.Label(layout = widgets.Layout(width='220px', height='30px'), value = 'TIPLOCs')])
    
    for idx, tiploc_name in enumerate(models['populator_dict']['tiploc_names']):
        w = widgets.IntText(layout = widgets.Layout(width='100px', height='22px'),
                              value = 0,
                              disabled=True,)

        hbox = widgets.HBox([widgets.Label(layout = widgets.Layout(width='130px', height='30px'),
                                           value = tiploc_name),
                             w,])        

        widgetlist['grid'][idx+1, widgetlist['grid'].n_columns-1] = hbox
        widgetlist['TIPLOCs: '+tiploc_name] = w

def setup_estimate():
  estimate_button = widgets.Button(description = 'Make predictions' , layout = widgets.Layout(width='504px', height='30px'))    
  estimate_button.on_click(estimate)

  widgetlist['out'] = widgets.Output()
  widgetlist['out_accordion'] = widgets.Accordion(children = [widgetlist['out']])
  widgetlist['out_accordion'].set_title(0, 'Predictions')

  display(estimate_button)
  display(widgetlist['out_accordion'])

def prefill(*args):
  user_input = {user_input_field:widgetlist[user_input_field].value 
                for user_input_field in ['day_name', 'severity', 'total_passcount', 'total_planned']}

  prefill_vector = generate_synthetic_inference_vector(user_input, models['populator_dict'])

  for variable_name in prefill_vector.columns:
    widgetlist[variable_name].value = prefill_vector[variable_name].values[0]

def update_tiplocs(*args):
    z = models['populator_dict']['tiploc_populator'].predict(np.atleast_2d([item.value for key, item in widgetlist.items() if key.startswith('Planned trains')]))

    for key, value in zip(models['populator_dict']['tiploc_names'], z[0]):
        widgetlist['TIPLOCs: '+key].value = value

def update_summary(*args):
    update_tiplocs()
    for idx, prefix in enumerate(prefixes + ['TIPLOCs']):
        total = sum([item.value for key, item in widgetlist.items() if key.startswith(prefix)])
        widgetlist['Total'+prefix].value = total

def estimate(*args):
  inference_vector = pd.DataFrame([{user_input_field:widgetlist[user_input_field].value for user_input_field in models['populator_dict']['all_feature_names']}])

  otbo = infer(**models['on_time_by_operator'], inference_vector=inference_vector)
  otbo['p'].index = ['On Time WTT']

  otbr = infer(**models['on_time_by_route'], inference_vector=inference_vector)
  otbr['p'].index = ['On Time WTT']

  ppbo = infer(**models['ppm_by_operator'], inference_vector=inference_vector)
  ppbo['p'].insert(0, 'National', ppbo['n'])
  ppbo['p'].index = ['PPM']

  otbo_gbtt = infer(**models['on_time_by_operator_gbtt'], inference_vector=inference_vector)
  otbo_gbtt['p'].index = ['On Time GBTT']

  otbr_gbtt = infer(**models['on_time_by_route_gbtt'], inference_vector=inference_vector)
  otbr_gbtt['p'].index = ['On Time GBTT']

  widgetlist['out'].clear_output()
  with widgetlist['out']:
    display((pd.concat([otbo['p'], otbo_gbtt['p'], ppbo['p']])*100).applymap('{:,.2f}%'.format))
    display((pd.concat([otbr['p'], otbr_gbtt['p']])*100).applymap('{:,.2f}%'.format))

import ipywidgets as widgets
from IPython.display import display

widgetlist = {}
hboxlist = []

setup_initial()
setup_estimate()
setup_grid()

### Model Explainer (run model first)
After pressing the Play button below, select a submodel and press Explain.

You can then click on the link under 'Dash app running on:' (it will look like 127.0.0.1...) to open the explainer in a new tab.

In [None]:
#@title Model Explainer

!pip install explainerdashboard -q
from explainerdashboard import RegressionExplainer, ExplainerDashboard

def explain(*args):
  logger.clear_output()
  label_group_name = explain_what.value.split(':')[0]
  individual_label_name = explain_what.value.split(':')[1]

  joined = labels[label_group_name][[individual_label_name]].dropna().join(features, how='inner')

  labels2 = joined[individual_label_name]
  features2 = joined[features.columns]
  
  model = models[label_group_name]['model_dict'][individual_label_name]

  explainer = RegressionExplainer(model, features2, labels2,
                                  #cats=['Deck', 'Embarked', 'Sex'],
                                  #descriptions=feature_descriptions, 
                                  #units = "$", # defaults to ""
                                  )
  with logger:
    ExplainerDashboard(explainer,
                        show_metrics = ['mean-absolute-error', 'R-squared'],                    
                        importances=True,
                        model_summary=True,
                        contributions=True,
                        whatif=False,
                        shap_dependence=False,
                        shap_interaction=False,
                        decision_trees=True,
                        mode='external').run()

explain_what = widgets.Dropdown(options = [label_group_name+':'+individual_label_name 
                                for label_group_name in labels.keys() 
                                for individual_label_name in labels[label_group_name]],
                      disabled=False,)
h = widgets.HBox([widgets.Label(layout = widgets.Layout(width='220px', height='30px'),
                                value = 'Explain which sub-model:'),
                  explain_what])
explain_button = widgets.Button(description = 'Explain' , layout = widgets.Layout(width='524px', height='30px'))    
explain_button.on_click(explain)
logger = widgets.Output()

display(h)
display(explain_button)
display(logger)