In [243]:
import plotly.express as px
import pandas as pd
import plotly.graph_objects as go
import numpy as np

In [269]:
df = pd.read_csv('C:/Users/omars/Desktop/df_bis.csv') ## Use a finalDf output from main.py file
def mean_absolute_percentage_error(y_true, y_pred):
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    return np.nanmean(np.abs((y_true - y_pred) / y_true))
models = ['sir', 'knn', 'mdp', 'agg']


def fig_add(fig, df_sub, model, color):
    
    fig.add_trace(go.Scatter(x=df_sub['date'], y=df_sub[model + '_prediction'],
                        mode='lines',
                        name= model.capitalize() + ' Predictions',
                        line=dict(color=color)))
    
    fig.add_trace(go.Scatter(x=list(df_sub['date']), y=list(df_sub[model + '_lower']),
                        mode='lines',
                        name= model.capitalize() + ' Predictions',
                        #fill='tonexty',
                        line=dict(color=color),
                        showlegend=False))
        
    fig.add_trace(go.Scatter(x=list(df_sub['date']), y=list(df_sub[model + '_upper']),
                        mode='lines',
                        name= model.capitalize() + ' Predictions',
                        fill='tonexty',
                        line=dict(color=color),
                        showlegend=False))
    return fig

colors = [
    '#1f77b4',  # muted blue
    '#ff7f0e',  # safety orange
    '#2ca02c',  # cooked asparagus green
    '#d62728',  # brick red
    '#9467bd',  # muted purple
    '#8c564b',  # chestnut brown
    '#e377c2',  # raspberry yogurt pink
    '#7f7f7f',  # middle gray
    '#bcbd22',  # curry yellow-green
    '#17becf'   # blue-teal
]

In [261]:
def plot_results(df, state, training_cutoff="2020-04-30"):
    df_sub = df.query('state == @state')
    df_train = df.query('date <= @training_cutoff')
    df_test = df.query('date > @training_cutoff')
    df_train_sub = df_sub.query('date <= @training_cutoff')
    df_test_sub = df_sub.query('date > @training_cutoff')

    fig = go.Figure()

    for i in range(len(models)):
        fig = fig_add(fig, df_sub, models[i], colors[i])
    
    fig.add_trace(go.Scatter(x=df_sub['date'], y=df_sub['cases'],
                        mode='lines',
                        name= 'Cases',
                        line=dict(width=5, dash='dash', color=colors[len(models)])))
        
    fig.add_shape(
            # Line Vertical
            dict(
                type="line",
                x0=training_cutoff,
                y0=0,
                x1=training_cutoff,
                y1=1.1*np.nanmax(df_sub.loc[:, ['cases'] + [model + '_prediction' for model in models]])
    ),      line=dict(dash='dot'))
    
    fig.update_traces(mode='lines')
    
    fig.update_layout(
    title="Cases vs. Predictions for the state of " + state,
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="#7f7f7f"
    )
)

    fig.show()

    results = pd.DataFrame(columns=['In-Sample MAPE for ' + state, 'Out-of-Sample MAPE for ' + state, 'Total In-Sample MAPE', 'Total Out-of-Sample MAPE'])
    for model in models:
        results.loc[model] = ([mean_absolute_percentage_error(df_train_sub['cases'], df_train_sub[ model + '_prediction']), 
                        mean_absolute_percentage_error(df_test_sub['cases'], df_test_sub[ model + '_prediction']),
                        mean_absolute_percentage_error(df_train['cases'], df_train[ model + '_prediction']),
                        mean_absolute_percentage_error(df_test['cases'], df_test[ model + '_prediction'])])
    print(results)
    
    fig = go.Figure()
    fig.update_layout(
    title="Cases vs. Prevalence for the state of " + state,
    font=dict(
        family="Courier New, monospace",
        size=18,
        color="#7f7f7f"
    )
)
    fig.add_trace(go.Scatter(x=df_sub['date'], y=df_sub['cases'],
                        mode='lines',
                        name='Detected Cases',
                        line=dict(dash='dash')))
    fig.add_trace(go.Scatter(x=df_sub[df_sub['prevalence'] > df_sub['cases']]['date'], y=df_sub[df_sub['prevalence'] > df_sub['cases']]['prevalence'],
                        mode='lines',
                        name='Probable Cases'))

    fig.show()

In [268]:
state = 'California'
plot_results(df, state)

     In-Sample MAPE for California  Out-of-Sample MAPE for California  \
sir                       0.194548                           0.437919   
knn                       0.000000                           0.038876   
mdp                       0.000000                           0.119513   
agg                       0.012146                           0.095967   

     Total In-Sample MAPE  Total Out-of-Sample MAPE  
sir              0.196416                  0.863573  
knn              0.000000                  0.073328  
mdp              0.000000                  0.111360  
agg              0.010324                  0.080961  
