## Stage IV

In [367]:
from datetime import datetime
from datetime import timedelta  
import pandas as pd
import math
import numpy as np
import matplotlib.pyplot as plt
import scipy as scipy
import sys
import statsmodels.formula.api as smf
import statsmodels.tools.eval_measures as smfem
import statsmodels.tsa.stattools as smfTools
from statsmodels.tsa.seasonal import seasonal_decompose
import json

### import for dash

In [368]:
import plotly.express as px
from jupyter_dash import JupyterDash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output
import plotly.graph_objects as go
from plotly.subplots import make_subplots

In [369]:
#define the utilities folder for team functions
sys.path.insert(0,"util")
import teamsuperfileutilslib as tLibs

### Load Team Super File

In [370]:
#read team super file
team_super_df =  pd.read_csv("..\data\superCOVID-19dataframe.csv")

#Retrive the confirm cases and death cases for USA from the team super file
#Using the Team Utility function that was imported
confirmed_usa, deaths_usa = tLibs.get_confirmed_deaths_tuple_df(team_super_df)

### Load GEO Data for map counties

In [371]:
with open("..\data\geojson-counties-fips.json") as jFile:
  counties = json.load(jFile)

## common defs

In [372]:
def USA_States():
    #List From source https://gist.github.com/rogerallen/1583593
    us_state_abbrev = {
    'All': 'All',
    'Alabama': 'AL', 'Alaska': 'AK', 'Arizona': 'AZ', 'Arkansas': 'AR', 'California': 'CA',
    'Colorado': 'CO', 'Connecticut': 'CT', 'Delaware': 'DE', 'District of Columbia': 'DC', 'Florida': 'FL', 'Georgia': 'GA',
    'Guam': 'GU', 'Hawaii': 'HI', 'Idaho': 'ID', 'Illinois': 'IL', 'Indiana': 'IN', 'Iowa': 'IA', 'Kansas': 'KS',
    'Kentucky': 'KY', 'Louisiana': 'LA', 'Maine': 'ME', 'Maryland': 'MD', 'Massachusetts': 'MA', 'Michigan': 'MI', 'Minnesota': 'MN',
    'Mississippi': 'MS', 'Missouri': 'MO', 'Montana': 'MT', 'Nebraska': 'NE', 'Nevada': 'NV', 'New Hampshire': 'NH', 'New Jersey': 'NJ',
    'New Mexico': 'NM',    'New York': 'NY', 'North Carolina': 'NC', 'North Dakota': 'ND', 'Ohio': 'OH',
    'Oklahoma': 'OK', 'Oregon': 'OR', 'Pennsylvania': 'PA', 'Rhode Island': 'RI', 'South Carolina': 'SC', 'South Dakota': 'SD',
    'Tennessee': 'TN', 'Texas': 'TX', 'Utah': 'UT', 'Vermont': 'VT', 'Virginia': 'VA', 'Washington': 'WA', 'West Virginia': 'WV',
    'Wisconsin': 'WI', 'Wyoming': 'WY'
    }
    return us_state_abbrev

In [373]:
def get_totals_dataframe(dataToProcess, logNormilized=False, normilizedByPop=False, popluation=0, normilizedByPopPerMultiplier=1000000):
    """ return a new dataframe by converting the running total to actual new total. Cleans the data of any na values.
        Adds a week number to the data from the date column. Shapes the data.
    """
    #get a grad total of the days for the confirmed cases.
    daily_cols = pd.DataFrame(dataToProcess[dataToProcess.columns[4:]].sum(axis=0)).reset_index()
    
    #rename column to totals
    daily_cols.columns=["date","daily_count"]
    
    #Remove running total and get the acutal value for the day
    daily_cols.daily_count = daily_cols.daily_count.diff()

    #Fill any missing "na" values with 0
    daily_cols.daily_count = daily_cols.daily_count.fillna(value=0)

    #calculate normalize by population
    if normilizedByPop:
        daily_cols["daily_count"] = daily_cols.apply(lambda row: round((row["daily_count"] / popluation) * normilizedByPopPerMultiplier,0), axis=1)
    
    #calculate normalize by log
    if logNormilized:
        daily_cols["daily_count"] = daily_cols.apply(lambda row: 0 if row["daily_count"] == 0 else math.log(row["daily_count"]),axis=1)

    #convert date column to a pandas date
    daily_cols["date"] = pd.to_datetime(daily_cols["date"])

    #insert a new column with the week number
    daily_cols["week_number"] = daily_cols["date"].dt.week

    #convet grand to int
    if (logNormilized == False):
        daily_cols["daily_count"] = daily_cols["daily_count"].apply(np.int64)

    daily_cols["days_since_infection_start"] = -1

    rIndexFirstIns = -1
    for index, row in daily_cols.iterrows():
        if row["daily_count"] > 0:
            rIndexFirstIns = index
            break
    
    daily_cols["days_since_infection_start"] = daily_cols.apply(lambda row: row.name - rIndexFirstIns if row.name >= rIndexFirstIns else -1, axis=1)
    
    return daily_cols

In [374]:
def  get_data_from_first_infection(confirmed_or_deaths_df, logNormilized=False, normilizedByPop=False, popluation=0, normilizedByPopPerMultiplier=1000000):
    #Get data for all of USA of new cases
    df_from_1ft_infec = get_totals_dataframe(confirmed_or_deaths_df, logNormilized, normilizedByPop, popluation, normilizedByPopPerMultiplier)

    #Get the data for starting from first infection
    df_from_1ft_infec = df_from_1ft_infec[df_from_1ft_infec.apply(lambda x: (x["days_since_infection_start"] != -1), axis=1)]
    return df_from_1ft_infec

In [375]:
def USA_State_County(usa_data_df, usa_state):
    #get data for the state
    counties_df = usa_data_df[usa_data_df.apply(lambda row: row["State"] == usa_state, axis=1)]
    array_counties = counties_df["County Name"].sort_values().to_numpy()
    #return a list counties
    return np.insert(array_counties, 0, "All")

In [376]:
def data_date_range_tuple(confirmed_or_deaths_df, logNormilized=False, normilizedByPop=False, popluation=0, normilizedByPopPerMultiplier=1000000):
    data_df = get_data_from_first_infection(confirmed_or_deaths_df, logNormilized, normilizedByPop, popluation, normilizedByPopPerMultiplier)
    return data_df.head(1)["date"].iloc[0], data_df.tail(1)["date"].iloc[0]

In [377]:
def seven_day_increase_to_dataframe(infection_df):
    #Get the last day of the current confirmed cases
    last_day = infection_df['days_since_infection_start'].tail(1).iloc[0]
    
    #create list of days to perdict out by adding 7 days to the current days list
    days_to_perdict = np.array([day + last_day + 1  for day in range(7)])
    days_to_perdict = infection_df[['days_since_infection_start']].copy().append(pd.DataFrame(days_to_perdict, columns=['days_since_infection_start']),ignore_index=True)
    return days_to_perdict

In [378]:
def prediction(data_df):
    formula = ("daily_count ~ days_since_infection_start + I(days_since_infection_start ** 2.00) + I(days_since_infection_start ** 3.00) + " +
              "I(days_since_infection_start ** 4.00) + I(days_since_infection_start ** 5.00) + I(days_since_infection_start ** 6.00)")

    confirm_NonLinerRM = smf.ols(formula=formula, data=data_df).fit()

    days_to_perdict_df = seven_day_increase_to_dataframe(data_df)
    
    #Create new dataframe with the new predictions
    furture_precition = confirm_NonLinerRM.predict(days_to_perdict_df)

    #Create new dataframe with future 7-days predicition
    future_precition_7_days = [round(fp,0) for fp in furture_precition[:]]
    future_precition_7_days = pd.DataFrame(future_precition_7_days, columns=['daily_count'])

    #Combine The Future precitions & the Future days into one dataframe with 2 columns
    future_precition_7_days = pd.concat([days_to_perdict_df.iloc[:].reset_index(drop=True), future_precition_7_days.reset_index(drop=True)], axis=1, ignore_index=False)

    firstDay = data_df.head(1)["date"].iloc[0]
    future_precition_7_days["date"] = future_precition_7_days.apply(lambda row: firstDay + timedelta(days=row["days_since_infection_start"]),axis=1)

    return future_precition_7_days

In [379]:
def get_mean_county_population():
    pop = team_super_df[["population"]].copy().dropna()
    pop = pop[pop.apply(lambda row: row["population"] != 0, axis=1)].mean()
    #return the mean of all the counties in usa
    return pop.iloc[0] 

In [380]:
def get_max_days():
    date_marks = {}
    for index, x in enumerate(confirmed_usa.columns[4:]):
        currDate = datetime.strptime(x,"%m/%d/%y")
        date_marks[str(index)] = x if currDate.day == 1 else ""

    return date_marks

In [381]:
def get_days():
    date_marks = {}
    for index, x in enumerate(confirmed_usa.columns[4:]):
        date_marks[str(index)] = x
    return date_marks

### Functions for plot and map creaters

In [382]:
def plot_fig(rdDataSource_value, dlDataNormalization_value, dlStates_value, dlCounties_value, dlPredictionModels_value, chkMovingAvg_value):
    
    #Create lables for the data
    plot_y_label = "Confirmed Cases" if rdDataSource_value == "confirmed" else "Deaths"
    plot_x_label = "Date Since 1ft Infection"

    loggedNorm = True if dlDataNormalization_value == "Log" else False

    if loggedNorm:
        plot_y_label = plot_y_label + " *Logged"
    
    #get data for confirmed or deaths
    data_df = confirmed_usa if rdDataSource_value == "confirmed" else deaths_usa

    #do we need to filter by state
    if dlStates_value != "All":
        data_df = data_df[data_df.apply(lambda row: row["State"] == dlStates_value ,axis=1)]
        #do we need to filter by county
        if dlCounties_value != "All":
            data_df = data_df[data_df.apply(lambda row: row["County Name"] == dlCounties_value ,axis=1)]
    
    #filter data by first date of infection
    data_df = get_data_from_first_infection(confirmed_or_deaths_df=data_df, logNormilized=loggedNorm)
    
    #plot the data
    fig = px.scatter(x=data_df["date"], y=data_df["daily_count"], labels={"x":plot_x_label,"y":plot_y_label})
    
    if chkMovingAvg_value == ["7Day"]:
        rolling_mean_series = data_df["daily_count"].rolling(7).mean()
        fig.add_trace(go.Line(x=data_df["date"],y=rolling_mean_series, name="7-Day Moving Avg.", marker=dict(color="Green")))
    
    if dlPredictionModels_value == "nonL":
        data_predictions = prediction(data_df)
        fig.add_trace(go.Line(x=data_predictions["date"],y=data_predictions["daily_count"], name="Prediction Trend Line", marker=dict(color="Red")))
    
    fig.update_layout(autosize=False, height=500)
    return fig

In [383]:
def map_fig(rdDataSource_value, rdMapDate_value):

    graph_label = "Confirmed Cases" if rdDataSource_value == "confirmed" else "Deaths"

    data_df = confirmed_usa if rdDataSource_value == "confirmed" else deaths_usa

    lastDateCol = data_df.columns[4:][rdMapDate_value]

    #create new dataframe from county info and request date column
    df =  pd.concat([data_df[data_df.columns[0:4]] , data_df[lastDateCol] ], axis=1).copy()

    #Remove state unalcated
    df = df[df.apply(lambda row: row["countyFIPS"] != 0, axis=1)]

    #Change the countyFips to a string column
    df["countyFIPS"] = df["countyFIPS"].astype(str)

    #format CountyFips to match need map fips
    df["countyFIPS"] = df.apply(lambda row: row["countyFIPS"] if  row["stateFIPS"] >= 10 else "0" + row["countyFIPS"], axis=1)

    df.columns = ["countyFIPS","CountyName","State","stateFIPS","daily_count"]

    popVal = get_mean_county_population()

    df["daily_count"] = df.apply(lambda row: round((row["daily_count"] / popVal) * 1000000,0), axis=1)

    df["daily_count"] = df["daily_count"].astype(int)

    df["Caption"] = df["CountyName"] + ", " + df["State"]

    color_Range_Max = 1000000 if rdDataSource_value == "confirmed" else 10000
    fig = px.choropleth_mapbox(df, geojson=counties, locations="countyFIPS", color="daily_count",
                        color_continuous_scale="Reds",
                        range_color=(0,color_Range_Max),
                        hover_name="Caption",
                        title="USA {}, Normalized Per {:,}".format(graph_label, color_Range_Max),
                        zoom=3, center = {"lat": 37.0902, "lon": -95.7129},
                        #scope="usa",
                        mapbox_style="carto-positron",
                        opacity=0.6,
                        labels={"daily_count":graph_label})
    fig.update_layout(autosize=False, height=900)
    return fig

### Dash Application Init

In [384]:
date_range_default_start, date_range_default_end  = data_date_range_tuple(confirmed_usa)

app = JupyterDash(__name__)

app.layout = html.Div([
    html.H2("USA Covid-19 Data",style={"text-align":"center"}),

    #Graph Data
    dcc.Graph(id="dataPlot"),

    html.Div([
            html.Label("Graph Type:"),
            dcc.RadioItems(id="rdGraphType",
                options=[
                    {"label": "Plot", "value": "plot"},
                    {"label": "Map USA", "value": "usa"}
                ],
                value="plot", labelStyle={"display":"block"})
        ],style={"width":"140px", "float":"left","margin-right":"5px"}),

    #Data source group
    html.Div([
            html.Label("Data Source:"),
            dcc.RadioItems(id="rdDataSource",
                options=[
                    {"label": "Confirmed", "value": "confirmed"},
                    {"label": "Deaths", "value": "deaths"}
                ],
                value="confirmed", labelStyle={"display":"block"})
        ],style={"width":"100px", "float":"left","margin-right":"5px"}),

    #Map date slider
    html.Div([
            html.Div("Current Date: ",id="divMapDateHeader"),
            dcc.Slider(id="rdMapDate", 
                min=0, 
                max=len(get_max_days())-1, 
                step=1, 
                value=len(get_max_days())-1, 
                marks=get_max_days(), 
                updatemode="mouseup",
                tooltip={"always_visible":False})
        ],style={"display": "none"},id="divMapSlider"),

    #Selection Controls
    html.Div([
        #Date Range Picker
        html.Div([
            html.Label("Date Start:"),
            dcc.DatePickerSingle(id="dtpDateStart", 
                placeholder="All Days",
                style={"display":"block", "height":"10px"})
        ],style={"float":"left","margin-right":"10px", "margin-top":"10px"}),

         #Date Range Picker
        html.Div([
            html.Label("Date End:"),
            dcc.DatePickerSingle(id="dtpDateEnd", 
                placeholder="All Days",
                style={"display":"block", "height":"10px"})
        ],style={"float":"left","margin-right":"10px", "margin-top":"10px"}),

        #Data Normalization group
        html.Div([
            html.Label("Data Normalization:",style={"float":"left"}),
            dcc.Dropdown(id="dlDataNormalization",
                options=[
                    {'label': "Linear", 'value': "Linear"},
                    {'label': "Log", 'value': "Log"}
                ],
                value="Linear", clearable=False)
        ],style={"width":"150px", "float":"left", "margin-right":"5px", "margin-top":"10px"}),

        #Data State group
        html.Div([
            html.Label("State:",style={"float":"left"}),
            dcc.Dropdown(id="dlStates",
                options=[
                    {'label': fullname, 'value': key}
                    for fullname, key in USA_States().items()
                ],
                value="All", clearable=False)
        ],style={"width":"200px", "float":"left", "margin-right":"5px", "margin-top":"10px"}),

        #Data county group
        html.Div([
            html.Label("County:",style={"float":"left"}),
            dcc.Dropdown(id="dlCounties",
                options=[
                    {'label': "All", 'value': "All"}
                ], 
                value="All", clearable=False)
        ],style={"width":"200px", "float":"left", "margin-right":"5px", "margin-top":"10px"}),

        #Data Prediction Model
        html.Div([
            html.Label("Prediction:",style={"float":"left"}),
            dcc.Dropdown(id="dlPredictionModels",
                options=[
                    {'label': "None", 'value': "None"},
                    {'label': "Non-Linear Model", 'value': "nonL"}
                ], 
                value="None", clearable=False),
            dcc.Checklist(id="chkMovingAvg",
                options=[{"label":"7-Days Moving Average","value":"7Day"}],value=[])
        ],style={"width":"200px", "float":"left", "margin-right":"5px", "margin-top":"10px"})

    ],style={"margin-right":"5px", "margin-top":"0px"}, id="divPlotControls")    

], style={"font-size":"14px"})

## setup call backs

In [385]:
@app.callback(   
    Output("dlCounties", "options"),
    Input("dlStates", "value"))
def set_counties_options(selected_state):
        return [{"label":c, "value": c} for c in USA_State_County(team_super_df, selected_state)]

#@app.callback(   
#    Output("testdateOutput", "children"),
#    Input("dtpDateStart", "date"))
#def testdatesel(date_value):
#    if date_value is not None:
#        date_object = date.fromisoformat(date_value)
#        date_string = date_object.strftime("%B %d, %Y")
#        return date_object
#    else:
#        return "noneasdfasdf"

@app.callback(
    Output("dataPlot", "figure"),
    Output("divPlotControls", "style"),
    Output("divMapSlider", "style"),
    Output("divMapDateHeader", "children"),
    [Input("rdGraphType", "value"),
    Input("rdDataSource", "value"),
    Input("dlDataNormalization", "value"),
    Input("dlStates", "value"),
    Input("dlCounties", "value"),
    Input("dlPredictionModels", "value"),
    Input("chkMovingAvg","value"),
    Input("rdMapDate","value")])
def update_plot(rdGraphType_value, rdDataSource_value, dlDataNorm_value, dlStates_value, dlCounties_value, dlPredModels_value, chkMovingAvg_value, rdMapDate_value):

    fig = None
    divPlotControls_show_style = None
    divMapSlider_show_style = None
    divMapDateHeader_text = ""

    if rdGraphType_value == "plot":
        fig = plot_fig(rdDataSource_value, dlDataNorm_value, dlStates_value, dlCounties_value, dlPredModels_value, chkMovingAvg_value)
        divPlotControls_show_style = {"display":"block"}
        divMapSlider_show_style  = {"display":"none"}
    else:
        fig = map_fig(rdDataSource_value, rdMapDate_value)
        divPlotControls_show_style = {"display":"none"}
        divMapSlider_show_style  = {"width":"700px", "float":"left","margin-right":"5px", "display": "block"}
        divMapDateHeader_text = "Current Date: {}".format(get_days()["{}".format(rdMapDate_value)])

    return fig, divPlotControls_show_style, divMapSlider_show_style, divMapDateHeader_text

In [386]:
app.enable_dev_tools(
    dev_tools_ui=True,
    dev_tools_serve_dev_bundles=True,
)

# Run app and display result inline in the notebook
#app.run_server(mode='External', debug=True)
app.run_server(mode='inline')