# Project Stage - V (Dashboard)
## Goals
The final stage aims a developing a simple interactive dashboard based on the analysis you have done so far. In this we will be utilizing Plotly (https://plotly.com/) along with Dash (https://plotly.com/dash/) as our framework.

Refer here for Plotly: https://github.com/q-tong/CS405-605-Data-Science/tree/main/Fall2023/Lecture/5.Visualization/Visualization

Getting started with Dash: https://www.youtube.com/watch?v=hSPmj7mK6ng

PS: This can be invoked from Jupyter, see here: https://medium.com/plotly/introducing-jupyterdash-811f1f57c02e

Tasks for stage V (team):
Task 1: (70 pts)
- Main graph
    - Allow for selection of date to show the trend of COVID-19 cases and deaths. (30)
    - Allow for linear or log mode selection on the number of cases and deaths. (10)
    - Incorporate your best model prediction trend line - Linear / Non-Linear. (30)
    - Ex: https://ourworldindata.org/coronavirus

Task 2: (30 pts)
- Trend
    - Plot the trend line using moving average (https://en.wikipedia.org/wiki/Moving_average). Use 7-day moving average. (15)
    - Allow for selection of multiple states on the same graph. (15)



Deliverable

Take screenshots of Report upload on canvas.
Each member creates separate notebooks for member tasks. Upload all notebooks to Github Repository.

In [1]:
import pandas as pd
import numpy as np
import time

import plotly.express as px
import dash   # make sure it's v2.2.0 or greater
from jupyter_dash import JupyterDash
from dash import dcc
from dash import html
from dash.dependencies import Input, Output
#

from sklearn import datasets
from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import PolynomialFeatures
from sklearn import linear_model
from sklearn.svm import SVR

pd.set_option('display.max_rows', 5000)


In [2]:
# Import Data
df = pd.read_csv('ProjectDataStage1LONGFORMAT.csv')
df.dropna(inplace = True)

df = df.groupby(['State', 'Date'])[["Deaths", "Cases", "population"]].sum().reset_index()  # group bys

# Data manipulations

df['DailyCases'] = df['Cases'].diff().abs()  # turns case / death data from cumulative to delta
df['DailyDeaths'] = df['Deaths'].diff().abs()

df['DeathPerCapita'] = (df['DailyDeaths'] / df['population'])*100000  # calculates per capita data for the daily data
df['CasesPerCapita'] = (df['DailyCases'] / df['population'])*100000

df['Date'] = pd.to_datetime(df['Date'])  # converts date from object to date time

df.dropna(inplace = True)


In [3]:
# Creating lists to use in plotly drop down and functions to use in app

states = ['AK', 'AL', 'AR', 'AZ', 'CA', 'CO', 'CT', 'DC', 'DE', 'FL', 'GA',
           'HI', 'IA', 'ID', 'IL', 'IN', 'KS', 'KY', 'LA', 'MA', 'MD', 'ME',
           'MI', 'MN', 'MO', 'MS', 'MT', 'NC', 'ND', 'NE', 'NH', 'NJ', 'NM',
           'NV', 'NY', 'OH', 'OK', 'OR', 'PA', 'RI', 'SC', 'SD', 'TN', 'TX',
           'UT', 'VA', 'VT', 'WA', 'WI', 'WV', 'WY']



    

In [4]:
# Functions to make plotly range slider have dates
# https://stackoverflow.com/questions/51063191/date-slider-with-plotly-dash-does-not-work

daterange = pd.date_range(start='2020',end='2023',freq='W') 
def unixTimeMillis(dt):
    #''' Convert datetime to unix timestamp '''
    return int(time.mktime(dt.timetuple()))

def unixToDatetime(unix):
    #''' Convert unix timestamp to datetime. '''
    return pd.to_datetime(unix,unit='s')

def getMarks(start, end, Nth=100):
    #''' Returns the marks for labeling. 
    #   Every Nth value will be used.
    #'''

    result = {}
    for i, date in enumerate(daterange):
        if(i%Nth == 1):
            # Append value to dict
            result[unixTimeMillis(date)] = str(date.strftime('%Y-%m-%d'))

    return result

    

In [5]:
# Functions 
# initialize the app

app = JupyterDash(__name__)


# Build App

app.layout = html.Div([
    html.H1("COVID-19 Dashboard", style = {'text-align':'center'}),  # Title
    
    dcc.RangeSlider(id='yearSlider',
                min = unixTimeMillis(daterange.min()),
                max = unixTimeMillis(daterange.max()),
                value = [unixTimeMillis(daterange.min()),
                         unixTimeMillis(daterange.max())],
                marks=getMarks(daterange.min(),
                            daterange.max())), # range slider
    
    dcc.Dropdown(id = 'stateSelect', options = states, value = 'AK', multi = False, style={'width': "40%"}),
    dcc.RadioItems(id = 'dataTransform',options = ['Linear', 'Log Transform'], value = 'Linear'),

    
    dcc.Graph(id='graphCases', figure = {}),  # Graph for Case Data
    html.Div(id = 'output_container', children = []), # container for text
    
    html.Br(),  # space
    

])




# Define callback to update graph
# Connect the Plotly graphs with Dash Components
@app.callback(
    [Output(component_id='output_container', component_property='children'),
     Output(component_id='graphCases', component_property='figure')],
    [Input(component_id='stateSelect', component_property='value')],
    [Input(component_id='yearSlider', component_property='value')],  # yearSlider value outputs date range as a tuple
    [Input(component_id='dataTransform', component_property='value')]
)


def update_graph(stateSelect, yearSlider, dataTransform):
    
    dateStart = unixToDatetime(yearSlider[0])  # stores start date in DateTime format
    dateEnd = unixToDatetime(yearSlider[1]) # stores end date in DateTime Format

    container = "The year range chosen by user was: {}".format(dateStart, dateEnd)

    dfTemp = df[df['State']==stateSelect]
    dfTemp = dfTemp[(dfTemp['Date'] > dateStart) & (dfTemp['Date'] < dateEnd)]
    
    if dataTransform =="Linear":  # Decided not to do train / test sets, because she just wants a line of best fit
     #   x = df[['Date']]#.reshape(-1, 1)
     #   y =df[['CasesPerCapita']]#.reshape(-1, 1)
     #   
     #   linearModel = SVR(kernel="poly", degree=1)
     #   polyModel = SVR(kernel="poly", degree=2)
     #   linearModel.fit(x, y)
     #   polyModel.fit(x, y)
            
        figCases = px.scatter(x=dfTemp['Date'], y=dfTemp['CasesPerCapita'])
    else: 
        figCases = px.scatter(x=dfTemp['Date'], y= np.log10(dfTemp['CasesPerCapita']))
        
        
    return container, figCases
    
# Run app and display result inline in the notebook
app.run_server(mode='inline')

In [12]:
# Functions 
# initialize the app

app = JupyterDash(__name__)


# Build App

app.layout = html.Div([
    html.H1("COVID-19 Dashboard", style = {'text-align':'center'}),  # Title
    
    dcc.RangeSlider(id='yearSlider',
                min = unixTimeMillis(daterange.min()),
                max = unixTimeMillis(daterange.max()),
                value = [unixTimeMillis(daterange.min()),
                         unixTimeMillis(daterange.max())],
                marks=getMarks(daterange.min(),
                            daterange.max())), # range slider
    
    dcc.Dropdown(id = 'stateSelect', options = states, value = 'AK', multi = False, style={'width': "40%"}),
    dcc.RadioItems(id = 'dataTransform',options = ['Linear', 'Log Transform'], value = 'Linear'),

    
    dcc.Graph(id='graphCases', figure = {}),  # Graph for Case Data
    html.Div(id = 'output_container', children = []), # container for text
    
    html.Br(),  # space
    

])




# Define callback to update graph
# Connect the Plotly graphs with Dash Components
@app.callback(
    [Output(component_id='output_container', component_property='children'),
     Output(component_id='graphCases', component_property='figure')],
    [Input(component_id='stateSelect', component_property='value')],
    [Input(component_id='yearSlider', component_property='value')],  # yearSlider value outputs date range as a tuple
    [Input(component_id='dataTransform', component_property='value')]
)


def update_graph(stateSelect, yearSlider, dataTransform):
    dateStart = unixToDatetime(yearSlider[0])  # stores start date in DateTime format
    dateEnd = unixToDatetime(yearSlider[1]) # stores end date in DateTime Format

    container = "The year range chosen by user was: {}".format(dateStart, dateEnd)
    
    dfTemp = df
    
    if dataTransform =="Log Transform":
        dfTemp['CasesPerCapita'] =  np.log10(dfTemp['CasesPerCapita'])
        dfTemp['DeathPerCapita'] =  np.log10(dfTemp['DeathPerCapita'])
        
    

    dfTemp = dfTemp[dfTemp['State']==stateSelect]
    dfTemp = dfTemp[(dfTemp['Date'] > dateStart) & (dfTemp['Date'] < dateEnd)]
    
  #######  This commented out section breaks the app  
    
  #  x = df['Date']#.reshape(-1, 1)
  #  y =df['CasesPerCapita']#.reshape(-1, 1)
       
  #  linearModel = SVR(kernel="poly", degree=1)
  #  polyModel = SVR(kernel="poly", degree=2)
  #  linearModel.fit(x, y)
  #  polyModel.fit(x, y)
    

    figCases = px.scatter(x=dfTemp['Date'], y=dfTemp['CasesPerCapita'])
        
        
    return container, figCases
    
# Run app and display result inline in the notebook
app.run_server(mode='inline')