In [1]:
import pandas as pd
import sqlalchemy as sql
import pymysql
import plotly.express as px

# for plotly animation to work in vscode need this workaround
import plotly.io as pio
pio.renderers.default = 'notebook'

In [2]:
hostname="localhost"
port="3307" # default 3306, using 3307 for the db hosted in vm
dbname="covid-dw"
uname="root"
pwd="testpassmysql"

# create mysqlalchemy engine to connect to mysql db
engine = sql.create_engine(
    "mysql+pymysql://{user}:{pwd}@{host}:{port}/{db}".format(
        host=hostname, db=dbname, port=port, user=uname, pwd=pwd
    )
)

In [3]:
# import data from db
dbConn = engine.connect()
try:
    df_s = pd.read_sql("SELECT * FROM vw_time_state_rates", dbConn)
    df_c = pd.read_sql("SELECT * FROM vw_time_county_rates", dbConn)
    df_d = pd.read_sql("SELECT * FROM vw_demo_severity",dbConn)
except Exception as ex:
    print(ex)
dbConn.close()

In [4]:
df_s.head(5)

Unnamed: 0,month,state_id,state,population,poverty,case_total,out_total,out_severe,out_death,poverty_rate,pop_infect_rate,case_death_rate,case_severe_rate,severe_death_rate
0,2021-12-15,26,MI,9966555.0,1232948.0,7605.0,3004.0,254.0,97.0,0.1237,0.0008,0.0323,0.0846,0.3819
1,2022-01-15,17,IL,12587530.0,1351159.0,41996.0,1729.0,351.0,112.0,0.1073,0.0033,0.0648,0.203,0.3191
2,2022-01-15,25,MA,6893574.0,628899.0,36153.0,1556.0,285.0,131.0,0.0912,0.0052,0.0842,0.1832,0.4596
3,2022-01-15,13,GA,10710017.0,1465328.0,35747.0,401.0,140.0,11.0,0.1368,0.0033,0.0274,0.3491,0.0786
4,2021-07-15,12,FL,21733312.0,2642642.0,11772.0,9505.0,575.0,169.0,0.1216,0.0005,0.0178,0.0605,0.2939


## Case Spread and Severity with Time

### Outbreak Map

In [10]:
# sort and convert to string for slider
df_s['month_str'] = df_s.month.astype(str)
df_s = df_s.sort_values(by="month",ignore_index=True)

fig = px.choropleth(
    df_s,
    locations='state',
    locationmode='USA-states',
    color='case_total',
    color_continuous_scale=px.colors.sequential.Oranges,
    animation_frame="month_str",
    range_color=(0,20000),
    scope='usa',
    labels={'case_total':'cases','month_str':'month'},
    hover_name='state',
    hover_data={'case_total':True,'month_str':False,'state':False})
fig.write_html("./plot/state_map.html")
fig.show()

In [7]:
# sort and convert to string for slider
df_c['month_str'] = df_c.month.astype(str)
df_c = df_c.sort_values(by="month",ignore_index=True)

In [14]:
# import json
# fig = px.choropleth(
#     df_tmp,
#     locations='county_id',
#     geojson=json.load(open('./plot/geojson/13.json')),
#     color='case_total',
#     color_continuous_scale=px.colors.sequential.Oranges,
#     animation_frame="month_str",
#     range_color=(0,200),
#     scope='usa',
#     fitbounds='locations',
#     hover_name='county',
#     labels={'case_total':'cases','month_str':'month'},
#     hover_data={'case_total':True,'month_str':False,'county':False}
#     )
# # fig.write_json("./plot/county_map.json")
# fig.show()

### Cases and deaths with time

In [None]:
# plotly.express cannot plot different y scales, must use plotly.graph_objects to do
import plotly.graph_objects as go
from plotly.subplots import make_subplots

def multi_plot(df, field_dict):
    """
    take a df and generate double-y axis multi-plot based on 'state' field
    Args:
        df (dataframe): dataframe containing the data
        dict_fields (dict): key, field name as in dataframe; value, name used in plot
    """
    fig = make_subplots(specs=[[{'secondary_y': True}]])
    states = df.state.unique().tolist()
    buttons = []
    i = 0
    fields = list(field_dict.keys())
    for state in states:
        df_tmp = df[df.state == state]
        df_tmp = df_tmp.sort_values('month')
        for field in fields:
            fig.add_trace(
                go.Scatter(
                    x=df_tmp.month,y=df_tmp[field],
                    name=state+' '+field_dict.get(field),mode='lines+markers',
                    line=dict(dash='solid',width=1),marker=dict(symbol='circle'),
                    ),
                secondary_y=False
            )
        # list of boolean indicate button which trace to show
        vis = [False] * len(states) * len(fields)
        vis[i*len(fields):(i+1)*len(fields)] = [True]*len(fields)
        button = dict(label=state,method='update',args=[{'visible': vis}])
        # add to button list
        buttons.append(button)
        i+=1
    fig.update_layout(
        updatemenus=[
            dict(
                type='dropdown', direction='right',
                xanchor='left', yanchor='top', y=1.2,
                buttons=buttons,
            )
        ]
    )
    return fig

In [None]:
df_s = df_s.sort_values(by='population',ascending=False,ignore_index=True)
fig = multi_plot(df_s,{'case_total':'total cases','out_death':'reported deaths'})
fig.update_layout(
    yaxis=dict(range=[0,1e5]),
    yaxis2=dict(range=[0,2e3]),
)
fig.show()

### Cases and severe cases with time

In [None]:
fig = multi_plot(df_s,{'case_total':'total cases','out_severe':'severe cases'})
fig.update_layout(
    yaxis=dict(range=[0,1e5]),
    yaxis2=dict(range=[0,1e4]),
)
fig.show()

### Infection and death rates with time

In [None]:
fig = multi_plot(df_s,{'pop_infect_rate':'infection per 1000','case_death_rate':'case death rate'})
fig.update_layout(
    yaxis=dict(range=[0,1]),
    yaxis2=dict(range=[0,1]),
)
fig.show()

## Demography vs Severity

In [None]:
df_d = df_d.sort_values(['age','race'],ignore_index=True)
df_d.head(10)

In [None]:
df_d_pop=df_d.drop('race', axis=1).groupby(['age','sex']).agg('sum').reset_index()
df_d_pop

In [None]:
df_d['severe_rate'] = df_d.out_severe/df_d.out_total
df_d['death_rate'] = df_d.out_death/df_d.out_total
df_d_pop['severe_rate'] = df_d_pop.out_severe/df_d_pop.out_total
df_d_pop['death_rate'] = df_d_pop.out_death/df_d_pop.out_total

In [None]:
# plot both severe and death rate with age, without race
fig = go.Figure()
fig.add_trace(go.Bar(
    x=df_d_pop.age,
    y=df_d_pop.severe_rate,
    name='severe_rate'
    ))
fig.add_trace(go.Bar(
    x=df_d_pop.age,
    y=df_d_pop.death_rate,
    name='death_rate'
    ))
fig.show()

In [None]:
# plot severe rate by age, sex, race
px.bar(
    df_d.dropna(),
    x='age',
    y='severe_rate',
    color='sex',
    facet_col='race',
    barmode='group'
)

In [None]:
# plot severe and death rate by age, sex, race
px.bar(
    df_d.dropna(),
    x='age',
    y='death_rate',
    color='sex',
    facet_col='race',
    barmode='group'
)