In [1]:
import math
import pandas as pd
import geopandas
import numpy as np
from IPython.display import display, HTML
import plotly.graph_objects as go
import json
from dash import Dash, dcc, html, Input, Output
import matplotlib.pyplot as plt

In [2]:
state_mapping = {
  "10": "Delaware",
  "11": "District of Columbia",
  "12": "Florida",
  "13": "Georgia",
  "15": "Hawaii",
  "16": "Idaho",
  "17": "Illinois",
  "18": "Indiana",
  "19": "Iowa",
  "20": "Kansas",
  "21": "Kentucky",
  "22": "Louisiana",
  "23": "Maine",
  "24": "Maryland",
  "25": "Massachusetts",
  "26": "Michigan",
  "27": "Minnesota",
  "28": "Mississippi",
  "29": "Missouri",
  "30": "Montana",
  "31": "Nebraska",
  "32": "Nevada",
  "33": "New Hampshire",
  "34": "New Jersey",
  "35": "New Mexico",
  "36": "New York",
  "37": "North Carolina",
  "38": "North Dakota",
  "39": "Ohio",
  "40": "Oklahoma",
  "41": "Oregon",
  "42": "Pennsylvania",
  "44": "Rhode Island",
  "45": "South Carolina",
  "46": "South Dakota",
  "47": "Tennessee",
  "48": "Texas",
  "49": "Utah",
  "50": "Vermont",
  "51": "Virginia",
  "53": "Washington",
  "54": "West Virginia",
  "55": "Wisconsin",
  "56": "Wyoming",
  "01": "Alabama",
  "02": "Alaska",
  "04": "Arizona",
  "05": "Arkansas",
  "06": "California",
  "08": "Colorado",
  "09": "Connecticut",
  "72": "Puerto Rico",
  "66": "Guam",
  "78": "Virgin Islands",
  "60": "American Samoa"
  }

inverse_state_mapping = dict(map(reversed, state_mapping.items()))

us_state_to_abbrev = {
    "Alabama": "AL",
    "Alaska": "AK",
    "Arizona": "AZ",
    "Arkansas": "AR",
    "California": "CA",
    "Colorado": "CO",
    "Connecticut": "CT",
    "Delaware": "DE",
    "Florida": "FL",
    "Georgia": "GA",
    "Hawaii": "HI",
    "Idaho": "ID",
    "Illinois": "IL",
    "Indiana": "IN",
    "Iowa": "IA",
    "Kansas": "KS",
    "Kentucky": "KY",
    "Louisiana": "LA",
    "Maine": "ME",
    "Maryland": "MD",
    "Massachusetts": "MA",
    "Michigan": "MI",
    "Minnesota": "MN",
    "Mississippi": "MS",
    "Missouri": "MO",
    "Montana": "MT",
    "Nebraska": "NE",
    "Nevada": "NV",
    "New Hampshire": "NH",
    "New Jersey": "NJ",
    "New Mexico": "NM",
    "New York": "NY",
    "North Carolina": "NC",
    "North Dakota": "ND",
    "Ohio": "OH",
    "Oklahoma": "OK",
    "Oregon": "OR",
    "Pennsylvania": "PA",
    "Rhode Island": "RI",
    "South Carolina": "SC",
    "South Dakota": "SD",
    "Tennessee": "TN",
    "Texas": "TX",
    "Utah": "UT",
    "Vermont": "VT",
    "Virginia": "VA",
    "Washington": "WA",
    "West Virginia": "WV",
    "Wisconsin": "WI",
    "Wyoming": "WY",
    "District of Columbia": "DC",
    "American Samoa": "AS",
    "Guam": "GU",
    "Northern Mariana Islands": "MP",
    "Puerto Rico": "PR",
    "United States Minor Outlying Islands": "UM",
    "U.S. Virgin Islands": "VI",
}
    
# invert the dictionary
abbrev_to_us_state = dict(map(reversed, us_state_to_abbrev.items()))

state_abbrev_to_fips = {
    'WA': '53', 'DE': '10', 'DC': '11', 'WI': '55', 'WV': '54', 'HI': '15',
    'FL': '12', 'WY': '56', 'PR': '72', 'NJ': '34', 'NM': '35', 'TX': '48',
    'LA': '22', 'NC': '37', 'ND': '38', 'NE': '31', 'TN': '47', 'NY': '36',
    'PA': '42', 'AK': '02', 'NV': '32', 'NH': '33', 'VA': '51', 'CO': '08',
    'CA': '06', 'AL': '01', 'AR': '05', 'VT': '50', 'IL': '17', 'GA': '13',
    'IN': '18', 'IA': '19', 'MA': '25', 'AZ': '04', 'ID': '16', 'CT': '09',
    'ME': '23', 'MD': '24', 'OK': '40', 'OH': '39', 'UT': '49', 'MO': '29',
    'MN': '27', 'MI': '26', 'RI': '44', 'KS': '20', 'MT': '30', 'MS': '28',
    'SC': '45', 'KY': '21', 'OR': '41', 'SD': '46'
}


In [3]:
v_df = pd.read_csv('gen1e_project/visualization_data.csv', index_col=0, dtype = {'population': float, 'health': float, 'smokers':float, 
                                                                  'obesity': float, 'drinking': float, 'pollution': float, 
                                                                  'sep_deaths': float, 'pn_deaths' : float, 'hyp_deaths' : float,
                                                                  'outcome': float, 'fips': str, 'state_fips': str, 'year': str,
                                                                  'geojson': str, 'county_name':str, 'state_name':str, 'state_abbr': str,
                                                                  'hospitals': str})

# standardise state fips
v_df['state_fips'] = v_df['fips'].str[:2]
v_df['state_name'] = v_df['state_fips'].apply(lambda x: state_mapping[x])
v_df['state_abbr'] = v_df['state_name'].apply(lambda x: us_state_to_abbrev[x])
v_df['year'] = v_df['year'].apply(str).str[:4]

# add hospitals to dataframe
hospitals_df = geopandas.read_file("gen1e_project/us_hospitals/v10/ghospitl.gdb")
hospitals_df['x'] = hospitals_df.geometry.x
hospitals_df['y'] = hospitals_df.geometry.y
hospitals_df = hospitals_df.rename(columns={"STCTYFIPS": "fips"})
hospitals_df.reset_index(inplace=True)
grouped_hospitals = hospitals_df.groupby('fips').aggregate(tuple)["NAME"].reset_index()

for idx, row in grouped_hospitals.iterrows():
    v_df.loc[v_df['fips'] == row['fips'], 'hospitals'] = json.dumps(row['NAME'])

hospitals_df = hospitals_df.merge(v_df[['fips', 'county_name', 'state_name']], on='fips', how='inner')

# Generating the Visualization

In [4]:
def filter_df(input_df,  year=None, state=None):
    """
    Filters the input dataframe by specified year and state for subset views of the heatmap

    Inputs
    -------
    input_df: pd.DataFrame()
        the dataframe to be subsetted
    year: str
        the year by which to subset the dataframe
    state: str
        the name of the state by which to subset the dataframe
    
    Outputs
    -------
    df: pd.DataFrame()
        the subsetted dataframe
    """
    df = input_df.copy(deep=True)

    if year in list(set(df['year'])):
        input_year = [str(year)] if year else list(set(df['year']))
        input_state = [str(state)] if state else list(set(df['state_name']))

        df = df.loc[(df['year'].isin(input_year)) & (df['state_name'].isin(input_state)), :]

        return df
        
    else:
        # non-functional aggregation code, this conditional is never reached for now
        grouped_df = v_df.groupby('fips').agg({
            "population": np.mean, "health": np.mean, "smokers": np.mean, "obesity": np.mean, "drinking": np.mean, "pollution": np.mean, "sep_deaths": np.mean, "pn_deaths": np.mean, "hyp_deaths": np.mean, "outcome": np.mean,
            "fips": "first", "state_fips": "first", 'year': "first", 'geojson': "first", 'county_name': "first", 'state_name': "first", 'state_abbr': "first", 'hospitals': "first"
        }).sort_values(by='outcome', ascending=False)
        grouped_df.drop(labels='fips', axis=1, inplace=True)
        grouped_df.reset_index(inplace=True)

        return grouped_df

def normalise_columns(input_df, columns, selection_year):
    """
    Creates a normalised ([0-1]) aggregate numpy aggregate combinerd by adding the normalised data from each specified column and then normalising again

    Inputs
    ------
    input_df: pd.DataFrame()
        the dataframe from which column data is read
    columns: [str]
        the list of column names to normalise, all of which must be in input_df
    selection_year: str
        the year by which input_df is subsetted
    
    Output
    -------
    acc: np.array
        array of normalised aggregate values between 0 and 1

    """
    df = input_df.copy(deep=True)
    df = df[df['year'] == str(selection_year)] if selection_year not in ["All", None] else df.groupby('fips').mean()

    if len(columns) == 1:
        return df[columns[0]]
    else:
        acc = np.zeros(len(df))
        for column in columns:
            series = (df[column] - df[column].min()) / (df[column].max() - df[column].min())
            acc += np.array(series)
        
        acc = (acc - min(acc)) / ( max(acc) - min(acc))
        
        return acc
    
def print_top_counties(df, year, state=None, hospitals=False):
    """
    Prints the counties with the highest outcome (RSP012) values for the given year and state. Optionally prints all hospitals within those counties.

    Inputs
    ------
    df: pd.DataFrame()
        the dataframe from which to print data
    year: str
        the year by which the dataframe is subsetted
    state: str
        the name of the state by which the dataframe is subsetted
    hospitals: bool
        whether to print the names all of hospitals located within each relevant county

    Outputs: None
    """
    class color:
        CYAN = '\033[96m'
        DARKCYAN = '\033[36m'
        BLUE = '\033[94m'
        BOLD = '\033[1m'
        UNDERLINE = '\033[4m'
        END = '\033[0m'

        year = "2023"
    
    input_state = [state] if state else list(set(v_df['state_name']))
    
    if(year in set(df['year'])):
        print(color.BOLD + color.UNDERLINE + f"Top Counties for {year} {'(' + state + ')' if state else ''}" + color.END)
        for idx, row in df.loc[(df['year'] == year) & (df['state_name'].isin(input_state)), :].sort_values(by='outcome', ascending=False)[:10].iterrows():
            print(f"{color.BOLD} {color.DARKCYAN} [{row['fips']}] {row['county_name']}, {row['state_name']}: {color.END} {int(row['outcome']):,} cases")

            if hospitals:
                for hospital in json.loads(row['hospitals']):
                    print(hospital + ', ', end='')
                print('\n')
    else:
        print(color.BOLD + color.UNDERLINE + f"Top Counties Averaged Across All Years {' (' + state + ')' if state else ''}" + color.END)

        grouped_df = v_df.groupby('fips').agg({
            "population": np.mean, "health": np.mean, "smokers": np.mean, "obesity": np.mean, "drinking": np.mean, "pollution": np.mean, "sep_deaths": np.mean, "pn_deaths": np.mean, "hyp_deaths": np.mean, "outcome": np.mean,
            "fips": "first", "state_fips": "first", 'year': "first", 'geojson': "first", 'county_name': "first", 'state_name': "first", 'state_abbr': "first", 'hospitals': "first"
        }).sort_values(by='outcome', ascending=False)
        grouped_df.reset_index(drop=True, inplace=True)

        for idx, row in grouped_df.loc[grouped_df['state_name'].isin(input_state), :][:10].iterrows():
            print(f"{color.BOLD} {color.DARKCYAN} [{row['fips']}] {row['county_name']}, {row['state_name']}: {color.END} {int(row['outcome']):,} cases")

        if hospitals:
            for idx, row in grouped_df[:10].iterrows():
                print(f"{color.BOLD} {color.CYAN} {row['county_name']}, {row['state_name']}: {color.END} {int(row['outcome']):,} cases")
                for hospital in json.loads(row['hospitals']):
                    print(hospital + ', ', end='')
                print('\n')
    print("\n")

In [5]:
app = Dash(__name__)

app.layout = html.Div(
    children=[
        html.Div(
            children=[
                html.H4("ARDS in the United States", style={'font-size': "34px", 'font-family': "Helvetica Neue", "margin": '0px', 'padding': "0px", "color": "#4b5563"})
            ],
            style={
                'flex': 0.1,
                'flex-grow': 0,
                'width': "100%",
            }
        ),
        html.Div(
            children=[
            html.Div(
                [
                    dcc.Loading([
                        map_trigger := html.Div(),
                        map_graph := dcc.Graph(id="graph", style={'width': "100%", "height": "100%", "flex": 1}),
                        ],
                        parent_style={'height': '100%'},
                        type='circle',
                        style={'color': "#e7e7e7"},
                    ),
                ],
                style={'border': "1px solid #e7e7e7", "width": "90%", "height": "90%", 'flex': 1, 'box-shadow': "4px 4px -1px 11px #e7e7e7", "background-color": "white", 'display': 'flex', 'flex-direction': "column"}
            ),
            html.Div(
                [
                    html.H4('Controls', style={'font-size': "20px", 'font-family': "Helvetica Neue", "color": "#4b5563", "margin": '0px', 'padding': "0px"}),
                    
                    dcc.Dropdown(
                        [*list(sorted(list(set(v_df['year'].apply(str).str[:4]))))],
                        id="selection_year",
                        style={"width": "100%",  "margin-bottom": "5px", 'margin-top': "5px", 'font-family': "Helvetica Neue"},
                        value='2023'
                    ),

                    dcc.Dropdown(
                        ['None', *list(sorted(list(set(v_df['state_name']))))],
                        id="state_dropdown",
                        style={"width": "100%", "margin-bottom": "5px", 'margin-top': "5px", 'font-family': "Helvetica Neue"},
                        placeholder='Filter States'
                    ),

                    html.Div(
                        [
                            html.P("Select options to view (normalized when aggregated):", style={
                                'font-size': '16px',
                                'font-family': "Helvetica Neue",
                            }),
                            dcc.Checklist(
                                options=[
                                    {
                                        "label": html.Div(['RSP012 Cases'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "outcome",
                                    },
                                    {
                                        "label": html.Div(['Low to Moderate Health Levels'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "health",
                                    },
                                    {
                                        "label": html.Div(['County Population'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "population",
                                    },
                                                                        {
                                        "label": html.Div(['% Smokers'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "smokers",
                                    },
                                    {
                                        "label": html.Div(['% Obese'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "obesity",
                                    },
                                    {
                                        "label": html.Div(['Local Pollution Levels (avg. daily ppm)'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "pollution",
                                    },
                                    {
                                        "label": html.Div(['Annual Deaths from Septicemia'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "sep_deaths",
                                    },
                                    {
                                        "label": html.Div(['Annual Deaths from Pneumonia'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "pn_deaths",
                                    },
                                    {
                                        "label": html.Div(['Annual Deaths from Hypertension'], style={'color': '#4b5563', 'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "hyp_deaths",
                                    },



                                ], value=['outcome'],
                            labelStyle={"display": "flex", "align-items": "center", 'font-family': "Helvetica Neue", 'flex-direction': 'row'},
                            style={'display': 'flex', "gap": '5px', "flex-wrap":'wrap'},
                            id='metrics',
                            inline=True
                            ),
                        ], style={"display": 'flex', "flex-direction": "column"}),

                    html.Div(
                        [
                            html.P("Select overlays:", style={
                                'font-size': '16px',
                                'font-family': "Helvetica Neue",
                            }),
                            dcc.Checklist(
                                options=[
                                    {
                                        "label": html.Div(['Hospitals'], style={'font-size': 14, 'font-family': "Helvetica Neue", "letter-spacing": "-0.03em"}),
                                        "value": "hospitals",
                                    },
                                ], value=[],
                            labelStyle={"display": "flex", "align-items": "center", 'font-family': "Helvetica Neue", 'flex-direction': 'row'},
                            style={'display': 'flex'},
                            id='overlays',
                            inline=True
                            ),
                        ],
                        style={'display': 'flex', 'flex-direction': 'row'}
                    ),
                    html.Div(
                        [
                            
                        ],
                        style={'flex': 1, "width": '100%'}
                    )
                ],
                style={"flex": 0.7, "height": "100%", "display": "flex", "flex-direction": 'column', 'padding': "20px"}
                )
            
            ],
            style={
                'padding': "20px",
                'flex': 1,
                "width": "100%",
                'box-sizing': "border-box",
                "display": 'flex',
                'flex-direction': 'row',
                'justify-content': "center",
                "align-items": 'center',
                'gap': '0px'
            }
        )
    ],
    style={
        'background-color': "#eeeeee",
        "height": "100vh",
        "width": "100vw",
        'display': 'flex',
        "flex-direction": 'column',
        "border": "2px solid #e7e7e7",
        'padding': "20px",
        'margin': '0px !important',
    }
)

@app.callback(Output(map_trigger, 'children'), Input(map_graph, 'loading_state'))
def onLoading(state):
    return

@app.callback(
    Output("graph", "figure"), 
    Input("metrics", "value"),
    Input("overlays", "value"),
    Input("state_dropdown", "value"),
    Input("selection_year", "value"),
    )
def display_choropleth(metrics, overlays, state_dropdown, selection_year):

    print_top_counties(v_df, str(selection_year), state=None if state_dropdown in ['None', None] else state_dropdown)

    fig = go.Figure()

    # adding the heatmap
    fig.add_trace(go.Choropleth(
        locations=filter_df(v_df, year=selection_year, state=state_dropdown)['fips'],
        z=normalise_columns(v_df, metrics, selection_year),
        colorscale="Agsunset",
        zmax= None if metrics != ['outcome']  else 3000,
        zmin=0,
        hovertemplate=[f"<em>{row['county_name']}, {row['state_name']}</em> <br> <i>RSP012 Discharges: {int(row['outcome']) if not math.isnan(row['outcome']) else None}</i><br> County Population: {int(row['population']) if not math.isnan(row['population']) else None} people <br> Low to Moderate Health: {round(row['health'], 1) if not math.isnan(row['health']) else None} % <br> Smokers: {round(row['smokers'], 1) if not math.isnan(row['smokers']) else None}% <br>Obese: {round(row['obesity'], 1) if not math.isnan(row['obesity']) else None}% <br> Pollution (ppm): {round(row['pollution'], 1) if not math.isnan(row['pollution']) else None} <br> Deaths from Sepsis: {int(row['sep_deaths']) if not math.isnan(row['sep_deaths']) else None} <br> Death from Hypertension: {int(row['hyp_deaths']) if not math.isnan(row['hyp_deaths']) else None} <br> Deaths from Pneumonia: {int(row['pn_deaths']) if not math.isnan(row['pn_deaths']) else None} <br>  Normalised Score: {'%{z}'} <extra>{row['state_abbr']}</extra>" for index, row in filter_df(v_df, year=selection_year, state=state_dropdown).iterrows()],
        geojson={"type": "FeatureCollection", "features": [json.loads(i) for i in v_df['geojson']]},
        colorbar=dict(orientation='h', thickness=10),
        marker_line_color='black',
    ))

    # configuring map projections and margins
    fig.update_layout(
        margin={"r":0,"t":0,"l":0,"b":0},
        geo = dict(
            scope='usa',
            projection=go.layout.geo.Projection(type = 'albers usa'), 
            fitbounds='locations' if state_dropdown not in ["None", None] else False
            )
    )

    # adding the hospitals scatterplot, which starts as invisible
    fig.add_trace(go.Scattergeo(      
        lon = hospitals_df[hospitals_df['fips'].str[:2] == inverse_state_mapping[state_dropdown]]['x'] if state_dropdown not in ["None", None] else hospitals_df['x'],
        lat = hospitals_df[hospitals_df['fips'].str[:2] == inverse_state_mapping[state_dropdown]]['y'] if state_dropdown not in ["None", None] else hospitals_df['y'],
        mode = 'markers',
        marker_color = "red",
        hovertemplate=[f"<em>{row['NAME']}</em> <br> County: {row['county_name']} <extra>{row['state_name']}</extra>" for idx, row in hospitals_df.iterrows()],
        # <extra>{df[df['fips'] == row['fips']]['county_name'][0]}</extra>
        visible = True if 'hospitals' in overlays else False,
            marker = dict(
                size = 4,
                opacity = 0.5,
                symbol = 'square',
            )
    ))

    return fig

app.server.run(port=8000, host='127.0.0.1', debug=False)

 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:8000
[33mPress CTRL+C to quit[0m
127.0.0.1 - - [17/Jul/2023 11:12:58] "GET / HTTP/1.1" 200 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "GET /_dash-layout HTTP/1.1" 200 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "GET /_dash-dependencies HTTP/1.1" 200 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "GET /_favicon.ico?v=2.11.1 HTTP/1.1" 200 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "[36mGET /_dash-component-suites/dash/dcc/async-graph.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "[36mGET /_dash-component-suites/dash/dcc/async-dropdown.js HTTP/1.1[0m" 304 -
127.0.0.1 - - [17/Jul/2023 11:12:58] "[36mGET /_dash-component-suites/dash/dcc/async-plotlyjs.js HTTP/1.1[0m" 304 -


[1m[4mTop Counties for 2023 [0m
[1m [36m [06037] Bradley, California: [0m 12,129 cases
[1m [36m [48201] Harris, Texas: [0m 6,388 cases
[1m [36m [17031] Cook, Illinois: [0m 6,155 cases
[1m [36m [06073] Franklin, California: [0m 4,644 cases
[1m [36m [04013] Maricopa, Arizona: [0m 4,415 cases
[1m [36m [48113] Dallas, Texas: [0m 3,599 cases
[1m [36m [12086] Miami-Dade, Florida: [0m 3,505 cases
[1m [36m [48029] Bexar, Texas: [0m 3,497 cases
[1m [36m [06065] Dallas, California: [0m 3,327 cases
[1m [36m [32003] Clark, Nevada: [0m 3,324 cases




127.0.0.1 - - [17/Jul/2023 11:12:59] "POST /_dash-update-component HTTP/1.1" 200 -
127.0.0.1 - - [17/Jul/2023 11:13:05] "POST /_dash-update-component HTTP/1.1" 200 -


[1m[4mTop Counties for 2023 (California)[0m
[1m [36m [06037] Bradley, California: [0m 12,129 cases
[1m [36m [06073] Franklin, California: [0m 4,644 cases
[1m [36m [06065] Dallas, California: [0m 3,327 cases
[1m [36m [06071] San Bernardino, California: [0m 3,018 cases
[1m [36m [06059] Crawford, California: [0m 3,004 cases
[1m [36m [06067] Desha, California: [0m 2,617 cases
[1m [36m [06085] Santa Clara, California: [0m 2,425 cases
[1m [36m [06001] Alameda, California: [0m 1,956 cases
[1m [36m [06019] Fresno, California: [0m 1,778 cases
[1m [36m [06013] Mohave, California: [0m 1,410 cases




127.0.0.1 - - [17/Jul/2023 11:13:16] "POST /_dash-update-component HTTP/1.1" 200 -
