# Cholera Data Project
## by: Leilani Reich

## Setup

In [None]:
# Import Packages for project

import plotly.express as px
from jupyter_dash import JupyterDash
from dash import dash_table, dcc, State, html
from dash.dependencies import Input, Output
import pandas as pd
import plotly.graph_objects as go
import dash_bootstrap_components as dbc

# --------------------------------------------
# Load Data
# For part 1
df = pd.read_csv("choleraDeaths.tsv", sep='\t')

# Read in naplesCholeraAgeSexData.tsv showing fatality numbers for men and women in different
# age groups from cholera in the same time period in Naples.
# Manually fixed some lines in tsv to work properly
naples_age_sex_data = pd.read_csv("naplesCholeraAgeSexData.tsv", sep='\t', comment="#")


# Read in UKcensus1851.csv showing the number of men and women in different age groups in
# that same time period in the UK.
uk_census_data = pd.read_csv("UKcensus1851.csv", comment="#")

# Read in choleraDeathLocations.csv
death_locations_data = pd.read_csv("choleraDeathLocations.csv", header=None, names=['Deaths', 'x-coord', 'y-coord'])
# and choleraPumpLocations.csv
pump_locations_data = pd.read_csv("choleraPumpLocations.csv", header=None, names=['x-coord', 'y-coord'])

# --------------------------------------------
# Build App
FA = "https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css"
app = JupyterDash(__name__, external_stylesheets=[dbc.themes.BOOTSTRAP, FA])


### Dataset Manipulation

In [None]:
# For adding overall totals to dataset in part 2
uk_census_data_add = uk_census_data.copy()
totals = []
for i in range(len(uk_census_data)):
    totals.append(uk_census_data["male"][i] + uk_census_data["female"][i])
uk_census_data_add.insert(3, "total", totals, True) 

# For total bar chart of census data
male_total_uk_census = sum(uk_census_data_add['male'])
female_total_uk_census = sum(uk_census_data_add['female'])


# Adding commas for my numbers
uk_census_data_str = uk_census_data_add.copy()
uk_census_data_str = uk_census_data_add.astype(str)
 
for i in range(len(uk_census_data_add)):
    uk_census_data_str['male'][i] = "{:,}".format(uk_census_data_add['male'][i])
    uk_census_data_str['female'][i] = "{:,}".format(uk_census_data_add['female'][i])
    uk_census_data_str['total'][i] = "{:,}".format(uk_census_data_add['total'][i])
    
# Add total row for uk_census_data_str to display in datatable
uk_census_data_str.loc[len(uk_census_data_str.index)] = ['Overall Totals', "{:,}".format(male_total_uk_census), 
                                                         "{:,}".format(female_total_uk_census),
                                                         "{:,}".format(sum(uk_census_data_add['total']))]


# So I can add attacks up to day and deaths of to day
attacks = []
current_sum = 0
for attack_count in df['Attack']:
    current_sum += attack_count
    attacks.append(current_sum)
df.insert(3, "C. Attacks", attacks, True)

deaths = []
current_sum2 = 0
for death_count in df['Death']:
    current_sum2 += death_count
    deaths.append(current_sum2)
df.insert(4, "C. Deaths", deaths, True)



import re
# Converting df dates into better format (not all 1854)
df_fixed_dates = []
for i in range(len(df['Date'])):
    df_fixed_dates.append(re.findall("(\d+-[a-zA-Z]{3})-", df['Date'][i])[0])

df['Date'] = df_fixed_dates


# Renaming columns for choleradeaths dataset
df = df.rename(columns={'Date': 'Date in 1854', 'Attack': 'Attacks', 'Death': 'Deaths'})


# Capitalize other column variables for uk_census_data
uk_census_data_add = uk_census_data_add.rename(
    columns={'age': 'Age', 'male': 'Male', 'female': 'Female', 'total': 'Total'})
uk_census_data_str = uk_census_data_str.rename(
    columns={'age': 'Age', 'male': 'Male', 'female': 'Female', 'total': 'Total'})
naples_age_sex_data = naples_age_sex_data.rename(
    columns={'age': 'Age', 'male': 'Male', 'female': 'Female'})


### Creating Functions for Making Charts/Graphs
- so app layout is less confusing

In [None]:
def create_line_chart():
    fig = px.line(df, x="Date in 1854", y=["Attacks", "Deaths", "C. Attacks", "C. Deaths"], markers=True,
                 color_discrete_map={"Attacks":"rgb(230, 211, 126)", "Deaths": "rgb(208, 55, 111)", "C. Attacks": "rgb(145, 62, 175)", "C. Deaths": "rgb(47, 37, 15)"})
    fig.update_layout(
        title="Attacks, deaths, cumulative attacks, & cumulative deaths vs date in 1854",
        yaxis_title="Count (associated with each variable)",
        legend_title='',
        hovermode="x unified"
    )
    fig.update_traces(hovertemplate='%{y}')
    return fig



# Making bar chart for part 2
def create_bar_chart(data, x_val, y_val, color):
    #bar_chart = px.bar(data, x=x_val, y=y_val,
    #         hover_data=[x_val, y_val])
    bar_chart = go.Figure(data=[go.Bar(
        x=data[x_val],
        y=data[y_val],
    marker_color=color, # marker color can be a single color value or an iterable
    )])
    
    bar_chart.update_layout(
        title="Number of fatalities vs age for " + y_val,
        
        xaxis=dict(
        title='Age',
        titlefont_size=16,
        tickfont_size=14,
        ),
        
        yaxis=dict(
        title='Number of Fatalities',
        titlefont_size=16,
        tickfont_size=14,
        )
    )
    
    return bar_chart

def create_combined_bar_chart():
    fig = go.Figure()
    fig.add_trace(go.Bar(
    x=naples_age_sex_data['Age'],
    y=naples_age_sex_data['Male'],
    name='Male',
    marker_color='rgb(114, 204, 230)'
    ))
    fig.add_trace(go.Bar(
    x=naples_age_sex_data['Age'],
    y=naples_age_sex_data['Female'],
    name='Female',
    marker_color='rgb(234, 141, 44)'),    
    )
    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(
        barmode='group', 
        xaxis_tickangle=-45,
        title="Deaths (per 10,000 inhabitants) vs age (yrs)",
        xaxis_title="Age range (yrs)",
        yaxis_title="Deaths (per 10,000 inhabitants)",
        legend_title="  Sex",
        paper_bgcolor="rgb(225, 229, 230)"
    )
    
    fig.update_traces(hovertemplate='Age range: %{x} (yrs) <br>Deaths: %{y} (per 10k)')
    
    return fig;
    
def create_combined_bar_chart2():
    fig = go.Figure()
    fig.add_trace(go.Bar(
    x=uk_census_data_add['Age'],
    y=uk_census_data_add['Male'],
    name='Male',
    marker_color='rgb(114, 204, 230)'
    ))
    fig.add_trace(go.Bar(
    x=uk_census_data_add['Age'],
    y=uk_census_data_add['Female'],
    name='Female',
    marker_color='rgb(234, 141, 44)'),    
    )

    # Here we modify the tickangle of the xaxis, resulting in rotated labels.
    fig.update_layout(
        barmode='group', 
        xaxis_tickangle=-45,
        title="Census count by age and sex",
        xaxis_title="Age range (yrs)",
        yaxis_title="Number of people",
        legend_title="  Sex"
    )
    
    fig.update_traces(hovertemplate='Age range: %{x} (yrs) <br>Count: %{y}')
    
    return fig;


def create_pie_chart(data, values, names, title, age=False, mf=False):
    pie_chart = px.pie(data, values=values, names=names, title=title, hole=.3, 
                       color_discrete_sequence=(['rgb(234, 141, 44)', 'rgb(114, 204, 230)'] if mf else [])
                      )
    pie_chart.update_layout(
        legend_title="Age Category (yrs)" if age else "  Sex",
        legend=dict(
        x=1,
        y=1,
        bordercolor="Black",
        borderwidth=1,
        traceorder='reversed'
        )
    )
    
    if values == "Male":
         pie_chart.update_traces(hovertemplate="Sex: Male<br>Age range: %{label}<br>Count: %{value}")
    elif values == "Female":
         pie_chart.update_traces(hovertemplate='Sex: Female<br>Age range: %{label}<br>Count: %{value}')
    else:
         pie_chart.update_traces(hovertemplate='Sex: %{label}<br>Count: %{value}')

    return pie_chart

def create_map(fitbounds=True):
    fig = px.scatter_mapbox(death_locations_data,
                       lon='x-coord',
                       lat='y-coord',
                       zoom=16,
                       size='Deaths',
                       color_discrete_sequence=['red'],
                       mapbox_style='carto-positron'
    )
    fig.update_traces(hovertemplate='x-coord %{lon} <br>y-coord: %{lat}')
    
    fig.update_traces(
        marker=dict(opacity=0.5, sizemin=5)
    )
    
    fig2 = (px.scatter_mapbox(
                        pump_locations_data,
                       lon='x-coord',
                       lat='y-coord', 
                       color_discrete_sequence=['blue'],
                       )
                )
    fig2.update_traces(hovertemplate='x-coord %{lon} <br>y-coord: %{lat}',
                       marker=dict(size=20, opacity=0.5)
                      )
    
    fig.add_trace(fig2.data[0])
    
    fig.update_layout(
        legend_title="",
    )
    
    fig['data'][0]['showlegend'] = True
    fig['data'][0]['name'] = 'Death'
    fig['data'][1]['showlegend'] = True
    fig['data'][1]['name'] = 'Pump'
    
    return fig


### Designing App Layout

In [None]:
# App layout
app.layout = dbc.Container([
    html.H1("London Cholera Dashboard", style={'text-align': 'center',
                                'font-size': '50px',
                               "height": '100px', 'background-color': 'rgb(225, 229, 230)'}),
    
    # for giving info on me and my project
    dbc.Row([
         dbc.Button(
            html.Span(["About", " ", html.I(className="fa fa-angle-down")]),
            id="collapse-button",
            color="red",
            n_clicks=0,
            style={"width": '200px', 'font-size': '30px'}
        ),
        dbc.Collapse(dbc.Card
                     (dbc.Row([dbc.Col(dbc.CardBody([
                         html.P("About Me:", style={'fontWeight': 'bold'}),
                         html.P("Creator: Leilani Reich"),
                          html.P(["LinkedIn: ", 
                                  dcc.Link(href="https://www.linkedin.com/in/leilani-reich/",
                                          style={"color": 'black'})]),
                          html.P(["Portfolio: ", 
                                  dcc.Link(href="https://leilani-reich.github.io/",
                                          style={"color": 'black'})])]
                     ), width=5, style={'border-right': '2px solid black'}), 
                               dbc.Col(dbc.CardBody([
                                   html.P("Project Details:", style={'fontWeight': 'bold'}),
                         html.P("Description: This dashboard visualizes data from London’s cholera outbreak in 1854."),
                         html.P("Libraries: Plotly Dash (Jupyter Dash): dash_html_components, dash-bootstrap-components; Pandas"),
                         html.P(["Data and Source Code: ",
                                dcc.Link(href="https://github.com/leilani-reich/London-Cholera-DashBoard",
                                        style={"color": 'black'})])
                         
                     ]), width=5)], style={"size": "15px", 'text-align': 'left', 'font-size': '20px', 'border-top': '2px solid black'},
                              
                              
                              justify="center")),
                      id='about-project-collapse', is_open=False, className="w-100", style={'background-color': 'rgb(225, 229, 230)'}
                     ), 
                    
    ], justify="center", style={'background-color': 'rgb(225, 229, 230)'}),
    
     ########################################################################
    
    # Header for first section
    dbc.Row(
        dbc.Col(
            html.H3("London 1854 Cholera Data", style={"border-top": '2px solid black', 'background-color': 'white'})
        ),
        style={"background-color": "white"}
    ),
    
    # Part 1 div
    dbc.Row([
    # For part 1 table
    # https://dash.plotly.com/datatable
    dbc.Col([
    html.H4("Attacks and deaths per day", style={'text-align': 'center', 'margin-top': '35px', 'margin-bottom': '-35px'}),
    dash_table.DataTable( # First data table for attacks and deaths
        data=df.to_dict('records'), 
        columns=[{"name": i, "id": i} for i in df.columns],
        style_cell={'text-align': 'right', 'width': '20%'},
        style_table={'overflowY': 'scroll', 'margin-top': '50px', 'padding-left': '50px', 'height': '900px'},
        style_data_conditional=[
        {
            'if': {'row_index': 'odd'},
            'backgroundColor': 'rgb(247, 249, 250)',
        }],
        style_header={
        'backgroundColor': 'rgb(247, 249, 250)',
        'color': 'black',
        'fontWeight': 'bold'
        },
        style_header_conditional=[
            {
                'if': {'column_id': 'Attacks'},
                'color': 'rgb(230, 211, 126)'
            },
            {
                'if': {'column_id': 'C. Attacks'},
                'color': 'rgb(145, 62, 175)'
            },
            {
                'if': {'column_id': 'Deaths'},
                'color': 'rgb(208, 55, 111)'
            },
            {
                'if': {'column_id': 'C. Deaths'},
                'color': 'rgb(47, 37, 15)'
            }
        ]
        )
    ], width=4, style={"marginLeft": 'auto', 'marginRight': 'auto'}),
    
    # For part 1 graph
    dbc.Col([
    dcc.Graph(id='graph', 
              figure=create_line_chart(),
              style={'height': '1000px', 'weight': '1200px', 'margin-top': '20px'}),
    html.P('This graph shows how attacks and deaths from cholera in London fluctuated, and how their '
           'cumulative counts increased from Aug 19 to Sept 29, 1854.',
          style={"text-align": 'center'})
    ], width=8),
    
    ], justify='center', style={'border-bottom':'solid black 2px', 'padding-bottom': '50px',
                               'background-color': 'white'}),
        
     ########################################################################
    
    # Header for second section
    dbc.Row(
        dbc.Col(
            html.H3("Naples 1884-1911 Cholera Data", style={'background-color': "rgb(225, 229, 230)"})
        ), style={'background-color': "rgb(225, 229, 230)"}
    ),
    
    
    # For part 2 tables/graphs
        # For male bar chart
        dbc.Row([
            
            # male/female age death data in combined bar chart
            dbc.Col([
                dcc.Graph(figure=create_combined_bar_chart(),
                          style={'height': '600px'}),
                html.P('This chart shows the distribution of deaths per 10,000 inhabitants in Naples,'
                       ' based on age group and sex.', style={"text-align": "center"})],
                width=6
            ),
        
        # For 1st data table
            
        dbc.Col([
            html.H4("Age categories for male and female deaths per 10,000 inhabitants", style={"padding-top": '30px'}),
        dash_table.DataTable( # First data table for attacks and deaths
        data=naples_age_sex_data.to_dict('records'), 
        columns=[{"name": i, "id": i} for i in naples_age_sex_data.columns],
        style_cell={'text-align': 'right', 'width': '20%'},
        style_table={'overflowY': 'scroll', 'margin-top': '30px'},
        style_data_conditional=[
        {
            'if': {'row_index': 'odd'},
            'backgroundColor': 'rgb(247, 249, 250)',
        }],
        style_header={
        'backgroundColor': 'rgb(247, 249, 250)',
        'color': 'black',
        'fontWeight': 'bold'
        },
        style_header_conditional=[{           
            'if': {'column_id': 'Male'},
            'color': 'rgb(114, 204, 230)'
            },
            {'if': {'column_id': 'Female'},
            'color': 'rgb(234, 141, 44)'
            },
        ]
        )
        ], style={'margin-top': '50px'}, width=3)], 
            
        justify="center", style={'border-bottom':'solid black 2px', 'padding-bottom': '50px','background-color': 'rgb(225, 229, 230)'}
        
        ),
    
         ########################################################################
    
        # Header for second section: UK 1851 Census Data
        dbc.Row(
            dbc.Col(
                html.H3("United Kingdom 1851 Census Data", style={"background-color": 'white'})
            ), style={"background-color": 'white'}
        ),            
        
    
        # For 2nd data table for census age data for men and women
        #(i.e. one column for men and another for women) including the overall totals
        dbc.Row([
            dbc.Col([
                # Show a pie chart of the census age data for men and another for women. (part 2)
                dcc.Graph(figure=create_pie_chart(uk_census_data_add, "Male", "Age", "Census age data for men", age=True)
                         ),
                html.P("This chart reveals the age distribution of men from the 1851 UK census.",
                       style={"text-align": "center"}
                      )],
                
                width=4),
            
                    dbc.Col([
        # Show a pie chart for the overall number of men vs women.
        dcc.Graph(figure=create_pie_chart(uk_census_data_add, 
                                          [male_total_uk_census, female_total_uk_census], 
                                          ["Male", "Female"], "Overall number of men vs women", mf=True)),
                 html.P("This chart shows the total number of men and women from the 1851 UK census.",
                       style={"text-align": "center"}
                      )]
                 , width=4),
            
        dbc.Col([
            dcc.Graph(figure=create_pie_chart(uk_census_data_add, "Female", "Age", "Census age data for women", age=True)
                     ),
            html.P("This chart reveals the age distribution of women from the 1851 UK census.",
                       style={"text-align": "center"})],
            width=4),

            
        ], style={'background-color': 'white'}),
        
        dbc.Row([
        # Show a bar chart of the census age data for men and women.          
        dbc.Col([
            dcc.Graph(figure=create_combined_bar_chart2(),
                style={'height': '600px'}),
            html.P("This chart breaks down the 1851 UK census counts by age group and sex.",
                       style={"text-align": "center"}
                      )],
            width=6
            ),
                        
        dbc.Col([
        html.H4("Census age data for males and females", style={"padding-top": '30px'}),
        dash_table.DataTable(
        data=uk_census_data_str.to_dict('records'), 
        columns=[{"name": i, "id": i} for i in uk_census_data_str.columns],
        style_cell={'textAlign': 'right', 'width': "20%"},
        style_table={'overflowY': 'scroll', 'margin-top': '30px'},
        style_data_conditional=[
        {
            'if': {'row_index': 'odd'},
            'backgroundColor': 'rgb(247, 249, 250)',
        },
        {
            'if': {'row_index': len(uk_census_data_str)-1},
            'backgroundColor': 'rgb(250, 252, 184)',
            'fontWeight': 'bold'
        }],
        style_header={
        'backgroundColor': 'rgb(247, 249, 250)',
        'color': 'black',
        'fontWeight': 'bold'
        },
        style_header_conditional=[{           
            'if': {'column_id': 'Male'},
            'color': 'rgb(114, 204, 230)'
            },
            {'if': {'column_id': 'Female'},
            'color': 'rgb(234, 141, 44)'
            },
        ]
        )], width=4)
        
        ], justify="center", style={'border-bottom':'solid black 2px', 'padding-bottom': '50px',
                                   'background-color': 'white'}),

         ########################################################################
    
        # Header for third section:
        dbc.Row(
            dbc.Col(
                html.H3("Cholera Deaths and Pumps Geographical Data")
            )
        ),  
    
        dbc.Row(     
            dbc.Col([
            html.H4("London neighborhoods deaths and pumps", style={"padding-top": '20px'}),
            dcc.Graph(figure=create_map(True), 
                      style={'width': '1200px', 'height': '1000px', 
                             'border':'solid black 2px'}),
            html.P("This map puts into perspective the locations of deaths from cholera (red) in London neighborhoods"
                   " and the pumps (blue), containing contaminated water that caused cholera to spread.",
                       style={"text-align": "center", 'padding-top': "10px"}
                      )], 
                width=6, style={"padding-bottom": '50px'}),
            justify="center")
    
    
], fluid=True, style={"font-family": 'Arial', 'background-color': 'rgb(225, 229, 230)'})


@app.callback(
    Output("about-project-collapse", "is_open"),
    [Input("collapse-button", "n_clicks")],
    [State("about-project-collapse", "is_open")],
)
def toggle_collapse(n, is_open):
    if n:
        return not is_open
    return is_open


def update_figure(colorscale):
    return px.scatter(
        df, x="total_bill", y="tip", color="size",
        color_continuous_scale=colorscale,
        render_mode="webgl", title="Tips"
        )
# Run app and display result inline in the notebook
app.run_server()