In [2]:
import dash
from dash import html, dcc
from dash.dependencies import Input, Output
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objs as go
import pandas as pd

In [4]:
def update_graph(selected_year):
    try:
        df_selected = df_2019.copy() if selected_year == '2019' else df_2020.copy()
        
        df_selected['ComputerStatus'] = df_selected['ComputerStatus'].map(computer_status_mapping)
        computer_status_data = df_selected.groupby(['Province', 'ComputerStatus']).size().unstack(fill_value=0)
        computer_status_data = computer_status_data.apply(lambda x: (x / x.sum()) * 1000, axis=1)
        computer_status_fig = px.bar(
            computer_status_data.reset_index(),
            x='Province',
            y=computer_status_data.columns,
            title="Computer Status per 1000 Offices by Province",
            labels={'value': 'Number of Offices', 'variable': 'Computer Status'},
            barmode='stack'
        )
        computer_status_fig.update_layout(
            title="Computer Status per 1000 Offices by Province",
            xaxis_title="Province",
            yaxis_title="Number of Offices",
            legend_title="Computer Status"
        )

        # Process hospital data for the selected year
        hospital_df = df_selected.groupby('Province').agg({
            'NumberOfHospitals': 'sum', 
            'AvgDistanceToHospital': 'mean'
        }).reset_index()
        hospital_df['NumberOfHospitalsPer100000'] = (hospital_df['NumberOfHospitals'] / df_selected['Total Families'] * 100000)
        national_average = hospital_df['NumberOfHospitalsPer100000'].mean()

        # Create the Plotly graph for hospital data
        hospital_fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Add bar plot for the number of hospitals per 100,000 families
        hospital_fig.add_trace(
            go.Bar(
                x=hospital_df['Province'],
                y=hospital_df['NumberOfHospitalsPer100000'],
                name='Number of Hospitals per 100000 Families',
                marker_color='lightblue'
            ),
            secondary_y=False,
        )

        # Add line plot for the average distance to the nearest hospital
        hospital_fig.add_trace(
            go.Scatter(
                x=hospital_df['Province'],
                y=hospital_df['AvgDistanceToHospital'],
                name='Average Distance to Nearest Hospital',
                mode='lines+markers',
                marker_color='darkorange',
                line=dict(width=2)
            ),
            secondary_y=True,
        )

        # Add a scatter trace for the national average line for a legend entry
        hospital_fig.add_trace(
            go.Scatter(
                x=hospital_df['Province'],
                y=[national_average]*len(hospital_df['Province']),
                name='National Average',
                mode='lines',
                line=dict(color="red", width=2, dash="dot"),
                hoverinfo='skip'
            ),
            secondary_y=False,
        )

        # Set titles, axes labels, and legend positioning
        hospital_fig.update_layout(
            title_text="Hospital Data Analysis",
            yaxis_title='Number of Hospitals per 100000 Families',
            legend=dict(
                x=0,
                y=1.0,
                traceorder='normal',
                bgcolor='rgba(255,255,255,0)',
                bordercolor='rgba(255,255,255,0)'
            )
        )

        hospital_fig.update_yaxes(title_text="Number of Hospitals per 100000 Families", secondary_y=False)
        hospital_fig.update_yaxes(title_text="Average Distance to Nearest Hospital (km)", secondary_y=True)


        # Process electricity data
        # Assuming you have columns 'PLN users by 1000' and 'Non-PLN users by 1000' in your dataframe
        grouped_electricity = df_selected.groupby('Province').agg({
            'PLN users by 1000': 'mean',
            'Non-PLN users by 1000': 'mean'
        }).reset_index()
        electricity_fig = px.bar(
            grouped_electricity,
            x='Province',
            y=['PLN users by 1000', 'Non-PLN users by 1000'],
            title="Electricity Usage by Province for 1000 Families",
            labels={'value': 'Number of Families', 'variable': 'Type of Electricity User'},
            barmode='stack'
        )
        electricity_fig.update_layout(
            xaxis_title="Province",
            yaxis_title="Number of Families",
            legend_title="Type of Electricity User"
        )

        # Process fuel data
        df_selected['FuelType'] = df_selected['FuelType'].map(fuel_mapping)
        fuel_data = df_selected.groupby(['Province', 'FuelType']).size().unstack(fill_value=0)
        fuel_data = fuel_data.apply(lambda x: (x / x.sum()) * 1000, axis=1)
        fuel_fig = px.bar(
            fuel_data.reset_index(),
            x='Province',
            y=fuel_data.columns,
            title="Fuel Used for Cooking per 1000 Families by Province",
            labels={'value': 'Number of Families', 'variable': 'Fuel Type'},
            barmode='stack'
        )
        fuel_fig.update_layout(
            xaxis_title="Province",
            yaxis_title="Number of Families",
            legend_title="Fuel Type"
        )

        # Process water source data
        df_selected['WaterSource'] = df_selected['WaterSource'].map(water_mapping)
        water_data = df_selected.groupby(['Province', 'WaterSource']).size().unstack(fill_value=0)
        water_data = water_data.apply(lambda x: (x / x.sum()) * 1000, axis=1)
        water_fig = px.bar(
            water_data.reset_index(),
            x='Province',
            y=water_data.columns,
            title="Water Source per 1000 Families by Province",
            labels={'value': 'Number of Families', 'variable': 'Water Source'},
            barmode='stack'
        )
        water_fig.update_layout(
            xaxis_title="Province",
            yaxis_title="Number of Families",
            legend_title="Water Source"
        )

        df_selected['DefecationFacility'] = df_selected['DefecationFacility'].map(defecation_facility_mapping)
        defecation_data = df_selected.groupby(['Province', 'DefecationFacility']).size().unstack(fill_value=0)
        defecation_data = defecation_data.apply(lambda x: (x / x.sum()) * 1000, axis=1)
        defecation_facility_fig = px.bar(
            defecation_data.reset_index(),
            x='Province',
            y=defecation_data.columns,
            title="Defecation Facilities per 1000 Families by Province",
            labels={'value': 'Number of Families', 'variable': 'Defecation Facility'},
            barmode='stack'
        )
        defecation_facility_fig.update_layout(title="Defecation Facility Usage by Province for 1000 Families",
                                                xaxis_title="Province",
                                                yaxis_title="Number of Families",
                                                legend_title="Defecation Facility")
        
        education_data = education_data_2019 if selected_year == '2019' else education_data_2020
        national_average_mean = education_data['Mean_4_per_1000'].mean()

        education_fig = make_subplots(specs=[[{"secondary_y": True}]])

        # Add bar plot for summed '2' and '3' columns
        education_fig.add_trace(
            go.Bar(
                x=education_data['Province'], 
                y=education_data['Sum_23_per_1000'],
                name='Sum of Public and Private Education Levels per 10000 Families'
            ),
            secondary_y=False
        )

        # Add line plot for averaged '4' columns
        education_fig.add_trace(
            go.Scatter(
                x=education_data['Province'], 
                y=education_data['Mean_4_per_1000'],
                name='Average Distance to Educational Facility',
                mode='lines+markers',
                marker_color='orange'
            ),
            secondary_y=True
        )

        education_fig.add_trace(
            go.Scatter(
                x=education_data['Province'], 
                y=[national_average_mean]*len(education_data),
                mode='lines',
                line=dict(color='blue', dash='dash'),
                name='National Avg Distance to Educational Institution'
            ),
            secondary_y=True
        )

        education_fig.update_layout(
            title_text='Educational Facilities and Distance Data by Province',
            yaxis_title='Total Educational Facilities per 10000 Families',
            yaxis2_title='Average Distance to Facilities (km)',
            legend_title='Legend'
        )

        transport_data = transport_data_2019 if selected_year == '2019' else transport_data_2020



        return electricity_fig, fuel_fig, water_fig, defecation_facility_fig, hospital_fig, computer_status_fig, education_fig
    
    
    except Exception as e:
        print(f"An error occurred: {e}")
    # Return blank figures in case of an error
        return {}, {}, {}, {}, {}
    