In [3]:
import dash
from dash import dcc, html, Input, Output
import plotly.express as px
import pandas as pd

df = pd.read_csv("dataset.csv")
df = df[df['Province'] != 'Canada']
df['NOC'] = df['Occupation'].str.extract(r'^([0-9])')
df['NOC Label'] = df['Occupation'].where(df['NOC'].notna(), None)
provinces = sorted(df['Province'].unique())

app = dash.Dash(__name__)
app.title = "Canada's Occupations by Major Field of Study"

app.layout = html.Div([
    html.H1("Canada's Occupations by Major Field of Study", style={'textAlign': 'center'}),


    html.H2("Essential Services Distributed Across Provinces and Territories"),
    html.Div([
        html.Label("Choose Essential Service Type:"),
        dcc.RadioItems(
            id='service-dropdown',
            options=[
                {'label': 'Nurses', 'value': 'nurse'},
                {'label': 'Police', 'value': 'police'},
                {'label': 'Firefighters', 'value': 'firefighter'}
            ],
            value='nurse',
            inline=True
        )
    ], style={'width': '40%', 'margin': 'auto'}),
    dcc.Graph(id='essential-service-graph'),

    html.H2("Employment Statistics Variance Based on Gender in Different Administrative Units"),
    html.Div([
        html.Label("Choose Province:", style={'fontSize': '18px'}),
        dcc.Dropdown(
            id='province-dropdown',
            options=[{'label': prov, 'value': prov} for prov in provinces],
            value='Ontario'
        )
    ], style={'width': '40%', 'margin': 'auto'}),
    dcc.Graph(id='gender-noc-graph'),

    html.H2("Engineering Manpower Available for Electronic Vehicle Factories"),
    html.Div([
        html.Label("Choose Engineer Type:"),
        dcc.Checklist(
            id='engineer-checklist',
            options=[
                {'label': 'Computer Engineers', 'value': '21311'},
                {'label': 'Mechanical Engineers', 'value': '21301'},
                {'label': 'Electrical Engineers', 'value': '21310'}
            ],
            value=['21311', '21301', '21310'],
            inline=True
        )
    ], style={'width': '90%', 'margin': 'auto'}),
    dcc.Graph(id='engineering-graph'),

    html.H2("Most Popular Occupations by Province", style={'fontSize': '28px'}),
    html.Div([
        html.Label("Choose Province:"),
        dcc.Tabs(
            id='province-tabs',
            value='Ontario',
            children=[dcc.Tab(label=prov, value=prov) for prov in provinces]
        )
    ]),
    dcc.Graph(id='occupations-graph')
])


@app.callback(
    Output('essential-service-graph', 'figure'),
    Input('service-dropdown', 'value')
)
def update_essential_services(selected_service):
    occupation = df['Occupation'].str.contains(selected_service, case=False)
    filtered_df = df[occupation & (df['Gender'] == 'Total')]
    grouped = filtered_df.groupby('Province', as_index=False)['Employment'].sum()
    figure1 = px.bar(
        grouped,
        x='Province',
        y='Employment',
        title=f"The Distribution of {selected_service.title()}s by Province",
        labels={'Employment': 'Number of Essential Workers'},
        text_auto=True
    )
    figure1.update_layout(xaxis_tickangle=45)
    return figure1


@app.callback(
    Output('gender-noc-graph', 'figure'),
    Input('province-dropdown', 'value')
)
def update_gender_noc(province):
    df_noc = df[
        (df['Gender'].isin(['Men', 'Women'])) &
        (df['NOC'].notna()) &
        (df['Province'] == province)
    ]
    grouped = df_noc.groupby(['NOC', 'Gender'], as_index=False)['Employment'].sum()
    figure2 = px.bar(
        grouped,
        x='NOC',
        y='Employment',
        color='Gender',
        barmode='group',
        title=f"Gender Comparison by NOC in {province}",
        labels={'Employment': 'Number of Workers', 'NOC': 'NOC Group'}
    )
    figure2.update_layout(xaxis_tickangle=0)
    return figure2


@app.callback(
    Output('engineering-graph', 'figure'),
    Input('engineer-checklist', 'value')
)
def update_engineer_graph(selected_nocs):
    pattern = '|'.join(selected_nocs)
    df_engineer = df.loc[df['Occupation'].str.contains(pattern) & (df['Gender'] == 'Total')].copy()
    def engineer_type(occupation):
        if '21311' in occupation:
            return 'Computer'
        elif '21301' in occupation:
            return 'Mechanical'
        elif '21310' in occupation:
            return 'Electrical'
        else:
            return 'Total'
    df_engineer.loc[:, 'Engineer Type'] = df_engineer['Occupation'].apply(engineer_type)
    grouped = df_engineer.groupby(['Province', 'Engineer Type'], as_index=False)['Employment'].sum()
    figure3 = px.density_heatmap(
        grouped,
        x='Province',
        y='Engineer Type',
        z='Employment',
        color_continuous_scale='Blues',
        title="Engineering Workforce Availability by Province",
        labels={'Employment': 'Number of Engineers'}
    )
    return figure3


@app.callback(
    Output('occupations-graph', 'figure'),
    Input('province-tabs', 'value')
)
def update_occupations(province):
    df_t4 = df[
        (df['Province'] == province) &
        (df['Gender'] == 'Total')
    ]
    occupations = df_t4.groupby('Occupation', as_index=False)['Employment'].sum()
    occupations = occupations.sort_values(by='Employment', ascending=False).head(15)
    occupations['NOC'] = occupations['Occupation'].str.extract(r'^([0-9])')
    figure4 = px.treemap(
        occupations,
        path=['Occupation'],
        values='Employment',
        title=f"Most Popular Occupations in {province} by Employment",
        labels={'Employment': 'Number of Workers', 'NOC': 'NOC Group'},
        color='NOC'
    )
    figure4.update_traces(textfont=dict(size=18), selector=dict(type='treemap'))
    return figure4

if __name__ == '__main__':
    app.run_server(debug=True)