In [5]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

# Read the cleaned Excel file
df = pd.read_excel('../cleaneddataset/JobTrends.xlsx', engine='openpyxl')

# Group the data by Province and NOC Title and sum 'no_of_people_worked'
grouped_df = df.groupby(['Province', 'NOC Title'])['no_of_people_worked'].sum().reset_index()

# Get the list of provinces
provinces = grouped_df['Province'].unique()

# For each province, create a bar chart
for province in provinces:
    province_data = grouped_df[grouped_df['Province'] == province]
    
    # Get top 10 NOC Titles
    top_10_noc_titles = province_data.nlargest(10, 'no_of_people_worked')['NOC Title']
    
    # Filter province_data to include only top 10 NOC Titles
    province_data = province_data[province_data['NOC Title'].isin(top_10_noc_titles)]
    
    # Create a bar chart
    fig = px.bar(province_data, x='NOC Title', y='no_of_people_worked', title=f"Job Trends in {province}")

    fig.show()

    # Save the figure as a PDF
    pio.write_image(fig, f'No of People employed_{province}.pdf')