In [1]:
import pandas as pd

# Load datasets
edu_df = pd.read_csv("Enhanced_Education_Dataset.csv")
pop_df = pd.read_csv("D:\Personal Projects\IRCC_Project\datasets\Population\Population_Demographics_by_Year_and_Province_and_Canada.csv")

# Extract year from REF_DATE and clean up columns for merge
edu_df['Year'] = pd.to_datetime(edu_df['REF_DATE'], errors='coerce').dt.year
edu_df = edu_df.rename(columns={'GEO': 'Province'})
pop_df['Province'] = pop_df['Province'].str.strip()  # Avoid mismatch from extra spaces

# Filter both datasets to include only from 1991 onwards
edu_df = edu_df[edu_df['Year'] >= 1991]
pop_df = pop_df[pop_df['Year'] >= 1991]

# Merge on Year and Province
merged_df = pd.merge(edu_df, pop_df, on=['Year', 'Province'], how='left')

# Create population-based metrics
merged_df['Educators_per_1000'] = merged_df['Total, work status'] / merged_df['Population Estimate'] * 1000
merged_df['OpSpend_per_Capita'] = merged_df['Total operating expenditures'] / merged_df['Population Estimate']
merged_df['Access_Index_per_Capita'] = merged_df['Education_Access_Index'] / merged_df['Population Estimate']

# Optional: Save to CSV
merged_df.to_csv("Merged_Education_Population_Data_1991_onwards.csv", index=False)
print("✅ Merged dataset saved to 'Merged_Education_Population_Data_1991_onwards.csv'")

✅ Merged dataset saved to 'Merged_Education_Population_Data_1991_onwards.csv'


In [2]:

# Create population-based metrics
merged_df['Educators_per_1000'] = merged_df['Total, work status'] / merged_df['Population Estimate'] * 1000
merged_df['OpSpend_per_Capita'] = merged_df['Total operating expenditures'] / merged_df['Population Estimate']
merged_df['Access_Index_per_Capita'] = merged_df['Education_Access_Index'] / merged_df['Population Estimate']


In [5]:
import pandas as pd
from prophet import Prophet

# Prepare to store all forecasts
forecast_all = []

# Get province list + 'Canada' for national forecast
geo_list = merged_df['Province'].unique().tolist()
geo_list.append('Canada')

# Loop through each province (and Canada)
for geo in geo_list:
    if geo == 'Canada':
        df_geo = merged_df.groupby('Year')['Educators_per_1000'].mean().reset_index()
    else:
        df_geo = merged_df[merged_df['Province'] == geo].groupby('Year')['Educators_per_1000'].mean().reset_index()

    # Ensure column names match Prophet requirements
    df_geo.columns = ['ds', 'y']
    df_geo['ds'] = pd.to_datetime(df_geo['ds'], format='%Y')

    # Skip if insufficient data
    if df_geo['y'].isnull().sum() > 5 or len(df_geo) < 5:
        continue

    # Fit Prophet model
    model = Prophet(yearly_seasonality=True, daily_seasonality=False, weekly_seasonality=False)
    model.fit(df_geo)

    # Create monthly future dataframe (from Jan 2024 to Dec 2035)
    future_months = pd.date_range(start='2024-01-01', end='2035-12-01', freq='MS')
    future_df = pd.DataFrame({'ds': future_months})

    # Predict
    forecast = model.predict(future_df)

    # Extract relevant columns
    result = forecast[['ds', 'yhat']].copy()
    result.columns = ['REF_DATE', 'Educators_per_1000']
    result['GEO'] = geo

    # Store result
    forecast_all.append(result)

# Combine all province forecasts
final_forecast_df = pd.concat(forecast_all, ignore_index=True)
final_forecast_df = final_forecast_df[['REF_DATE', 'GEO', 'Educators_per_1000']]

# Optional: Save to CSV
final_forecast_df.to_csv("Monthly_Educators_per_1000_Forecast_2024_2035.csv", index=False)

# Display preview
print(final_forecast_df.head())
print("✅ Forecast dataset saved to 'Monthly_Educators_per_1000_Forecast_2024_2035.csv'")

15:36:01 - cmdstanpy - INFO - Chain [1] start processing
15:36:02 - cmdstanpy - INFO - Chain [1] done processing
15:36:03 - cmdstanpy - INFO - Chain [1] start processing
15:36:03 - cmdstanpy - INFO - Chain [1] done processing
15:36:03 - cmdstanpy - INFO - Chain [1] start processing
15:36:04 - cmdstanpy - INFO - Chain [1] done processing
15:36:04 - cmdstanpy - INFO - Chain [1] start processing
15:36:04 - cmdstanpy - INFO - Chain [1] done processing
15:36:05 - cmdstanpy - INFO - Chain [1] start processing
15:36:05 - cmdstanpy - INFO - Chain [1] done processing
15:36:06 - cmdstanpy - INFO - Chain [1] start processing
15:36:06 - cmdstanpy - INFO - Chain [1] done processing
15:36:06 - cmdstanpy - INFO - Chain [1] start processing
15:36:07 - cmdstanpy - INFO - Chain [1] done processing
15:36:07 - cmdstanpy - INFO - Chain [1] start processing
15:36:07 - cmdstanpy - INFO - Chain [1] done processing
15:36:07 - cmdstanpy - INFO - Chain [1] start processing
15:36:08 - cmdstanpy - INFO - Chain [1]

    REF_DATE     GEO  Educators_per_1000
0 2024-01-01  Quebec           14.133378
1 2024-02-01  Quebec            2.258106
2 2024-03-01  Quebec           16.069003
3 2024-04-01  Quebec            6.757504
4 2024-05-01  Quebec           -4.309188
✅ Forecast dataset saved to 'Monthly_Educators_per_1000_Forecast_2024_2035.csv'
