In [24]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import root_mean_squared_error, mean_squared_error, mean_absolute_percentage_error

In [25]:
population_estimates_df = pd.read_csv('../data/PopByAgeGroupSupplemented.csv')
projected_population_df = pd.read_csv('../data/PastProjections.csv')

In [26]:
years = [2006, 2011, 2016, 2021]

age_groups = np.array(['65 - 69 years', '70 - 74 years', '75 - 79 years', '80 - 84 years', '85 years and over'])

population_estimates_filtered = population_estimates_df[population_estimates_df['Year'].isin(years)]

methods = projected_population_df['Criteria for Projection'].unique()
projected_population_dfs = {}

for method in methods:
    method_df = projected_population_df[projected_population_df['Criteria for Projection'] == method]
    projected_population_dfs[method] = method_df

In [27]:
avg_rmses = {}
avg_mapes = {}

for method in methods:
    print(f'\nResults for method: {method}')
    projected_df = projected_population_dfs[method]
    # Get RMSE for elderly age groups
    rmses = []
    mapes = []
    for idx, year in enumerate(years):
        projected_row = projected_df.iloc[idx]
        pop_row = population_estimates_filtered.iloc[idx]
        for ag in age_groups:
            population = pop_row[ag]
            projected_pop = projected_row[ag]
            rmse = np.sqrt(mean_squared_error([population], [projected_pop]))
            rmses.append(rmse)
            mape = round(mean_absolute_percentage_error([population], [projected_pop]) * 100, 5)
            mapes.append(mape)

            print(f'Method: {method:<16} - Year: {year:<8} - Age group: {ag:<20} - Projection: {projected_pop:<10} - Population: {population:<10} - RMSE: {rmse:<10.5f} - MAPE: {mape:<10.3f}%')

    avg_rmse = np.mean(rmses)
    avg_rmses[method] = avg_rmse
    
    avg_mape = np.mean(mapes)
    avg_mapes[method] = avg_mape

print('\n')    
for method in methods:
    print(f'Average RMSE for {method} for elderly age groups: {avg_rmses[method]:.5f}')
    print(f'Average MAPE for {method} for elderly age groups: {avg_mapes[method]:.3f}%\n')


Results for method: Method - M0F1
Method: Method - M0F1    - Year: 2006     - Age group: 65 - 69 years        - Projection: 141.2      - Population: 141.2      - RMSE: 0.00000    - MAPE: 0.000     %
Method: Method - M0F1    - Year: 2006     - Age group: 70 - 74 years        - Projection: 117.5      - Population: 117.5      - RMSE: 0.00000    - MAPE: 0.000     %
Method: Method - M0F1    - Year: 2006     - Age group: 75 - 79 years        - Projection: 91.4       - Population: 91.4       - RMSE: 0.00000    - MAPE: 0.000     %
Method: Method - M0F1    - Year: 2006     - Age group: 80 - 84 years        - Projection: 64.4       - Population: 64.4       - RMSE: 0.00000    - MAPE: 0.000     %
Method: Method - M0F1    - Year: 2006     - Age group: 85 years and over    - Projection: 47.8       - Population: 47.8       - RMSE: 0.00000    - MAPE: 0.000     %
Method: Method - M0F1    - Year: 2011     - Age group: 65 - 69 years        - Projection: 171.0      - Population: 173.638    - RMSE: 2.6380

In [28]:
avg_rmses = {}
avg_mapes = {}

for method in methods:
    print(f'\nResults for method: {method}')
    projected_df = projected_population_dfs[method]
    # Get RMSE for elderly age groups
    rmses = []
    mapes = []
    for idx, year in enumerate(years[1:]):
        projected_row = projected_df.iloc[idx+1]
        pop_row = population_estimates_filtered.iloc[idx+1]
        for ag in age_groups:
            population = pop_row[ag]
            projected_pop = projected_row[ag]
            rmse = np.sqrt(mean_squared_error([population], [projected_pop]))
            rmses.append(rmse)
            mape = round(mean_absolute_percentage_error([population], [projected_pop]) * 100, 3)
            mapes.append(mape)

            print(f'Method: {method:<16} - Year: {year:<8} - Age group: {ag:<20} - Projection: {projected_pop:<10} - Population: {population:<10} - RMSE: {rmse:<10.5f} - MAPE: {mape:<10.3f}%')
            
    avg_rmse = np.mean(rmses)
    avg_rmses[method] = avg_rmse
    
    avg_mape = np.mean(mapes)
    avg_mapes[method] = avg_mape

print('\n')    
for method in methods:
    print(f'Average RMSE for {method} for elderly age groups: {avg_rmses[method]:<.5f}')
    print(f'Average MAPE for {method} for elderly age groups: {avg_mapes[method]:<.3f}%\n')


Results for method: Method - M0F1
Method: Method - M0F1    - Year: 2011     - Age group: 65 - 69 years        - Projection: 171.0      - Population: 173.638    - RMSE: 2.63800    - MAPE: 1.519     %
Method: Method - M0F1    - Year: 2011     - Age group: 70 - 74 years        - Projection: 129.4      - Population: 131.19     - RMSE: 1.79000    - MAPE: 1.364     %
Method: Method - M0F1    - Year: 2011     - Age group: 75 - 79 years        - Projection: 100.8      - Population: 102.036    - RMSE: 1.23600    - MAPE: 1.211     %
Method: Method - M0F1    - Year: 2011     - Age group: 80 - 84 years        - Projection: 69.9       - Population: 70.113     - RMSE: 0.21300    - MAPE: 0.304     %
Method: Method - M0F1    - Year: 2011     - Age group: 85 years and over    - Projection: 60.2       - Population: 58.416     - RMSE: 1.78400    - MAPE: 3.054     %
Method: Method - M0F1    - Year: 2016     - Age group: 65 - 69 years        - Projection: 208.5      - Population: 211.236    - RMSE: 2.7360