In [1]:
import pandas as pd

XLS = '/kaggle/input/philippines-population-projection-2020-to-2025/phl_population-projections_admin3_midyear2020_2025.xlsx'
sheet_name = 'PopnProj_adm3'

df = pd.read_excel(io=XLS, sheet_name=sheet_name)
df.head()

Unnamed: 0,Region,Region_pcode,Province,Province_pcode,Municipality_City,Mun_Pcode,July2020,July2021,July2022,July2023,July2024,July2025
0,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Adams,PH012801000,1823,1827,1830,1833,1834,1835
1,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bacarra,PH012802000,32778,32850,32908,32951,32979,32994
2,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Badoc,PH012803000,32218,32299,32365,32418,32456,32480
3,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,Bangui,PH012804000,14954,14993,15024,15049,15067,15079
4,Region I (Ilocos Region),PH010000000,Ilocos Norte,PH012800000,City of Batac,PH012805000,56248,56389,56504,56595,56661,56702


In [2]:
from plotly import colors
from plotly import express
from plotly import io

io.renderers.default = 'iframe'

region_df = df[['Region', 'July2020', 'July2021', 'July2022', 'July2023', 'July2024', 'July2025']].groupby(by=['Region']).sum().reset_index()
region_df.columns = ['Region', ] + list(range(2020, 2026))
express.line(log_y=True, color_discrete_sequence = colors.sample_colorscale('HSV', 17),
    data_frame=region_df.melt(id_vars=['Region']).rename(columns={'variable': 'year', 'value': 'population'}), x='year', y='population', color='Region'
)

We already knew from the data card that these are projections based on 2015 data, so it is not surprising that the projections are all linear. If we use a log plot in the y direction here we can see more difference among the lower-population regions. Let's do the same thing at the Municipality level.

In [3]:
province_df = df[['Province', 'July2020', 'July2021', 'July2022', 'July2023', 'July2024', 'July2025']].groupby(by=['Province']).sum().reset_index()
province_df.columns = ['Province', ] + list(range(2020, 2026))
express.line(log_y=True, color_discrete_sequence = colors.sample_colorscale('HSV', 87), height=1200,
    data_frame=province_df.melt(id_vars=['Province']).rename(columns={'variable': 'year', 'value': 'population'}), x='year', y='population', color='Province',
)

We can also visualize this same data with a bit less detail in terms of the mean and variance across the projected years.

In [4]:
plot_df = province_df.copy()
plot_df['mean'] = plot_df[range(2020, 2026)].mean(axis='columns')
plot_df['std'] = plot_df[range(2020, 2026)].std(axis='columns')
express.scatter(data_frame=plot_df, x='mean', y='std', color='Province', color_discrete_sequence = colors.sample_colorscale('HSV', 87), 
                log_x=True, log_y=True, height=800)