# Import Required Libraries
This section imports pandas for data handling and matplotlib for plotting.

In [25]:
import pandas as pd
import plotly.express as px

# Load Population Data from Feather File
This section loads the UN WPP population data from the provided feather file using pandas.

In [7]:
# Load the population data
population_data = pd.read_feather('https://catalog.ourworldindata.org/garden/un/2024-07-12/un_wpp/population.feather')

# Display Data Overview
This section displays the first few rows and basic info of the loaded dataset to understand its structure.

In [8]:
# Display the first few rows and basic info
print(population_data.head())
print(population_data.info())

       country  year  sex   age    variant  population  population_change  \
0  Afghanistan  1950  all     0  estimates      315817               <NA>   
1  Afghanistan  1950  all  0-14  estimates     3191673               <NA>   
2  Afghanistan  1950  all  0-24  estimates     4672833               <NA>   
3  Afghanistan  1950  all   0-4  estimates     1300029               <NA>   
4  Afghanistan  1950  all     1  estimates      269174               <NA>   

   population_density  
0                <NA>  
1                <NA>  
2                <NA>  
3                <NA>  
4                <NA>  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 12321792 entries, 0 to 12321791
Data columns (total 8 columns):
 #   Column              Dtype   
---  ------              -----   
 0   country             category
 1   year                UInt16  
 2   sex                 category
 3   age                 category
 4   variant             category
 5   population          Int64   
 6   po

# Filter Data for Poland
This section filters the dataset to include only rows where the country is Poland.

In [16]:
# Filter the data for Poland
poland_data = population_data[population_data['country'] == 'Poland']

In [None]:
# Filter for overall population (medium variant is used for main estimates)
poland_data_overall = poland_data[(poland_data['sex'] == 'all') &
                                 (poland_data['age'] == 'all') &
                                 (poland_data['variant'] == 'medium')]

In [None]:
# Filter for Germany overall population
germany_data = population_data[population_data['country'] == 'Germany']
germany_data_overall = germany_data[(germany_data['sex'] == 'all') &
                                   (germany_data['age'] == 'all') &
                                   (germany_data['variant'] == 'medium')]

# Combine Poland and Germany data for comparison
combined_data = pd.concat([poland_data_overall, germany_data_overall], ignore_index=True)

# Visualize Poland's Population
This section plots the population of Poland over time using matplotlib.

## Using Historical Population Data

Let's now switch to using actual historical population data instead of projections. We'll use the demography/population/historical dataset which contains historical estimates.

In [30]:
# Load historical population data (not projections)
historical_population_url = "https://catalog.ourworldindata.org/garden/demography/2024-07-15/population/historical.feather"

# Read the historical data
historical_data = pd.read_feather(historical_population_url)

print("Historical population data shape:", historical_data.shape)
print("\nColumns:", historical_data.columns.tolist())
print("\nFirst few rows:")
print(historical_data.head())
print("\nCountries available (first 20):")
print(sorted(historical_data['country'].unique())[:20])
print("\nYear range:", historical_data['year'].min(), "to", historical_data['year'].max())

Historical population data shape: (59177, 6)

Columns: ['country', 'year', 'population_historical', 'world_pop_share_historical', 'population_density_historical', 'growth_rate_historical']

First few rows:
       country    year  population_historical  world_pop_share_historical  \
0  Afghanistan  -10000                  14737                    0.327405   
1  Afghanistan   -9000                  20405                    0.358793   
2  Afghanistan   -8000                  28253                    0.386254   
3  Afghanistan   -7000                  39120                    0.405317   
4  Afghanistan   -6000                  54166                    0.407928   

   population_density_historical  growth_rate_historical  
0                       0.022595                    <NA>  
1                       0.031285                    <NA>  
2                       0.043318                    <NA>  
3                       0.059979                    <NA>  
4                       0.083047    

In [31]:
# Filter for Poland and Germany in the historical data
poland_historical = historical_data[historical_data['country'] == 'Poland'].copy()
germany_historical = historical_data[historical_data['country'] == 'Germany'].copy()

print("Poland historical data points:", len(poland_historical))
print("Poland year range:", poland_historical['year'].min(), "to", poland_historical['year'].max())
print("\nGermany historical data points:", len(germany_historical))
print("Germany year range:", germany_historical['year'].min(), "to", germany_historical['year'].max())

# Let's look at recent historical data (say, from 1900 onwards)
poland_recent = poland_historical[poland_historical['year'] >= 1900]
germany_recent = germany_historical[germany_historical['year'] >= 1900]

print(f"\nPoland data from 1900 onwards: {len(poland_recent)} data points")
print(f"Germany data from 1900 onwards: {len(germany_recent)} data points")

Poland historical data points: 261
Poland year range: -10000 to 2023

Germany historical data points: 261
Germany year range: -10000 to 2023

Poland data from 1900 onwards: 124 data points
Germany data from 1900 onwards: 124 data points


In [32]:
# Combine data for both countries (from 1900 onwards)
historical_combined = pd.concat([
    poland_recent.assign(country='Poland'),
    germany_recent.assign(country='Germany')
])

# Create interactive plot using plotly
fig_historical = px.line(
    historical_combined,
    x='year',
    y='population_historical',
    color='country',
    title='Historical Population: Poland vs Germany (1900-2023)',
    labels={
        'population_historical': 'Population',
        'year': 'Year'
    },
    width=800,
    height=500
)

# Update layout
fig_historical.update_layout(
    yaxis=dict(rangemode='tozero'),
    xaxis=dict(title='Year'),
    yaxis_title='Population',
    legend=dict(title='Country')
)

# Show the plot
fig_historical.show()

In [33]:
# Let's also create a longer-term historical view (from year 1 CE onwards)
poland_longterm = poland_historical[poland_historical['year'] >= 1]
germany_longterm = germany_historical[germany_historical['year'] >= 1]

historical_longterm = pd.concat([
    poland_longterm.assign(country='Poland'),
    germany_longterm.assign(country='Germany')
])

# Create interactive plot for the long-term view
fig_longterm = px.line(
    historical_longterm,
    x='year',
    y='population_historical',
    color='country',
    title='Long-term Historical Population: Poland vs Germany (1 CE - 2023)',
    labels={
        'population_historical': 'Population',
        'year': 'Year'
    },
    width=800,
    height=500
)

# Update layout
fig_longterm.update_layout(
    yaxis=dict(rangemode='tozero'),
    xaxis=dict(title='Year'),
    yaxis_title='Population',
    legend=dict(title='Country')
)

# Show the plot
fig_longterm.show()

## Summary: Historical vs Projection Data

We've now explored population data using two different datasets:

1. **UN WPP Projections**: These include estimates and projections, with data categorized by variant (medium, high, low), sex, and age groups. This is more detailed for recent years but focuses on future projections.

2. **Historical Population Data**: This provides a long-term historical perspective going back thousands of years, showing actual historical estimates rather than projections. This dataset is simpler (just country, year, and population) but covers a much longer time span.

Both datasets show the population trends for Poland and Germany, but the historical dataset gives us a much broader perspective on demographic changes over time, while the projection dataset provides more detailed breakdowns and future estimates.

In [29]:
# Plot Poland and Germany population over time using plotly express
fig = px.line(combined_data, x='year', y='population', color='country',
              title='Population of Poland and Germany Over Time',
              labels={'population': 'Population', 'year': 'Year'})
fig.update_layout(yaxis_range=[0, combined_data['population'].max() * 1.1])
fig.show()