DESCRIPTION: Run population estimations of a country based on various assumptions.

# Demographics

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

import numpy as np
import pandas as pd
import matplotlib as plt

pd.set_option("display.max.columns", None)
pd.set_option("display.precision", 2)

In [2]:
'''
Columns:
  NAME: The name of the country
  GENC: Geography, the country abbreviation
  POP: The population
  SEX:
    0 = Both genders
    1 = Male
    2 = Female

  
'''
url = "https://api.census.gov/data/timeseries/idb/1year?get=NAME,GENC,POP&YR=2023,2024&AGE=0:100&SEX=0,1,2"
df = pd.read_json(url)
df

Unnamed: 0,0,1,2,3,4,5
0,NAME,GENC,POP,YR,AGE,SEX
1,Andorra,AD,586,2023,0,0
2,Andorra,AD,302,2023,0,1
3,Andorra,AD,284,2023,0,2
4,Andorra,AD,588,2023,1,0
...,...,...,...,...,...,...
137558,United Arab Emirates,AE,7,2024,99,1
137559,United Arab Emirates,AE,11,2024,99,2
137560,United Arab Emirates,AE,34,2024,100,0
137561,United Arab Emirates,AE,15,2024,100,1


In [32]:
# Rename the columns
df = df.rename(columns={0: 'Country', 1: 'Abbr', 2: 'Population', 3: 'Year', 4: 'Age', 5: 'Sex'})

# Drop the first row
df = df.iloc[1:, :]

# Convert string columns to integers
df["Population"] = df["Population"].astype(int)
df["Age"] = df["Age"].astype(int)
df["Sex"] = df["Sex"].astype(int)

df

Unnamed: 0,Country,Abbr,Population,Year,Age,Sex
4,Andorra,AD,588,2023,1,0
5,Andorra,AD,303,2023,1,1
6,Andorra,AD,285,2023,1,2
7,Andorra,AD,593,2023,2,0
8,Andorra,AD,306,2023,2,1
...,...,...,...,...,...,...
137558,United Arab Emirates,AE,7,2024,99,1
137559,United Arab Emirates,AE,11,2024,99,2
137560,United Arab Emirates,AE,34,2024,100,0
137561,United Arab Emirates,AE,15,2024,100,1


In [43]:
c = df.copy(True)

country = "China"
ctry = 'CN'

# Filter rows for a specific country
c = c[(c['Abbr'] == ctry) & (c['Year'] == '2023')]

# Total for country
print(f"Total population for {country} = {c['Population'].sum():,.0f}")

# Add a calculated rows. We'll split the "Both Genders" population evenly between males and females.
c['Males'] = (c.Population * (c.Sex == 1) + c.Population/2 * (c.Sex == 0)).astype(int)
c['Females'] = (c.Population * (c.Sex == 2) + c.Population/2 * (c.Sex == 0)).astype(int)

c

Total population for China = 2,826,285,692


Unnamed: 0,Country,Abbr,Population,Year,Age,Sex,Males,Females
12424,China,CN,13760508,2023,0,0,6880254,6880254
12425,China,CN,7194194,2023,0,1,7194194,0
12426,China,CN,6566314,2023,0,2,0,6566314
12427,China,CN,14379462,2023,1,0,7189731,7189731
12428,China,CN,7540282,2023,1,1,7540282,0
...,...,...,...,...,...,...,...,...
12722,China,CN,15484,2023,99,1,15484,0
12723,China,CN,44974,2023,99,2,0,44974
12724,China,CN,87422,2023,100,0,43711,43711
12725,China,CN,20664,2023,100,1,20664,0


In [44]:
# Group the populations into age ranges; 0-5, 6-10, 11-15, ...

bins = np.arange(0, 120, 5)
#print(bins)
d = c.groupby(pd.cut(df["Age"], bins)).sum()

[  0   5  10  15  20  25  30  35  40  45  50  55  60  65  70  75  80  85
  90  95 100 105 110 115]


In [48]:
c.groupby(df['Age']).sum().copy()

Unnamed: 0_level_0,Population,Age,Sex,Males,Females
Age,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
0,27521016,0,3,14074448,13446568
1,28758924,3,3,14730013,14028911
2,27164836,6,3,13932357,13232479
3,26972096,9,3,13855010,13117086
4,29790126,12,3,15324130,14465995
...,...,...,...,...,...
96,354440,288,3,138868,215572
97,244310,291,3,94656,149653
98,182314,294,3,69434,112879
99,120916,297,3,45713,75203
