# Classifer

This notebook takes UN population division data on country-level age distributions and classifies these distributions based on whether the fraction of the population within an age group decreases monotonically with age.

## Import packages

In [14]:
#Load in required libraries
import pandas as pd
import numpy as np
import os
import matplotlib.pyplot as plt

## Read in necessary data

In [15]:
# Set working directory
home =  os.getcwd()[:-4]

# Read in age distribution data
df = pd.read_excel(f'{home}data/required/WPP2019_POP_F15_1_ANNUAL_POPULATION_BY_AGE_BOTH_SEXES.xlsx', sheet_name = 'ESTIMATES', header=16)
df = df.drop(df[df.Type=="Label/Separator"].index) # Drop all rows that are blank section separators
df = df.infer_objects() # Correct dtypes for age columns
df = df.round() # Round all population counts to nearest integer

## Check whether distributions are monotonic decreasing

In [16]:
mono_check=np.zeros(df.shape[0])

for i in range(df.shape[0]):
    mono_check[i] = df.iloc[i, -21::].is_monotonic_decreasing #Check which of the age distributions are monotoic decreasing with age

df["monocheck"] = mono_check

# Save 2 sets of age distributions, monotonic decreasing (md) and other
df.loc[df.monocheck==1].to_csv(f'{home}data/required/agedists_md.csv') 
df.loc[df.monocheck==0].to_csv(f'{home}data/required/agedists_other.csv') 

  return Index(self).is_monotonic_decreasing


## Focus on a specific year 

In [17]:
# Select a year to focus on
select_year = 2019

# Subset to get only country-level distributions for selected year
df_countries = df.loc[df.Type=="Country/Area"] 
df_countries_year = df_countries.loc[df_countries["Reference date (as of 1 July)"]==select_year]
df_countries_year_other=df_countries_year.loc[df_countries_year.monocheck==0]
df_countries_year_md=df_countries_year.loc[df_countries_year.monocheck==1]

# Save 2 sets of age distributions for the selected year, monotonic decreasing (md) and other (i.e. not monotic decreasing)
df_countries_year_md.to_csv(f'{home}data/required/agedists_countries{select_year}_md.csv') 
df_countries_year_other.to_csv(f'{home}data/required/agedists_countries{select_year}_other.csv') 