# Read ASFR Data

This notebook reads in the Age-Specific Fertility Rate (ASFR) data files.

In [1]:
import pandas as pd
import numpy as np

## Read asfrVH.txt

Cohort fertility rates by birth cohort and age (horizontal parallelograms).

In [17]:
# Read asfrVH.txt - ASFR by cohort and age
asfr_tr = pd.read_csv(
    '../data/asfr/asfrTR.txt',
    delim_whitespace=True,
    skiprows=2,
    na_values='.',
)

print(f"Shape: {asfr_tr.shape}")
asfr_tr.head()

Shape: (229362, 5)


  asfr_tr = pd.read_csv(


Unnamed: 0,Code,Year,Age,Cohort,ASFR
0,AUT,1951,12-,,0.0
1,AUT,1951,13,1938.0,0.0
2,AUT,1951,13,1937.0,0.0
3,AUT,1951,14,1937.0,0.00029
4,AUT,1951,14,1936.0,0.00045


In [18]:
# Check data info
asfr_tr.shape

(229362, 5)

## Read asfrVHbo.txt

Cohort fertility rates by birth cohort, age, and birth order (horizontal parallelograms).

In [4]:
# Read asfrVHbo.txt - ASFR by cohort, age, and birth order
asfr_vhbo = pd.read_csv(
    '../data/asfr/asfrVHbo.txt',
    delim_whitespace=True,
    skiprows=2,
    na_values='.',
)

print(f"Shape: {asfr_vhbo.shape}")
asfr_vhbo.head()

Shape: (146388, 9)


  asfr_vhbo = pd.read_csv(


Unnamed: 0,Code,Cohort,Age,ASFR,ASFR1,ASFR2,ASFR3,ASFR4,ASFR5p
0,AUT,1929,12-,,,,,,
1,AUT,1929,13,,,,,,
2,AUT,1929,14,,,,,,
3,AUT,1929,15,,,,,,
4,AUT,1929,16,,,,,,


In [5]:
# Check data info
asfr_vhbo.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 146388 entries, 0 to 146387
Data columns (total 9 columns):
 #   Column  Non-Null Count   Dtype  
---  ------  --------------   -----  
 0   Code    146388 non-null  object 
 1   Cohort  146388 non-null  int64  
 2   Age     146388 non-null  object 
 3   ASFR    73700 non-null   float64
 4   ASFR1   73700 non-null   float64
 5   ASFR2   73700 non-null   float64
 6   ASFR3   73700 non-null   float64
 7   ASFR4   73700 non-null   float64
 8   ASFR5p  73700 non-null   float64
dtypes: float64(6), int64(1), object(2)
memory usage: 10.1+ MB


## Summary Statistics

In [6]:
# Unique countries in the data
print(f"Countries in asfrVH: {asfr_vh['Code'].nunique()}")
print(f"Countries in asfrVHbo: {asfr_vhbo['Code'].nunique()}")
print(f"\nCountry codes: {sorted(asfr_vh['Code'].unique())}")

Countries in asfrVH: 39
Countries in asfrVHbo: 38

Country codes: ['AUT', 'BEL', 'BGR', 'BLR', 'CAN', 'CHE', 'CHL', 'CZE', 'DEUTE', 'DEUTNP', 'DEUTW', 'DNK', 'ESP', 'EST', 'FIN', 'FRATNP', 'GBRTENW', 'GBR_NIR', 'GBR_NP', 'GBR_SCO', 'HRV', 'HUN', 'IRL', 'ISL', 'ITA', 'JPN', 'KOR', 'LTU', 'NLD', 'NOR', 'POL', 'PRT', 'RUS', 'SVK', 'SVN', 'SWE', 'TWN', 'UKR', 'USA']


In [7]:
# Cohort range
print(f"Cohort range in asfrVH: {asfr_vh['Cohort'].min()} - {asfr_vh['Cohort'].max()}")
print(f"Cohort range in asfrVHbo: {asfr_vhbo['Cohort'].min()} - {asfr_vhbo['Cohort'].max()}")

Cohort range in asfrVH: 1836 - 2011
Cohort range in asfrVHbo: 1878 - 2011


In [8]:
# Age range
print(f"Unique ages in asfrVH: {sorted(asfr_vh['Age'].unique())}")

Unique ages in asfrVH: ['12-', '13', '14', '15', '16', '17', '18', '19', '20', '21', '22', '23', '24', '25', '26', '27', '28', '29', '30', '31', '32', '33', '34', '35', '36', '37', '38', '39', '40', '41', '42', '43', '44', '45', '46', '47', '48', '49', '50', '51', '52', '53', '54', '55+']


In [11]:
print(f"Number of rows with no missing ASFR in asfrVH: {asfr_vh['ASFR'].notna().sum()}")

Number of rows with no missing ASFR in asfrVH: 114972


In [12]:
print(f"Nubmer of rows with no missing ASFR in asfrVHbo: {asfr_vhbo['ASFR'].notna().sum()}")

Nubmer of rows with no missing ASFR in asfrVHbo: 73700
