## Deep Learning for Fertility Prediction (DLMP)

### Import packages 

In [1]:
import tensorflow as tf
import numpy as np
import os as os
tfkl = tf.keras.layers
import random

### Import functions

In [2]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/Users/paigepark/Desktop/repos/deep-fert/code/training_functions.py'>

### Import data

In [None]:
asfr_training = np.loadtxt('../data/asfrTR_training.txt')
asfr_test = np.loadtxt('../data/asfrTR_test.txt')

In [8]:
asfr_training.shape

(75936, 4)

In [4]:
geos_key = np.load('../data/geos_key.npy')

#### All Country Model

These models are those used in the paper to produce all of main figures/table in the paper.

In [5]:
# prep data
train_prepped = training_functions.prep_data(asfr_training, mode="train", changeratetolog=True)
test_prepped = training_functions.prep_data(asfr_test, mode="test", changeratetolog=True)

In [6]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(asfr_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim

In [26]:
np.partition(np.unique(asfr_training[:,3]), 3)[:3]

array([0.e+00, 1.e-05, 2.e-05])

testing model

In [12]:
model_country, loss_info_country = training_functions.run_deep_model(train_prepped, test_prepped, country_geo_dim, 
                                                                        epochs=50, 
                                                                        steps_per_epoch=1405, 
                                                                        lograte=True)

training_input_features = (tf.convert_to_tensor((asfr_training[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(asfr_training[:,2], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(asfr_training[:,0], dtype=tf.float32))  # Geo

test_input_features = (tf.convert_to_tensor((asfr_test[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                    tf.convert_to_tensor(asfr_test[:,2], dtype=tf.float32),  # Age
                    tf.convert_to_tensor(asfr_test[:,0], dtype=tf.float32))  # Geo

training_predictions = model_country.predict(training_input_features)

test_predictions = model_country.predict(test_input_features)

inputs = np.delete(asfr_training, 3, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))
inputs_test = np.delete(asfr_test, 3, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

np.savetxt(f"../data/dl_fitted_explore.txt", training_predictions)
np.savetxt(f"../data/dl_forecast_explore.txt", test_predictions)    

Epoch 1/50
1405/1405 - 5s - 3ms/step - loss: 1.0445 - val_loss: 0.7196 - learning_rate: 1.0000e-03
Epoch 2/50
1405/1405 - 4s - 3ms/step - loss: 0.3542 - val_loss: 0.5649 - learning_rate: 1.0000e-03
Epoch 3/50
1405/1405 - 4s - 3ms/step - loss: 0.2608 - val_loss: 0.4133 - learning_rate: 1.0000e-03
Epoch 4/50
1405/1405 - 4s - 3ms/step - loss: 0.2248 - val_loss: 0.3633 - learning_rate: 1.0000e-03
Epoch 5/50
1405/1405 - 4s - 3ms/step - loss: 0.2041 - val_loss: 0.3035 - learning_rate: 1.0000e-03
Epoch 6/50
1405/1405 - 4s - 3ms/step - loss: 0.1872 - val_loss: 0.3313 - learning_rate: 1.0000e-03
Epoch 7/50
1405/1405 - 4s - 3ms/step - loss: 0.1763 - val_loss: 0.3021 - learning_rate: 1.0000e-03
Epoch 8/50
1405/1405 - 4s - 3ms/step - loss: 0.1663 - val_loss: 0.2473 - learning_rate: 1.0000e-03
Epoch 9/50
1405/1405 - 4s - 3ms/step - loss: 0.1600 - val_loss: 0.2341 - learning_rate: 1.0000e-03
Epoch 10/50
1405/1405 - 4s - 3ms/step - loss: 0.1534 - val_loss: 0.2666 - learning_rate: 1.0000e-03
Epoch 11/

production model

In [7]:
# run all country model
for i in range(1,6):
    # Set reproducible seeds per iteration
    np.random.seed(i)
    tf.random.set_seed(i)
    random.seed(i)
    os.environ['PYTHONHASHSEED'] = str(i)

    model_country, loss_info_country = training_functions.run_deep_model(train_prepped, test_prepped, country_geo_dim, 
                                                                         epochs=50, 
                                                                         steps_per_epoch=1405, 
                                                                         lograte=True)

    training_input_features = (tf.convert_to_tensor((asfr_training[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(asfr_training[:,2], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(asfr_training[:,0], dtype=tf.float32))  # Geo
    
    test_input_features = (tf.convert_to_tensor((asfr_test[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(asfr_test[:,2], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(asfr_test[:,0], dtype=tf.float32))  # Geo

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(asfr_training, 3, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(asfr_test, 3, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/dl_model_{i}.keras")

    np.savetxt(f"../data/dl_fitted_{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_forecast_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/50
1405/1405 - 5s - 3ms/step - loss: 1.1511 - val_loss: 0.8871 - learning_rate: 1.0000e-03
Epoch 2/50
1405/1405 - 4s - 3ms/step - loss: 0.4694 - val_loss: 0.5682 - learning_rate: 1.0000e-03
Epoch 3/50
1405/1405 - 4s - 3ms/step - loss: 0.3398 - val_loss: 0.3622 - learning_rate: 1.0000e-03
Epoch 4/50
1405/1405 - 4s - 3ms/step - loss: 0.2774 - val_loss: 0.3325 - learning_rate: 1.0000e-03
Epoch 5/50
1405/1405 - 3s - 2ms/step - loss: 0.2473 - val_loss: 0.3193 - learning_rate: 1.0000e-03
Epoch 6/50
1405/1405 - 4s - 2ms/step - loss: 0.2246 - val_loss: 0.3116 - learning_rate: 1.0000e-03
Epoch 7/50
1405/1405 - 3s - 2ms/step - loss: 0.2096 - val_loss: 0.3270 - learning_rate: 1.0000e-03
Epoch 8/50
1405/1405 - 4s - 3ms/step - loss: 0.1960 - val_loss: 0.2975 - learning_rate: 1.0000e-03
Epoch 9/50
1405/1405 - 3s - 2ms/step - loss: 0.1865 - val_loss: 0.3052 - learning_rate: 1.0000e-03
Epoch 10/50
1405/1405 - 3s - 2ms/step - loss: 0.1795 - val_loss: 0.2797 - learning_rate: 1.0000e-03
Epoch 11/

non-log model

In [15]:
# prep data
train_prepped = training_functions.prep_data(asfr_training, mode="train", changeratetolog=False)
test_prepped = training_functions.prep_data(asfr_test, mode="test", changeratetolog=False)

In [16]:
model_country, loss_info_country = training_functions.run_deep_model(train_prepped, test_prepped, country_geo_dim, 
                                                                        epochs=50, 
                                                                        steps_per_epoch=1405, 
                                                                        lograte=False)

training_input_features = (tf.convert_to_tensor((asfr_training[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(asfr_training[:,2], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(asfr_training[:,0], dtype=tf.float32))  # Geo

test_input_features = (tf.convert_to_tensor((asfr_test[:,1] - 1950) / (2015-1950), dtype=tf.float32),  # Normalized year
                    tf.convert_to_tensor(asfr_test[:,2], dtype=tf.float32),  # Age
                    tf.convert_to_tensor(asfr_test[:,0], dtype=tf.float32))  # Geo

training_predictions = model_country.predict(training_input_features)

test_predictions = model_country.predict(test_input_features)

inputs = np.delete(asfr_training, 3, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))
inputs_test = np.delete(asfr_test, 3, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

np.savetxt(f"../data/dl_fitted_nonlog.txt", training_predictions)
np.savetxt(f"../data/dl_forecast_nonlog.txt", test_predictions)   

Epoch 1/50
1405/1405 - 5s - 3ms/step - loss: 0.0015 - val_loss: 6.8497e-04 - learning_rate: 1.0000e-03
Epoch 2/50
1405/1405 - 3s - 2ms/step - loss: 4.8703e-04 - val_loss: 5.8749e-04 - learning_rate: 1.0000e-03
Epoch 3/50
1405/1405 - 3s - 2ms/step - loss: 2.6083e-04 - val_loss: 2.3624e-04 - learning_rate: 1.0000e-03
Epoch 4/50
1405/1405 - 3s - 2ms/step - loss: 1.7099e-04 - val_loss: 1.7458e-04 - learning_rate: 1.0000e-03
Epoch 5/50
1405/1405 - 3s - 2ms/step - loss: 1.3381e-04 - val_loss: 1.4241e-04 - learning_rate: 1.0000e-03
Epoch 6/50
1405/1405 - 3s - 2ms/step - loss: 1.1481e-04 - val_loss: 1.5055e-04 - learning_rate: 1.0000e-03
Epoch 7/50
1405/1405 - 3s - 2ms/step - loss: 9.8164e-05 - val_loss: 1.5354e-04 - learning_rate: 1.0000e-03
Epoch 8/50
1405/1405 - 3s - 2ms/step - loss: 8.4813e-05 - val_loss: 1.2964e-04 - learning_rate: 1.0000e-03
Epoch 9/50
1405/1405 - 3s - 2ms/step - loss: 7.4455e-05 - val_loss: 1.2589e-04 - learning_rate: 1.0000e-03
Epoch 10/50
1405/1405 - 3s - 2ms/step - l