## Deep Learning for Mortality Prediction (DLMP)

### Import packages 

In [1]:
import tensorflow as tf
import numpy as np
import os as os
tfkl = tf.keras.layers
import random

Set global seed

### Import functions

In [2]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/Users/paigepark/Desktop/deep-mort/code/training_functions.py'>

### Import data

#### State data

In [3]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')

#### Country data

In [4]:
country_training = np.loadtxt('../data/country_training.txt')
country_test = np.loadtxt('../data/country_test.txt')

#### Combined data

In [5]:
combined_training = np.loadtxt('../data/combined_training.txt')
combined_test = np.loadtxt('../data/combined_test.txt')

In [6]:
geos_key = np.load('../data/geos_key.npy')

In [7]:
geo_dict = {int(code): geo for geo, code in geos_key}

#### Single population models

Below, I am training indiviudal models for each population. I am only saving mses rather than predictions since the prediction data would be too large to store. 

In [10]:
n_pop = np.unique(country_training[:, 0])
geo_dim = int(max(n_pop) + 1)
geo_dim

90

In [11]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU devices:", tf.config.list_physical_devices('GPU'))

# Optional: Set memory growth to prevent over-allocation
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


TensorFlow version: 2.17.0
Built with CUDA: False
GPU devices: []


In [12]:
for s in range(1,6):
    # Set reproducible seeds per iteration
    np.random.seed(s)
    tf.random.set_seed(s)
    random.seed(s)
    os.environ['PYTHONHASHSEED'] = str(s)
    
    for j in range(0,2):
        for i in range(50, geo_dim):
            country_index = i
            gender_index = j
            single_country_training = country_training[(country_training[:,0] == country_index) & (country_training[:,1] == gender_index)]
            single_country_test = country_test[(country_test[:,0] == country_index) & (country_test[:,1] == gender_index)]

            single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
            single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
            single_geo_dim = country_index + 1

            print(f"Running model {geo_dict[i]}: Gender {j}")

            model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, 
                                                                          single_country_test_prepped, 
                                                                          single_geo_dim, epochs=30, 
                                                                          steps_per_epoch=500, lograte=True)
        
            tc = single_country_test  
            years = (tc[:,2] - 1959) / 60
            ages  = tc[:,3]
            geos  = tc[:,0].astype(np.int32)
            genders = tc[:,1]

            test_input_features = (
                tf.convert_to_tensor(years, dtype=tf.float32),
                tf.convert_to_tensor(ages, dtype=tf.float32),
                tf.convert_to_tensor(geos, dtype=tf.int32),      # ensure ints
                tf.convert_to_tensor(genders, dtype=tf.float32),
            )
            
            test_predictions = model_single.predict(test_input_features)


            inputs_test = np.delete(single_country_test, 4, axis=1)
            test_predictions = np.column_stack((inputs_test, test_predictions))

            geo_name = geo_dict[i]

            model_single.save(f"../models/{geo_name}_gender_{j}.keras")   

            np.savez_compressed(f"../data/{geo_name}_gender_{j}_iter_{s}.npz", test_predictions) 

print(f"Finished training for {s} iterations.")

Running model AUS: Gender 0
Epoch 1/30
500/500 - 6s - 12ms/step - loss: 5.3884 - val_loss: 0.5356 - learning_rate: 0.0010
Epoch 2/30


KeyboardInterrupt: 

#### All Country Model

In [8]:
# prep data
country_train_prepped = training_functions.prep_data(country_training, mode="train", changeratetolog=True)
country_test_prepped = training_functions.prep_data(country_test, mode="test", changeratetolog=True)

In [9]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

90

In [11]:
# run country model
for i in range(1,6):
    # Set reproducible seeds per iteration
    np.random.seed(i)
    tf.random.set_seed(i)
    random.seed(i)
    os.environ['PYTHONHASHSEED'] = str(i)

    model_country, loss_info_country = training_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 
                                                                         epochs=20, steps_per_epoch=1405, lograte=True)

    training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(country_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(country_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/dl_country_model_{i}.keras")

    np.savetxt(f"../data/dl_country_forecast_{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_country_forecast_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/20
1405/1405 - 9s - 6ms/step - loss: 1.9850 - val_loss: 0.2581 - learning_rate: 0.0010
Epoch 2/20
1405/1405 - 7s - 5ms/step - loss: 0.3244 - val_loss: 0.2397 - learning_rate: 0.0010
Epoch 3/20
1405/1405 - 7s - 5ms/step - loss: 0.2508 - val_loss: 0.1768 - learning_rate: 0.0010
Epoch 4/20
1405/1405 - 7s - 5ms/step - loss: 0.2251 - val_loss: 0.1754 - learning_rate: 0.0010
Epoch 5/20
1405/1405 - 7s - 5ms/step - loss: 0.2027 - val_loss: 0.2596 - learning_rate: 0.0010
Epoch 6/20
1405/1405 - 7s - 5ms/step - loss: 0.1881 - val_loss: 0.1946 - learning_rate: 0.0010
Epoch 7/20
1405/1405 - 7s - 5ms/step - loss: 0.1853 - val_loss: 0.1806 - learning_rate: 0.0010
Epoch 8/20
1405/1405 - 7s - 5ms/step - loss: 0.1737 - val_loss: 0.1815 - learning_rate: 2.5000e-04
Epoch 9/20
1405/1405 - 7s - 5ms/step - loss: 0.1658 - val_loss: 0.1872 - learning_rate: 2.5000e-04
Epoch 10/20
1405/1405 - 7s - 5ms/step - loss: 0.1720 - val_loss: 0.1998 - learning_rate: 2.5000e-04
Epoch 11/20
1405/1405 - 8s - 6ms/step

### Combined (all country / all state) DL Model

In [12]:
# convert combined data to tensors and other prep
combined_train_prepped = training_functions.prep_data(combined_training, mode="train", changeratetolog=True)
combined_test_prepped = training_functions.prep_data(combined_test, mode="test", changeratetolog=True)

In [13]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

90


In [14]:
# run combined model
for i in range(1,6):
    # Set reproducible seeds per iteration
    np.random.seed(i)
    tf.random.set_seed(i)
    random.seed(i)
    os.environ['PYTHONHASHSEED'] = str(i)

    model_combined, loss_info_combined = training_functions.run_deep_model(combined_train_prepped, combined_test_prepped, 
                                                                           combined_geo_dim, epochs=20, steps_per_epoch=2000,
                                                                           lograte=True)

    training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_combined.predict(training_input_features)

    test_predictions = model_combined.predict(test_input_features)

    inputs = np.delete(combined_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(combined_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_combined.save(f"../models/combined_model_{i}.keras")

    np.savetxt(f"../data/dl_combined_forecast_{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_combined_forecast_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/20
2000/2000 - 13s - 6ms/step - loss: 1.5381 - val_loss: 0.2362 - learning_rate: 0.0010
Epoch 2/20
2000/2000 - 11s - 6ms/step - loss: 0.2940 - val_loss: 0.2458 - learning_rate: 0.0010
Epoch 3/20
2000/2000 - 14s - 7ms/step - loss: 0.2388 - val_loss: 0.2514 - learning_rate: 0.0010
Epoch 4/20
2000/2000 - 18s - 9ms/step - loss: 0.2123 - val_loss: 0.2381 - learning_rate: 0.0010
Epoch 5/20
2000/2000 - 14s - 7ms/step - loss: 0.1952 - val_loss: 0.1732 - learning_rate: 2.5000e-04
Epoch 6/20
2000/2000 - 14s - 7ms/step - loss: 0.1878 - val_loss: 0.2045 - learning_rate: 2.5000e-04
Epoch 7/20
2000/2000 - 15s - 7ms/step - loss: 0.1880 - val_loss: 0.1694 - learning_rate: 2.5000e-04
Epoch 8/20
2000/2000 - 13s - 7ms/step - loss: 0.1875 - val_loss: 0.1859 - learning_rate: 2.5000e-04
Epoch 9/20
2000/2000 - 15s - 7ms/step - loss: 0.1824 - val_loss: 0.1791 - learning_rate: 2.5000e-04
Epoch 10/20
2000/2000 - 14s - 7ms/step - loss: 0.1783 - val_loss: 0.1857 - learning_rate: 2.5000e-04
Epoch 11/20
200