## Deep Learning for Mortality Prediction (DLMP)

### Import packages 

In [4]:
import tensorflow as tf
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
import keras
tfkl = tf.keras.layers

### Import functions

In [5]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/Users/paigepark/Desktop/deep-mort/code/training_functions.py'>

### Import data

#### State data

In [None]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')

#### Country data

In [None]:
country_training = np.loadtxt('../data/country_training.txt')
country_test = np.loadtxt('../data/country_test.txt')

#### Combined data

In [None]:
combined_training = np.loadtxt('../data/combined_training.txt')
combined_test = np.loadtxt('../data/combined_test.txt')

In [9]:
geos_key = np.load('../data/geos_key.npy')

In [13]:
geo_dict = {int(code): geo for geo, code in geos_key}

#### Single population models

Below, I am training indiviudal models for each population. I am only saving mses rather than predictions since the prediction data would be too large to store. 

In [None]:
mses = {}
for j in range(1,2):
    for i in range(50, 88):
        country_index = i
        gender_index = j
        single_country_training = country_training[(country_training[:,0] == country_index) & (country_training[:,1] == gender_index)]
        single_country_test = country_test[(country_test[:,0] == country_index) & (country_test[:,1] == gender_index)]

        single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
        single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
        single_geo_dim = country_index + 1

        print(f"Running model {geo_dict[i]}: Gender {j}")

        model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, single_country_test_prepped, single_geo_dim, 30, True)
        
        geo_name = geo_dict[i]
        mses[geo_name] = loss_single

        model_single.save(f"../models/{geo_name}_gender_{j}.keras")    

    with open(f"../data/single_country_mses_gender_{j}.txt", "w") as f:
        for key, value in mses.items():
            f.write(f"{key}: {value}\n")

Running model AUS: Gender 1
Epoch 1/30
1000/1000 - 7s - 7ms/step - loss: 2.3368 - val_loss: 0.0604 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 6s - 6ms/step - loss: 0.2289 - val_loss: 0.0371 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.1422 - val_loss: 0.0322 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 6s - 6ms/step - loss: 0.1060 - val_loss: 0.0287 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 6s - 6ms/step - loss: 0.0837 - val_loss: 0.0309 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 6s - 6ms/step - loss: 0.0725 - val_loss: 0.0517 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 7s - 7ms/step - loss: 0.0643 - val_loss: 0.0771 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 7s - 7ms/step - loss: 0.0557 - val_loss: 0.0275 - learning_rate: 2.5000e-04
Epoch 9/30
1000/1000 - 7s - 7ms/step - loss: 0.0530 - val_loss: 0.0398 - learning_rate: 2.5000e-04
Epoch 10/30
1000/1000 - 6s - 6ms/step - loss: 0.0529 - val_loss: 0.0275 - learning_rate: 2.5000e-04
Epoch 11/

Also tested single country models, with both genders. Understandably the error is lower here, but the paper uses the single population models for comparison since they are more similar to classical models. 

In [32]:
mses = {}

for i in range(50, 88):
    country_index = i
    single_country_training = country_training[country_training[:,0] == country_index]
    single_country_test = country_test[country_test[:,0] == country_index]

    single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
    single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
    single_geo_dim = country_index + 1

    print(f"Running model {geo_dict[i]}")

    # if single_country_training.shape[0] == 0 or single_country_test.shape[0] == 0:
    #     print(f"Skipping {geo_dict[i]} (index {i}) in iteration {j} — no data")
    #     continue

    model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, single_country_test_prepped, single_geo_dim, 30, True)
    
    geo_name = geo_dict[i]
    mses[geo_name] = loss_single

    model_single.save(f"../models/{geo_name}_new.keras")    

    with open(f"../data/single_country_mses_new.txt", "w") as f:
        for key, value in mses.items():
            f.write(f"{key}: {value}\n")

Running model AUS
Epoch 1/30
1000/1000 - 7s - 7ms/step - loss: 2.6538 - val_loss: 0.1177 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 5s - 5ms/step - loss: 0.2695 - val_loss: 0.0512 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 5s - 5ms/step - loss: 0.1744 - val_loss: 0.0473 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 5s - 5ms/step - loss: 0.1281 - val_loss: 0.0286 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 5s - 5ms/step - loss: 0.1031 - val_loss: 0.0337 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 5s - 5ms/step - loss: 0.0875 - val_loss: 0.0357 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 5s - 5ms/step - loss: 0.0752 - val_loss: 0.0359 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 5s - 5ms/step - loss: 0.0665 - val_loss: 0.0227 - learning_rate: 2.5000e-04
Epoch 9/30
1000/1000 - 5s - 5ms/step - loss: 0.0652 - val_loss: 0.0255 - learning_rate: 2.5000e-04
Epoch 10/30
1000/1000 - 5s - 5ms/step - loss: 0.0640 - val_loss: 0.0263 - learning_rate: 2.5000e-04
Epoch 11/30
1000/10

#### All Country Model

In [None]:
# prep data
country_train_prepped = training_functions.prep_data(country_training, mode="train", changeratetolog=True)
country_test_prepped = training_functions.prep_data(country_test, mode="test", changeratetolog=True)

In [10]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

88

In [None]:
# run country model
for i in range(1,6):
    model_country, loss_info_country = training_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(country_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(country_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/country_model_{i}.keras")

    np.savetxt(f"../data/country_training_predictions_{i}.txt", training_predictions)
    np.savetxt(f"../data/country_test_predictions_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/30
1000/1000 - 9s - 9ms/step - loss: 2.7208 - val_loss: 0.2944 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 6s - 6ms/step - loss: 0.3960 - val_loss: 0.2863 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.3058 - val_loss: 0.2643 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 5s - 5ms/step - loss: 0.2735 - val_loss: 0.2214 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 5s - 5ms/step - loss: 0.2346 - val_loss: 0.1842 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 6s - 6ms/step - loss: 0.2155 - val_loss: 0.2115 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 6s - 6ms/step - loss: 0.2072 - val_loss: 0.2410 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 6s - 6ms/step - loss: 0.1999 - val_loss: 0.1659 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 7s - 7ms/step - loss: 0.1896 - val_loss: 0.1926 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 7s - 7ms/step - loss: 0.1807 - val_loss: 0.1799 - learning_rate: 0.0010
Epoch 11/30
1000/1000 - 6s - 6ms/step - loss: 0.1

### Combined (all country / all state) DL Model

In [None]:
# convert combined data to tensors and other prep
combined_train_prepped = training_functions.prep_data(combined_training, mode="train", changeratetolog=True)
combined_test_prepped = training_functions.prep_data(combined_test, mode="test", changeratetolog=True)

In [33]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

88


In [35]:
# run combined model
for i in range(1,6):
    model_combined, loss_info_combined = training_functions.run_deep_model(combined_train_prepped, combined_test_prepped, combined_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_combined.predict(training_input_features)

    test_predictions = model_combined.predict(test_input_features)

    inputs = np.delete(combined_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(combined_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_combined.save(f"../models/combined_model_{i}.keras")

    np.savetxt(f"../data/combined_training_predictions_{i}.txt", training_predictions)
    np.savetxt(f"../data/combined_test_predictions_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/30
1000/1000 - 8s - 8ms/step - loss: 2.7743 - val_loss: 0.3106 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 5s - 5ms/step - loss: 0.4051 - val_loss: 0.2424 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.3075 - val_loss: 0.2293 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 7s - 7ms/step - loss: 0.2779 - val_loss: 0.2032 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 6s - 6ms/step - loss: 0.2403 - val_loss: 0.2275 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 7s - 7ms/step - loss: 0.2281 - val_loss: 0.2104 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 7s - 7ms/step - loss: 0.2132 - val_loss: 0.2463 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 7s - 7ms/step - loss: 0.2064 - val_loss: 0.1842 - learning_rate: 2.5000e-04
Epoch 9/30
1000/1000 - 6s - 6ms/step - loss: 0.2034 - val_loss: 0.1908 - learning_rate: 2.5000e-04
Epoch 10/30
1000/1000 - 6s - 6ms/step - loss: 0.1971 - val_loss: 0.1945 - learning_rate: 2.5000e-04
Epoch 11/30
1000/1000 - 6s - 6ms/step