## Deep Learning for Mortality Prediction (DLMP)

### Import packages 

In [1]:
import tensorflow as tf
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
import keras
tfkl = tf.keras.layers

### Import functions

In [2]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/Users/paigepark/Desktop/deep-mort/code/training_functions.py'>

### Import data

#### State data

In [3]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')

#### Country data

In [4]:
country_training = np.loadtxt('../data/country_training_new.txt')
country_test = np.loadtxt('../data/country_test_new.txt')

#### Combined data

In [5]:
combined_training = np.loadtxt('../data/combined_training_new.txt')
combined_test = np.loadtxt('../data/combined_test_new.txt')

In [6]:
geos_key = np.load('../data/geos_key_new.npy')

In [7]:
geo_dict = {int(code): geo for geo, code in geos_key}

#### Single population models

Below, I am training indiviudal models for each population. I am only saving mses rather than predictions since the prediction data would be too large to store. 

In [8]:
n_pop = np.unique(country_training[:, 0])
geo_dim = int(max(n_pop) + 1)
geo_dim

90

In [None]:
mses = {}
for s in range(1,6):
    for j in range(0,1):
        for i in range(50, geo_dim):
            country_index = i
            gender_index = j
            single_country_training = country_training[(country_training[:,0] == country_index) & (country_training[:,1] == gender_index)]
            single_country_test = country_test[(country_test[:,0] == country_index) & (country_test[:,1] == gender_index)]

            single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
            single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
            single_geo_dim = country_index + 1

            print(f"Running model {geo_dict[i]}: Gender {j}")

            model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, single_country_test_prepped, single_geo_dim, 30, True)
        
            test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender


            test_predictions = model_single.predict(test_input_features)

            inputs_test = np.delete(country_test, 4, axis=1)
            test_predictions = np.column_stack((inputs_test, test_predictions))

            geo_name = geo_dict[i]

            model_single.save(f"../models/{geo_name}_gender_{j}.keras")   

            np.savez_compressed(f"../data/{geo_name}_gender_{j}_iter_{s}.npz", test_predictions) 

print(f"Finished training for {s} iterations.")

Running model AUS: Gender 0
Epoch 1/30
1000/1000 - 8s - 8ms/step - loss: 2.9178 - val_loss: 0.0683 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 6s - 6ms/step - loss: 0.2477 - val_loss: 0.0638 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.1561 - val_loss: 0.0466 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 6s - 6ms/step - loss: 0.1139 - val_loss: 0.0395 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 6s - 6ms/step - loss: 0.0922 - val_loss: 0.0459 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 6s - 6ms/step - loss: 0.0789 - val_loss: 0.0334 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 6s - 6ms/step - loss: 0.0722 - val_loss: 0.0380 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 6s - 6ms/step - loss: 0.0667 - val_loss: 0.0720 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 6s - 6ms/step - loss: 0.0628 - val_loss: 0.0404 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 6s - 6ms/step - loss: 0.0579 - val_loss: 0.0361 - learning_rate: 2.5000e-04
Epoch 11/30
1000/

#### All Country Model

In [25]:
# prep data
country_train_prepped = training_functions.prep_data(country_training, mode="train", changeratetolog=True)
country_test_prepped = training_functions.prep_data(country_test, mode="test", changeratetolog=True)

In [26]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

90

In [27]:
# run country model
for i in range(1,6):
    model_country, loss_info_country = training_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(country_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(country_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/dl_country_model_{i}.keras")

    np.savetxt(f"../data/dl_country_forecast_{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_country_forecast_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/30
1000/1000 - 7s - 7ms/step - loss: 2.6535 - val_loss: 0.2539 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 5s - 5ms/step - loss: 0.4105 - val_loss: 0.1981 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.3053 - val_loss: 0.2165 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 5s - 5ms/step - loss: 0.2579 - val_loss: 0.2501 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 6s - 6ms/step - loss: 0.2269 - val_loss: 0.2462 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 6s - 6ms/step - loss: 0.2007 - val_loss: 0.1722 - learning_rate: 2.5000e-04
Epoch 7/30
1000/1000 - 6s - 6ms/step - loss: 0.2048 - val_loss: 0.1766 - learning_rate: 2.5000e-04
Epoch 8/30
1000/1000 - 5s - 5ms/step - loss: 0.1919 - val_loss: 0.1778 - learning_rate: 2.5000e-04
Epoch 9/30
1000/1000 - 5s - 5ms/step - loss: 0.1862 - val_loss: 0.1823 - learning_rate: 2.5000e-04
Epoch 10/30
1000/1000 - 6s - 6ms/step - loss: 0.1859 - val_loss: 0.1612 - learning_rate: 6.2500e-05
Epoch 11/30
1000/1000 - 5s - 

### Combined (all country / all state) DL Model

In [33]:
# convert combined data to tensors and other prep
combined_train_prepped = training_functions.prep_data(combined_training, mode="train", changeratetolog=True)
combined_test_prepped = training_functions.prep_data(combined_test, mode="test", changeratetolog=True)

In [34]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

88


In [35]:
# run combined model
for i in range(1,6):
    model_combined, loss_info_combined = training_functions.run_deep_model(combined_train_prepped, combined_test_prepped, combined_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_combined.predict(training_input_features)

    test_predictions = model_combined.predict(test_input_features)

    inputs = np.delete(combined_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(combined_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_combined.save(f"../models/combined_model_{i}.keras")

    np.savetxt(f"../data/combined_training_predictions_{i}.txt", training_predictions)
    np.savetxt(f"../data/combined_test_predictions_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/30
1000/1000 - 8s - 8ms/step - loss: 2.6999 - val_loss: 0.3947 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 6s - 6ms/step - loss: 0.4093 - val_loss: 0.2743 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 6s - 6ms/step - loss: 0.3142 - val_loss: 0.2504 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 6s - 6ms/step - loss: 0.2645 - val_loss: 0.2563 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 6s - 6ms/step - loss: 0.2372 - val_loss: 0.2329 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 6s - 6ms/step - loss: 0.2218 - val_loss: 0.2114 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 6s - 6ms/step - loss: 0.2139 - val_loss: 0.1952 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 6s - 6ms/step - loss: 0.2036 - val_loss: 0.2257 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 7s - 7ms/step - loss: 0.2034 - val_loss: 0.2145 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 6s - 6ms/step - loss: 0.2050 - val_loss: 0.2045 - learning_rate: 0.0010
Epoch 11/30
1000/1000 - 6s - 6ms/step - loss: 0.1