## Deep Learning for Mortality Prediction (DLMP)

In [1]:
#%run data_preparation/split_data.py

### Import packages 

In [17]:
import tensorflow as tf
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
import keras
tfkl = tf.keras.layers
import math

### Import functions

In [44]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/Users/paigepark/Desktop/deep-mort/code/training_functions.py'>

### Import data

#### State data

In [5]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')

#### Country data

In [6]:
country_training = np.loadtxt('../data/country_training_new.txt')
country_test = np.loadtxt('../data/country_test_new.txt')

#### Combined data

In [7]:
combined_training = np.loadtxt('../data/combined_training_new.txt')
combined_test = np.loadtxt('../data/combined_test_new.txt')

In [8]:
geos_key = np.load('../data/geos_key_new.npy')

In [9]:
geo_dict = {int(code): geo for geo, code in geos_key}

#### Single population models

Below, I am training indiviudal models for each population. I am only saving mses rather than predictions since the prediction data would be too large to store. 

In [10]:
n_pop = np.unique(country_training[:, 0])
geo_dim = int(max(n_pop) + 1)
geo_dim

90

In [11]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU devices:", tf.config.list_physical_devices('GPU'))

# Optional: Set memory growth to prevent over-allocation
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


TensorFlow version: 2.17.0
Built with CUDA: False
GPU devices: []


In [None]:
for s in range(1,6):
    for j in range(0,2):
        for i in range(50, 52):
            country_index = i
            gender_index = j
            single_country_training = country_training[(country_training[:,0] == country_index) & (country_training[:,1] == gender_index)]
            single_country_test = country_test[(country_test[:,0] == country_index) & (country_test[:,1] == gender_index)]

            single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
            single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
            single_geo_dim = country_index + 1

            # set steps_per_epoch based on the number of training samples
            N = len(single_country_training)
            batch_size = 256
            steps_per_epoch = math.ceil(N / batch_size) * 3 

            print(f"Running model {geo_dict[i]}: Gender {j}")

            model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, 
                                                                          single_country_test_prepped, 
                                                                          single_geo_dim, epochs=50, 
                                                                          steps_per_epoch=steps_per_epoch, lograte=True)
        
            tc = single_country_test  
            years = (tc[:,2] - 1959) / 60
            ages  = tc[:,3]
            geos  = tc[:,0].astype(np.int32)
            genders = tc[:,1]

            test_input_features = (
                tf.convert_to_tensor(years, dtype=tf.float32),
                tf.convert_to_tensor(ages, dtype=tf.float32),
                tf.convert_to_tensor(geos, dtype=tf.int32),      # ensure ints
                tf.convert_to_tensor(genders, dtype=tf.float32),
            )
            
            test_predictions = model_single.predict(test_input_features)


            inputs_test = np.delete(single_country_test, 4, axis=1)
            test_predictions = np.column_stack((inputs_test, test_predictions))

            geo_name = geo_dict[i]

            # model_single.save(f"../models/{geo_name}_gender_{j}.keras")   

            # np.savez_compressed(f"../data/{geo_name}_gender_{j}_iter_{s}.npz", test_predictions) 

print(f"Finished training for {s} iterations.")

4700
Running model AUS: Gender 0
Epoch 1/50
57/57 - 2s - 42ms/step - loss: 26.2680 - val_loss: 29.9856 - learning_rate: 0.0010
Epoch 2/50
57/57 - 0s - 7ms/step - loss: 13.9360 - val_loss: 13.0326 - learning_rate: 0.0010
Epoch 3/50
57/57 - 0s - 6ms/step - loss: 3.5309 - val_loss: 4.5640 - learning_rate: 0.0010
Epoch 4/50
57/57 - 1s - 11ms/step - loss: 0.7471 - val_loss: 2.5907 - learning_rate: 0.0010
Epoch 5/50
57/57 - 0s - 8ms/step - loss: 0.5945 - val_loss: 1.6731 - learning_rate: 0.0010
Epoch 6/50
57/57 - 0s - 8ms/step - loss: 0.5396 - val_loss: 1.4395 - learning_rate: 0.0010
Epoch 7/50
57/57 - 1s - 9ms/step - loss: 0.5213 - val_loss: 0.8574 - learning_rate: 0.0010
Epoch 8/50
57/57 - 0s - 7ms/step - loss: 0.5040 - val_loss: 0.8352 - learning_rate: 0.0010
Epoch 9/50
57/57 - 0s - 9ms/step - loss: 0.4513 - val_loss: 0.3908 - learning_rate: 0.0010
Epoch 10/50
57/57 - 1s - 10ms/step - loss: 0.4449 - val_loss: 0.3283 - learning_rate: 0.0010
Epoch 11/50
57/57 - 0s - 6ms/step - loss: 0.4222 

KeyboardInterrupt: 

#### All Country Model

In [34]:
# prep data
country_train_prepped = training_functions.prep_data(country_training, mode="train", changeratetolog=True)
country_test_prepped = training_functions.prep_data(country_test, mode="test", changeratetolog=True)

In [35]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

90

In [37]:
math.ceil(len(country_training)/ 256)

1405

In [48]:
# run country model
for i in range(1,6):
    N = len(country_training)
    batch_size = 256
    steps_per_epoch = math.ceil(N / batch_size)

    model_country, loss_info_country = training_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 
                                                                         epochs=20, steps_per_epoch=steps_per_epoch, lograte=True)

    training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(country_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(country_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/dl_country_model_{i}.keras")

    np.savetxt(f"../data/dl_country_forecast{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_country_forecast{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

Epoch 1/20
1405/1405 - 11s - 8ms/step - loss: 2.0501 - val_loss: 0.2499 - learning_rate: 0.0010
Epoch 2/20
1405/1405 - 9s - 7ms/step - loss: 0.3321 - val_loss: 0.2121 - learning_rate: 0.0010
Epoch 3/20
1405/1405 - 10s - 7ms/step - loss: 0.2541 - val_loss: 0.1817 - learning_rate: 0.0010
Epoch 4/20
1405/1405 - 13s - 9ms/step - loss: 0.2135 - val_loss: 0.1808 - learning_rate: 0.0010
Epoch 5/20
1405/1405 - 9s - 7ms/step - loss: 0.2010 - val_loss: 0.1987 - learning_rate: 0.0010
Epoch 6/20
1405/1405 - 9s - 7ms/step - loss: 0.1932 - val_loss: 0.2210 - learning_rate: 0.0010
Epoch 7/20
1405/1405 - 11s - 8ms/step - loss: 0.1786 - val_loss: 0.2357 - learning_rate: 0.0010
Epoch 8/20
1405/1405 - 10s - 7ms/step - loss: 0.1682 - val_loss: 0.1942 - learning_rate: 2.5000e-04
Epoch 9/20
1405/1405 - 9s - 7ms/step - loss: 0.1720 - val_loss: 0.1717 - learning_rate: 2.5000e-04
Epoch 10/20
1405/1405 - 10s - 7ms/step - loss: 0.1637 - val_loss: 0.1915 - learning_rate: 2.5000e-04
Epoch 11/20
1405/1405 - 10s - 7

### Combined (all country / all state) DL Model

In [None]:
# convert combined data to tensors and other prep
combined_train_prepped = training_functions.prep_data(combined_training, mode="train", changeratetolog=True)
combined_test_prepped = training_functions.prep_data(combined_test, mode="test", changeratetolog=True)

In [None]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

In [47]:
len(combined_training)/256

3240.6015625

In [None]:
# run combined model
for i in range(1,6):
    N = len(combined_training)
    batch_size = 256
    steps_per_epoch = math.ceil(N / batch_size) / 2 # Adjust multiplier as needed

    model_combined, loss_info_combined = training_functions.run_deep_model(combined_train_prepped, combined_test_prepped, 
                                                                           combined_geo_dim, epochs=20, steps_per_epoch=steps_per_epoch,
                                                                           lograte=True)

    training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_combined.predict(training_input_features)

    test_predictions = model_combined.predict(test_input_features)

    inputs = np.delete(combined_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(combined_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_combined.save(f"../models/combined_model_{i}.keras")

    np.savetxt(f"../data/combined_training_predictions{i}.txt", training_predictions)
    np.savetxt(f"../data/combined_test_predictions{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")