## Deep Learning for Mortality Prediction (DLMP)

In [1]:
#%run data_preparation/split_data.py

### Import packages 

In [1]:
import tensorflow as tf
import numpy as np
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
import keras
tfkl = tf.keras.layers

2025-07-08 17:26:29.802867: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: SSE4.1 SSE4.2 AVX AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Import functions

In [2]:
import training_functions
import importlib

importlib.reload(training_functions)

<module 'training_functions' from '/home/ppark/repos/deep-mort/code/training_functions.py'>

### Import data

#### State data

In [3]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')

#### Country data

In [4]:
country_training = np.loadtxt('../data/country_training_new.txt')
country_test = np.loadtxt('../data/country_test_new.txt')

#### Combined data

In [5]:
combined_training = np.loadtxt('../data/combined_training_new.txt')
combined_test = np.loadtxt('../data/combined_test_new.txt')

In [6]:
geos_key = np.load('../data/geos_key_new.npy')

In [7]:
geo_dict = {int(code): geo for geo, code in geos_key}

#### Single population models

Below, I am training indiviudal models for each population. I am only saving mses rather than predictions since the prediction data would be too large to store. 

In [8]:
n_pop = np.unique(country_training[:, 0])
geo_dim = int(max(n_pop) + 1)
geo_dim

90

In [14]:
import tensorflow as tf

print("TensorFlow version:", tf.__version__)
print("Built with CUDA:", tf.test.is_built_with_cuda())
print("GPU devices:", tf.config.list_physical_devices('GPU'))

# Optional: Set memory growth to prevent over-allocation
for gpu in tf.config.list_physical_devices('GPU'):
    tf.config.experimental.set_memory_growth(gpu, True)


TensorFlow version: 2.13.1
Built with CUDA: True
GPU devices: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:2', device_type='GPU')]


In [None]:
for s in range(1,6):
    for j in range(0,2):
        for i in range(50, geo_dim):
            country_index = i
            gender_index = j
            single_country_training = country_training[(country_training[:,0] == country_index) & (country_training[:,1] == gender_index)]
            single_country_test = country_test[(country_test[:,0] == country_index) & (country_test[:,1] == gender_index)]

            single_country_training_prepped = training_functions.prep_data(single_country_training, mode="train", changeratetolog=True)
            single_country_test_prepped = training_functions.prep_data(single_country_test, mode="test", changeratetolog=True)
            single_geo_dim = country_index + 1

            print(f"Running model {geo_dict[i]}: Gender {j}")

            model_single, loss_single = training_functions.run_deep_model(single_country_training_prepped, single_country_test_prepped, single_geo_dim, 30, True)
        
            tc = single_country_test  
            years = (tc[:,2] - 1959) / 60
            ages  = tc[:,3]
            geos  = tc[:,0].astype(np.int32)
            genders = tc[:,1]

            test_input_features = (
                tf.convert_to_tensor(years, dtype=tf.float32),
                tf.convert_to_tensor(ages, dtype=tf.float32),
                tf.convert_to_tensor(geos, dtype=tf.int32),      # ensure ints
                tf.convert_to_tensor(genders, dtype=tf.float32),
            )
            
            test_predictions = model_single.predict(test_input_features)


            inputs_test = np.delete(single_country_test, 4, axis=1)
            test_predictions = np.column_stack((inputs_test, test_predictions))

            geo_name = geo_dict[i]

            model_single.save(f"../models/{geo_name}_gender_{j}.keras")   

            np.savez_compressed(f"../data/{geo_name}_gender_{j}_iter_{s}.npz", test_predictions) 

print(f"Finished training for {s} iterations.")

Running model AUS: Gender 1
Epoch 1/30
1000/1000 - 15s - loss: 2.3432 - val_loss: 0.1978 - lr: 0.0010 - 15s/epoch - 15ms/step
Epoch 2/30
1000/1000 - 8s - loss: 0.2170 - val_loss: 0.1220 - lr: 0.0010 - 8s/epoch - 8ms/step
Epoch 3/30
1000/1000 - 7s - loss: 0.1445 - val_loss: 0.1477 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 4/30
1000/1000 - 7s - loss: 0.1031 - val_loss: 0.0749 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 5/30
1000/1000 - 7s - loss: 0.0838 - val_loss: 0.0886 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 6/30
1000/1000 - 7s - loss: 0.0709 - val_loss: 0.0407 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 7/30
1000/1000 - 7s - loss: 0.0663 - val_loss: 0.0242 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 8/30
1000/1000 - 7s - loss: 0.0567 - val_loss: 0.0997 - lr: 0.0010 - 7s/epoch - 7ms/step
Epoch 9/30


#### All Country Model

In [None]:
# prep data
country_train_prepped = training_functions.prep_data(country_training, mode="train", changeratetolog=True)
country_test_prepped = training_functions.prep_data(country_test, mode="test", changeratetolog=True)

In [None]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

In [None]:
# run country model
for i in range(1,6):
    model_country, loss_info_country = training_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_country.predict(training_input_features)

    test_predictions = model_country.predict(test_input_features)

    inputs = np.delete(country_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(country_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_country.save(f"../models/dl_country_model_{i}.keras")

    np.savetxt(f"../data/dl_country_forecast_{i}.txt", training_predictions)
    np.savetxt(f"../data/dl_country_forecast_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")

### Combined (all country / all state) DL Model

In [None]:
# convert combined data to tensors and other prep
combined_train_prepped = training_functions.prep_data(combined_training, mode="train", changeratetolog=True)
combined_test_prepped = training_functions.prep_data(combined_test, mode="test", changeratetolog=True)

In [None]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

In [None]:
# run combined model
for i in range(1,6):
    model_combined, loss_info_combined = training_functions.run_deep_model(combined_train_prepped, combined_test_prepped, combined_geo_dim, 30, lograte=True)

    training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender
    
    test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

    training_predictions = model_combined.predict(training_input_features)

    test_predictions = model_combined.predict(test_input_features)

    inputs = np.delete(combined_training, 4, axis=1)
    training_predictions = np.column_stack((inputs, training_predictions))
    inputs_test = np.delete(combined_test, 4, axis=1)
    test_predictions = np.column_stack((inputs_test, test_predictions))

    model_combined.save(f"../models/combined_model_{i}.keras")

    np.savetxt(f"../data/combined_training_predictions_{i}.txt", training_predictions)
    np.savetxt(f"../data/combined_test_predictions_{i}.txt", test_predictions)    

    print(f"Iteration {i} complete")