## Deep Learning for Mortality Prediction (DLMP)

### Import packages 

In [2]:
import tensorflow as tf
import csv
import numpy as np
import pandas as pd
import os as os
import matplotlib.pyplot as plt
import seaborn as sns
import keras
tfkl = tf.keras.layers

### Import functions

In [3]:
import dl_functions
import lc_functions
import importlib

importlib.reload(lc_functions)
importlib.reload(dl_functions)

<module 'evaluation_functions' from '/Users/paigepark/Desktop/deep-mort/code/evaluation_functions.py'>

### Import data

#### State data

In [4]:
state_training = np.loadtxt('../data/state_training.txt')
state_test = np.loadtxt('../data/state_test.txt')
state_final_test = np.loadtxt('../data/state_final_test.txt')

#### Country data

In [5]:
country_training = np.loadtxt('../data/country_training.txt')
country_test = np.loadtxt('../data/country_test.txt')
country_final_test = np.loadtxt('../data/country_final_test.txt')

#### Combined data

In [6]:
combined_training = np.loadtxt('../data/combined_training.txt')
combined_test = np.loadtxt('../data/combined_test.txt')
combined_final_test = np.loadtxt('../data/combined_final_test.txt')

In [7]:
geos_key = np.load('../data/geos_key.npy')

### Train Seperate DL Models for Country and State Data

#### State Model

In [8]:
# convert data to tensors and other prep 
state_train_prepped = dl_functions.prep_data(state_training, mode="train")
state_test_prepped = dl_functions.prep_data(state_test, mode="test")
state_final_test_prepped = dl_functions.prep_data(state_final_test, mode="test")

In [9]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(state_training[:, 0]).y
state_geo_dim = np.array(tf.size(unique_vals)).item()
state_geo_dim

50

In [10]:
# run model for states
model_state, loss_info_state = dl_functions.run_deep_model(state_train_prepped, state_test_prepped, state_geo_dim, 30)

Epoch 1/30
1000/1000 - 16s - 16ms/step - loss: 0.0134 - val_loss: 6.4041e-04 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 13s - 13ms/step - loss: 6.9229e-04 - val_loss: 0.0011 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 13s - 13ms/step - loss: 3.4736e-04 - val_loss: 8.5300e-05 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 13s - 13ms/step - loss: 2.4878e-04 - val_loss: 6.0132e-05 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 14s - 14ms/step - loss: 1.8729e-04 - val_loss: 5.4070e-05 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 13s - 13ms/step - loss: 1.5255e-04 - val_loss: 4.1414e-05 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 14s - 14ms/step - loss: 1.2883e-04 - val_loss: 1.0245e-04 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 13s - 13ms/step - loss: 1.1910e-04 - val_loss: 1.2674e-04 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 13s - 13ms/step - loss: 1.0958e-04 - val_loss: 1.5232e-04 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 13s - 13ms/step - loss: 1.0093e-04 - va

In [11]:
model_state.save('models/model_state.keras')

In [97]:
training_input_features = (tf.convert_to_tensor((state_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(state_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(state_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(state_training[:,1], dtype=tf.float32))  # Gender

In [101]:
test_input_features = (tf.convert_to_tensor((state_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(state_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(state_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(state_test[:,1], dtype=tf.float32))  # Gender

In [99]:
training_predictions = model_state.predict(training_input_features)


[1m14688/14688[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 592us/step


In [100]:
print(state_training.shape)
print(training_input_features[0].shape)
print(training_predictions.shape)

(470000, 5)
(470000,)
(470000, 1)


In [102]:
test_predictions = model_state.predict(test_input_features)

[1m3125/3125[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 589us/step


In [103]:
print(inputs.shape)
print(training_predictions.shape)
print(state_training.shape)

(470000, 4)
(470000, 1)
(470000, 5)


In [105]:
inputs = np.delete(state_training, 4, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))

In [106]:
inputs_test = np.delete(state_test, 4, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

In [107]:
print(inputs_test.shape)
print(test_predictions.shape)

(100000, 4)
(100000, 5)


In [108]:
# save
np.savetxt("../data/state_training_predictions.txt", training_predictions)
np.savetxt("../data/state_test_predictions.txt", test_predictions)

#### US only model

In [109]:
us_only_training = combined_training[combined_training[:,0] == 87]
us_only_test = combined_test[combined_test[:,0] == 87]

In [110]:
us_only_training.shape

(9400, 5)

In [111]:
us_training_prepped = dl_functions.prep_data(us_only_training, mode="train")
us_test_prepped = dl_functions.prep_data(us_only_test, mode="test")

In [112]:
us_geo_dim = 88

In [24]:
model_us, loss_us = dl_functions.run_deep_model(us_training_prepped, us_test_prepped, us_geo_dim, 25)

Epoch 1/25
1000/1000 - 20s - 20ms/step - loss: 0.0133 - val_loss: 9.2500e-04 - learning_rate: 0.0010
Epoch 2/25
1000/1000 - 14s - 14ms/step - loss: 6.5321e-04 - val_loss: 9.1107e-05 - learning_rate: 0.0010
Epoch 3/25
1000/1000 - 14s - 14ms/step - loss: 2.9597e-04 - val_loss: 3.3497e-04 - learning_rate: 0.0010
Epoch 4/25
1000/1000 - 16s - 16ms/step - loss: 1.8992e-04 - val_loss: 3.0159e-04 - learning_rate: 0.0010
Epoch 5/25
1000/1000 - 18s - 18ms/step - loss: 1.2446e-04 - val_loss: 6.7023e-05 - learning_rate: 0.0010
Epoch 6/25
1000/1000 - 16s - 16ms/step - loss: 1.0061e-04 - val_loss: 4.6545e-04 - learning_rate: 0.0010
Epoch 7/25
1000/1000 - 14s - 14ms/step - loss: 6.9843e-05 - val_loss: 1.0966e-04 - learning_rate: 0.0010
Epoch 8/25
1000/1000 - 13s - 13ms/step - loss: 5.9427e-05 - val_loss: 1.0206e-04 - learning_rate: 0.0010
Epoch 9/25
1000/1000 - 13s - 13ms/step - loss: 4.4152e-05 - val_loss: 8.8720e-05 - learning_rate: 0.0010
Epoch 10/25
1000/1000 - 13s - 13ms/step - loss: 3.9179e-05 

In [25]:
model_us.save('models/us_only.keras')

In [113]:
training_input_features = (tf.convert_to_tensor((us_only_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(us_only_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(us_only_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(us_only_training[:,1], dtype=tf.float32))  # Gender

In [114]:
print(us_only_training)

[[8.70000e+01 0.00000e+00 1.95900e+03 0.00000e+00 2.37060e-02]
 [8.70000e+01 1.00000e+00 1.95900e+03 0.00000e+00 3.08840e-02]
 [8.70000e+01 0.00000e+00 1.95900e+03 1.00000e+00 1.61300e-03]
 ...
 [8.70000e+01 1.00000e+00 2.00500e+03 9.80000e+01 4.10780e-01]
 [8.70000e+01 0.00000e+00 2.00500e+03 9.90000e+01 3.75465e-01]
 [8.70000e+01 1.00000e+00 2.00500e+03 9.90000e+01 4.38072e-01]]


In [115]:
test_input_features = (tf.convert_to_tensor((us_only_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(us_only_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(us_only_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(us_only_test[:,1], dtype=tf.float32))  # Gender

In [116]:
training_predictions = model_us.predict(training_input_features)


[1m294/294[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step


In [117]:
test_predictions = model_us.predict(test_input_features)

[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 869us/step


In [118]:
inputs = np.delete(us_only_training, 4, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))
inputs_test = np.delete(us_only_test, 4, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

In [119]:
# save
np.savetxt("../data/us_training_predictions.txt", training_predictions)
np.savetxt("../data/us_test_predictions.txt", test_predictions)

#### US only, female only model

In [17]:
print(us_only_training)

[[8.70000e+01 0.00000e+00 1.95900e+03 0.00000e+00 2.37060e-02]
 [8.70000e+01 1.00000e+00 1.95900e+03 0.00000e+00 3.08840e-02]
 [8.70000e+01 0.00000e+00 1.95900e+03 1.00000e+00 1.61300e-03]
 ...
 [8.70000e+01 1.00000e+00 2.00500e+03 9.80000e+01 4.10780e-01]
 [8.70000e+01 0.00000e+00 2.00500e+03 9.90000e+01 3.75465e-01]
 [8.70000e+01 1.00000e+00 2.00500e+03 9.90000e+01 4.38072e-01]]


In [120]:
# female = 0, male = 1
us_female_training = us_only_training[us_only_training[:,1] == 0]
us_female_test = us_only_test[us_only_test[:,1] == 0]

In [21]:
us_female_training.shape

(4700, 5)

In [22]:
us_female_training_prepped = dl_functions.prep_data(us_female_training, mode="train")
us_female_test_prepped = dl_functions.prep_data(us_female_test, mode="test")

In [26]:
model_us_f, loss_us_f = dl_functions.run_deep_model(us_female_training_prepped, us_female_test_prepped, us_geo_dim, 20)

Epoch 1/20
1000/1000 - 19s - 19ms/step - loss: 0.0136 - val_loss: 6.8110e-04 - learning_rate: 0.0010
Epoch 2/20
1000/1000 - 17s - 17ms/step - loss: 8.1912e-04 - val_loss: 1.0963e-04 - learning_rate: 0.0010
Epoch 3/20
1000/1000 - 14s - 14ms/step - loss: 3.4496e-04 - val_loss: 9.9120e-05 - learning_rate: 0.0010
Epoch 4/20
1000/1000 - 13s - 13ms/step - loss: 1.8953e-04 - val_loss: 1.5245e-04 - learning_rate: 0.0010
Epoch 5/20
1000/1000 - 13s - 13ms/step - loss: 1.3052e-04 - val_loss: 8.1228e-04 - learning_rate: 0.0010
Epoch 6/20
1000/1000 - 13s - 13ms/step - loss: 8.3358e-05 - val_loss: 4.3157e-05 - learning_rate: 0.0010
Epoch 7/20
1000/1000 - 13s - 13ms/step - loss: 6.8630e-05 - val_loss: 1.3500e-04 - learning_rate: 0.0010
Epoch 8/20
1000/1000 - 14s - 14ms/step - loss: 6.1278e-05 - val_loss: 4.0702e-05 - learning_rate: 0.0010
Epoch 9/20
1000/1000 - 15s - 15ms/step - loss: 4.2988e-05 - val_loss: 5.2572e-05 - learning_rate: 0.0010
Epoch 10/20
1000/1000 - 15s - 15ms/step - loss: 3.3211e-05 

In [27]:
model_us_f.save('models/models_us_f.keras')

In [121]:
training_input_features = (tf.convert_to_tensor((us_female_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(us_female_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(us_female_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(us_female_training[:,1], dtype=tf.float32))  # Gender

In [None]:
print(training_input_features)

(<tf.Tensor: shape=(470000,), dtype=float32, numpy=
array([0.        , 0.        , 0.        , ..., 0.76666665, 0.76666665,
       0.76666665], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  1.,  2., ..., 97., 98., 99.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  0.,  0., ..., 49., 49., 49.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([0., 0., 0., ..., 1., 1., 1.], dtype=float32)>)


In [122]:
test_input_features = (tf.convert_to_tensor((us_female_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(us_female_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(us_female_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(us_female_test[:,1], dtype=tf.float32))  # Gender

In [123]:
# save predictions 
training_predictions = model_us_f.predict(training_input_features)


[1m147/147[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 676us/step


In [124]:
test_predictions = model_us_f.predict(test_input_features)

[1m32/32[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 701us/step


In [125]:
inputs = np.delete(us_female_training, 4, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))

In [126]:
inputs_test = np.delete(us_female_test, 4, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

In [68]:
test_predictions.shape

(1000, 1)

In [127]:
# save
np.savetxt("../data/us_f_training_predictions.txt", training_predictions)
np.savetxt("../data/us_f_test_predictions.txt", test_predictions)

#### Country Model

In [28]:
# prep data
country_train_prepped = dl_functions.prep_data(country_training, mode="train")
country_test_prepped = dl_functions.prep_data(country_test, mode="test")
country_final_test_prepped = dl_functions.prep_data(country_final_test, mode="test")

In [29]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(country_training[:, 0]).y
country_geo_dim = np.array(tf.size(unique_vals)).item()
country_geo_dim = country_geo_dim + 50
country_geo_dim

88

In [30]:
# run country model
model_country, loss_info_country = dl_functions.run_deep_model(country_train_prepped, country_test_prepped, country_geo_dim, 30)

Epoch 1/30
1000/1000 - 20s - 20ms/step - loss: 0.0141 - val_loss: 8.5077e-04 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 14s - 14ms/step - loss: 0.0017 - val_loss: 0.0013 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 13s - 13ms/step - loss: 0.0011 - val_loss: 4.5032e-04 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 12s - 12ms/step - loss: 9.5264e-04 - val_loss: 0.0010 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 13s - 13ms/step - loss: 8.0814e-04 - val_loss: 2.9243e-04 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 12s - 12ms/step - loss: 7.5737e-04 - val_loss: 3.3311e-04 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 13s - 13ms/step - loss: 7.4341e-04 - val_loss: 2.9507e-04 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 16s - 16ms/step - loss: 7.2176e-04 - val_loss: 3.1382e-04 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 14s - 14ms/step - loss: 6.7220e-04 - val_loss: 3.6238e-04 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 13s - 13ms/step - loss: 6.7827e-04 - val_loss: 3.17

In [31]:
model_country.save('models/model_country.keras')

In [128]:
training_input_features = (tf.convert_to_tensor((country_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(country_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(country_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(country_training[:,1], dtype=tf.float32))  # Gender

In [None]:
print(training_input_features)

(<tf.Tensor: shape=(470000,), dtype=float32, numpy=
array([0.        , 0.        , 0.        , ..., 0.76666665, 0.76666665,
       0.76666665], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  1.,  2., ..., 97., 98., 99.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  0.,  0., ..., 49., 49., 49.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([0., 0., 0., ..., 1., 1., 1.], dtype=float32)>)


In [129]:
test_input_features = (tf.convert_to_tensor((country_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(country_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(country_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(country_test[:,1], dtype=tf.float32))  # Gender

In [130]:
# save predictions 
training_predictions = model_country.predict(training_input_features)


[1m10650/10650[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 827us/step


In [131]:
test_predictions = model_country.predict(test_input_features)

[1m2313/2313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 614us/step


In [132]:
inputs = np.delete(country_training, 4, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))
inputs_test = np.delete(country_test, 4, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

In [133]:
# save
np.savetxt("../data/country_training_predictions.txt", training_predictions)
np.savetxt("../data/country_test_predictions.txt", test_predictions)

### Train Combined DL Model

In [32]:
# convert combined data to tensors and other prep
combined_train_prepped = dl_functions.prep_data(combined_training, mode="train")
combined_test_prepped = dl_functions.prep_data(combined_test, mode="test")
combined_final_test_prepped = dl_functions.prep_data(combined_final_test, mode="test")

In [33]:
# get the proper geography input dimension for model set up 
unique_vals = tf.unique(combined_training[:, 0]).y
combined_geo_dim = np.array(tf.size(unique_vals)).item()
print(combined_geo_dim)

88


In [36]:
model_combined, loss_info_combined = dl_functions.run_deep_model(combined_train_prepped, combined_test_prepped, combined_geo_dim, 30)

Epoch 1/30
1000/1000 - 24s - 24ms/step - loss: 0.0144 - val_loss: 3.0510e-04 - learning_rate: 0.0010
Epoch 2/30
1000/1000 - 19s - 19ms/step - loss: 0.0011 - val_loss: 7.3584e-04 - learning_rate: 0.0010
Epoch 3/30
1000/1000 - 21s - 21ms/step - loss: 6.9995e-04 - val_loss: 8.6619e-04 - learning_rate: 0.0010
Epoch 4/30
1000/1000 - 24s - 24ms/step - loss: 5.4548e-04 - val_loss: 4.1275e-04 - learning_rate: 0.0010
Epoch 5/30
1000/1000 - 23s - 23ms/step - loss: 5.0655e-04 - val_loss: 2.0033e-04 - learning_rate: 0.0010
Epoch 6/30
1000/1000 - 19s - 19ms/step - loss: 4.2464e-04 - val_loss: 3.7384e-04 - learning_rate: 0.0010
Epoch 7/30
1000/1000 - 15s - 15ms/step - loss: 3.9013e-04 - val_loss: 5.2067e-04 - learning_rate: 0.0010
Epoch 8/30
1000/1000 - 17s - 17ms/step - loss: 3.7477e-04 - val_loss: 1.2919e-04 - learning_rate: 0.0010
Epoch 9/30
1000/1000 - 15s - 15ms/step - loss: 3.5897e-04 - val_loss: 1.2744e-04 - learning_rate: 0.0010
Epoch 10/30
1000/1000 - 16s - 16ms/step - loss: 3.4319e-04 - va

In [37]:
model_combined.save('models/model_combined.keras')

In [134]:
training_input_features = (tf.convert_to_tensor((combined_training[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                            tf.convert_to_tensor(combined_training[:,3], dtype=tf.float32),  # Age
                            tf.convert_to_tensor(combined_training[:,0], dtype=tf.float32),  # Geography
                            tf.convert_to_tensor(combined_training[:,1], dtype=tf.float32))  # Gender

In [None]:
print(training_input_features)

(<tf.Tensor: shape=(470000,), dtype=float32, numpy=
array([0.        , 0.        , 0.        , ..., 0.76666665, 0.76666665,
       0.76666665], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  1.,  2., ..., 97., 98., 99.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([ 0.,  0.,  0., ..., 49., 49., 49.], dtype=float32)>, <tf.Tensor: shape=(470000,), dtype=float32, numpy=array([0., 0., 0., ..., 1., 1., 1.], dtype=float32)>)


In [135]:
test_input_features = (tf.convert_to_tensor((combined_test[:,2] - 1959) / 60, dtype=tf.float32),  # Normalized year
                        tf.convert_to_tensor(combined_test[:,3], dtype=tf.float32),  # Age
                        tf.convert_to_tensor(combined_test[:,0], dtype=tf.float32),  # Geography
                        tf.convert_to_tensor(combined_test[:,1], dtype=tf.float32))  # Gender

In [136]:
training_predictions = model_combined.predict(training_input_features)


[1m25338/25338[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m21s[0m 823us/step


In [137]:
test_predictions = model_combined.predict(test_input_features)

[1m5438/5438[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 849us/step


In [138]:
inputs = np.delete(combined_training, 4, axis=1)
training_predictions = np.column_stack((inputs, training_predictions))

In [139]:
inputs_test = np.delete(combined_test, 4, axis=1)
test_predictions = np.column_stack((inputs_test, test_predictions))

In [79]:
test_predictions.shape

(174000, 1)

In [140]:
# save
np.savetxt("../data/combined_training_predictions.txt", training_predictions)
np.savetxt("../data/combined_test_predictions.txt", test_predictions)

### Train Lee-Carter model

In [38]:
lc_output, lc_forecasts, lc_val_mse = lc_functions.run_lc_model(train_data=combined_training, test_data=combined_test)

Skipping Geo: 74.0, Gender: 1.0 due to NaN or infinite values in m_x


In [83]:
print(lc_output)

{(0.0, 0.0): {'params': (array([[ -4.45426238],
       [ -7.12073621],
       [ -7.94156131],
       [ -8.90293106],
       [ -9.28821858],
       [-10.9124869 ],
       [-10.57489156],
       [-12.27480962],
       [-11.04167143],
       [-13.80268415],
       [-12.50213674],
       [-10.28997169],
       [-12.03005733],
       [-12.68725714],
       [-12.35647263],
       [-10.88336443],
       [ -8.26971637],
       [ -7.77401079],
       [ -7.67842804],
       [ -8.28850822],
       [ -8.56691603],
       [ -7.24432516],
       [ -9.32221081],
       [ -8.77391974],
       [ -7.84697905],
       [ -8.48712276],
       [ -7.38211393],
       [ -7.60415415],
       [ -7.65711823],
       [ -7.68316234],
       [ -7.59643427],
       [ -7.03053041],
       [ -6.94676002],
       [ -7.25863765],
       [ -7.31192909],
       [ -6.95159673],
       [ -7.23561044],
       [ -7.21381348],
       [ -7.32066403],
       [ -6.51434743],
       [ -6.48594133],
       [ -6.28767009],
       [ 

In [39]:
np.savetxt("../data/lc_forecasts.csv", lc_forecasts, delimiter=",")

### Generate Table 1: Training and Test MSEs
This table will document average MSEs (for states alone, countries alone, and total) over 5 training runs with each model (LC, deep learning seperate, deep learning joint)

In [None]:
def compare_models(num_iterations):
    results = []
    for i in range(num_iterations):
        lc = lc_functions.run_lc_model(train_data=combined_training, test_data=combined_test)
        print(f"Lee Carter Iteration {i} Complete")

        state_model, state_only = dl_functions.run_deep_model(dataset_train=state_train_prepped, dataset_test=state_test_prepped, geo_dim=state_geo_dim, epochs=30)
        country_model, country_only = dl_functions.run_deep_model(dataset_train=country_train_prepped, dataset_test=country_test_prepped, geo_dim=country_geo_dim, epochs=30)

        ind_ave = ((state_only['val_mse']*50) + (country_only['val_mse']*37)) / 87
        seperate_dl = [state_only['val_mse'], country_only['val_mse'], ind_ave]
        print(f"Seperate DL Iteration {i} Complete")

        model_combined, loss_info_combined = dl_functions.run_deep_model(combined_train_prepped, combined_test_prepped, combined_geo_dim, epochs=30)

        state_combined = model_combined.evaluate(state_test_prepped)
        country_combined = model_combined.evaluate(country_test_prepped)
        total_combined = model_combined.evaluate(combined_test_prepped)

        combined_dl = [state_combined, country_combined, total_combined]
        print(f"Combined DL Iteration {i} Complete")

        results.append((lc, seperate_dl, combined_dl))

    return results
        

Comparison results are computationally costly to generate. Could be good to look into how Savio could help to speed this up. Commenting out for now.

In [27]:
# comparison_results = compare_models(num_iterations=1)

In [28]:
# lc_results = np.array([result[0][0] for result in comparison_results])
# seperate_dl_results = np.array([result[1] for result in comparison_results])
# combined_dl_results = np.array([result[2] for result in comparison_results])

In [29]:
# lc_avg = np.mean(lc_results, axis=0)
# seperate_dl_avg = np.mean(seperate_dl_results, axis=0)
# combined_dl_avg = np.mean(combined_dl_results, axis=0)

In [30]:
# average_results = np.array([lc_avg, seperate_dl_avg, combined_dl_avg])

# # Multiply each value by 10^4 for table clarity
# average_results = [[value * 10**4 for value in row] for row in average_results]
# average_results_rounded = np.round(average_results, 3).T
# print(average_results_rounded)

In [31]:
# from tabulate import tabulate

# column_labels = ["Lee Carter Model", "Separate DL Models", "Combined DL Model"]
# row_labels = ["State Test MSE", "Country Test MSE", "Weighted Average of MSEs"]

# table_data = [[row_labels[i]] + list(average_results_rounded[i]) for i in range(len(row_labels))]

# latex_table = tabulate(table_data, headers=["Test MSE by Model"] + column_labels, tablefmt="latex")


In [32]:
# print(latex_table)