# Mortality Rate Prediction Using Neural Networks

## 1. Introduction

## 2. Data Preparation

### 2.1 Load packages

In [2]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf

from copy import deepcopy
from sklearn.model_selection import train_test_split, GroupShuffleSplit
from sklearn.compose import make_column_transformer
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler, OrdinalEncoder

from tensorflow.keras.models import Model
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.utils import plot_model

from tensorflow.keras.layers import Input, Embedding, Reshape, Dense, Concatenate
from tensorflow.keras.regularizers import l1

from sklearn.metrics import confusion_matrix, RocCurveDisplay, PrecisionRecallDisplay

### 2.2 Load data
The project uses the Australia population data set available on Human Mortality Databse (HMD) - labelled as "AUS".

Steps before importing csv into Python:
1. Download the data from website https://www.mortality.org/File/GetDocument/hmd.v6/AUS/STATS/Mx_1x1.txt -> RAW
2. Remove header rows -> EDITTED TXT
3. Open in Excel and convert to CSV using " " delimiter
4. Add headers back in

In [41]:
ausData = pd.read_csv('AUS_mort.csv')
cheData = pd.read_csv('CHE_mort.csv')



### 2.3 Data Formatting
- Replace "." with NA
- Encode age bracket "110+" as 110
- Converting age, mx and gender to int, float and category respectively

In [52]:
all_mort = cheData
# make data wider to longer
all_mort = all_mort.melt(id_vars=['Year','Age'], value_vars =['Female','Male','Total'], var_name = "Gender", value_name='mx')

#change 110+ to 110
all_mort.loc[all_mort['Age'] == '110+', 'Age'] = '110'

#replace . with NA
all_mort.loc[all_mort['mx'] == '.', 'mx'] = np.nan

#convert type
all_mort['Gender'] = all_mort['Gender'].astype('category')
all_mort['Age'] = all_mort['Age'].astype('int')
all_mort['mx'] = all_mort['mx'].astype('float')
all_mort.dtypes

Year         int64
Age          int64
Gender    category
mx         float64
dtype: object

### 2.3 Data Pre-processing
Pre-processing steps were then applied so that the data is in the proper format for our models. This includes:
- Select calendar years t from 1950 until 2016 (the latest calendar year available) **NOTE TO KELLY: UPDATE TO 2019**
- Select ages to be of values between 0 to 99 years
- Create *logmx* column which is the log of the mx column

In [53]:
#keep rows where year is between 1950 and 2016 and age is between 0 and 100
all_mort = all_mort[(all_mort['Year'] >= 1950) & (all_mort['Year'] <= 2016)
                    & (all_mort['Age'] >= 0) & (all_mort['Age'] < 100)
                    ]

#log mx
all_mort['log_mx'] = np.log(all_mort['mx'])

  result = getattr(ufunc, method)(*inputs, **kwargs)
