In [None]:
def rnn_preprocessing(data_raw, gender, T0, tau0, obsYear):
    # select gender
    gender_mort = data_raw[all_mort['Gender'] == gender].drop(columns = ['Gender'])

    # select all years less than obsYear
    gender_mort1 = gender_mort[gender_mort['Year'] <= obsYear]

    # widen dataframe
    train_rates = pd.pivot_table(gender_mort1, values = 'logmx', index = 'Year', columns = 'Age').reset_index()

    delta0 = int((tau0 - 1) / 2)

    # add padding at the age boundaries by duplicating the marginal feature values
    if (delta0 > 0):
        for i in range(delta0):
            train_rates = pd.concat([
                # add in year column 
                train_rates.iloc[:,0],
                # add padding at beginning
                train_rates.iloc[:,1],
                train_rates.loc[:, train_rates.columns != 'Year'],
                # add padding at end
                train_rates.iloc[:,len(train_rates.columns)-1]
                ], axis=1)

    train_rates = train_rates.loc[:, train_rates.columns != 'Year']

    t1 = train_rates.shape[0] - (T0 - 1) - 1 # number of rows in train_rates
    a1 = train_rates.shape[1] - (tau0 - 1) # number of columns in train_rates
    ntrain = t1 * a1 # number of training samples

    # initiate dataframe
    xt_train = np.zeros((ntrain, T0, tau0))
    yt_train = np.zeros((ntrain))

    # fill in dataframe with training data
    for t0 in range(t1):
        for a0 in range(a1):
            xt_train[t0 * a1 + a0, :, :] = train_rates.iloc[t0:(t0+T0), a0:(a0+tau0)]
            yt_train[t0 * a1 + a0] = train_rates.iloc[t0+T0, a0+delta0]
    
    return xt_train, yt_train

In [None]:
# set parameters
T0 = 10 # lookback period
tau0 = 5 # dimension of x_t - smooth feature values over 5 neighboring ages to predict the mortality rate of the central age x
gender = "Female"
obsYear = 1999

# training data pre-processing
x_train, y_train = rnn_preprocessing(all_mort, gender, T0, tau0, obsYear)

# scale the data
x_min = x_train.min(axis = 0).min(axis = 0).min(axis = 0)
x_max = x_train.max(axis = 0).max(axis = 0).max(axis = 0)
f = lambda x: 2 * (x - x_min) / (x_min - x_max) - 1
x_train= f(x_train)

y_train = -y_train
y0 = y_train.mean()

x_train.shape, y_train.shape