In [None]:
# Import libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

In [None]:
# Import Keras utils
import tensorflow as tf
import keras
from keras.models import Sequential
from keras.layers import Input, Dropout, Dense, Normalization
from tensorflow.keras.utils import to_categorical
from pandas.api.types import CategoricalDtype

In [None]:
# opening the text file
county_names = []
with open('counties.txt','r') as file:
    # reading each line   
    for line in file:   
        county_names += [line.replace('\n','')]

In [None]:
# Parse historic data as inputs
for i in range(len(county_names)):
    dataset_name = "data/CHAT-"+str(county_names[i])+"-historical.csv"
    if i == 0:
        pd_data_historic = pd.read_csv(dataset_name)
    else:
        pd_data_remain = pd.read_csv(dataset_name)
        pd_data_historic = pd.concat([pd_data_historic,pd_data_remain])

In [None]:
# Parse UHII values, duplicate y values for different X values
for i in range(len(county_names)):
    dataset_name = "data/CHAT-"+str(county_names[i])+"-vulnerability-indicators.csv"
    if i == 0:
        pd_data_uhii_base = pd.read_csv(dataset_name)
        pd_data_uhii = pd_data_uhii_base 
        for j in range(4):
            pd_data_uhii = pd.concat([pd_data_uhii,pd_data_uhii_base])  
  
    else:
        pd_data_remain_base = pd.read_csv(dataset_name)
        pd_data_remain = pd_data_remain_base
        for j in range(4):
            pd_data_remain = pd.concat([pd_data_remain,pd_data_remain_base])
        pd_data_uhii = pd.concat([pd_data_uhii,pd_data_remain])


In [None]:
# Inputs and Outputs, fill zeros for NaN
X_all = pd_data_historic[['time_of_year', 'socioeconomic_group','avg_event_rh_max_perc','avg_event_rh_min_perc','tmax','tmin','hist_avg_annual_events','hist_avg_duration']]
y = pd_data_uhii[["uhii_avgdeltat"]].fillna(0) 

In [None]:
# Preprocessing for inputs and outputs, add categorical features
X_all["time_of_year"] = X_all["time_of_year"].replace({
    "Total": 0,
    "AM": 1,
    "JJA": 2,
    "SO": 3})

X_all["socioeconomic_group"] = X_all["socioeconomic_group"].replace({
    "2006 HW": 0,
    "Vulnerable": 1,
    "General": 2
    })


In [None]:
# Make sure we are having reproducable results
import random as python_random
np.random.seed(12345)
python_random.seed(12345)
tf.random.set_seed(12345)

# Split the dataset
randomseed = 12345
indx_bin=np.arange(0,len(y))
random.Random(randomseed).shuffle(indx_bin)

train_split=0.6
val_split=0.2
test_split=0.2

nTrain=int(train_split*len(indx_bin))
nVal=int(val_split*len(indx_bin))
nTest=int(test_split*len(indx_bin))
train_indx_bin=indx_bin[0:nTrain]
val_indx_bin=indx_bin[nTrain:nTrain+nVal]
test_indx_bin=indx_bin[nTrain+nVal:]

In [None]:
# Split dataset
X_train=X_all.values[train_indx_bin]
y_train=y.values[train_indx_bin]
X_val=X_all.values[val_indx_bin]
y_val=y.values[val_indx_bin]
X_test=X_all.values[test_indx_bin]
y_test=y.values[test_indx_bin]

standardize=True

# Apply normalization to the training dataset
if standardize:
    normalize_layer = Normalization()
    normalize_layer.adapt(X_train)
    
print(X_train.shape)
print(y_train.shape)
print(X_val.shape)
print(y_val.shape)
print(X_test.shape)
print(y_test.shape)

## Keras ANN

In [None]:
# Create model parameters
fc1 = 128
fc2 = 64
fc3 = 32
fc4 = 16
lr = 0.001

# Create a model
model = Sequential()
model.add(normalize_layer)
model.add(Dense(fc1, activation='relu', input_dim=X_train.shape[1]))
model.add(Dense(fc2, activation='relu'))
model.add(Dense(fc3, activation='relu'))
model.add(Dense(fc4, activation='relu'))
model.add(Dense(1))

# Compile the model
opt = tf.keras.optimizers.Adam(learning_rate=lr)
model.compile(optimizer=opt, 
              loss='mae',
              metrics=['mae'])
              
callback = tf.keras.callbacks.EarlyStopping(
    monitor="val_loss",
    min_delta=1e-4,
    patience=50,
    verbose=0,
    mode="auto",
    baseline=None,
    restore_best_weights=False,
)

In [None]:
# build the model
training = True

if training == True:
    history = model.fit(X_train, y_train, 
              epochs=1000,
              callbacks=[callback],
              validation_data=(X_val,y_val))
            

In [None]:
# Plot training curve
plt.plot(history.history['mae'], label='train', color = (208/255,222/255,198/255))
plt.plot(history.history['val_mae'], label='val', color = (78/255,105/255,86/255))
plt.xlabel('Epochs')
plt.ylabel('Mean Absolute Error')
plt.legend()
plt.show()

In [None]:
# Save model
model_name = "model_3"
model.save(model_name)

In [None]:
# Print accuracies
pred_train= model.predict(X_train)
scores = model.evaluate(X_train, y_train, verbose=0)

pred_val= model.predict(X_val)
scores2 = model.evaluate(X_val, y_val, verbose=0)
print('Accuracy on validation data: %1.2f (%%)'%(scores2[1]*100)) 

pred_test= model.predict(X_test)
scores3 = model.evaluate(X_test, y_test, verbose=0)
print('Accuracy on testing data: %1.2f (%%)'%(scores3[1]*100))   