In [None]:
#running on Python 3.8.12

import tensorflow as tf
import numpy as np
import pandas as pd
import autokeras as ak
import statistics as stat
import random as rand
import keras_tuner as kt

from numpy import asarray, hstack, array

from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.layers import Dense, Dropout, LSTM
from tensorflow.keras.models import Sequential, clone_model, load_model
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.metrics import RootMeanSquaredError
from tensorflow.keras.metrics import MeanAbsoluteError
from tensorflow.keras.metrics import MeanAbsolutePercentageError

from matplotlib import pyplot as plt

from sklearn.preprocessing import StandardScaler, MinMaxScaler
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.metrics import mean_absolute_percentage_error

from pandas import Series

In [None]:
#Get dataset
src = r'.\data\ncr_may72022.csv'
dataset = pd.read_csv(src)
dataset = dataset.drop(0)
actual = pd.read_csv(r'.\data\ncr_may72022_casesonly.csv')
actual['Date'] = pd.to_datetime(actual['Date'],format = '%m/%d/%Y')

def reshape(series):
    series = series.reshape((len(series),1))
    return series

def scale(series):
    scaler = MinMaxScaler(feature_range = (0,1))
    series = scaler.fit_transform(series)
    return series

def split_sequences(sequences, n_steps_in, n_steps_out):
    X, y = list(), list()
    for i in range(len(sequences)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        if out_end_ix > len(sequences):
            break
        seq_x, seq_y = sequences[i:end_ix, :-1],sequences[end_ix-1:out_end_ix, -1]
        X.append(seq_x)
        y.append(seq_y)
    return array(X), array(y)

def split_y(sequences, n_steps_in, n_steps_out):
    y = list()
    for i in range(len(sequences)):
        end_ix = i + n_steps_in
        out_end_ix = end_ix + n_steps_out-1
        if out_end_ix > len(sequences):
            break
        seq_y = sequences[end_ix-1:out_end_ix, -1]
        y.append(seq_y)
    return array(y)
        
def akTSFModel(seed, lookback):
    clf = ak.TimeseriesForecaster(
        lookback=lookback,
        max_trials=10,
        metrics=[tf.keras.metrics.RootMeanSquaredError(), tf.keras.metrics.MeanAbsoluteError(), tf.keras.metrics.MeanAbsolutePercentageError()],
        overwrite = True,
        loss = 'mse',
        seed = seed,
    )
    return clf

def plot(predictions, actual, i):
    plt.plot(actual, color = 'green')
    plt.plot(predictions, color = 'red')
    plt.title('Risk Model vs Actual')
    plt.legend(['Actual','Predicted'])
    plt.savefig('data/risk/' +str(i)+'.png')
    plt.clf()

In [None]:
#WHEN USING THIS FOR BASE MODEL, COMMENT OUT EVERYTHING EXCEPT covscale, AND covid_counts.

#Split dataset
covscale = dataset['covid_counts'].astype('float64')
covid_counts = dataset['covid_counts'].astype('float64')
isolation_beds = dataset['isolation_beds'].astype('float64')
susceptible = dataset['susceptible_counts'].astype('float64')
recovery = dataset['recovery_counts'].astype('float64')
death = dataset['death_counts'].astype('float64')
incidence_rate = dataset['incidence_rate'].astype('float64')
quarantine_type_int = dataset['quarantine_type_int'].astype('float64')
retail_rec = dataset['retail_rec_baseline'].astype('float64')
grocery_pharma = dataset['grocery_pharma_baseline'].astype('float64')
parks = dataset['parks_baseline'].astype('float64')
transit = dataset['transit_baseline'].astype('float64')
workplace = dataset['workplace_baseline'].astype('float64')
residential = dataset['residential_baseline'].astype('float64')

#Get values of each item of data.
covscale = covscale.values
covid_counts = covid_counts.values
susceptible = susceptible.values
recovery = recovery.values
death = death.values
incidence_rate = incidence_rate.values
quarantine_type_int = quarantine_type_int.values
retail_rec = retail_rec.values
grocery_pharma = grocery_pharma.values
parks = parks.values
transit = transit.values
workplace = workplace.values
residential = residential.values
isolation_beds = isolation_beds.values

In [None]:
#reshape all features.
covscale = reshape(covscale)
covid_counts = reshape(covid_counts)
susceptible = reshape(susceptible)
recovery = reshape(recovery)
death = reshape(death)
incidence_rate = reshape(incidence_rate)
quarantine_type_int = reshape(quarantine_type_int)
retail_rec = reshape(retail_rec)
grocery_pharma = reshape(grocery_pharma)
parks = reshape(parks)
transit = reshape(transit)
workplace = reshape(workplace)
residential = reshape(residential)
isolation_beds = reshape(isolation_beds)

In [None]:
#normalize
covid_counts = scale(covid_counts)
susceptible = scale(susceptible)
recovery = scale(recovery)
death = scale(death)
incidence_rate = scale(incidence_rate)
quarantine_type_int = scale(quarantine_type_int)
retail_rec = scale(retail_rec)
grocery_pharma = scale(grocery_pharma)
parks = scale(parks)
transit = scale(transit)
workplace = scale(workplace)
residential = scale(residential)
isolation_beds = scale(isolation_beds)

#prep inverse scaler
scaler = MinMaxScaler(feature_range = (0,1))
scaler.fit(covscale)

In [None]:
#stack columns horizontally
#use basedata if running base model.
#basedata = np.expand_dims(dataset_stacked, 1)
dataset_stackedcov = hstack((covid_counts, susceptible, recovery, death, incidence_rate, quarantine_type_int, retail_rec, grocery_pharma, parks, transit, workplace, residential, isolation_beds, covid_counts))
dataset_stacked = hstack((covid_counts, susceptible, recovery, death, incidence_rate, quarantine_type_int, retail_rec, grocery_pharma, parks, transit, workplace, residential, isolation_beds))


In [None]:
lookback, predict = 60, 60

y = split_y(covid_counts, lookback, predict)
X, throw = split_sequences(dataset_stackedcov, lookback, predict)

In [None]:
train_X, test_X = dataset_stacked[:-1,:], dataset_stacked[-1:,:]
train_y, test_y = y[:-1,:], y[-1:,:]
print("trx:",train_X.shape, "tsx:",test_X.shape,"try:",train_y.shape,"tsy:", test_y.shape)

In [None]:
#546 used here, because that is index of September 14, 2021. End of the training and start of testing.

seeds = rand.sample(range(100000),10)
RMSE = {}
MAE = {}
MAPE = {}
for i in seeds:
    try:
        clf = akTSFModel(i, lookback)
        clf.fit(x=train_X[:546,:], y=train_y[:546], batch_size = lookback, epochs=10)
    except Exception as e:
        print(e)
    model = clf.export_model()
    try:
        model.save("final models/autokeras_risk" + str(i), save_format="tf")
    except Exception:
        model.save("final models/autokeras_risk" + str(i) + ".h5")

    #prediction on last data value
    X_test2 = (X[546:547,:])
    predictions = model.predict(X_test2)
    #inverse scaling
    unscaled_predictions = scaler.inverse_transform(predictions)
    unscaled_predictions = unscaled_predictions[0]

    RMSE[str(i)] = [str(mean_squared_error(actual['covid_counts'][546:(546+predict)], unscaled_predictions, squared = False))]  
    MAE[str(i)] = [str(mean_absolute_error(actual['covid_counts'][546:(546+predict)], unscaled_predictions))]
    MAPE[str(i)] = [str(mean_absolute_percentage_error(actual['covid_counts'][546:(546+predict)], unscaled_predictions))]

    #plotting
    to_plot = actual['covid_counts'][546:(546+predict)]
    to_plot = to_plot.reset_index(drop = True)
    plot(unscaled_predictions, to_plot, i)

    #metrics into dataframe to csv
    metricsRMSE = pd.DataFrame(RMSE)
    metricsMAE = pd.DataFrame(MAE)
    metricsMAPE = pd.DataFrame(MAPE)

    metricsRMSE.to_csv('data/risk/Risk RMSEs.csv')
    metricsMAE.to_csv('data/risk/Risk MAEs.csv')
    metricsMAPE.to_csv('data/risk/Risk MAPEs.csv')
