In [11]:
import tensorflow as tf
import glob 
import os
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler,StandardScaler
from sklearn.preprocessing import LabelEncoder
from numpy import array
from datetime import datetime
import numpy as np

In [12]:
path=r'C:\Users\Joshlin\Downloads\infections-develop\infections-develop\warehouse\design\raw'

data = pd.DataFrame()
def read_c(csv):
    """Function to read the filenames present in the folder
    Returns the list containing filename """
    files = pd.read_csv(csv)
    files['filename'] = os.path.basename(csv).split('.')[0]
    return files

data = pd.concat(map(read_c, glob.glob(os.path.join(path, "*.csv")))) #creating dataframe by concatnating all csv files

In [13]:
#changing the format of dates from yyyy-mm-dd to yyyymmdd
data['date'] = pd.to_datetime(data['date'],format='%Y-%m-%d') 
data['date'] = data['date'].astype('str')
data['date'] = data['date'].str.replace('-','')
print(data['date'])

#Removing columns 
data=data.drop(columns=['dailyCases', 'dailyFirstDoseByVaccinationDate', 'dailySecondDoseByVaccinationDate',
                   'dailyThirdInjectionByVaccinationDate'])

0      20200301
1      20200302
2      20200303
3      20200304
4      20200305
         ...   
670    20211231
671    20220101
672    20220102
673    20220103
674    20220104
Name: date, Length: 94500, dtype: object


In [19]:
data.dtypes

date                           object
covidOccupiedBeds             float64
covidOccupiedMVBeds           float64
estimatedNewAdmissions        float64
EDC0-4                        float64
EDC5-9                        float64
EDC10-14                      float64
EDC15-19                      float64
EDC20-24                      float64
EDC25-29                      float64
EDC30-34                      float64
EDC35-39                      float64
EDC40-44                      float64
EDC45-49                      float64
EDC50-54                      float64
EDC55-59                      float64
EDC60-64                      float64
EDC65-69                      float64
EDC70-74                      float64
EDC75-79                      float64
EDC80-84                      float64
EDC85-89                      float64
EDC90+                        float64
newDeaths28DaysByDeathDate    float64
EDV12-15                      float64
EDV16-17                      float64
EDV18-24    

In [40]:

data = data.sort_values(by=['filename','date']) # sorting the values to get the data in sequence
 # datetime to unix conversion
data['date'] = pd.to_datetime(data['date'])  # datetime to unix conversion
data['new_date'] = (data['date'] - datetime(1970,1,1))
data['new_date'] = ( data['new_date'] / np.timedelta64(1, 'D')).astype(int)

# list of column names for scaling 
column_name = ['covidOccupiedBeds', 'covidOccupiedMVBeds',
       'estimatedNewAdmissions', 'EDC0-4', 'EDC5-9', 'EDC10-14', 'EDC15-19',
       'EDC20-24', 'EDC25-29', 'EDC30-34', 'EDC35-39', 'EDC40-44', 'EDC45-49',
       'EDC50-54', 'EDC55-59', 'EDC60-64', 'EDC65-69', 'EDC70-74', 'EDC75-79',
       'EDC80-84', 'EDC85-89', 'EDC90+',
       'newDeaths28DaysByDeathDate', 'EDV12-15', 'EDV16-17',
       'EDV18-24', 'EDV25-29', 'EDV30-34', 'EDV35-39', 'EDV40-44', 'EDV45-49',
       'EDV50-54', 'EDV55-59', 'EDV60-64', 'EDV65-69', 'EDV70-74', 'EDV75-79',
       'EDV80-84', 'EDV85-89', 'EDV90+','new_date']

scaler =StandardScaler()
data[column_name]=scaler.fit_transform(data[column_name])

#coverting categorical filename to lable 
le = LabelEncoder()
#Adding to the table
data['filename'] = le.fit_transform(data['filename'])

input_data = pd.DataFrame(data.drop('date',axis=1))
new_data = input_data
n_features = len(new_data.columns)
new_data.head()
final_data = new_data.values

In [15]:
def sequences_creation(data, n):
    """ 
    Function to split sequence into steps
    slices the data so that each input have the sequence of data of n_steps
    returns the output containing the steps
    """
    X, y = list(), list()  
    for i in range(len(data)):
        # sequence range 
        end = i + n
        # check if the range is within the limit
        if end > len(data):
            break
        # slicing the data set 
        x_value, y_value = data[i:end, :], data[end-1, 2]
        X.append(x_value)
        y.append(y_value)
    return array(X), array(y)

In [16]:
n_steps  = 7     # number of steps denote the number of days in sequence consider as input  
X,y = sequences_creation(final_data, n_steps)
X= np.array(X)
y =np.array(y)

# splitting train, test and validation test
val_limit = round((len(X)/100)*80)
val_2_limit = round((len(X)/100)*10) + val_limit
X_train  =  X[:val_limit]
y_train  = y[:val_limit]
X_val = X[val_limit:val_2_limit]
y_val = y[val_limit:val_2_limit]
X_test = X[val_2_limit:]
y_test = y[val_2_limit:]
n_features = X_train.shape[2]


In [28]:
from keras.models import Sequential
from keras.layers import LSTM,GRU,SimpleRNN
from keras.layers import Dense
from keras.layers import Dropout
from tensorflow.keras.optimizers import Adam,RMSprop,SGD
from keras import initializers
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint, TensorBoard

In [47]:
initializer = tf.keras.initializers.HeUniform()
model = Sequential()
model.add(LSTM(50, activation='relu', return_sequences=True,input_shape=(n_steps, n_features),kernel_initializer=initializer))
model.add(Dropout(0.01))
model.add(LSTM(20,activation='relu',return_sequences=True))
model.add(Dropout(0.01))
model.add(Dense(1,'relu'))
model.compile(optimizer= Adam(learning_rate= 0.001),loss=tf.keras.losses.MeanSquaredLogarithmicError(),metrics=[tf.metrics.MeanAbsoluteError()])
model.summary()

Model: "sequential_14"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm_8 (LSTM)               (None, 7, 50)             18600     
                                                                 
 dropout_20 (Dropout)        (None, 7, 50)             0         
                                                                 
 lstm_9 (LSTM)               (None, 7, 20)             5680      
                                                                 
 dropout_21 (Dropout)        (None, 7, 20)             0         
                                                                 
 dense_31 (Dense)            (None, 7, 1)              21        
                                                                 
Total params: 24,301
Trainable params: 24,301
Non-trainable params: 0
_________________________________________________________________


In [57]:
%%time
import numpy as np
#X_train = X_train.astype('float32')
#X_train = 
#y_train = np.asarray(y_train).astype('float32')
#X_val = np.asarray(X_val).astype('float32')
#y_val = np.asarray(y_val).astype('float32')
#print(X_train[0])


es = EarlyStopping(monitor='val_loss', min_delta=1e-10, patience=10, verbose=1)
rlr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=10, verbose=1)
mcp = ModelCheckpoint(filepath='weights.h5', monitor='val_loss', verbose=1, save_best_only=True, save_weights_only=True)

tb = TensorBoard('logs')


model.fit(X_train, y_train,validation_data =(X_val,y_val),callbacks=[es, rlr, mcp, tb],epochs=200,verbose=1,shuffle=False)

Epoch 1/200
Epoch 00001: val_loss improved from inf to 0.05237, saving model to weights.h5
Epoch 2/200
Epoch 00002: val_loss improved from 0.05237 to 0.05236, saving model to weights.h5
Epoch 3/200
Epoch 00003: val_loss did not improve from 0.05236
Epoch 4/200
Epoch 00004: val_loss did not improve from 0.05236
Epoch 5/200
Epoch 00005: val_loss did not improve from 0.05236
Epoch 6/200
Epoch 00006: val_loss did not improve from 0.05236
Epoch 7/200
Epoch 00007: val_loss did not improve from 0.05236
Epoch 8/200
Epoch 00008: val_loss did not improve from 0.05236
Epoch 9/200
Epoch 00009: val_loss did not improve from 0.05236
Epoch 10/200
Epoch 00010: val_loss improved from 0.05236 to 0.05231, saving model to weights.h5
Epoch 11/200
Epoch 00011: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.

Epoch 00011: val_loss did not improve from 0.05231
Epoch 12/200
Epoch 00012: val_loss improved from 0.05231 to 0.05222, saving model to weights.h5
Epoch 13/200
Epoch 00013: val_loss i

<keras.callbacks.History at 0x19ec6659c40>

In [58]:
#GRU

model_2 = Sequential()
model_2.add(GRU(40,return_sequences=True,input_shape=(n_steps, n_features),activation='relu',kernel_initializer=tf.keras.initializers.HeUniform()))
model_2.add(Dropout(0.1))
model_2.add(GRU(30,return_sequences=True, activation='relu'))
model_2.add(Dropout(0.1))
model_2.add(GRU(10,activation='relu',return_sequences=True))
model_2.add(Dropout(0.1))
model_2.add(Dense(1,activation='relu'))
model_2.summary()
model_2.compile(optimizer= Adam(learning_rate= 0.001),loss= tf.keras.losses.MeanSquaredLogarithmicError(),metrics=[tf.metrics.MeanAbsoluteError()])


model_2.fit(X_train,y_train,validation_data =(X_val,y_val),callbacks=[es, rlr, mcp, tb],epochs=200,verbose=1,shuffle=False)

Model: "sequential_16"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 gru_12 (GRU)                (None, 7, 40)             10080     
                                                                 
 dropout_25 (Dropout)        (None, 7, 40)             0         
                                                                 
 gru_13 (GRU)                (None, 7, 30)             6480      
                                                                 
 dropout_26 (Dropout)        (None, 7, 30)             0         
                                                                 
 gru_14 (GRU)                (None, 7, 10)             1260      
                                                                 
 dropout_27 (Dropout)        (None, 7, 10)             0         
                                                                 
 dense_33 (Dense)            (None, 7, 1)            

<keras.callbacks.History at 0x19ec6637d60>

In [59]:
model_3 = Sequential()
model_3.add(Dense(200,activation='tanh',kernel_initializer=initializer,input_shape=(n_steps, n_features)))
model_3.add(Dense(100,activation='tanh'))
model_3.add(Dropout(0.1))
model_3.add(Dense(50,activation='tanh'))
model_3.add(Dropout(0.1))
model_3.add(Dense(30,activation='tanh'))
model_3.add(Dropout(0.1))
model_3.add(Dense(1,activation='relu'))
model_3.compile(optimizer=Adam(learning_rate=0.001), loss=tf.keras.losses.MeanSquaredLogarithmicError(), 
            metrics=[tf.metrics.MeanAbsoluteError()])
# fit model
model_3.fit(X_train,y_train,validation_data =(X_val,y_val),callbacks=[es, rlr, mcp, tb],epochs=200,verbose=1,shuffle=False)
model_3.evaluate(X_test,y_test)

Epoch 1/200
Epoch 00001: val_loss did not improve from 0.05205
Epoch 2/200
Epoch 00002: val_loss did not improve from 0.05205
Epoch 3/200
Epoch 00003: val_loss did not improve from 0.05205
Epoch 4/200
Epoch 00004: val_loss did not improve from 0.05205
Epoch 5/200
Epoch 00005: val_loss did not improve from 0.05205
Epoch 6/200
Epoch 00006: val_loss did not improve from 0.05205
Epoch 7/200
Epoch 00007: val_loss did not improve from 0.05205
Epoch 8/200
Epoch 00008: val_loss did not improve from 0.05205
Epoch 9/200
Epoch 00009: val_loss did not improve from 0.05205
Epoch 10/200
Epoch 00010: val_loss did not improve from 0.05205
Epoch 11/200
Epoch 00011: val_loss did not improve from 0.05205
Epoch 12/200
Epoch 00012: val_loss did not improve from 0.05205
Epoch 13/200
Epoch 00013: val_loss did not improve from 0.05205
Epoch 14/200
Epoch 00014: val_loss did not improve from 0.05205
Epoch 15/200
Epoch 00015: val_loss did not improve from 0.05205
Epoch 16/200
Epoch 00016: val_loss did not improv

[0.07850394397974014, 0.5463212132453918]

In [61]:
#LSTM
model.evaluate(X_train,y_train)
#GRU
model_2.evaluate(X_train,y_train)
#Neural Network
model_3.evaluate(X_train,y_train)



[0.055602092295885086, 0.5309653878211975]

In [56]:
model.evaluate(X_val,y_val)
model_2.evaluate(X_val,y_val)
model_3.evaluate(X_val,y_val)



[0.056079525500535965, 0.4901369512081146]

In [55]:
#lstm
model.evaluate(X_test,y_test)
#GRU
model_2.evaluate(X_test,y_test)
#Neural network
model_3.evaluate(X_test,y_test)



[0.0785084217786789, 0.5444673299789429]