In [None]:
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
import seaborn as sns
sns.set_style('whitegrid')
plt.style.use("fivethirtyeight")

df=pd.read_csv('../input/covid-thailand-sy/confirmed-cases.csv')
ks=df
df['date_count'] = df.announce_date.map(df.groupby('announce_date').size())
ndf=df.drop_duplicates('announce_date',keep='last')

dfc = ndf[['announce_date', 'date_count']].copy()
df=dfc.rename(columns={"announce_date": "date", "date_count": "count"})
# convert the column (it's a string) to datetime type
datetime_series = pd.to_datetime(df['date'],format='%d/%m/%Y')

# create datetime index passing the datetime seies
datetime_index = pd.DatetimeIndex(datetime_series.values)
df=df.set_index(datetime_index)
df.drop('date',axis=1,inplace=True)



line = pd.to_datetime("2021-07-09", format="%Y-%m-%d")
new_row = pd.DataFrame([[9276]], columns=['count'], index=[line])
df = pd.concat([df, pd.DataFrame(new_row)], ignore_index=False)


df=df.sort_index()
df


In [None]:
#plotdata
plt.figure(figsize=(16,8))
plt.title('Covid Case Thailand')
plt.plot(df['count'])
plt.xlabel('date', fontsize=18)
plt.ylabel('Number', fontsize=18)
plt.show()

In [None]:
def data_prepare(data,y_time,datab,train_ratio):
    global scaler
    dataset = data.values
    #Get the number of rows to train the model on
    training_data_len = int(np.ceil( len(dataset) * train_ratio ))
    training_data_len
    #Scale the data
    from sklearn.preprocessing import MinMaxScaler, StandardScaler

    scaler = MinMaxScaler(feature_range=(0,1))
    scaler2 = StandardScaler()
    scaled_data = scaler.fit_transform(dataset)

    scaled_data


    #Create the training data set
    #Create the scaled training data set
    train_data = scaled_data[0:int(training_data_len), :]
    #Split the data into x_train and y_train data sets
    x_train = []
    y_train = []
    for i in range(datab, len(train_data)):
        x_train.append(train_data[i-datab:i, 0])
        y_train.append(train_data[i-y_time:i, 0])
        '''if i<= (datab+1):
            print(x_train)
            print(y_train)
            print()'''


    # Convert the x_train and y_train to numpy arrays 
    x_train, y_train = np.array(x_train), np.array(y_train)

    #Reshape the data
    x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
    print('x_train', x_train.shape)
    print('y_train',y_train.shape)

    x_train
    return df, x_train, y_train, scaled_data,training_data_len

#output data predict

y_time=1 #predict time
datab=60 #input data
train_ratio=0.90
#Create a new dataframe with only the 'Close column
data = df.filter(['count'])
df, x_train, y_train,scaled_data,training_data_len =data_prepare(df,y_time,datab,train_ratio)
df
#Convert the dataframe to a numpy array


In [None]:
#Create the testing data set
#Create a new array containing scaled values from index 1543 to 2002 
dataset = data.values
test_data = scaled_data[training_data_len - datab: , :]

#Create the data sets x_test and y_test
x_test = []
y_test_scale = scaled_data[training_data_len:, :]
for i in range(datab, len(test_data)):
    x_test.append(test_data[i-datab:i, 0])
    
    # Convert the data to a numpy array
x_test = np.array(x_test)
# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))

In [None]:
from keras.models import Sequential
from keras.layers import Dense, LSTM
from matplotlib import pyplot as plt


#Build the LSTM model
model = Sequential()
model.add(LSTM(100, return_sequences=True, input_shape= (x_train.shape[1],x_train.shape[2] )))
model.add(LSTM(100, return_sequences= False))
model.add(Dense(100))
model.add(Dense(100))

model.add(Dense(y_time))
# Compile the model
model.compile(optimizer='Adam', loss='mean_squared_error',metrics=['accuracy'])

#Train the model
history=model.fit(x_train, y_train, batch_size=1,validation_data=(x_test, y_test_scale), epochs=10)

# plot train and validation loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model train vs validation loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper right')
plt.show()

In [None]:
#Create the testing data set
#Create a new array containing scaled values from index 1543 to 2002 
dataset = data.values
test_data = scaled_data[training_data_len - datab: , :]

#Create the data sets x_test and y_test
x_test = []
y_test = dataset[training_data_len:, :]
for i in range(datab, len(test_data)):
    x_test.append(test_data[i-datab:i, 0])
    
    # Convert the data to a numpy array
x_test = np.array(x_test)
# Reshape the data
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1 ))
# Get the models predicted price values 
predictions1 = model.predict(x_test)
predictions = scaler.inverse_transform(predictions1)
# Get the root mean squared error (RMSE)
rmse = np.sqrt(np.mean(((predictions - y_test) ** 2)))
predicte=predictions[:,0]
print('RSME',rmse)

In [None]:
# Plot the data
train = data[:training_data_len]
valid = data[training_data_len:]
valid['Predictions'] = predicte
# Visualize the data
plt.figure(figsize=(16,8))
plt.title('Model')
plt.xlabel('Date', fontsize=18)
plt.ylabel('Numer of case)', fontsize=18)
plt.plot(train['count'])
plt.plot(valid[['count', 'Predictions']])
plt.legend(['Train', 'Val', 'Predictions'], loc='upper left')
plt.show()
model.save('covid.h5')

In [None]:
#predict next day

from tensorflow import keras
from pandas_datareader.data import DataReader
model = keras.models.load_model('covid.h5')

last_30_days = df[-datab:]

#Scale the data to be values between 0 and 1
last_30_days_scaled = scaler.transform(last_30_days)
#Create an empty list
X_test = []
Y_pre =  []
  
  
#Append teh past 60 days
X_test.append(last_30_days_scaled)
#Convert the X_test data set to a numpy array
X_test = np.array(X_test)
#Reshape the data
X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1],1))
#Get the predicted scaled price
pred_price = model.predict(X_test)
pred_price = scaler.inverse_transform(pred_price)
print(pred_price)


In [None]:
#predict next days

from tensorflow import keras
from pandas_datareader.data import DataReader
model = keras.models.load_model('covid.h5')
predict=80
ks=df

for i in range(predict):
    last_30_days = ks[-datab:]

    #Scale the data to be values between 0 and 1
    last_30_days_scaled = scaler.transform(last_30_days)
    #Create an empty list
    X_test = []
    Y_pre =  []

    #Append teh past 60 days
    X_test.append(last_30_days_scaled)
    #Convert the X_test data set to a numpy array
    X_test = np.array(X_test)
    #Reshape the data
    X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1],1))
    #Get the predicted scaled price
    pred_price = model.predict(X_test)
    pred_price = scaler.inverse_transform(pred_price)
    add=pred_price[0,0]
    ks = ks.append({'count': add},ignore_index=True)


#plotdata
plt.figure(figsize=(16,8))
plt.title('Covid Case Thailand')
plt.plot(ks['count'])
plt.xlabel('Days', fontsize=18)
plt.ylabel('Number', fontsize=18)
plt.show()


In [None]:
#plotdata
plt.figure(figsize=(16,8))
plt.title('Covid Case Thailand')
plt.plot(ks['count'].iloc[0:489])
plt.plot(ks['count'].iloc[488:])
plt.xlabel('Days', fontsize=18)
plt.ylabel('Number', fontsize=18)
plt.legend(['Current', 'Predict'], loc='upper left')
plt.show()

In [None]:
ks.cumsum()
ks['cum']=ks.cumsum()['count']
plt.figure(figsize=(16,8))
plt.title('Covid Case Thailand')
plt.plot(ks['cum'].iloc[0:489])
plt.plot(ks['cum'].iloc[488:])
plt.legend(['Current', 'Predict'], loc='upper left')
plt.xlabel('Days', fontsize=18)
plt.ylabel('Cum. Number', fontsize=18)
plt.show()