# Predicting daily temperature using LSTM
## Modelling
This notebook contains the LSTM model to model the temperature


## Import libraries

In [16]:
import warnings
warnings.filterwarnings('ignore')
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense

print('Libraries imported')

Libraries imported


## Get dataset

In [17]:
# import from csv
x_test = pd.read_csv('./data/x_test.csv')
x_train = pd.read_csv('./data/x_train.csv')
y_test = pd.read_csv('./data/y_test.csv')
y_train = pd.read_csv('./data/y_train.csv')

In [18]:
# format as arrays
X = np.array(x_train)
y = np.array(y_train)
# reshape X into [samples, timesteps, features]
X = X.reshape((X.shape[0], X.shape[1],1))

## Define simple LSTM model

In [19]:
# create blank model
model = Sequential()
# add LSTM layer
model.add(LSTM(128, activation = 'relu', input_shape=(X.shape[1], 1)))
model.add(Dense(1))
# define optimisation method
model.compile(optimizer='adam', loss='mse')

## Train model

In [20]:
# define training parameters
n_epoch = 5
val_split = 0.1

model.fit(X, y, epochs=n_epoch, validation_split=val_split)  # train model

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x26a3bc68760>

In [21]:
# get loss statistics
loss_df = pd.DataFrame({'train_loss':model.history.history['loss'],
                        'val_loss':model.history.history['val_loss']})
# save statistics
loss_df.to_csv('./data/LSTM_loss.csv', index=False)
loss_df

Unnamed: 0,train_loss,val_loss
0,14.236206,9.649299
1,9.218683,8.213611
2,8.50983,7.984323
3,8.226976,8.073421
4,8.282599,8.001594


In [23]:
# # Plot the training accuracy
# plt.plot(model.history['accuracy'])
# plt.plot(model.history['val_accuracy'])
# plt.title('Training accuracy')
# plt.ylabel('Accuracy')
# plt.xlabel('Epoch')
# plt.legend(['train', 'val'], loc='upper left')
# plt.show()

## Test model

In [24]:
# format data
x_test = np.array(x_test)
y_test_set = np.array(y_test).flatten()
x_input = x_test.reshape((x_test.shape[0], x_test.shape[1], 1))

In [25]:
# make forecast
prediction = model.predict(x_input)
prediction = prediction.flatten()

prediction_df = pd.DataFrame({'temp_pred':prediction,
                        'temp_val':y_test_set})

# save statistics
prediction_df.to_csv('./data/LSTM_pred.csv', index=False)
prediction_df



Unnamed: 0,temp_pred,temp_val
0,13.356195,11.1
1,13.759382,12.8
2,13.269389,13.3
3,12.674009,12.8
4,13.732738,11.7
...,...,...
1090,15.423079,15.6
1091,10.697013,16.1
1092,12.864421,9.4
1093,10.770575,12.8
