# Loading the data

In [None]:
# Load data using read_csv
from pandas import read_csv, concat
from matplotlib import pyplot
# write the path and column names of the CSV file
df = read_csv('path/to/csv/file',usecols=["col1","col2"])
print(df1.head())

In [None]:
# col1 plots
pyplot.figure(figsize=(18,5))
pyplot.plot(df["col1"])
pyplot.title("the dataset (col1)")

# col2 plots
pyplot.figure(figsize=(18,5))
pyplot.plot(df["col2"],"r")
pyplot.title("the dataset (col2)")

pyplot.show()

# printing Stats
print("\nThe table below shows the summary caracteristics of the dataset :\n")
print(df.describe(),"\n\n")

# Preparing the dataset

In [None]:
# lstm autoencoder to recreate a timeseries
import numpy as np
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
from keras.layers import RepeatVector
from keras.layers import TimeDistributed
'''
A UDF to convert input data into 3-D
array as required for LSTM network.
'''
def temporalize(X, lookback):
    output_X = []
    for i in range(len(X)-lookback+1):
        t = []
        for j in range(0,lookback):
            # Gather past records upto the lookback period
            t.append(X[[(i+j)], :])
        output_X.append(t)
    return output_X

In [None]:
# define input timeseries
timeseries = np.array([df[col1],df[col2]]).transpose()
print(timeseries.shape)
timeseries[:10]

(2803, 2)


array([[0.7416    , 0.80133333],
       [0.739     , 0.803     ],
       [0.7354    , 0.806     ],
       [0.7322    , 0.808     ],
       [0.7294    , 0.80933333],
       [0.7276    , 0.80966667],
       [0.7258    , 0.81033333],
       [0.7254    , 0.811     ],
       [0.7242    , 0.81266667],
       [0.7232    , 0.813     ]])

In [None]:
timesteps = 100
n_features = 2
# X, y = temporalize(X = timeseries, y = np.zeros(len(timeseries)), lookback = timesteps)
X= temporalize(X = timeseries, lookback = timesteps)

X = np.array(X)
X = X.reshape(X.shape[0], timesteps, n_features)

X.shape

(2704, 100, 2)

# Building the model

In [None]:
# define model
model = Sequential()
model.add(LSTM(128, activation='relu', input_shape=(timesteps,n_features), return_sequences=True))
model.add(LSTM(32, activation='relu', return_sequences=False))
model.add(RepeatVector(timesteps))
model.add(LSTM(32, activation='relu', return_sequences=True))
model.add(LSTM(128, activation='relu', return_sequences=True))
model.add(TimeDistributed(Dense(n_features)))
model.compile(optimizer='rmsprop', loss='mae', metrics=["accuracy"])
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
lstm (LSTM)                  (None, 100, 128)          67072     
_________________________________________________________________
lstm_1 (LSTM)                (None, 32)                20608     
_________________________________________________________________
repeat_vector (RepeatVector) (None, 100, 32)           0         
_________________________________________________________________
lstm_2 (LSTM)                (None, 100, 32)           8320      
_________________________________________________________________
lstm_3 (LSTM)                (None, 100, 128)          82432     
_________________________________________________________________
time_distributed (TimeDistri (None, 100, 2)            258       
Total params: 178,690
Trainable params: 178,690
Non-trainable params: 0
__________________________________________________

In [None]:
# fit model
history = model.fit(X, X, epochs=20, batch_size=5, verbose=1, validation_split=0.2)
# demonstrate reconstruction
yhat = model.predict(X, verbose=1)

# Visualizing the performance of the model

In [None]:
import matplotlib.pyplot as plt
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'r', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend(loc=0)
plt.figure()

plt.plot(epochs, loss, 'r', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend(loc=0)
plt.figure()

plt.show()

In [None]:
a=len(X)//timesteps
X1 = X[[i*timesteps for i in range(a)],:,0]
X1=X1.reshape(X1.shape[0]*X1.shape[1])
X1=list(X1)
yhat1 = yhat[[i*timesteps for i in range(a)],:,0]
yhat1=yhat1.reshape(yhat1.shape[0]*yhat1.shape[1])
yhat1=list(yhat1)

X2 = X[[i*timesteps for i in range(a)],:,1]
X2=X2.reshape(X2.shape[0]*X2.shape[1])
X2=list(X2)
yhat2 = yhat[[i*timesteps for i in range(a)],:,1]
yhat2=yhat2.reshape(yhat2.shape[0]*yhat2.shape[1])
yhat2=list(yhat2)

In [None]:
l1=list(range(len(X1)))

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(20,5))
plt.plot(l1, X1, label='Actual data')
plt.plot(l1, yhat1, 'r', label='Predicted data')
plt.title('Reconstruction of the Humidity measurements')
plt.show()

plt.figure(figsize=(20,5))
plt.plot(l1, X2, label='Actual data')
plt.plot(l1, yhat2, 'r', label='Predicted data')
plt.title('Reconstruction of the Temperature measurements')
plt.show()