In [1]:
!pip install -U -q PyDrive
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials

auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

[?25l[K    1% |▎                               | 10kB 29.8MB/s eta 0:00:01[K    2% |▋                               | 20kB 2.1MB/s eta 0:00:01[K    3% |█                               | 30kB 3.1MB/s eta 0:00:01[K    4% |█▎                              | 40kB 2.0MB/s eta 0:00:01[K    5% |█▋                              | 51kB 2.5MB/s eta 0:00:01[K    6% |██                              | 61kB 3.0MB/s eta 0:00:01[K    7% |██▎                             | 71kB 3.4MB/s eta 0:00:01[K    8% |██▋                             | 81kB 3.9MB/s eta 0:00:01[K    9% |███                             | 92kB 4.3MB/s eta 0:00:01[K    10% |███▎                            | 102kB 3.3MB/s eta 0:00:01[K    11% |███▋                            | 112kB 3.3MB/s eta 0:00:01[K    12% |████                            | 122kB 4.8MB/s eta 0:00:01[K    13% |████▎                           | 133kB 4.8MB/s eta 0:00:01[K    14% |████▋                           | 143kB 9.0MB/s eta 0:00:01[

In [0]:
link = 'https://drive.google.com/open?id=1JTKh8nUQwVUgfPo0GjxcccO75bJOz-WE'
fluff, id = link.split('=')
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('X_train.csv')

link = 'https://drive.google.com/open?id=1dJ8g5zQSkmCAQnagJCpko8lqzKKBGQNS'
fluff, id = link.split('=')
downloaded = drive.CreateFile({'id':id}) 
downloaded.GetContentFile('Y_train.csv')

The first step is to load the dataset which consist of the input data, covering 10 sensor channels with 128 measurements per time series plus three ID columns and the target data, consist of two ID columns and one column of class label.

In [0]:
import numpy as np
import pandas as pd

input_data = pd.read_csv('X_train.csv')
target_data = pd.read_csv('Y_train.csv')

We take only the 10 sensor channel columns from the input data and the class label column from the target data as we did not need the ID columns for training our model.

In [0]:
input_data = input_data.iloc[:,3:13].values
target_data = target_data.iloc[:,2].values

The next step is to normalize our data so that every values are in the range of -1 to 1.

In [0]:
from sklearn.preprocessing import MinMaxScaler

scaler = MinMaxScaler(feature_range=(-1, 1))
input_data = scaler.fit_transform(input_data)

There are 3810 series in the dataset which consist of 128 time steps and ten features for each series. To fit our input data to the neural network model, we need to reshape it into a three-dimensional array where the dimensions of the array are [number_of_series, time_steps, features].

In [6]:
def reshapeTimestep(x):
  x_reshaped = []
  for i in range(int(len(x) / 128)):
    lowerB = i * 128
    upperB = lowerB + 128
    timesteps = np.array(x[lowerB:upperB])
    x_reshaped.append(timesteps)
  return np.array(x_reshaped)
  
input_data = reshapeTimestep(input_data)

print (input_data[0])

[[-7.66889091e-01 -6.40862617e-01 -6.36152446e-01 ... -3.05657115e-02
   2.70892246e-01 -7.05215082e-02]
 [-7.66889091e-01 -6.40852509e-01 -6.36278018e-01 ... -6.87582345e-04
   2.64757478e-01 -6.57008320e-02]
 [-7.66889091e-01 -6.40862617e-01 -6.36403591e-01 ... -1.72730018e-02
   2.65639750e-01 -5.59844220e-02]
 ...
 [-7.67859670e-01 -6.39589009e-01 -6.34833930e-01 ... -3.69710694e-02
   2.59275468e-01 -7.37263232e-02]
 [-7.67869781e-01 -6.39568793e-01 -6.34645570e-01 ... -2.99437308e-02
   2.52859258e-01 -1.02014516e-01]
 [-7.67900111e-01 -6.39558685e-01 -6.34457211e-01 ... -2.46832455e-02
   2.57529229e-01 -8.38874137e-02]]


The target data consist of multiple class label which are represented in string. We need to one-hot encode these class labels so that the data suitable for fitting the neural network model.

In [0]:
target_data = pd.get_dummies(target_data).values

Then we split the input and target data into training dan testing data with a 7 : 3 ratio.

In [0]:
input_test = input_data[2667:3810]
input_train = input_data[:2667]

target_test = target_data[2667:3810]
target_train = target_data[:2667]

Next, we build the neural network model which consist of two LSTM layer with 512 units each, one Dense layer with 256 units, and another Dense layer with 9 units for the output layer as this dataset has 9 different class labels. We add Dropout of 0.5 rate to reduce overfitting. Then we fit the model with the training data.

In [9]:
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import LSTM
from keras.layers import Dropout

Using TensorFlow backend.


In [10]:
model = Sequential()
model.add(LSTM(units = 512, input_shape = (128, 10), return_sequences=True))
model.add(Dropout(0.5))
model.add(LSTM(units = 512, input_shape = (128, 10)))
model.add(Dropout(0.5))
model.add(Dense(units = 256))
model.add(Dense(units = 9, activation = 'softmax'))
model.compile(optimizer = 'adam', loss = 'categorical_crossentropy', metrics=['accuracy', 'mse'])
model.fit(input_train, target_train, epochs = 100, batch_size = 64)

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.
Instructions for updating:
Use tf.cast instead.
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 6

<keras.callbacks.History at 0x7fe8c4278748>

Finally, we evaluate the model on the test data based on its accuracy and RMSE.

In [14]:
evaluate = model.evaluate(input_test, target_test, batch_size = 64)

print('{} : {}'.format('accuracy', evaluate[1]))
print('{} : {}'.format('root-mean-squared-error', evaluate[2]))

accuracy : 0.6080489925199294
root-mean-squared-error : 0.058164952999129174
