In [1]:
import pandas as pd 
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

In [2]:
X_train = pd.read_csv('data/X_train.csv')
y_train = pd.read_csv('data/y_train.csv')
X_test = pd.read_csv('data/X_test.csv')

In [3]:
X_train.head()

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.10488,-0.10597,0.10765,0.017561,0.000767,-0.74857,2.103,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.1049,-0.106,0.067851,0.029939,0.003386,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.10492,-0.10597,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.10495,-0.10597,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.096
4,0_4,0,4,-0.75852,-0.63435,-0.10495,-0.10596,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.441


In [4]:
X_train_values = X_train.iloc[:, 3:]
#take the values only
X_train_values = X_train_values.values
y_train_values = y_train.values[:,-1]
#encode categorical data
le = LabelEncoder()
y_train_values = le.fit_transform(y_train_values)
print(X_train_values.shape)
print(y_train_values.shape)

(487680, 10)
(3810,)


In [118]:
#every 128 rows is a new sequence
X_train_values = X_train_values.reshape(X_train_values.shape[0]//128, 128, 10)

In [111]:
X_train_values.shape

(3810, 128, 10)

In [95]:
#convert to torch tensor
X_train_tensor = torch.from_numpy(X_train_values).float()
y_train_tensor = torch.from_numpy(y_train_values).long()

In [97]:
#split into train and validation
X_train_tensor, X_val_tensor, y_train_tensor, y_val_tensor = train_test_split(X_train_tensor, y_train_tensor, test_size=0.2, random_state=42)

In [98]:
#create dataset
class SequenceDataset(Dataset):
    def __init__(self, X, y):
        self.X = X
        self.y = y
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [99]:
#create dataloader
train_dataset = SequenceDataset(X_train_tensor, y_train_tensor)
val_dataset = SequenceDataset(X_val_tensor, y_val_tensor)
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=128, shuffle=False)

In [100]:
for X, y in train_loader:
    print(X.shape, y.shape)
    break

torch.Size([128, 128, 10]) torch.Size([128])


In [101]:
#create model
class LSTM(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, num_classes):
        super(LSTM, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.num_classes = num_classes
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, num_classes)
        self.device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    def forward(self, x):
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(self.device)
        out, _ = self.lstm(x, (h0, c0))
        out = self.fc(out[:, -1, :])
        return out

In [102]:
#device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)

cuda


In [103]:
input_size = 10
hidden_size = 128
num_layers = 1
num_classes = 9 
model = LSTM(input_size, hidden_size, num_layers, num_classes)
model.to(device)

LSTM(
  (lstm): LSTM(10, 128, batch_first=True)
  (fc): Linear(in_features=128, out_features=9, bias=True)
)

In [104]:
#define loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [106]:
#train model
num_epochs = 1000
for epoch in range(num_epochs):
    for i, (X, y) in enumerate(train_loader):
        X = X.to(device)
        y = y.to(device)
        #forward
        outputs = model(X)
        loss = criterion(outputs, y)
        #backward
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        if (i+1) % 10 == 0:
            print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                  .format(epoch+1, num_epochs, i+1, len(train_loader), loss.item()))

Epoch [1/1000], Step [10/24], Loss: 0.6824
Epoch [1/1000], Step [20/24], Loss: 0.7977
Epoch [2/1000], Step [10/24], Loss: 0.7721
Epoch [2/1000], Step [20/24], Loss: 0.7493
Epoch [3/1000], Step [10/24], Loss: 0.6614
Epoch [3/1000], Step [20/24], Loss: 0.9168
Epoch [4/1000], Step [10/24], Loss: 0.8317
Epoch [4/1000], Step [20/24], Loss: 0.8665
Epoch [5/1000], Step [10/24], Loss: 0.7547
Epoch [5/1000], Step [20/24], Loss: 0.7474
Epoch [6/1000], Step [10/24], Loss: 0.7797
Epoch [6/1000], Step [20/24], Loss: 0.8448
Epoch [7/1000], Step [10/24], Loss: 0.7570
Epoch [7/1000], Step [20/24], Loss: 0.8106
Epoch [8/1000], Step [10/24], Loss: 0.6386
Epoch [8/1000], Step [20/24], Loss: 0.8383
Epoch [9/1000], Step [10/24], Loss: 0.6540
Epoch [9/1000], Step [20/24], Loss: 0.5966
Epoch [10/1000], Step [10/24], Loss: 0.6109
Epoch [10/1000], Step [20/24], Loss: 0.7101
Epoch [11/1000], Step [10/24], Loss: 0.5566
Epoch [11/1000], Step [20/24], Loss: 0.8518
Epoch [12/1000], Step [10/24], Loss: 0.8295
Epoch 

In [107]:
#evaluate model
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for X, y in val_loader:
        X = X.to(device)
        y = y.to(device)
        outputs = model(X)
        _, predicted = torch.max(outputs.data, 1)
        total += y.size(0)
        correct += (predicted == y).sum().item()
    print('Accuracy of the model on the validation set: {} %'.format(100 * correct / total))    


Accuracy of the model on the validation set: 77.03412073490814 %


In [108]:
X_train

Unnamed: 0,row_id,series_id,measurement_number,orientation_X,orientation_Y,orientation_Z,orientation_W,angular_velocity_X,angular_velocity_Y,angular_velocity_Z,linear_acceleration_X,linear_acceleration_Y,linear_acceleration_Z
0,0_0,0,0,-0.75853,-0.63435,-0.104880,-0.105970,0.107650,0.017561,0.000767,-0.74857,2.1030,-9.7532
1,0_1,0,1,-0.75853,-0.63434,-0.104900,-0.106000,0.067851,0.029939,0.003386,0.33995,1.5064,-9.4128
2,0_2,0,2,-0.75853,-0.63435,-0.104920,-0.105970,0.007275,0.028934,-0.005978,-0.26429,1.5922,-8.7267
3,0_3,0,3,-0.75852,-0.63436,-0.104950,-0.105970,-0.013053,0.019448,-0.008974,0.42684,1.0993,-10.0960
4,0_4,0,4,-0.75852,-0.63435,-0.104950,-0.105960,0.005135,0.007652,0.005245,-0.50969,1.4689,-10.4410
...,...,...,...,...,...,...,...,...,...,...,...,...,...
487675,3809_123,3809,123,0.62871,-0.76878,-0.084391,0.081093,0.003167,0.093760,-0.142740,3.27180,2.0115,-9.0063
487676,3809_124,3809,124,0.62884,-0.76868,-0.084365,0.081099,0.014994,0.032637,-0.132380,4.42750,3.0696,-8.1257
487677,3809_125,3809,125,0.62891,-0.76861,-0.084345,0.081178,-0.031184,-0.003961,-0.138940,2.70480,4.2622,-8.1443
487678,3809_126,3809,126,0.62903,-0.76850,-0.084414,0.081231,-0.069153,0.013229,-0.130210,2.54100,4.7130,-9.4435
