In [42]:
import torch
import torchvision
import numpy as np
import pandas as pd
from torch.utils.data import Dataset,DataLoader
from sklearn.model_selection import train_test_split
from sklearn.utils import shuffle
from torch import nn, optim
import torch.nn.functional as F
from torch.autograd import Variable
import matplotlib.pyplot as plt
import os

In [43]:
path = os.getcwd()+'\Joint_Dataset.csv'

In [44]:
dataset = pd.read_csv(path)
dataset.head()

In [45]:
X = dataset.drop('label',axis=1)
y = dataset['label']

In [46]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

In [47]:
trainset = pd.concat([X_train,y_train],axis=1)
testset = pd.concat([X_test,y_test],axis=1)

In [48]:
class MSRDataset(Dataset):
    def __init__(self,data):
        self.len = data.shape[0]
        self.x_data = torch.from_numpy(data.drop('label',axis=1).to_numpy()).float()
        self.y_data = torch.from_numpy(data['label'].to_numpy()-1).type(torch.LongTensor)
    
    def __getitem__(self,index):
        return self.x_data[index],self.y_data[index]
    
    def __len__(self):
        return self.len

In [49]:
train_data = MSRDataset(trainset)
test_data = MSRDataset(testset)

In [50]:
trainloader = DataLoader(dataset=train_data,batch_size=64,shuffle=True,num_workers=0)
testloader = DataLoader(dataset=test_data,batch_size=64,shuffle=True,num_workers=0)

In [51]:
import pdb

class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim, layer_dim, output_dim):
        super(LSTMModel, self).__init__()
        
        # Number of hidden dimensions
        self.hidden_dim = hidden_dim
        
        # Number of hidden layers
        self.layer_dim = layer_dim
        
        # RNN
        self.lstm = nn.LSTM(input_dim, hidden_dim, layer_dim, batch_first=True)
        
        # Readout layer
        self.fc = nn.Linear(hidden_dim, output_dim)
    
    def forward(self, x):
        
        # Initialize hidden state with zeros
        #h0 = Variable(torch.zeros(self.layer_dim, x.size(0), self.hidden_dim))
        out, hn = self.lstm(x, None)
        out = self.fc(out[:, -1, :]) 
        return out

In [52]:
# Create RNN
input_dim = 60    # input dimension
hidden_dim = 100  # hidden layer dimension
layer_dim = 2     # number of hidden layers
output_dim = 20   # output dimension

model = LSTMModel(input_dim, hidden_dim, layer_dim, output_dim)
print(model)

LSTMModel(
  (lstm): LSTM(60, 100, num_layers=2, batch_first=True)
  (fc): Linear(in_features=100, out_features=20, bias=True)
)


In [53]:
criterion = nn.CrossEntropyLoss()
# SGD Optimizer
#learning_rate = 0.05
#optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)
optimizer = optim.Adam(model.parameters(),lr=0.001)

In [None]:
correct = 0
total = 0
loss_list = []
ite_list = []
for epoch in range(1000):
    for i,data in enumerate(trainloader):
        inputs,labels = data
        inputs,labels = Variable(inputs.view(-1,1,60)),Variable(labels)
        #pdb.set_trace()
        optimizer.zero_grad()
        output = model(inputs)
        loss = criterion(output,labels)
        loss.backward()
        optimizer.step()
    if epoch %100 == 0:
        loss_list.append(loss.data)
        ite_list.append(epoch)
        for idx,i in enumerate(output):
            if torch.argmax(i) == labels[idx]:
                correct+=1
            total+=1
        print('Accuracy on trainset epoch',epoch,':',round(correct/total,3)*100)
        

Accuracy on trainset epoch 0 : 7.3
Accuracy on trainset epoch 100 : 11.0
Accuracy on trainset epoch 200 : 9.8
Accuracy on trainset epoch 300 : 9.1
Accuracy on trainset epoch 400 : 10.2
Accuracy on trainset epoch 500 : 10.6
Accuracy on trainset epoch 600 : 10.8
Accuracy on trainset epoch 700 : 10.4
Accuracy on trainset epoch 800 : 10.0
Accuracy on trainset epoch 900 : 11.5


In [None]:
correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        inputs,labels = data
        inputs,labels = Variable(inputs.view(-1,1,60)),Variable(labels)
        #pdb.set_trace()
        output = model(inputs)
        for idx, i in enumerate(output):
            #pdb.set_trace()
            if torch.argmax(i) == labels[idx]:
                correct+=1
            total+=1
#pdb.set_trace()
print('Accuracy ', round(correct/total,3)*100)

In [None]:
# visualization loss 
plt.plot(ite_list,loss_list)
plt.xlabel("Number of iteration")
plt.ylabel("Loss")
plt.title("LSTM: Loss vs Number of iteration")
plt.show()