In [1]:
import numpy as np
import pandas as pd
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
import os
from collections import Counter
from sklearn.utils.class_weight import compute_class_weight
import torch
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [27]:
flag_cuda = torch.cuda.is_available()

if not flag_cuda:
    print('Using CPU')
else:
    print('Using GPU')

Using CPU


In [13]:
PATH = "./TrainingData/"
val_subject = 5
test_subject = 6

In [14]:
files = []
for filename in os.listdir(PATH):
    files.append(filename)
files = sorted(files, key = lambda x: (int(x.split('_')[1]),int(x.split('_')[2]), x.split('_')[4] ))
files_train = list(filter(lambda x: int(x.split('_')[1]) not in  [val_subject, test_subject], files))
files_val = list(filter(lambda x: int(x.split('_')[1]) == val_subject, files))
files_test = list(filter(lambda x: int(x.split('_')[1])== test_subject, files))

Calculating the value count of the window (15) to feed it in CNN

In [17]:
def windows(d, w, t):
  r = np.arange(len(d))
  s = r[::t]
  z = list(zip(s, s + w))
  f = '{0[0]}:{0[1]}'.format
  g = lambda t: d.iloc[t[0]:t[1]]
  ranges = list(map(f,z))
  return ranges, pd.concat(map(g, z), keys=map(f, z))

In [19]:
def make_dataset(files, X, Y, flag = False):
    for i in range(0, len(files), 4):
        x_time, x, y_time, y = files[i: i + 4]
        x_time_df = pd.read_csv(PATH + x_time , header=None)
        x_df = pd.read_csv(PATH + x , header=None)
        x_combined = pd.concat([x_time_df, x_df], axis=1, ignore_index=True)
        x_combined = x_combined.loc[range(1,len(x_combined), 4)].reset_index()  # down sampled the frequency
        # print(x_combined.shape)
        y_time_df = pd.read_csv(PATH + y_time , header=None)
        y_df = pd.read_csv(PATH + y , header=None)
        y_combined = pd.concat([y_time_df, y_df], axis=1, ignore_index=True)
        # print(y_combined.shape)
        train_df = pd.concat([x_combined, y_combined], axis=1, ignore_index=True)
        train_df = train_df.drop(columns=[0, 1, 8])  # Dropping the time stamp
        Y.extend(train_df[9].values)
        X.extend(train_df.drop(columns=[9]).values)
        overlap = 15
        if flag:
            overlap = 30
        ranges, windows_df = windows(train_df, 30, overlap)
        for ran in ranges:
            l,r = ran.split(':')
            df_range = windows_df.iloc[int(l): int(r)]
            if int(r) > len(windows_df):
                break
            y_values = df_range[9].values
            x_values = df_range.drop(columns=[9]).values
            X.append(x_values)
            Y.append(Counter(list(y_values)).most_common(1)[0][0])
    return X, Y

In [20]:
train_X, train_y = [], []
valid_X, valid_y = [], []
test_X, test_y = [], []

make_dataset(files_train, train_X, train_y, False)
train_X, train_y = np.array(train_X), np.array(train_y).reshape(-1,1)
make_dataset(files_val, valid_X, valid_y, True)
valid_X, valid_y = np.array(valid_X),np.array(valid_y).reshape(-1,1)
make_dataset(files_train, test_X, test_y, True)
test_X, test_y = np.array(test_X), np.array(test_y).reshape(-1,1)
print("Done splitting the data")

Done


The weights that compensate for the imbalance distributions

In [21]:
weight  = compute_class_weight(class_weight = 'balanced', classes = np.unique(train_y), y = np.array(train_y))
weight = torch.tensor(weight)

In [22]:
from torch.nn.modules import dropout
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
  def __init__(self):
    super(Net, self).__init__()
    self.conv1 = nn.Conv1d(6,16,3)
    self.conv2 = nn.Conv1d(16,32,3)
    self.pool1 = nn.MaxPool1d(2,2)
    self.conv3 = nn.Conv1d(32,64,3)
    self.conv4 = nn.Conv1d(64,128,3)
    self.pool2 = nn.MaxPool1d(2,2)
    self.fc1 = nn.Linear(3456, 120)
    self.fc2 = nn.Linear(120, 64)
    self.fc3 = nn.Linear(64,4)
    self.dropout1 = nn.Dropout(0.1)

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool1(F.relu(self.dropout1(self.conv2(x))))
    x = F.relu(self.conv3(x))
    x = self.pool2(F.relu(self.dropout1(self.conv4(x))))
    x = x.view(-1,3456)
    x = F.relu(self.dropout1(self.fc1(x)))
    x = F.relu(self.dropout1(self.fc2(x)))
    x = self.fc3(x)
    return x

model = Net()

Net(
  (conv1): Conv1d(6, 16, kernel_size=(3,), stride=(1,))
  (conv2): Conv1d(16, 32, kernel_size=(3,), stride=(1,))
  (pool1): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv3): Conv1d(32, 64, kernel_size=(3,), stride=(1,))
  (conv4): Conv1d(64, 128, kernel_size=(3,), stride=(1,))
  (pool2): MaxPool1d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3456, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=64, bias=True)
  (fc3): Linear(in_features=64, out_features=4, bias=True)
  (dropout1): Dropout(p=0.1, inplace=False)
)


In [25]:
# Specifying the loss function
criterion = nn.CrossEntropyLoss(weight = weight.float())

# Specify optimizer
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [26]:
batch_size = 64

x_train_loader = torch.utils.data.DataLoader(train_X,batch_size = batch_size)
y_train_loader = torch.utils.data.DataLoader(train_y,batch_size = batch_size)

x_validation_loader = torch.utils.data.DataLoader(valid_X, batch_size = batch_size)
y_validation_loader = torch.utils.data.DataLoader(valid_y, batch_size = batch_size)

In [28]:
import matplotlib.pyplot as plt

n_epochs = 10

def trainNet(model,criterion,optimizer,n_epochs,flag_cuda):

  train_losslist = []
  valid_losslist = []
  valid_loss_min = np.Inf # track change in validation loss
  for epoch in range(1, n_epochs + 1):
      # Keeping track of training and validation loss
      train_loss = 0.0
      valid_loss = 0.0
    
      model.train()

      for data,target in zip(x_train_loader,y_train_loader):
          if flag_cuda:
              data, target = data.cuda(), target.cuda()
          optimizer.zero_grad()
          output = model(data.float())
          output = output.float()
          target = target.type(torch.LongTensor)
          
          if flag_cuda:
            output, target = output.cuda(), target.cuda()


          loss = criterion(output, target)

          # Backward pass: compute gradient of loss with respect to parameters
          loss.backward()
          # Perform a single optimization step (parameter update)
          optimizer.step()
          # Update training loss
          train_loss += loss.item()*data.size(0)
          
      model.eval()

      for data,target in zip(x_validation_loader,y_validation_loader):
          if flag_cuda:
              data, target = data.cuda(), target.cuda()
          output = model(data.float())
          output = output.float()
          target = target.type(torch.LongTensor) 

          if flag_cuda:
            output, target = output.cuda(), target.cuda()

          loss = criterion(output, target)
          valid_loss += loss.item()*data.size(0)
      
      # Calculating average losses
      train_loss = train_loss/len(train_X)
      valid_loss = valid_loss/len(valid_X)
      train_losslist.append(train_loss)
      valid_losslist.append(valid_loss)
          
      #Printing training/validation statistics 
      print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f}'.format(
          epoch, train_loss, valid_loss))
      
      # Saving model if validation loss has decreased
      if valid_loss <= valid_loss_min:
          print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
              valid_loss_min,valid_loss))
          torch.save(model.state_dict(), 'model.pt')
          valid_loss_min = valid_loss
        
  return epochs_list, train_losslist, valid_losslist

# Executing the training
epochs_list, train_losslist, valid_losslist = trainNet(model,criterion,optimizer,n_epochs,flag_cuda)

# Loading the best model
model.load_state_dict(torch.load('model.pt'))

# Plotting the learning curves
plt.plot(epochs_list, train_losslist, epochs_list, valid_losslist)
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.legend(['Training','Validation'])
plt.title("Performance of Baseline Model")
plt.show()

RuntimeError: Given groups=1, weight of size [16, 6, 3], expected input[1, 64, 6] to have 6 channels, but got 64 channels instead