In [1]:
import os
from google.colab import drive 

drive.mount('/content/drive', force_remount=True)

Mounted at /content/drive


In [3]:
os.chdir("/content/drive/Othercomputers/Dell")
PATH_TO_DIR = "/content/drive/Othercomputers/Dell"

In [4]:
import torch
import torch.nn as nn
import torch.optim
from torch.optim import lr_scheduler

import re
import sys
import random
import string
import time
import pickle
import numpy as np
import pandas as pd
import seaborn as sns
import glob

np.random.seed(42) 


In [5]:
device = torch.device("cuda:0") if torch.cuda.is_available() else torch.device("cpu")
print(device)
if device == 'cuda:0':
  torch.cuda.get_device_properties(device)

cpu


Preprocessing Dataset 

In [6]:
def get_files():
  files = glob.glob(f"{PATH_TO_DIR}/Action-Segmentation-Project/dataset/avocado/clutter/[0-9]*/optoforce_data.csv")
  labels = glob.glob(f"{PATH_TO_DIR}/Action-Segmentation-Project/dataset/avocado/clutter/[0-9]*/labels")
  return files, labels

def read_data(files, labels):
  """reads every 830th frame which is approx 1FPS"""
  frames = [] 
  for file in files:
    data_df = pd.read_csv(file)
    data_df = data_df.drop(columns = ['ring_x','ring_y', 'ring_z'])
    data_df = data_df.iloc[::830,:]
    data_df["label"] = ""
    frames.append(data_df)
  
  action_segment_td = [] #time durations for each action 
  ground_truth_actions = [] #action per frame
  
  for labels_per_file in labels:
    td_per_file = []
    gt_actions_per_file = []
    with open (labels_per_file) as f:
        for line in f:
            x, y  = line.split(';')
            td_per_file.append(x)
            gt_actions_per_file.append(y.strip('\n'))
        action_segment_td.append(td_per_file)
        ground_truth_actions.append(gt_actions_per_file)

  return frames, action_segment_td, ground_truth_actions

def append_labels_per_frame(frames,action_segment_td, ground_truth_actions):
  for df, duration_of_actions, labels in zip(frames,action_segment_td, ground_truth_actions):
    condition = []
    for actions in duration_of_actions:
        start_time, end_time = actions.split(':')
        condition.append(df['time'].between(int(start_time),int(end_time)))

    df['label'] = np.select(condition,labels,default=None)
    df.dropna(inplace=True)
  
  return frames
  
def standardise_features(frames, features = ['index_x', 'index_y', 'index_z','middle_x','middle_y','middle_z','thumb_x', 'thumb_y', 'thumb_z']):
  for frame in frames:
    for feature in frame[features]:
      mean = frame[feature].mean()
      std = frame[feature].std()

      frame[feature] = (frame[feature] - mean)/std

  return frames


def one_hot_encode_labels(frames):

  unique_actions = set()

  for frame in frames:
    for label in frame['label']:
        unique_actions.add(label)

  
  one_hot_encoding_acts = pd.get_dummies(list(unique_actions))
  index_label_map = {np.argmax(v):k for k, v in one_hot_encoding_acts.items()}

  actions_per_seq = []
  for frame in frames:
      action_encodings = []
      for i in range(0, len(frame)):
          action_encodings.append(one_hot_encoding_acts[frame['label'].iloc[i]])
      actions_per_seq.append(action_encodings)

  return actions_per_seq, unique_actions, index_label_map

In [7]:
files, labels = get_files()

In [8]:
frames, action_segment_td, ground_truth_actions = read_data(files, labels)
frames = standardise_features(append_labels_per_frame(frames, action_segment_td, ground_truth_actions))
actions_per_seq, unique_actions, index_label_map = one_hot_encode_labels(frames)


In [9]:
max_length = max([len(frame)for frame in frames])
max_length

142

In [10]:
[len(frame)for frame in frames]

[76, 75, 104, 39, 139, 57, 128, 79, 142, 123]

In [11]:
features = ['index_x', 'index_y', 'index_z','middle_x','middle_y','middle_z','thumb_x', 'thumb_y', 'thumb_z' ]
numeric_features_per_seq = [np.array(frames[i][features]) for i in range(len(frames))]

labels_per_seq =   [np.array(actions_per_seq[i]) for i in range(len(actions_per_seq))]

In [12]:
labels_per_seq[0].shape

(76, 6)

In [13]:
#create padded sequences 
#fet the last timestep and duplicate to max len

padded_numeric_features_per_seq = np.zeros((10,max_length,9))
 
for i in range(len(numeric_features_per_seq)):
  last_timestep = numeric_features_per_seq[i][-1:][0] 
  repeat_n = max_length - numeric_features_per_seq[i].shape[0]
  padding = np.tile(last_timestep,(repeat_n,1))
  #print(padding)
  padded_seq = np.concatenate((numeric_features_per_seq[i],padding),axis=0)
  padded_numeric_features_per_seq[i] = padded_seq
                   
 



In [14]:
numeric_features_per_seq[8].shape

(142, 9)

In [15]:
padded_numeric_features_per_seq.shape

(10, 142, 9)

In [16]:
padded_labels_per_seq = -1 * np.ones((10,max_length,6))
 
for i in range(len(labels_per_seq)):
  seq_len = labels_per_seq[i].shape[0]

  padded_labels_per_seq[i][:seq_len,:] = labels_per_seq[i]

In [17]:
padded_labels_per_seq.shape

(10, 142, 6)

In [18]:
from torch.utils.data import Dataset
from torch.utils.data import random_split, DataLoader
from torch.autograd import Variable
import torch.nn.functional as F

In [20]:
X_data, y_data = torch.FloatTensor(padded_numeric_features_per_seq), torch.LongTensor(padded_labels_per_seq)

In [None]:
# class OpToForceDataset(Dataset):
#   def __init__(self, actions, sequence, window_length = 150,stride = 1, n_th_frame = 1):
#     self.actions = actions
#     self.sequence = sequence
#     self.window_length = window_length

#     self.y = torch.tensor(actions)
#     self.X  = torch.tensor(sequence) #may be wrong

#     self.stride = stride
#     self.n_th_frame = n_th_frame

#   def __len__(self):
#     return self.X.shape[0]

#   def __getitem__(self,timestep):
#     if timestep >= self.window_length - 1:
#       #print(timestep)
#       start_index = timestep - self.window_length + 1
#       #print(start_index)
#       x = self.X[start_index:timestep+1:self.n_th_frame,:]
#       #print(x)
#       y = self.y[start_index:timestep+1:self.n_th_frame,:]
#       #print(y)
#     else:
#       padding_x = self.X[0].repeat(self.window_length-timestep-1,1) #np.tile eqv
#       x = self.X[0:timestep+1:self.n_th_frame,:]
#       x = torch.cat((padding_x,x),axis = 0)

#       padding_y = self.y[0].repeat(self.window_length-timestep-1,1)
#       y = self.y[0:timestep+1:self.n_th_frame,:]
#       y = torch.cat((padding_y,y),axis = 0)

#     return x,y
  

In [None]:
# train_dataset = OpToForceDataset(train_y,train_x)

In [None]:
# X, y = train_dataset[6]
# print(X)


In [None]:
# train_x[5]

In [21]:
class OpToForceDataset(Dataset):
  def __init__(self, sequences, actions):
    self.X = sequences
    self.y = actions

  def __len__(self):
    return self.X.shape[0]

  def __getitem__(self,index):
    return self.X[index], self.y[index]


In [22]:
train_dataset = OpToForceDataset(X_data,y_data)

In [23]:
torch.manual_seed(42)


<torch._C.Generator at 0x7fba69f727d0>

In [24]:
train_loader = DataLoader(train_dataset,batch_size=1,shuffle = True)
X, y = next(iter(train_loader))

# print(X.shape)
# print(X)

# print(y.shape)
# print(y)

In [72]:
class ManyToManyLSTM(nn.Module):
  def __init__(self,n_features=9, hidden_size=500, num_layers = 1, num_classes = 6):
    super().__init__()
    self.n_features = n_features
    self.hidden_size = hidden_size #number of nodes for each ts
    self.num_layers = num_layers # how many stacked layers in LSTM cell
    self.num_classes = num_classes 


    self.lstm = nn.LSTM(input_size=n_features,hidden_size= self.hidden_size, batch_first = True, num_layers = self.num_layers)
    self.linear = nn.Linear(in_features=self.hidden_size, out_features = 6)

  def forward(self, x):
    batch_size = x.shape[0]
    #each batch hidden state and cell state are reset
    h0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)
    c0 = torch.zeros(self.num_layers, batch_size, self.hidden_size)

    output, (h_n, c_n) = self.lstm(x,(h0, c0))

    print("shape of each hidden state's output for each time step, output")
    print(output.shape)

    print("shape of final hidden state at last time step:")
    print(h_n.shape)
   # print(h_n)
   # print(output[-1])

    frames = output.view(-1,output.shape[2])
    print("shape after flattning ouput")
    print(frames.shape)
    logits = self.linear(frames)

    #preds = F.log_softmax(out_fc, dim = 1)
    print("shape of output")
    print(output.shape)
    return logits, output,h_n


In [74]:
model_test = ManyToManyLSTM()
 
print(model_test)

ManyToManyLSTM(
  (lstm): LSTM(9, 500, batch_first=True)
  (linear): Linear(in_features=500, out_features=6, bias=True)
)


In [75]:
logits, outputs, h_n = model_test(X)

shape of each hidden state's output for each time step
torch.Size([1, 142, 500])
shape of final hidden state at last time step:
torch.Size([1, 1, 500])
shape after flattning ouput
torch.Size([142, 500])
shape of output
torch.Size([1, 142, 500])


In [69]:
#torch.equal(outputs[0][-1],h_n[0][0])

True

In [71]:
#outputs[0][-1]

In [70]:
#h_n[0][0]

In [None]:
_, output_indexes = outputs.max(dim=1)

In [None]:
output_indexes

tensor([1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1,
        1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0,
        0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 5, 1, 3, 1, 1, 1, 1, 1, 1, 5, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 5, 5, 0, 0, 5, 1, 3, 5,
        5, 5, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3])

In [None]:
print(y.shape)

labels = y
labels.shape

labels

mask = (labels >=0).float()
mask

torch.Size([1, 142, 6])


tensor([[[1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1., 1., 1., 1.],
         [1., 1., 1.

In [None]:
num_actions = len([out for out in mask[0] if out.all() >= 1])
num_actions

142

In [None]:
outputs = outputs[0:num_actions,:]
outputs

tensor([[ 0.0360,  0.0427, -0.0433,  0.0242, -0.0290,  0.0256],
        [ 0.0400,  0.0446, -0.0484,  0.0217, -0.0302,  0.0297],
        [ 0.0434,  0.0448, -0.0508,  0.0199, -0.0303,  0.0319],
        [ 0.0443,  0.0450, -0.0509,  0.0196, -0.0306,  0.0336],
        [ 0.0438,  0.0451, -0.0503,  0.0201, -0.0312,  0.0353],
        [ 0.0446,  0.0448, -0.0510,  0.0198, -0.0313,  0.0356],
        [ 0.0460,  0.0440, -0.0518,  0.0193, -0.0307,  0.0362],
        [ 0.0463,  0.0440, -0.0514,  0.0186, -0.0304,  0.0369],
        [ 0.0458,  0.0441, -0.0512,  0.0187, -0.0309,  0.0370],
        [ 0.0448,  0.0438, -0.0516,  0.0185, -0.0314,  0.0372],
        [ 0.0437,  0.0436, -0.0513,  0.0191, -0.0319,  0.0371],
        [ 0.0434,  0.0436, -0.0509,  0.0197, -0.0322,  0.0373],
        [ 0.0438,  0.0433, -0.0510,  0.0196, -0.0322,  0.0376],
        [ 0.0442,  0.0428, -0.0511,  0.0194, -0.0320,  0.0380],
        [ 0.0445,  0.0428, -0.0506,  0.0195, -0.0317,  0.0388],
        [ 0.0446,  0.0431, -0.0502,  0.0

In [None]:
gt

tensor([[[0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 0., 1., 0.],
         [0., 0., 0., 1., 0., 0.],
         [0., 0., 0., 1., 0., 0.],
         [0., 0., 0.

In [None]:
y = y.squeeze()
gt = y[:num_actions,:]
_,gt_indexes = gt.max(dim=1)
gt_indexes

tensor([4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 3, 5, 5, 5, 5, 5, 5, 5, 5,
        5, 5, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
        4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
        2, 2, 0, 0, 0, 0, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 1, 1, 1, 1])

In [None]:
def _loss_function_one_hot(predictions,targets):
  #predictions = torch.FloatTensor(predictions)
  #targets = torch.FloatTensor(targets)
  return nn.CrossEntropyLoss()(predictions,targets)

In [None]:
_loss_function_one_hot(outputs,gt_indexes)

tensor(1.8130, grad_fn=<NllLossBackward0>)

In [None]:
def _initialise_model(learning_rate= 5e-5, num_hidden_units =100):
  model1 = ManyToManyLSTM(n_features = 9, hidden_units=num_hidden_units)
  loss_function = nn.CrossEntropyLoss()
  optimizer = torch.optim.Adam(model1.parameters(), lr = learning_rate)
  #TODO dataloader
  return model1, loss_function, optimizer


def _train_model(data_loader, model, loss_function, optimizer):
  n_batches = len(data_loader)

  total_loss = 0

  model.train()

  for X, y in data_loader:
    output = model(X)
    loss = loss_function(output, y)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    total_loss += loss.item()

  avg_loss = total_loss / n_batches

  print(f"Train loss: {avg_loss}" )

def _test_model(data_loader, model, loss_function):
  n_batches = len(data_loader)
  total_loss = 0

  model.eval()

  with torch.no_grad():
    for X, y in data_loader:
        output = model(X)
        total_loss += loss_function(output,y).item()

    avg_loss = total_loss/ n_batches
    print(f"Test loss: {avg_loss} ")

def main(epochs = 20):
  
  model, loss_function, optimizer = _initialise_model()
  #Dataloader

  for epoch in range(epochs):
    print(f"Epoch {epoch}\n---------------")
    _train_model(train_loader, model, loss_function, optimizer)
    _test_model(test_loader, model, loss_function, optimizer)
    print()



In [None]:
#validation loss
#prediction 

#weights and biases
#figure out a good smapling rate 


In [None]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
print(input)
target = torch.empty(3, dtype=torch.long).random_(5)
print(target)
output = loss(input, target)
print(output)
output.backward()
# # Example of target with class probabilities
# input = torch.randn(3, 5, requires_grad=True)
# target = torch.randn(3, 5).softmax(dim=1)
# output = loss(input, target)
# output.backward()
# print(output)

tensor([[-0.0584, -0.7067,  1.3492,  0.6804,  0.9711],
        [ 0.7021, -0.0707, -1.6658,  0.1288, -1.4878],
        [ 0.4574, -0.3677, -0.7418, -0.4898, -0.1248]], requires_grad=True)
tensor([3, 1, 4])
tensor(1.5861, grad_fn=<NllLossBackward0>)


In [None]:
nn.Softmax()(target)

NameError: ignored