# Install required packages

In [None]:
!pip install wandb

import pickle
import torch

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting wandb
  Downloading wandb-0.12.21-py2.py3-none-any.whl (1.8 MB)
[K     |████████████████████████████████| 1.8 MB 4.2 MB/s 
[?25hCollecting pathtools
  Downloading pathtools-0.1.2.tar.gz (11 kB)
Collecting shortuuid>=0.5.0
  Downloading shortuuid-1.0.9-py3-none-any.whl (9.4 kB)
Collecting sentry-sdk>=1.0.0
  Downloading sentry_sdk-1.9.0-py2.py3-none-any.whl (156 kB)
[K     |████████████████████████████████| 156 kB 21.2 MB/s 
[?25hCollecting setproctitle
  Downloading setproctitle-1.3.0-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl (30 kB)
Collecting GitPython>=1.0.0
  Downloading GitPython-3.1.27-py3-none-any.whl (181 kB)
[K     |████████████████████████████████| 181 kB 9.2 MB/s 
[?25hCollecting docker-pycreds>=0.4.0
  Downloading docker_pycreds-0.4.0-py2.py3-none-any.whl (9.0 kB)
Collecting gitdb<5,>=4.0.1
  Downloading g

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


True

# Load Point Set

In [None]:
def load_data(filepath='./dhg_data.pckl'):
    
    file = open(filepath, 'rb')
    data = pickle.load(file, encoding='latin1')
    file.close()
    return data['x_train'], data['x_test'], data['y_train'], data['y_test']

In [None]:
from sklearn.model_selection import train_test_split

# DHG dataset - 1393 valid sequences
# x_train: 835 x N x 600 x 3 (records, frames, points, coordinates)
# x_val: 279 x N x 600 x 3 (records, frames, points, coordinates)
# x_test: 279 x N x 600 x 3 (records, frames, points, coordinates)
# labels: classify with 14 gesture categories

# x_train, x_test, y_train, y_test = load_data('dhg_data.pckl')
x_train, x_test, y_train, y_test = load_data('/content/drive/My Drive/Colab Notebooks/674 Project/dhg_data.pckl')
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, test_size=0.25, stratify=y_train)
y_val = y_val - 1

In [None]:
print(f"x train shape: {x_train.shape}")
print(f"x val shape: {x_val.shape}")
print(f"x test shape: {x_test.shape}")

x train shape: torch.Size([835, 20, 600, 3])
x val shape: torch.Size([279, 20, 600, 3])
x test shape: torch.Size([279, 20, 600, 3])


# Define PPN Model

In [None]:
from torch.nn.modules.activation import Softmax
from torch.nn.modules.dropout import Dropout
import torch
import torch.nn as nn
from torch.nn import Sequential as Seq, Linear as Lin, LeakyReLU, GroupNorm

# This helper function is from Assignment 3
# It creates a multi-layer perceptron (consists of multiple layers of nn.Linear)
# with specified layer construction
def MLP(channels, enable_group_norm=True):
    if enable_group_norm:
        num_groups = [0]
        for i in range(1, len(channels)):
            if channels[i] >= 32:
                num_groups.append(channels[i] // 32)
            else:
                num_groups.append(1)
        return Seq(*[
            Seq(Lin(channels[i - 1], channels[i]), LeakyReLU(negative_slope=0.2)
                # , nn.Dropout(p=0.2)
                )
            for i in range(1, len(channels))])
    else:
        return Seq(*[Seq(Lin(channels[i - 1], channels[i]), LeakyReLU(negative_slope=0.2))
                     for i in range(1, len(channels))])


# PointNet module for extracting point descriptors
# num_input_features: number of input raw per-point or per-vertex features
# num_output_features: number of output per-point descriptors (23, which is 22 joints + none category)
class PointNet(torch.nn.Module):
    def __init__(self, num_input_features=3, num_output_features=256):
        super(PointNet, self).__init__()
        self.input_features = num_input_features
        self.output_features = num_output_features
        self.num_points = 600
        # T-Net layer to transform
        # self.T_net = nn.Linear(3, 3, bias=False)
        #
        # self.feature_transform = nn.Linear(64, 64, bias=False)
        self.mlp = MLP([num_input_features, 32, 64, num_output_features])
        self.featureExtractionLayer = Seq(
            # self.T_net,
            # self.feature_transform,
            self.mlp
        )

    def forward(self, x):
        x = self.featureExtractionLayer(x)
        # x -> N x F = 600 x 256
        x = torch.max(x, -2, keepdim=True)[0]
        return x
  

class pnGroup(torch.nn.Module):
    
    def __init__(self, num_input_features, num_output_features):
        super(pnGroup, self).__init__()
        self.out_num = num_output_features
        self.point_net = PointNet(num_input_features, num_output_features)

    def forward(self, x):
        y, x = x[0], x[1:]
        y = self.point_net(y)
        for frame in x:
            y = torch.cat((y, self.point_net(frame)), 0)

        # y -> 20 x 256
        return y

class PPN(torch.nn.Module):
    def __init__(self, num_input_features, num_output_features, device):
        super(PPN, self).__init__()
        self.device = device
        self.png = pnGroup(num_input_features, num_output_features)
        self.partial = True
        self.test = False
        self.hidden_size = num_output_features
        self.num_layers = 3
        self.num_points = 600
        self.input_size = num_output_features
        self.sequence_length = 20     # M
        self.num_classes = 14
        self.lstm = nn.LSTM(self.input_size, self.hidden_size, self.num_layers, batch_first=True, dropout=0)
        self.fc = Seq(
            Lin(self.hidden_size * self.sequence_length, 128),
            nn.Dropout(p=0.2),
            Lin(128, self.num_classes)
        )

    def forward(self, x):
        B = x.size(0)

        y, x = x[0], x[1:]
        y = self.png(y)
        for pc in x:
            y = torch.cat((y, self.png(pc)), 0)

        # y -> B x 20 x 32
        if not self.partial:
            # LSTM forward
            y = y.reshape(B, self.sequence_length, y.size(-1))
            h0 = torch.zeros(self.num_layers, y.size(0), self.hidden_size).to(self.device)
            c0 = torch.zeros(self.num_layers, y.size(0), self.hidden_size).to(self.device)
            out, _ = self.lstm(y, (h0, c0))
            out = out.reshape(out.shape[0], -1)
            out = self.fc(out)
            # print(out.shape)
            # out = nn.functional.normalize(out, dim=-1)
            return out

        else:
            y = y.reshape(B, self.sequence_length * y.size(-1))
            out = self.fc(y)
            # out = nn.Softmax(-1)(out)
            # out = nn.functional.normalize(out, dim=-1)
            return out


# Training


In [None]:
import random as random
import matplotlib.pyplot as plt
import numpy as np

# Device configuration
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

print(f"Using device: {device}")

Using device: cuda:0


In [None]:
config = dict(
    learning_rate=1e-2,
    decay=1e-3,
    epochs=100,
    batch_size=32,
    num_global_features=64
    # num_hidden_layers=3
)

In [None]:
def model_training_pipeline(hyperparameters):

  with wandb.init(project="4d-gesture-recognition", config=hyperparameters):
    config = wandb.config
    model, train_loader, test_loader, criterion, optimizer = make(config)
    print(model)

    train(0, model, train_loader, x_val, y_val, criterion, optimizer, config)

    model.partial = False
    train(1, model, train_loader, x_val, y_val,  criterion, optimizer, config)

    test(model, test_loader)

    
  return model

In [None]:
def make(config):

  # Make the data
  train_loader = make_loader(x_train, y_train, batch_size=config.batch_size)
  test_loader = make_loader(x_test, y_test, batch_size=config.batch_size)

  # Make the model
  model = PPN(3, config.num_global_features, device).to(device)

  # Make the loss and optimizer
  criterion = nn.CrossEntropyLoss(reduction='mean')
  optimizer = torch.optim.SGD(model.parameters(), lr=config.learning_rate, weight_decay=config.decay, nesterov=True, momentum=0.9)


  return model, train_loader, test_loader, criterion, optimizer


In [None]:
def make_loader(data, label, batch_size):
  loader = torch.utils.data.DataLoader([[data[i], label[i]-1] for i in range(len(label))], batch_size=batch_size, shuffle=True, pin_memory=True, num_workers=2)

  return loader

In [None]:
from tqdm.notebook import tqdm
from google.colab import files

def train(num_prev_epochs, model, loader,  x_val, y_val, criterion, optimizer, config):

  wandb.watch(model, criterion, log="all", log_freq=10)

  total_batches = len(loader) * config.epochs
  example_ct = 0
  batch_ct = 0
  min_loss, best_model = 10, None
  x_val, y_val = x_val.to(device).float(), y_val.to(device)

  print("start training...")

  for epoch in tqdm(range(config.epochs)):
    for _, (data, labels) in enumerate(loader):
      loss, accuracy = train_batch(data, labels, model, optimizer, criterion)
      example_ct += len(data)
      batch_ct += 1

      if ((batch_ct + 1) % 25) == 0:
        train_log(loss, accuracy, example_ct, num_prev_epochs * config.epochs + epoch)
      
    # Save the best model which makes the best validation accuracy
    val_pred = model(x_val)
    val_loss = criterion(val_pred, y_val)
    val_accuracy = sum(torch.argmax(val_pred, dim=-1) == y_val) / 100
    val_log(val_loss, val_accuracy, num_prev_epochs * config.epochs + epoch)

    if val_loss < min_loss:
      min_loss = val_loss
      torch.save(model.state_dict(), '/content/drive/My Drive/Colab Notebooks/674 Project/saved_best_model')
      print("updated the best model...")


def train_batch(data, labels, model, optimizer, criterion):
  data, labels = data.to(device).float(), labels.to(device)

  # forward pass
  out = model(data)
  loss = criterion(out, labels)
  accuracy = sum(torch.argmax(out, dim=-1) == labels) / 100

  # backward pass
  optimizer.zero_grad()
  loss.backward()

  # step with optimizer
  optimizer.step()

  return loss, accuracy


In [None]:
def train_log(loss, accuracy, example_ct, epoch):
  wandb.log({"epoch": epoch, "train loss": loss}, step=epoch)
  wandb.log({"epoch": epoch, "train accuracy": accuracy}, step=epoch)
  print("epoch: " +str(epoch).zfill(3)+ "| "+str(example_ct).zfill(5) + f" examples | loss: {loss: .3f}")

In [None]:
def val_log(val_loss, val_accuracy, epoch):
  wandb.log({"epoch": epoch, "val loss": val_loss}, step=epoch)
  wandb.log({"epoch": epoch, "val accuracy": val_accuracy}, step=epoch)

In [None]:
def test(model, test_loader):
  model.eval()

  with torch.no_grad():
    correct, total = 0, 0
    for data, labels in test_loader:
      data, labels = data.to(device).float(), labels.to(device)
      print("labels: ", labels)
      out = model(data)
      _, predicted = torch.max(out.data, 1)
      print("predicted: ", predicted)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()
    
    print(f"Accuracy of the model on the {total} " + f"test data: {100 * correct / total}%")
    wandb.log({"test accuracy": correct / total})

  torch.onnx.export(model, data, "model.onnx")
  wandb.save("model.onnx")

In [None]:
model = model_training_pipeline(config)

PPN(
  (png): pnGroup(
    (point_net): PointNet(
      (mlp): Sequential(
        (0): Sequential(
          (0): Linear(in_features=3, out_features=32, bias=True)
          (1): LeakyReLU(negative_slope=0.2)
        )
        (1): Sequential(
          (0): Linear(in_features=32, out_features=64, bias=True)
          (1): LeakyReLU(negative_slope=0.2)
        )
        (2): Sequential(
          (0): Linear(in_features=64, out_features=64, bias=True)
          (1): LeakyReLU(negative_slope=0.2)
        )
      )
      (featureExtractionLayer): Sequential(
        (0): Sequential(
          (0): Sequential(
            (0): Linear(in_features=3, out_features=32, bias=True)
            (1): LeakyReLU(negative_slope=0.2)
          )
          (1): Sequential(
            (0): Linear(in_features=32, out_features=64, bias=True)
            (1): LeakyReLU(negative_slope=0.2)
          )
          (2): Sequential(
            (0): Linear(in_features=64, out_features=64, bias=True)
         

  0%|          | 0/100 [00:00<?, ?it/s]

epoch: 000| 00768 examples | loss:  2.655
updated the best model...
epoch: 001| 01539 examples | loss:  2.651
epoch: 002| 02310 examples | loss:  2.660
epoch: 003| 03081 examples | loss:  2.640
updated the best model...
epoch: 004| 03852 examples | loss:  2.631
epoch: 005| 04623 examples | loss:  2.624
updated the best model...
epoch: 006| 05394 examples | loss:  2.635
epoch: 007| 06165 examples | loss:  2.649
epoch: 008| 06936 examples | loss:  2.638
epoch: 009| 07707 examples | loss:  2.626
epoch: 010| 08478 examples | loss:  2.643
updated the best model...
epoch: 011| 09249 examples | loss:  2.638
epoch: 011| 10020 examples | loss:  2.646
updated the best model...
epoch: 012| 10820 examples | loss:  2.628
updated the best model...
epoch: 013| 11591 examples | loss:  2.644
epoch: 014| 12362 examples | loss:  2.634
updated the best model...
epoch: 015| 13133 examples | loss:  2.631
updated the best model...
epoch: 016| 13904 examples | loss:  2.639
updated the best model...
epoch: 017

  0%|          | 0/100 [00:00<?, ?it/s]

epoch: 100| 00768 examples | loss:  2.628
updated the best model...
epoch: 101| 01539 examples | loss:  2.407
updated the best model...
epoch: 102| 02310 examples | loss:  2.285
updated the best model...
epoch: 103| 03081 examples | loss:  2.091
updated the best model...
epoch: 104| 03852 examples | loss:  2.000
updated the best model...
epoch: 105| 04623 examples | loss:  1.918
updated the best model...
epoch: 106| 05394 examples | loss:  2.056
updated the best model...
epoch: 107| 06165 examples | loss:  1.674
updated the best model...
epoch: 108| 06936 examples | loss:  1.683
updated the best model...
epoch: 109| 07707 examples | loss:  1.358
updated the best model...
epoch: 110| 08478 examples | loss:  1.682
epoch: 111| 09249 examples | loss:  1.462
epoch: 111| 10020 examples | loss:  0.817
epoch: 112| 10820 examples | loss:  1.570
updated the best model...
epoch: 113| 11591 examples | loss:  1.644
epoch: 114| 12362 examples | loss:  1.749
epoch: 115| 13133 examples | loss:  1.114


  return tensor.shape == torch.Size([0]) or (~torch.isfinite(tensor)).all().item()
  return tensor.shape == torch.Size([0]) or (~torch.isfinite(tensor)).all().item()
  if not torch.isfinite(tensor).all():
  tmin = flat.min().item()
  tmax = flat.max().item()
  {name: wandb.Histogram(np_histogram=(tensor.tolist(), bins.tolist()))},
  + "or define the initial states (h0/c0) as inputs of the model. "


VBox(children=(Label(value='1.534 MB of 1.534 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
epoch,▁▁▁▁▂▂▂▂▂▃▃▃▃▃▃▄▄▄▄▄▅▅▅▅▅▅▆▆▆▆▆▇▇▇▇▇▇███
test accuracy,▁
train accuracy,▁▂▁▂▂▁▂▃▂▃▃▃▁▅▅▅▄▆▅▇▃▄▁▅▆▇▇▇▇▇█▇▅▆▇▇████
train loss,███████▇▇▇▇▆▆▆▅▄▅▄▄▃▇▅▃▄▃▃▂▂▂▁▂▂▅▃▂▂▂▁▁▁
val accuracy,▁▁▁▁▁▂▂▂▃▃▃▄▅▄▄▄▅▅▅▆▂▅▅▆▄▇▇▇██▅▇▅▇▆█████
val loss,██████▇▆▆▆▆▅▅▅▅▅▄▄▅▃▇▄▄▃▆▂▂▂▁▂▇▂▃▂▆▁▁▂▁▁

0,1
epoch,199.0
test accuracy,0.79928
train accuracy,0.31
train loss,0.05524
val accuracy,2.17
val loss,0.99745


In [None]:
# device = torch.device('cpu')
test_loader = make_loader(x_test, y_test, batch_size=32)
# saved_model = open('/content/drive/My Drive/Colab Notebooks/674 Project/saved_model', 'rb')
best_model = PPN(3, 64, device).to(device)
# print(model)
best_model.partial = False
best_model.load_state_dict(torch.load('/content/drive/My Drive/Colab Notebooks/674 Project/saved_best_model'))
test(best_model, test_loader)


labels:  tensor([ 7,  3,  9, 13,  0, 13,  8, 12,  0,  2,  1,  0,  6,  1,  6,  2,  5,  8,
         3, 12,  3, 13, 13,  9,  7,  4,  4, 13, 11, 12,  7,  6],
       device='cuda:0')
predicted:  tensor([ 7,  3,  9, 13,  0,  3, 13, 12,  0,  2,  1,  0,  6,  1,  6,  2,  5,  8,
         3, 12,  3,  7,  6,  9,  7,  4,  4,  6, 11, 12,  7,  6],
       device='cuda:0')
labels:  tensor([ 1,  8,  0, 11,  5,  4,  1,  0, 12, 12,  7, 12,  5, 10,  1,  3,  9, 13,
         8,  3,  7,  5,  9,  3,  5,  4,  8, 10,  1,  0, 13,  9],
       device='cuda:0')
predicted:  tensor([ 4, 13,  0, 11,  9,  4,  1,  0,  6, 12,  7, 12,  5, 10,  1,  3,  9, 13,
         8,  3,  7,  5,  9,  3,  5,  4,  8,  8,  0,  0, 13,  9],
       device='cuda:0')
labels:  tensor([10,  9, 11,  3, 12,  5,  4,  6,  2,  9,  6, 12, 11,  2,  1,  6,  7, 10,
         9,  3,  0,  3,  6, 13,  2, 11, 12,  4,  6, 13,  7,  2],
       device='cuda:0')
predicted:  tensor([10,  9, 11,  3, 12,  5,  4,  6,  2,  9,  6, 12, 11,  2,  1,  6,  7, 10,
         8, 

Error: ignored