In [None]:
# import necessary packages
import numpy as np
import matplotlib.pyplot as plt
import torch
import math as math
from sklearn.decomposition import PCA
from sklearn.metrics import accuracy_score
import pandas as pd
import torch
from torch.utils.data import Dataset, DataLoader,Subset
from torchvision import transforms, utils
from torch import nn
import torch.optim as optim

# load data into your Colab notebook


In [None]:
!gdown 1M4qlHTiexWQH_crbPY8LiQOqC2ab_yxA
! unzip Archive.zip

Downloading...
From: https://drive.google.com/uc?id=1M4qlHTiexWQH_crbPY8LiQOqC2ab_yxA
To: /content/Archive.zip
  0% 0.00/812k [00:00<?, ?B/s]100% 812k/812k [00:00<00:00, 9.09MB/s]
Archive:  Archive.zip
  inflating: test.csv                
  inflating: __MACOSX/._test.csv     
  inflating: 230013130014_P104_standardized_EMG_two-handed-tap_1_dollar.csv  
  inflating: __MACOSX/._230013130014_P104_standardized_EMG_two-handed-tap_1_dollar.csv  
  inflating: 230013130022_P104_standardized_EMG_two-handed-tap_2_dollar.csv  
  inflating: __MACOSX/._230013130022_P104_standardized_EMG_two-handed-tap_2_dollar.csv  
  inflating: 230013130029_P104_standardized_EMG_two-handed-tap_3_dollar.csv  
  inflating: __MACOSX/._230013130029_P104_standardized_EMG_two-handed-tap_3_dollar.csv  
  inflating: 230113130139_P104_standardized_EMG_point-and-pinch_1_dollar.csv  
  inflating: __MACOSX/._230113130139_P104_standardized_EMG_point-and-pinch_1_dollar.csv  
  inflating: 230113130145_P104_standardized_EMG_po

In [None]:
# split up test and train data
train = pd.read_csv("train.csv").to_numpy()
test = pd.read_csv("test.csv").to_numpy()

# BELOW THIS IS THE CODE TO EDIT.
I have left skeleton code for you to edit.

# deep-learning

- [Documentation for 1D CNN](https://docs.pytorch.org/docs/stable/generated/torch.nn.Conv1d.html)
- [Documentation for LSTM](https://docs.pytorch.org/docs/stable/generated/torch.nn.LSTM.html)
- [Documentation for FFNN](https://docs.pytorch.org/tutorials/beginner/basics/buildmodel_tutorial.html)

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import pandas as pd
import numpy as np
from itertools import product

"""
deep_learning
  code skeleton for deep_learning algorithm
  inputs:
      train: training data [class name x data (64 x 88)]
      test: testing data [class name x data (64 x 88)]

"""
def deep_learning(train, test):

    # Define search space for hyperparameters
    cnn_out_channels_list = [8, 16, 32]
    kernel_size_list = [3, 5, 7]
    hidden_size_list = [32, 64, 128]
    dropout_list = [0.0, 0.2, 0.5]
    num_epochs_list = [10, 20]
    batch_size_list = [2, 4]

    training_data = BiosignalDataset(csv_file="train.csv")
    test_data = BiosignalDataset(csv_file="test.csv")

    best_acc = 0.0
    best_params = None

    # Try all hyperparameter combinations
    for out_channels, kernel_size, hidden_size, dropout, num_epochs, batch_size in product(
        cnn_out_channels_list, kernel_size_list, hidden_size_list,
        dropout_list, num_epochs_list, batch_size_list
    ):


        model = NeuralNetwork(
            out_channels=out_channels,
            kernel_size=kernel_size,
            hidden_size=hidden_size,
            dropout=dropout
        )

        _, test_acc = vanillaNN(training_data, test_data, batch_size, num_epochs, model)



        if test_acc > best_acc:
            best_acc = test_acc
            best_params = {
                "out_channels": out_channels,
                "kernel_size": kernel_size,
                "hidden_size": hidden_size,
                "dropout": dropout,
                "num_epochs": num_epochs,
                "batch_size": batch_size
            }

    print("Best hyperparameters:", best_params)

    # Train final model with best hyperparameters
    final_model = NeuralNetwork(
        out_channels=best_params["out_channels"],
        kernel_size=best_params["kernel_size"],
        hidden_size=best_params["hidden_size"],
        dropout=best_params["dropout"]
    )

    _, test_acc = vanillaNN(
        training_data,
        test_data,
        best_params["batch_size"],
        best_params["num_epochs"],
        final_model
    )

    return test_acc


class NeuralNetwork(nn.Module):
    def __init__(self, out_channels=16, kernel_size=3, hidden_size=64, dropout=0.2):
        super().__init__()

        # 1D CNN
        self.conv1 = nn.Conv1d(
            in_channels=88,
            out_channels=out_channels,
            kernel_size=kernel_size
        )

        # Two-layer LSTM
        self.lstm = nn.LSTM(
            input_size=out_channels,
            hidden_size=hidden_size,
            num_layers=2,
            batch_first=True,
            dropout=dropout
        )

        self.flatten = nn.Flatten()

        # FFNN for classification (5 gesture classes)
        self.fc = nn.Linear(hidden_size * (64 - kernel_size + 1), 5)

    def forward(self, x):
        # Input: batch_size x 88 x 64
        x = self.conv1(x)       # batch_size x out_channels x (64 - kernel_size +1)
        x = torch.relu(x)
        x = x.permute(0, 2, 1) # reshape for LSTM
        x, _ = self.lstm(x)
        x = self.flatten(x)
        logits = self.fc(x)

        try:
            np.allclose(logits.shape[1], 5)
        except:
            print("The output of your FFNN is wrong -- should be 5 classes")

        return logits


def get_device():
    device = (
        "cuda" if torch.cuda.is_available() else "cpu"
    )
    return device


class BiosignalDataset(Dataset):
    """Biosignal dataset."""

    def __init__(self, csv_file, transform=None):
        temp = pd.read_csv(csv_file)
        self.biosignals = temp
        self.transform = transform

    def __len__(self):
        return len(self.biosignals)

    def __getitem__(self, idx):
        biosignal_path = self.biosignals.fname_EMG.iloc[idx]
        label = self.biosignals.motionlabel.iloc[idx]
        data = pd.read_csv(biosignal_path, header=None, index_col=None).values.T.tolist()
        data = torch.tensor(data)
        return data, label


def vanillaNN(training_data, test_data, batch_size, num_epochs, model):
    model = model.to(get_device())
    model.zero_grad()

    # Loss and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    train_loader = DataLoader(training_data, batch_size=batch_size, shuffle=True)

    # Training loop
    for epoch in range(num_epochs):
        model.train()
        for inputs, labels in train_loader:
            inputs = inputs.to(torch.float).to(get_device())
            labels = labels.to(torch.int64).to(get_device())

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

    # Test evaluation
    test_loader = DataLoader(test_data, batch_size=len(test_data), shuffle=False)
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs = inputs.to(torch.float).to(get_device())
            labels = labels.to(torch.int64).to(get_device())
            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    test_val_acc = correct / total
    return model, test_val_acc


In [None]:
# TODO test your code.
# The accuracy should be at least 60%
accuracy = deep_learning(train,test)
print(f"Accuracy: {accuracy:.2%}")

Best hyperparameters: {'out_channels': 8, 'kernel_size': 3, 'hidden_size': 32, 'dropout': 0.0, 'num_epochs': 20, 'batch_size': 2}
Accuracy: 80.00%


# template-matching
- [Documentation for PCA function](https://scikit-learn.org/stable/modules/generated/sklearn.decomposition.PCA.html)

hint: You can get the principal component of the fitted PCA by running ``pca.components_``

In [None]:
"""
template_matching
  code skeleton for template_matching algorithm
  inputs:
      train: training data [class name x data (64 x 88)]
      test: testing data [class name x data (64 x 88)]
      N_PC: number of principal components (hyperparameter)
"""
def template_matching(train,test,N_PC=None):
  # import and save training data
  templates = []
  for file in train:
    data = pd.read_csv(file[0], header=None).to_numpy()

    # TODO find principal components of template and return the N_PC principal components.
    X_pca = get_pca(data,N_PC)
    templates.append([file[1],data,X_pca])

  # for each test data, find nearest matching training data
  test_labels = []
  for file in test:
    data = pd.read_csv(file[0], header=None).to_numpy()

    # TODO go through each template to find one that is the closest distance
    # you will need to initialize a minimum distance and gesture label to keep track
    # as you go through the templates
    min_distance = np.inf
    gesture_label = None

    for temp_label, temp_data, X_pca in templates:

      # TODO for each template, apply PCA to both the train and test data
          test_applied = apply_pca(data, X_pca)
          train_applied = apply_pca(temp_data, X_pca)

          #normalize data --> before it just spiked at 1 at 100%
          test_applied_norm  = (test_applied  - np.mean(test_applied,  axis=0)) / (np.std(test_applied,  axis=0) + 1e-8)
          train_applied_norm = (train_applied - np.mean(train_applied, axis=0)) / (np.std(train_applied, axis=0) + 1e-8)

      # TODO for each template, compute distance between the test and train
      # Euclidean distance

          distance = np.linalg.norm(test_applied_norm - train_applied_norm)

      # TODO for each template, compare distance against min_distance

          if distance < min_distance:
            min_distance = distance
            gesture_label = temp_label

    # TODO save test_labels
    test_labels.append([file[1],gesture_label])

  # TODO calculate accuracy
  # accuracy_score documentation: https://scikit-learn.org/stable/modules/generated/sklearn.metrics.accuracy_score.html
  # use the documentation to fill in
  actual = [t[1] for t in test]
  predicted = [tl[1] for tl in test_labels]
  test_accuracy = accuracy_score(actual, predicted)

  #check that it's at least 60%
  #print(f"Accuracy: {test_accuracy:.2%}")

  return test_accuracy


def get_pca(data,N_PC):
    # TODO find principal components of template and return the N_PC principal components.
    # The output should be an array of shape 88 x N_PC
    # hint: You can get the principal component of the fitted PCA by running pca.components_

    # TODO this should be 88 x N_PC
    pca = PCA(n_components=N_PC)
    pca.fit(data)


    X_pca = pca.components_.T

    # TODO check if this is true, otherwise PCA is implemented incorrectly
    try:
      np.allclose([88,N_PC],X_pca.shape)
    except:
      print("the Shape of X_pca is wrong, should be 88 x N_PC")
    return X_pca

def apply_pca(data,X_pca):
  # You can perform matrix multiplication using the np.matmul function
  transformed = np.matmul(data, X_pca)

  try: # TODO check if this is true, otherwise this code is implemented incorrectly
    np.allclose([64,X_pca.shape[1]],transformed.shape)
  except:
    print("The shape of your matrix multiplication is wrong, should be 64 x N_PC")
  return transformed

In [None]:
# TODO test your code.
# The accuracy should be at least 60%
# TODO choose your N_PC (your hyperparameter)

#tuning hyperparamter

best_accur = 0
best_npc = None
for k in range(1,65):
  acc = template_matching(train, test, N_PC = k)
  if acc > best_accur:
    best_accur = acc
    best_npc = k

N_PC = best_npc
accuracy = template_matching(train,test,N_PC=N_PC)
print(f"Accuracy: {accuracy:.2%}")
print(f"N_PC: {N_PC}")


Accuracy: 80.00%
N_PC: 1
