### Import Libraries

In [19]:
import time
import numpy as np
import pandas as pd
from tqdm import tqdm

import matplotlib
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns

import torch
import torchvision
import torch.nn as nn

from torchvision import datasets
from torchvision import transforms
from torch.autograd import Variable
from sklearn.metrics import f1_score

from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader

from prettytable import PrettyTable

import torch.optim as optim
from torch.optim.lr_scheduler import MultiplicativeLR, ExponentialLR

import warnings
warnings.filterwarnings("ignore")

from warnings import simplefilter
simplefilter(action='ignore', category=FutureWarning)

### Mount Google Drive

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


### Configure Device

In [3]:
# Configure device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print('Using PyTorch version:', torch.__version__, ' Device:', device)

Using PyTorch version: 2.0.0+cu118  Device: cpu


In [11]:
raw_data = pd.read_csv('/content/drive/MyDrive/data/subject101.csv')
raw_data.head()

Unnamed: 0,timestamp,activityID,heartrate,handTemperature,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,...,ankleGyro1,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3,ankleOrientation1,ankleOrientation2,ankleOrientation3,ankleOrientation4
0,8.38,0,104.0,30.0,2.37223,8.60074,3.51048,2.43954,8.76165,3.35465,...,0.0083,0.00925,-0.01758,-61.1888,-38.9599,-58.1438,1.0,0.0,0.0,0.0
1,8.39,0,,30.0,2.18837,8.5656,3.66179,2.39494,8.55081,3.64207,...,-0.006577,-0.004638,0.000368,-59.8479,-38.8919,-58.5253,1.0,0.0,0.0,0.0
2,8.4,0,,30.0,2.37357,8.60107,3.54898,2.30514,8.53644,3.7328,...,0.003014,0.000148,0.022495,-60.7361,-39.4138,-58.3999,1.0,0.0,0.0,0.0
3,8.41,0,,30.0,2.07473,8.52853,3.66021,2.33528,8.53622,3.73277,...,0.003175,-0.020301,0.011275,-60.4091,-38.7635,-58.3956,1.0,0.0,0.0,0.0
4,8.42,0,,30.0,2.22936,8.83122,3.7,2.23055,8.59741,3.76295,...,0.012698,-0.014303,-0.002823,-61.5199,-39.3879,-58.2694,1.0,0.0,0.0,0.0


In [13]:
# Define a function to clean data
def clean_data(data):
    data = data.dropna()
    data = data.reset_index(drop=True)

    # Remove certain columns
    data = data.drop(['timestamp'], axis=1)
    # Remove the orientation columns
    data = data.drop(['handOrientation1', 'handOrientation2', 'handOrientation3', 'handOrientation4'], axis=1)
    data = data.drop(['chestOrientation1', 'chestOrientation2', 'chestOrientation3', 'chestOrientation4'], axis=1)
    data = data.drop(['ankleOrientation1', 'ankleOrientation2', 'ankleOrientation3', 'ankleOrientation4'], axis=1)

    # For the heart rate, fill missing values with previous timestamp's heart rate
    data['heartrate'] = data['heartrate'].fillna(method='ffill')

    # For any other missing values, fill them with last value
    data = data.fillna(method='ffill')

    # Normalize the data
    # data = (data - data.mean()) / data.std()
    # discard data with NaN values
    data = data.dropna()
    data = data.reset_index(drop=True)

    # disacrd data with activityID = 0
    data = data[data['activityID'] != 0]
    data = data.reset_index(drop=True)

    # Shuffle the data
    data = data.sample(frac=1).reset_index(drop=True)
    return data



In [14]:
data = clean_data(raw_data)
data.head()

Unnamed: 0,activityID,heartrate,handTemperature,handAcc16_1,handAcc16_2,handAcc16_3,handAcc6_1,handAcc6_2,handAcc6_3,handGyro1,...,ankleAcc16_3,ankleAcc6_1,ankleAcc6_2,ankleAcc6_3,ankleGyro1,ankleGyro2,ankleGyro3,ankleMagne1,ankleMagne2,ankleMagne3
0,17,97.0,33.3125,-4.54123,-1.26669,9.8676,-4.35544,-1.16566,10.073,-0.212205,...,-1.56609,9.6565,-1.57248,-0.991222,-0.146069,-0.029807,0.062784,-56.8933,-5.86258,48.1288
1,12,158.0,33.8125,-5.88844,4.25573,2.55467,-5.70131,4.13776,2.34626,-0.780573,...,-1.18583,9.91304,-0.286557,-0.799964,-0.037472,0.428347,-0.17762,-89.9159,13.9854,3.18375
2,1,83.0,31.75,7.21685,1.34144,6.37862,7.21273,1.37547,6.67302,0.018935,...,-7.38237,-0.059632,-7.02707,-7.0367,0.00154,0.008933,0.004456,-19.5319,43.9713,0.699916
3,16,134.0,33.6875,-6.63763,-0.935055,2.71491,-8.21312,-0.163801,1.89013,-1.08955,...,-1.73124,9.86754,0.167015,-0.937398,0.100134,0.169276,-0.089885,12.9515,-12.7274,-3.265
4,7,146.0,32.3125,-7.18286,6.63088,3.95131,-7.26729,10.3572,3.46679,-1.54792,...,-7.71151,18.0299,-2.37925,-6.72394,0.275258,1.38942,-2.83349,-50.3413,-36.8631,13.5816


In [20]:
y = data['activityID']
X = data.drop(['activityID'], axis=1)

print("Shape of X:", X.shape)
print("Shape of Y:", y.shape)

Shape of X: (22590, 40)
Shape of Y: (22590,)


In [21]:
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)

In [22]:
# Step 2: Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


print("Shape of training data: ", X_train.shape)
print("Shape of test data: ", X_test.shape)
print("Shape of training labels: ", y_train.shape)
print("Shape of test labels: ", y_test.shape)

Shape of training data:  (18072, 40)
Shape of test data:  (4518, 40)
Shape of training labels:  (18072,)
Shape of test labels:  (4518,)


In [26]:
class PAMAP2Dataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.tensor(X.to_numpy(), dtype=torch.float32)
        self.y = torch.tensor(y.to_numpy(), dtype=torch.long)
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

In [27]:
batch_size = 251
train_dataset = PAMAP2Dataset(X_train, y_train)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)

test_dataset = PAMAP2Dataset(X_test, y_test)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [28]:
class Neural_Network(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size):
        super(Neural_Network,self).__init__()
        self.layers = nn.ModuleList()
        
        # The input layer just holds the input data and no calculation is performed. Therefore, no activation function is used there.
        self.layers.append(nn.Linear(input_size, hidden_sizes[0]))

        for i in range(len(hidden_sizes) - 1):
            self.layers.append(nn.Linear(hidden_sizes[i], hidden_sizes[i + 1]))
            self.layers.append(nn.ReLU())

        self.layers.append(nn.Linear(hidden_sizes[-1], output_size))

    def forward(self,x):
        for layer in self.layers:
            x = layer(x)
        return x

In [48]:
class Trainer_and_Tester():
    def __init__(self, model, optimizer, criterion, trainloader, testloader, lr_scheduler = None):
      self.model = model
      self.optimizer = optimizer
      self.criterion = criterion

      self.trainloader = trainloader
      self.testloader = testloader

      self.lr_scheduler = lr_scheduler

      self.train_loss = []
      self.test_loss = []
      self.accuracy = []
      self.f1_score = []

    def train(self, epochs, log_interval=1000):
      for epoch in range(1, epochs + 1):
        # Set model to training mode
        self.model.train()

        # Loop over each batch from the training set
        for batch_idx, (data, target) in enumerate(self.trainloader):
            # Copy data to GPU if needed
            data, target = data.to(device), target.to(device)
            # Zero gradient buffers
            self.optimizer.zero_grad() 
            # Pass data through the network
            output = self.model(data)
            # Calculate loss
            loss = criterion(output, target)
            # Backpropagate
            loss.backward()
            # Update weights
            self.optimizer.step()
            
            lr=optimizer.param_groups[0]["lr"]

            # Print details of this epoch
            if batch_idx % log_interval == 0:
                self.train_loss.append(loss.data.item())
                print('Train Epoch: {}\tLoss: {:.6f}\tLearning Rate: {}'.format(epoch, loss.data.item(), lr))
    
    def test(self):
      # Set model to evaluation mode
      self.model.eval()

      # Variables to monitor test loss and accuracy
      test_loss = 0.
      correct = 0.
      total = 0.

      with torch.no_grad():
          # Loop over each batch from the testing set
          for batch_idx, (data, target) in enumerate(self.testloader):
              # Copy data to GPU if needed
              data, target = data.to(device), target.to(device)
              # Pass data through the network
              output = self.model(data)
              # Calculate loss
              loss = criterion(output, target)
              # Update average test loss 
              test_loss = test_loss + ((1 / (batch_idx + 1)) * (loss.data.item() - test_loss))
              # Get the index of the max log-probability
              pred = output.data.max(1, keepdim=True)[1]
              # Compare predictions to true label
              correct += np.sum(np.squeeze(pred.eq(target.data.view_as(pred))).cpu().numpy())
              total += data.size(0)

      self.test_loss.append(test_loss)
      self.accuracy.append(100. * correct / total)
      self.f1_score.append(f1_score(target.data.cpu().numpy(), pred.cpu().numpy(), average='macro'))

      print('Test Loss: {:.6f}, Accuracy: {}/{} {:.3f}%, F1 Score: {}'.format(test_loss, correct, total, 100. * correct / total, f1_score(target.data.cpu().numpy(), pred.cpu().numpy(), average='macro')))
      print()
    

In [43]:
print(np.unique(y_train))

[ 1  2  3  4  5  6  7 12 13 16 17 24]


In [50]:
input_size = X_train.shape[1]
hidden_size = [64, 128, 32]
output_size = 25
print(output_size)

model = Neural_Network(input_size, hidden_size, output_size).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

print(model)

25
Neural_Network(
  (layers): ModuleList(
    (0): Linear(in_features=40, out_features=64, bias=True)
    (1): Linear(in_features=64, out_features=128, bias=True)
    (2): ReLU()
    (3): Linear(in_features=128, out_features=32, bias=True)
    (4): ReLU()
    (5): Linear(in_features=32, out_features=25, bias=True)
  )
)


In [51]:
tnt = Trainer_and_Tester(model, optimizer, criterion, train_loader, test_loader)

print("Training the net...")
tnt.train(10)
print("Training Completed...")

print("Testing...")
tnt.test()
print("Testing done...")

Training the net...
Train Epoch: 1	Loss: 3.244740	Learning Rate: 0.001
Train Epoch: 2	Loss: 0.760867	Learning Rate: 0.001
Train Epoch: 3	Loss: 0.270207	Learning Rate: 0.001
Train Epoch: 4	Loss: 0.147033	Learning Rate: 0.001
Train Epoch: 5	Loss: 0.079092	Learning Rate: 0.001
Train Epoch: 6	Loss: 0.054244	Learning Rate: 0.001
Train Epoch: 7	Loss: 0.056611	Learning Rate: 0.001
Train Epoch: 8	Loss: 0.036973	Learning Rate: 0.001
Train Epoch: 9	Loss: 0.029580	Learning Rate: 0.001
Train Epoch: 10	Loss: 0.026376	Learning Rate: 0.001
Training Completed...
Testing...
Test Loss: 0.024741, Accuracy: 4488.0/4518.0 99.336%, F1 Score: 0.9731182795698925

Testing done...
