In [19]:
# Imports
import pandas as pd
import matplotlib.pyplot as plt
import time 

import torch
import mrmr
from torch import nn
from torch.utils.data import Dataset, DataLoader

In [20]:
# Constants
DATA_PATH = './data/'
DATASET_PATH = DATA_PATH + 'smartphone+based+recognition+of+human+activities+and+postural+transitions/'
MODELS_PATH = DATA_PATH + 'models/'

In [21]:
# Loading functions
def load_data_frame(txt):
    return pd.read_csv(DATASET_PATH + txt + '.txt', header=None, sep='\s+')

In [22]:
# Loading data
train_set = load_data_frame('Train/X_train')
test_set = load_data_frame('Test/X_test')

features = []
with open(DATASET_PATH + 'features.txt') as f:
    for line in f:
        feat = line.strip()
        features.append(feat)

train_set.columns = features
test_set.columns = features

train_set['subjectId'] = load_data_frame('Train/subject_id_train')
train_set['activity'] = load_data_frame('Train/y_train')

test_set['subjectId'] = load_data_frame('Test/subject_id_test')
test_set['activity'] = load_data_frame('Test/y_test')

activity_labels = load_data_frame('activity_labels')
activity_labels.columns = ['id', 'activity']

features = train_set.columns.tolist()

In [23]:
# Graphing features
complete_set_train = train_set.copy()
for x in activity_labels['id']:
    complete_set_train['activity'] = complete_set_train['activity'].replace(x, activity_labels['activity'][x - 1])

selected_features_graph = []

for selected_feature in selected_features_graph:
    if selected_feature != '':
        train_subset = complete_set_train[[selected_feature, 'activity']]
        grouped_train_by_feature = train_subset.groupby('activity').mean()

        grouped_train_by_feature.plot(kind='bar', legend=False)
        plt.title(selected_feature + ' Vs. Activity')
        plt.xlabel('Activity')
        plt.ylabel(selected_feature + ' mean value')
        plt.show()


In [24]:
# Creating model
device = None

if torch.cuda.is_available():
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"
else:
    device = "cpu"

num_features = 30

class NeuralNetwork(nn.Module):
    def __init__(self):
        super(NeuralNetwork, self).__init__()
        self.fc1 = nn.Linear(num_features, 512)
        self.relu1 = nn.ReLU()
        self.lstm = nn.LSTM(512, 8, 2, batch_first=True)
        self.relu2 = nn.ReLU()
        self.fc2 = nn.Linear(8, 1)
        self.silu = nn.SiLU()

    def forward(self, x):
        x = self.fc1(x)
        x = self.relu1(x)
        # Prepare the input for LSTM
        x = x.unsqueeze(1)  # Add a dimension for sequence length (batch, seq_len, input_size)
        x, _ = self.lstm(x)
        x = x[:, -1, :]  # Take the output from the last time step
        x = self.relu2(x)
        x = self.fc2(x)
        x = self.silu(x)
        return x

model = NeuralNetwork().to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.0001)

In [25]:
# Training and testing function
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        y = y.squeeze()
        X, y = X.to(device), y.to(device)
        pred = model(X)
        loss = loss_fn(pred, y)
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            y = y.squeeze()
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    return(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [26]:
# Dataset preparation
class HAPTDataset(Dataset):
    def __init__(self, dataset, features, label):
        self.data = torch.tensor(dataset[features].values, dtype=torch.float32)
        self.labels = torch.tensor(dataset[label].values, dtype=torch.float32)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]
    
mrmr_selected_features = mrmr.mrmr_classif(train_set[features[:-1]], train_set[features[-1]], num_features)
    
train_dataset = HAPTDataset(train_set, mrmr_selected_features, 'activity')
test_dataset = HAPTDataset(test_set, mrmr_selected_features, 'activity')

train_dataloader = DataLoader(train_dataset, batch_size=8, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=8, shuffle=True)

100%|██████████| 30/30 [00:05<00:00,  5.13it/s]


In [27]:
epochs = 5
perf_timer = time.perf_counter()
perf_acc = ""

for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_dataloader, model, loss_function, optimizer)
    perf_acc = test(test_dataloader, model, loss_function)
    
print(perf_acc)
perf_timer = time.perf_counter() - perf_timer
print("Done!")

Epoch 1
-------------------------------
loss: 0.000000  [    8/ 7767]
loss: 0.000000  [  808/ 7767]
loss: 0.000000  [ 1608/ 7767]
loss: 0.000000  [ 2408/ 7767]
loss: 0.000000  [ 3208/ 7767]
loss: 0.000000  [ 4008/ 7767]
loss: 0.000000  [ 4808/ 7767]
loss: 0.000000  [ 5608/ 7767]
loss: 0.000000  [ 6408/ 7767]
loss: 0.000000  [ 7208/ 7767]
Epoch 2
-------------------------------
loss: 0.000000  [    8/ 7767]
loss: 0.000000  [  808/ 7767]
loss: 0.000000  [ 1608/ 7767]
loss: 0.000000  [ 2408/ 7767]
loss: 0.000000  [ 3208/ 7767]
loss: 0.000000  [ 4008/ 7767]
loss: 0.000000  [ 4808/ 7767]
loss: 0.000000  [ 5608/ 7767]
loss: 0.000000  [ 6408/ 7767]
loss: 0.000000  [ 7208/ 7767]
Epoch 3
-------------------------------
loss: 0.000000  [    8/ 7767]
loss: 0.000000  [  808/ 7767]
loss: 0.000000  [ 1608/ 7767]
loss: 0.000000  [ 2408/ 7767]
loss: 0.000000  [ 3208/ 7767]
loss: 0.000000  [ 4008/ 7767]
loss: 0.000000  [ 4808/ 7767]
loss: 0.000000  [ 5608/ 7767]
loss: 0.000000  [ 6408/ 7767]
loss: 0.00

In [28]:
if True:
    torch.save(model.state_dict(), DATA_PATH + "models/lstm_1.pth")
    print("Saved PyTorch Model State to model.pth")

    with open(DATA_PATH + "models/lstm_1.txt", "w") as f:
        f.write("Epochs: {}\n".format(epochs))
        f.write("Feature Selection: {}\n".format("MRMR"))
        f.write("Feature Set: Top {}\n".format(num_features))
        f.write("Model: {}\n".format(str(model)))
        f.write("Loss Function: {}\n".format("Cross Entropy Loss"))
        f.write("Optimizer: {}\n\n\n\n".format(str(optimizer)))
        f.write("Results: {}\n".format(perf_acc))
        f.write("Timer: {}s\n".format(round(perf_timer, 2)))

Saved PyTorch Model State to model.pth
