In [1]:
import numpy as np

# Load Dataset

In [2]:
X_trainval = np.loadtxt('Datasets/UCI HAR Dataset/train/X_train.txt')
y_trainval = np.loadtxt('Datasets/UCI HAR Dataset/train/y_train.txt')

X_test = np.loadtxt('Datasets/UCI HAR Dataset/test/X_test.txt')
y_test = np.loadtxt('Datasets/UCI HAR Dataset/test/y_test.txt')

## Split into Train, Val, Test

In [3]:
np.random.seed(1) #Setting seed for having reproducing results and having the same train, val, test splits

trainval_subjects = np.loadtxt('Datasets/UCI HAR Dataset/train/subject_train.txt')
trainval_subject_ids = np.unique(trainval_subjects)

np.random.shuffle(trainval_subject_ids)    # shuffle subjects for splitting into train and validation sets
train_subjects = trainval_subject_ids[:16] # 16 subjects are for training split
val_subjects = trainval_subject_ids[16:]   #  5 subjects are for validation split 

In [4]:
train_masks = np.zeros(trainval_subjects.shape[0], dtype=np.bool)
for train_subject in train_subjects:
    train_masks |= (trainval_subjects == train_subject)
    
val_masks = np.zeros(trainval_subjects.shape[0], dtype=np.bool)
for val_subject in val_subjects:
    val_masks |= (trainval_subjects == val_subject)

In [5]:
X_train, y_train = X_trainval[train_masks], y_trainval[train_masks]
X_val, y_val = X_trainval[val_masks], y_trainval[val_masks]

In [6]:
X_train.shape, y_train.shape, X_val.shape, y_val.shape, X_test.shape, y_test.shape

((5609, 561), (5609,), (1743, 561), (1743,), (2947, 561), (2947,))

# Normalise the Features

In [7]:
mean =  X_train.mean(0)
std = X_train.std(0)
mean.shape, std.shape

((561,), (561,))

In [8]:
eps = 1e-7 # adding a very small value to std to avoid dividing by zero 
X_train_normalised = (X_train - mean) / (std + eps)
X_val_normalised = (X_val - mean) / (std + eps)
X_test_normalised = (X_test - mean) / (std + eps)

In [9]:
import warnings
warnings.filterwarnings("ignore")
# warnings.filterwarnings("default")

# Ridge

In [10]:
from sklearn.linear_model import RidgeClassifier

max_val_acc = -1
best_alpha = None
ridge = None
for alpha in [0, 0.1, 1, 3, 10]:
    classifier = RidgeClassifier(alpha=alpha)
    classifier.fit(X_train_normalised, y_train)
    train_acc = classifier.score(X_train_normalised, y_train)
    val_acc = classifier.score(X_val_normalised, y_val)
    print('alpha: {:5.1f}, train_acc: {:.3f}, val_acc: {:.3f}'.format(alpha, train_acc, val_acc))
    if val_acc > max_val_acc:
        best_alpha = alpha
        max_val_acc = val_acc
        ridge = classifier
print('best alpha: {}'.format(best_alpha))
print('train  acc: {:.3f}'.format(ridge.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(ridge.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(ridge.score(X_test_normalised, y_test)))

alpha:   0.0, train_acc: 0.991, val_acc: 0.923
alpha:   0.1, train_acc: 0.991, val_acc: 0.929
alpha:   1.0, train_acc: 0.991, val_acc: 0.927
alpha:   3.0, train_acc: 0.992, val_acc: 0.924
alpha:  10.0, train_acc: 0.991, val_acc: 0.921
best alpha: 0.1
train  acc: 0.991
val    acc: 0.929
test   acc: 0.938


# Support Vector Machine

In [11]:
from sklearn.svm import LinearSVC

max_val_acc = -1
best_C = None
linear_svc = None
for c in [0.1, 1, 3, 10]:
    classifier = LinearSVC(C=c, dual=False)
    classifier.fit(X_train_normalised, y_train)
    train_acc = classifier.score(X_train_normalised, y_train)
    val_acc = classifier.score(X_val_normalised, y_val)
    print('C: {:5.1f}, train_acc: {:.3f}, val_acc: {:.3f}'.format(c, train_acc, val_acc))
    if val_acc > max_val_acc:
        best_C = c
        max_val_acc = val_acc
        linear_svc = classifier
print('best C: {}'.format(best_C))
print('train  acc: {:.3f}'.format(linear_svc.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(linear_svc.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(linear_svc.score(X_test_normalised, y_test)))

C:   0.1, train_acc: 0.997, val_acc: 0.935
C:   1.0, train_acc: 1.000, val_acc: 0.920
C:   3.0, train_acc: 1.000, val_acc: 0.915
C:  10.0, train_acc: 1.000, val_acc: 0.909
best C: 0.1
train  acc: 0.997
val    acc: 0.935
test   acc: 0.941


In [12]:
from sklearn.svm import SVC

max_val_acc = -1
best_kernel = None
svc = None
for kernel in ['poly', 'rbf', 'sigmoid']:
    classifier = SVC(kernel=kernel)
    classifier.fit(X_train_normalised, y_train)
    train_acc = classifier.score(X_train_normalised, y_train)
    val_acc = classifier.score(X_val_normalised, y_val)
    print('Kernel: {}, train_acc: {:.3f}, val_acc: {:.3f}'.format(kernel, train_acc, val_acc))
    if val_acc > max_val_acc:
        best_kernel = kernel
        max_val_acc = val_acc
        svc = classifier
print('best kernel: {}'.format(best_kernel))
print('train  acc: {:.3f}'.format(svc.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(svc.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(svc.score(X_test_normalised, y_test)))

Kernel: poly, train_acc: 0.983, val_acc: 0.933
Kernel: rbf, train_acc: 0.990, val_acc: 0.923
Kernel: sigmoid, train_acc: 0.838, val_acc: 0.788
best kernel: poly
train  acc: 0.983
val    acc: 0.933
test   acc: 0.909


# AdaBoost

In [13]:
from sklearn.ensemble import AdaBoostClassifier

adaboost = AdaBoostClassifier()
adaboost.fit(X_train_normalised, y_train)
print('train  acc: {:.3f}'.format(adaboost.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(adaboost.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(adaboost.score(X_test_normalised, y_test)))

train  acc: 0.541
val    acc: 0.558
test   acc: 0.531


# Random Forest

In [14]:
from sklearn.ensemble import RandomForestClassifier

random_forest = RandomForestClassifier()
random_forest.fit(X_train_normalised, y_train)
print('train  acc: {:.3f}'.format(random_forest.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(random_forest.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(random_forest.score(X_test_normalised, y_test)))

train  acc: 1.000
val    acc: 0.871
test   acc: 0.898


# K-Nearest Neighbours

In [15]:
from sklearn.neighbors import KNeighborsClassifier

max_val_acc = -1
best_k = None
knn = None
for k in [1, 3, 5, 7, 9]:
    classifier = KNeighborsClassifier(n_neighbors=k)
    classifier.fit(X_train_normalised, y_train)
    train_acc = classifier.score(X_train_normalised, y_train)
    val_acc = classifier.score(X_val_normalised, y_val)
    print('K: {}, train_acc: {:.3f}, val_acc: {:.3f}'.format(k, train_acc, val_acc))
    if val_acc > max_val_acc:
        best_k = k
        max_val_acc = val_acc
        knn = classifier
print('best K: {}'.format(best_k))
print('train  acc: {:.3f}'.format(knn.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(knn.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(knn.score(X_test_normalised, y_test)))

K: 1, train_acc: 1.000, val_acc: 0.832
K: 3, train_acc: 0.992, val_acc: 0.851
K: 5, train_acc: 0.984, val_acc: 0.857
K: 7, train_acc: 0.979, val_acc: 0.858
K: 9, train_acc: 0.976, val_acc: 0.864
best K: 9
train  acc: 0.976
val    acc: 0.864
test   acc: 0.870


# Decision Tree

In [16]:
from sklearn.tree import DecisionTreeClassifier

max_val_acc = -1
best_criterion = None
decisiontree = None
for criterion in ['gini', 'entropy', 'log_loss']:
    classifier = DecisionTreeClassifier(criterion=criterion)
    classifier.fit(X_train_normalised, y_train)
    train_acc = classifier.score(X_train_normalised, y_train)
    val_acc = classifier.score(X_val_normalised, y_val)
    print('Criterion: {}, train_acc: {:.3f}, val_acc: {:.3f}'.format(criterion, train_acc, val_acc))
    if val_acc > max_val_acc:
        best_criterion = criterion
        max_val_acc = val_acc
        decisiontree = classifier
print('best Criterion: {}'.format(best_criterion))
print('train  acc: {:.3f}'.format(decisiontree.score(X_train_normalised, y_train)))
print('val    acc: {:.3f}'.format(decisiontree.score(X_val_normalised, y_val)))
print('test   acc: {:.3f}'.format(decisiontree.score(X_test_normalised, y_test)))

Criterion: gini, train_acc: 1.000, val_acc: 0.806
Criterion: entropy, train_acc: 1.000, val_acc: 0.795
Criterion: log_loss, train_acc: 1.000, val_acc: 0.788
best Criterion: gini
train  acc: 1.000
val    acc: 0.806
test   acc: 0.793


# Neural Network

In [17]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import copy

In [18]:
class MLP(nn.Module):
    def __init__(self, neurons, activation):
        super(MLP, self).__init__()
        layers = []
        for i in  range(len(neurons) - 1):
            layers.append(nn.Linear(neurons[i], neurons[i+1]))
            if activation == 'ReLU':
                layers.append(nn.ReLU())
            elif activation == 'LeakyReLU':
                layers.append(nn.LeakyReLU())
            elif activation == 'Sigmoid':
                layers.append(nn.Sigmoid())
            elif activation == 'Tanh':
                layers.append(nn.Tanh())
            else:
                raise ValueError('Activation must be one of ReLU, LeakyReLU, sigmoid, or Tanh')
        self.mlp = nn.Sequential(*layers[:-1])
    
    def forward(self, x):
        return self.mlp(x)

In [19]:
def init_weights(m):
    if isinstance(m, nn.Linear):
        torch.nn.init.xavier_normal_(m.weight)
        m.bias.data.fill_(0.0)

In [20]:
n_classes = 6
architectures = [[X_train_normalised.shape[1], 1024, n_classes],
                 [X_train_normalised.shape[1], 1024, 128, n_classes],
                 [X_train_normalised.shape[1], 1024, 512, 128, n_classes],
                 [X_train_normalised.shape[1], 1024, 1024, 128, n_classes]]
activations = ['ReLU', 'LeakyReLU', 'Sigmoid', 'Tanh']
lrs = [0.001, 0.0001, 0.00001]
optimisers = ['SGD', 'Adam']

best_val = -1
best_lr = -1
best_architecture = None
best_activation = None
best_opt = None
best_mlp = None


for arch in architectures:
    for activation in activations:
        for lr in lrs:
            for opt in optimisers:
                print('Trying {} with {} Activation Layer and {} Optimiser with {} Learning Rate'.format(arch, activation, opt, lr))
                mlp = MLP(arch, activation).cuda()
                mlp.apply(init_weights)
                criterion = nn.CrossEntropyLoss()
                if opt == 'SGD':
                    optimiser = optim.SGD(mlp.parameters(), lr=lr)
                else:
                    optimiser = optim.Adam(mlp.parameters(), lr=lr)
                num_epochs = 100
                for epoch in range(num_epochs):
                    mlp.train()
                    optimiser.zero_grad()
                    output = mlp(torch.tensor(X_train_normalised).float().cuda())
                    loss = criterion(output, torch.tensor(y_train - 1).long().cuda())
                    loss.backward()
                    optimiser.step()
                    mlp.eval()
                    with torch.no_grad():
                        output = mlp(torch.tensor(X_val_normalised).float().cuda())
                        pred = torch.argmax(output, 1)
                        correct = (pred == torch.tensor(y_val - 1).long().cuda()).sum()
                        val = (correct / output.shape[0]).item()
                        if val > best_val:
                            best_val = val
                            best_lr = lr
                            best_architecture = arch
                            best_activation = activation
                            best_opt = opt
                            best_mlp = copy.deepcopy(mlp)

print('-------------------------------')                    
print('Best Validation: {}'.format(best_val))
print('Achieved with {} Architecture and {} Activation Layer and {} Optimiser with {} Learning Rate'.format(best_architecture, best_activation, best_opt, best_lr))

Trying [561, 1024, 6] with ReLU Activation Layer and SGD Optimiser with 0.001 Learning Rate
Trying [561, 1024, 6] with ReLU Activation Layer and Adam Optimiser with 0.001 Learning Rate
Trying [561, 1024, 6] with ReLU Activation Layer and SGD Optimiser with 0.0001 Learning Rate
Trying [561, 1024, 6] with ReLU Activation Layer and Adam Optimiser with 0.0001 Learning Rate
Trying [561, 1024, 6] with ReLU Activation Layer and SGD Optimiser with 1e-05 Learning Rate
Trying [561, 1024, 6] with ReLU Activation Layer and Adam Optimiser with 1e-05 Learning Rate
Trying [561, 1024, 6] with LeakyReLU Activation Layer and SGD Optimiser with 0.001 Learning Rate
Trying [561, 1024, 6] with LeakyReLU Activation Layer and Adam Optimiser with 0.001 Learning Rate
Trying [561, 1024, 6] with LeakyReLU Activation Layer and SGD Optimiser with 0.0001 Learning Rate
Trying [561, 1024, 6] with LeakyReLU Activation Layer and Adam Optimiser with 0.0001 Learning Rate
Trying [561, 1024, 6] with LeakyReLU Activation Lay

In [21]:
with torch.no_grad():
    best_mlp.eval()
    
    output = best_mlp(torch.tensor(X_train_normalised).float().cuda())
    pred = torch.argmax(output, 1)
    correct = (pred == torch.tensor(y_train - 1).long().cuda()).sum()
    train_acc = (correct / output.shape[0]).item()
    print('train accuracy: ', train_acc)
    
    output = best_mlp(torch.tensor(X_val_normalised).float().cuda())
    pred = torch.argmax(output, 1)
    correct = (pred == torch.tensor(y_val - 1).long().cuda()).sum()
    val_acc = (correct / output.shape[0]).item()
    print('val   accuracy: ', val_acc)
    
    output = best_mlp(torch.tensor(X_test_normalised).float().cuda())
    pred = torch.argmax(output, 1)
    correct = (pred == torch.tensor(y_test - 1).long().cuda()).sum()
    test_acc = (correct / output.shape[0]).item()
    print('test  accuracy: ', test_acc)

train accuracy:  0.9980388879776001
val   accuracy:  0.9311531782150269
test  accuracy:  0.9290804266929626
