In [1]:
import os
import re
import itertools
import warnings
from joblib import dump

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from scipy import stats, signal
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import preprocessing
from sklearn.model_selection import train_test_split, cross_val_score

warnings.filterwarnings("ignore")
random_state = 42
activities = ["dab", "jamesbond", "mermaid", "pushback", "scarecrow", "snake", "window360", "cowboy", "logout"]
n_labels = len(activities)

In [2]:
dfs = []

def load_data(activity, label, files, subject):
    for file in files:
        df = pd.read_csv(os.path.join(subject, activity+str(file)+".csv"), sep=",", index_col=0)
        df["label"] = [label for _ in range(len(df))]
        df["activity"] = [activity for _ in range(len(df))]
        df["file"] = [file for _ in range(len(df))]
        df["subject"] = [subject for _ in range(len(df))]
        dfs.append(df)

ids = [1, 2, 3, 4, 5, 6, 7]
user = "subject1"
load_data("dab", 0, ids, user)
load_data("jamesbond", 1, ids, user)
load_data("mermaid", 2, ids, user)

user = "subject3"
load_data("dab", 0, ids, user)
load_data("jamesbond", 1, ids, user)
load_data("mermaid", 2, ids, user)

ids = [1, 2, 3, 4, 5, 6]
user = "subject2"
load_data("dab", 0, ids, user)
load_data("jamesbond", 1, ids, user)

ids = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
user = "subject1"
load_data("pushback", 3, ids, user)
load_data("scarecrow", 4, ids, user)
load_data("snake", 5, ids, user)
load_data("window360", 6, ids, user)
load_data("cowboy", 7, ids, user)
load_data("logout", 8, ids, user)

user = "subject3"
load_data("pushback", 3, ids, user)
load_data("scarecrow", 4, ids, user)
load_data("snake", 5, ids, user)
load_data("window360", 6, ids, user)
load_data("cowboy", 7, ids, user)
load_data("logout", 8, ids, user)

df = pd.concat(dfs)
print(df.shape)
df.head()

(52200, 13)


Unnamed: 0,yaw,pitch,roll,g_x,g_y,g_z,a_x,a_y,a_z,label,activity,file,subject
0,-0.05,12.99,72.28,-6.47,-2.95,-1.05,-0.2,1.0,-0.17,0,dab,1,subject1
1,-1.67,15.87,55.53,-67.89,11.78,-6.4,-0.26,1.29,-0.01,0,dab,1,subject1
2,-18.2,5.15,9.66,-180.85,-41.34,-70.0,-0.25,1.47,1.91,0,dab,1,subject1
3,23.6,7.15,-49.8,-250.14,7.11,91.12,-0.27,1.37,-0.11,0,dab,1,subject1
4,-10.8,23.43,-50.88,-4.71,59.81,-41.99,-0.66,-0.72,0.06,0,dab,1,subject1


In [3]:
def convert_to_timeseries(features, window_size, num_features):
    data = np.reshape(features, newshape=(window_size, num_features))
    return data.T

window_size = 30
X = list()
y = list()
df_len = len(df)
for idx in range(0, df_len, window_size):
    window_df = df[idx:idx+window_size]
    labels = window_df["label"].unique()
    files = window_df["file"].unique()
    subjects = window_df["subject"].unique()
    if len(labels) != 1 or len(files) != 1 or len(subjects) != 1 or len(window_df) < window_size:
        continue
    assert len(labels) == 1 and len(window_df) == window_size
    features = window_df.drop(columns=["label", "activity", "subject", "file"]).values
    features = convert_to_timeseries(features, window_size=window_size, num_features=9)
    X.append(features)
    y.append(labels)
     

X = np.array(X)
y = np.array(y)

X.shape, y.shape

((1740, 9, 30), (1740, 1))

In [4]:
def compute_mean(data):
    return np.mean(data)

def compute_variance(data):
    return np.var(data)

def compute_median_absolute_deviation(data):
    return stats.median_absolute_deviation(data)

def compute_root_mean_square(data):
    def compose(*fs):
        def wrapped(x):
            for f in fs[::-1]:
                x = f(x)
            return x
        return wrapped
    rms = compose(np.sqrt, np.mean, np.square)
    return rms(data)

def compute_kurtosis(data):
    return stats.kurtosis(data)

def compute_min_max(data):
    return np.max(data) - np.min(data)

def compute_spectral_entropy(data):
    freqs, power_density = signal.welch(data)
    return stats.entropy(power_density)

def compute_spectral_energy(data):
    freqs, power_density = signal.welch(data)
    return np.sum(np.square(power_density))

def compute_principle_frequency(data):
    freqs, power_density = signal.welch(data)
    return freqs[np.argmax(np.square(power_density))]

In [5]:
feature_names = []

# Rotation around the front-to-back axis is called roll.
# Rotation around the side-to-side axis is called pitch.
# Rotation around the vertical axis is called yaw.
for i in ["yaw", "pitch", "roll", "gyro_x", "gyro_y", "gyro_z", "acc_x", "acc_y", "acc_z"]:
        for j in ["_mean", "_var", "_mad", "_rms", "_kurtosis", "_min_max", "_entropy", "_pfreq"]:
            feature_names.append(i+j)

n_features = len(feature_names)

def extract_features_per_row(f_n):
    mean = compute_mean(f_n)
    var = compute_variance(f_n)
    mad = compute_median_absolute_deviation(f_n)
    rms = compute_root_mean_square(f_n)
    kurtosis = compute_kurtosis(f_n)
    min_max = compute_min_max(f_n)
    entropy = compute_spectral_entropy(f_n)
    pfreq = compute_principle_frequency(f_n)
    return mean, var, mad, rms, kurtosis, min_max, entropy, pfreq

def extract_features(X):
    new_features = np.ones((X.shape[0], n_features))

    for i in range(X.shape[0]):
        features = []
        for j in range(X.shape[1]):
            f_n = X[i][j]
            feature = extract_features_per_row(f_n)
            features.extend(feature)
        new_features[i] = np.array(features)
    print(new_features)

    return new_features

X = extract_features(X)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=random_state)

X_train.shape

[[-1.65600000e+00  9.05061451e+02  1.12603470e+01 ...  2.08000000e+00
   2.14686637e+00  4.66666667e-01]
 [-3.19333333e-01  1.02738219e+03  1.92663870e+01 ...  1.67000000e+00
   1.84534258e+00  4.00000000e-01]
 [ 1.40333333e-01  8.36102043e+02  6.67170000e+00 ...  1.52000000e+00
   2.16763364e+00  2.00000000e-01]
 ...
 [-1.06400000e+00  2.67424344e+02  2.10751590e+01 ...  5.70000000e-01
   1.32002764e+00  2.00000000e-01]
 [ 1.79166667e+00  2.99605754e+02  2.60122170e+01 ...  7.00000000e-01
   2.28489777e+00  2.00000000e-01]
 [ 3.57666667e-01  2.95832378e+02  2.32397550e+01 ...  8.90000000e-01
   1.92788333e+00  2.00000000e-01]]


(1165, 72)

In [6]:
X_test.shape

(575, 72)

In [7]:
scaler = preprocessing.StandardScaler()

X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)
y_train = y_train.flatten()
y_train.shape

(1165,)

In [8]:
df2 = pd.DataFrame(X_train, columns=feature_names)
df2["activityName"] = y_train
df2["activityName"] = df2["activityName"].replace([0,1,2,3,4,5,6,7,8], activities)
df2

Unnamed: 0,yaw_mean,yaw_var,yaw_mad,yaw_rms,yaw_kurtosis,yaw_min_max,yaw_entropy,yaw_pfreq,pitch_mean,pitch_var,...,acc_y_pfreq,acc_z_mean,acc_z_var,acc_z_mad,acc_z_rms,acc_z_kurtosis,acc_z_min_max,acc_z_entropy,acc_z_pfreq,activityName
0,-0.048055,-0.692056,-0.701782,-0.822765,-0.380227,-0.892487,0.439602,-1.180342,0.285761,-0.087769,...,-0.604326,0.582818,-0.074817,-0.207234,-0.048381,-0.390616,-0.143117,-1.331515,-0.888528,mermaid
1,0.302569,-0.621332,-0.492800,-0.683384,-0.424807,-0.764063,0.531584,1.796061,0.436737,-0.108284,...,0.126385,-0.497695,-0.074828,-0.442056,-0.224073,-0.226570,-0.137426,1.256865,-0.041792,jamesbond
2,0.097137,1.183943,-0.517617,1.224513,0.350333,1.834562,0.376034,-0.064191,0.403596,0.212483,...,-0.969682,0.897518,-0.005661,0.702705,0.730224,6.090165,0.826382,2.139418,2.498416,dab
3,0.741550,2.060651,0.389496,1.783740,-0.113622,1.766446,0.935830,-0.436241,-0.550922,-0.004848,...,0.857096,-5.183660,2.703011,0.321118,5.952811,6.282337,6.018944,2.226319,-0.041792,dab
4,0.508402,-0.821318,-1.095583,-1.360469,-0.477480,-1.407527,0.077781,-1.180342,0.136035,-0.116375,...,-0.604326,0.719663,-0.075058,-1.058466,-0.025208,-0.063821,-0.156051,1.406897,-0.888528,mermaid
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1160,0.432801,-0.031182,0.283046,0.179018,-0.411401,0.064632,-1.035653,-0.064191,0.092850,-0.035328,...,0.126385,-0.333654,-0.074143,0.409176,-0.169237,-0.263734,-0.104834,-0.762261,-0.041792,logout
1161,-0.271653,-0.380123,-0.642353,-0.225383,0.837448,0.062209,0.140700,-1.180342,-0.499454,-0.061886,...,-0.604326,0.189119,-0.074842,-0.471409,-0.131808,-0.110538,-0.136909,0.701004,-0.888528,scarecrow
1162,-2.736037,1.009450,1.158158,1.492935,-0.520999,0.804481,-1.086748,-0.064191,-0.154469,0.023923,...,0.857096,0.294019,-0.071929,1.524584,-0.020968,-0.236488,-0.045857,0.531241,0.381576,window360
1163,-2.000128,0.771364,1.493183,1.169593,-0.560232,0.781058,-1.087581,-0.436241,-1.150342,-0.113738,...,0.491740,0.206818,-0.073941,1.084292,-0.093541,-0.367632,-0.109490,-0.919718,0.381576,window360


In [9]:
X_train.shape

(1165, 72)

In [10]:
print(y_train)
print(y_train.shape)

[2 1 0 ... 6 6 8]
(1165,)


In [11]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class MLP(nn.Module):
    def __init__(self, input_size, hidden_size1, hidden_size2, output_size):
        super(MLP, self).__init__()
        self.layer1 = nn.Linear(input_size, hidden_size1, bias = True)
        self.layer2 = nn.Linear(hidden_size1, hidden_size2, bias = True)
        self.layer3 = nn.Linear(hidden_size2, output_size, bias = True)
    
    def forward(self,x):
        
        y = self.layer1(x)
        y_output = F.sigmoid(y)
        z = self.layer2(y_output)
        z_output = F.sigmoid(z)
        
        label = self.layer3(z_output)
        label_output = F.softmax(label)
        
        return label_output

In [12]:
from keras.utils.np_utils import to_categorical

X_train = X_train.astype('float32')
X_test = X_test.astype('float32')

y_train

array([2, 1, 0, ..., 6, 6, 8], dtype=int64)

In [13]:
model = MLP(72, 256, 128, 9)   
print(model)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr = 0.01, momentum = 0.9)

MLP(
  (layer1): Linear(in_features=72, out_features=256, bias=True)
  (layer2): Linear(in_features=256, out_features=128, bias=True)
  (layer3): Linear(in_features=128, out_features=9, bias=True)
)


In [14]:
def get_error(scores,labels):
    predicted_labels = scores.argmax(dim = 1)
    indicator = (predicted_labels == labels)
    num_matches = indicator.sum()
    
    return 1-num_matches.float()

In [15]:
for epoch in range(150):
    
    running_loss = 0
    running_error = 0
    num_labels = 0

    shuffled_indices = torch.randperm(1165)
#     
    X = torch.from_numpy(X_train)
    
    y = torch.from_numpy(y_train)
#     print(X.type)
    for count in range (0,1165):
        optimizer.zero_grad()
        minbatch_data = X[count].view(1,72)
        minbatch_label = y[count].view(1)
        
        minbatch_data.requires_grad_()
        scores = model(minbatch_data)
        
        log_prob = torch.log(scores)
        loss = criterion(log_prob, minbatch_label)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.detach().item()
        
        error = get_error(scores.detach(), minbatch_label)
        running_error += error.item()
        
        num_labels += 1
        
    total_loss = running_loss/num_labels
    total_error = running_error/num_labels
        

In [16]:
print(total_loss)
print(total_error)

9.140034252100063e-05
0.0


In [17]:
torch.save(model,"pytorch_model.pth")