# Human Activity Recognition Using Smartphones

The Human Activity Recognition database is built from the recordings of 30 subjects performing activities of daily living (ADL) while carrying a waist-mounted smartphone with embedded inertial sensors.

The goal of the Final Project is to predict the type of activity of the subject from the inertial sensor data recorded from the waist-mounted smartphones 

In [1]:
# import libraries

import numpy as np
import pandas as pd

## Get the Training and Test Data

In [7]:
# get the features
feature_file = 'data/features.txt'

features = []
with open(feature_file, 'r') as file:
    for line in file:
        columns = line.split()
        features.append(columns[1])

print('No. of Features: {}'.format(len(features)))


No. of Features: 561


In [24]:
# create a list of unique features by appending 'n' at the end of the feature till the 
# length of the unique feature list matches that of the original list of features

seen = set()
uniq_features = []
for idx, x in enumerate(features):
    if x not in seen:
        uniq_features.append(x)
        seen.add(x)
    elif x + 'n' not in seen:
        uniq_features.append(x + 'n')
        seen.add(x + 'n')
    else:
        uniq_features.append(x + 'nn')
        seen.add(x + 'nn')
print("Length of unique features = " + str(len(uniq_features)))


Length of unique features = 561


In [10]:
# get the training data from txt files to pandas dataffame
X_train = pd.read_csv('data/train/X_train.txt', delim_whitespace=True, header=None, names=uniq_features)

# # add subject column to the dataframe
X_train['subject'] = pd.read_csv('data/train/subject_train.txt', header=None)

y_train = pd.read_csv('data/train/y_train.txt', names=['Activity'])

def map_labels(value):
    label_map = {1: 'WALKING', 2: 'WALKING_UPSTAIRS', 3: 'WALKING_DOWNSTAIRS', 4: 'SITTING', 5: 'STANDING', 6: 'LAYING'}
    return label_map[value]

y_train_labels = y_train.map(map_labels)

# # put all columns in a single dataframe
train = X_train
train['Activity'] = y_train
train['ActivityName'] = y_train_labels
train.sample()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity,ActivityName
2566,0.278423,-0.034008,-0.126925,-0.996372,-0.967098,-0.982184,-0.996405,-0.965976,-0.980818,-0.940356,...,-0.072012,0.178544,0.405054,0.568242,-0.804295,0.223146,-0.03571,15,5,STANDING


In [25]:
# get the test data from txt files to pandas dataffame
X_test = pd.read_csv('data/test/X_test.txt', delim_whitespace=True, header=None, names=uniq_features)

# # add subject column to the dataframe
X_test['subject'] = pd.read_csv('data/test/subject_test.txt', header=None)

y_test = pd.read_csv('data/test/y_test.txt', names=['Activity'])

def map_labels(value):
    label_map = {1: 'WALKING', 2: 'WALKING_UPSTAIRS', 3: 'WALKING_DOWNSTAIRS', 4: 'SITTING', 5: 'STANDING', 6: 'LAYING'}
    return label_map[value]

y_test_labels = y_test.map(map_labels)

# # put all columns in a single dataframe
test = X_test
test['Activity'] = y_test
test['ActivityName'] = y_test_labels
test.sample()

Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity,ActivityName
2687,0.286355,-0.035174,-0.140594,-0.470594,-0.161859,-0.270344,-0.490161,-0.213268,-0.264895,-0.395903,...,-0.173484,0.313432,0.290348,-0.639894,-0.841003,0.146476,0.121483,24,1,WALKING


## Data Check

Check for duplicate and nan/null values in the test and training data

In [26]:
train_duplicate = sum(train.duplicated())
test_duplicate  = sum(test.duplicated())
train_null = train.isnull().values.sum()
test_null  = test.isnull().values.sum()

print("Duplicates in training set = " + str(train_duplicate))
print("Duplicates in test set = " + str(test_duplicate))
print("Nulls in training set = " + str(train_null))
print("Nulls in test set = " + str(test_null))

Duplicates in training set = 0
Duplicates in test set = 0
Nulls in training set = 0
Nulls in test set = 0


## Renaming Data Columns

In [31]:
columns = train.columns

# Removing '()' from column names
columns = columns.str.replace('()','')
columns = columns.str.replace('-', '')
columns = columns.str.replace(',','')

train.columns = columns
test.columns = columns