In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score
from sklearn.model_selection import GridSearchCV

In [3]:
#loading train and test data
train_data = pd.read_table('UCI HAR Dataset/UCI HAR Dataset/train/X_train.txt',delim_whitespace=True,header=None)
train_labels = pd.read_table('UCI HAR Dataset/UCI HAR Dataset/train/y_train.txt',delim_whitespace=True,header=None)
test_data = pd.read_table('UCI HAR Dataset/UCI HAR Dataset/test/X_test.txt',delim_whitespace=True,header=None)
test_labels = pd.read_table('UCI HAR Dataset/UCI HAR Dataset/test/y_test.txt',delim_whitespace=True,header=None)

In [4]:
#exploring the features data
features = open('UCI HAR Dataset/UCI HAR Dataset/features.txt')
col_names = features.readline()

In [5]:
train_data.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,551,552,553,554,555,556,557,558,559,560
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.074323,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.158075,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,0.414503,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,0.404573,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.087753,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892


In [6]:
#trying to concat labels and data
train = pd.concat([train_data, train_labels], axis = 0)
test = pd.concat([test_data, test_labels], axis = 0)
train.head()

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,551,552,553,554,555,556,557,558,559,560
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.074323,-0.298676,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,0.158075,-0.595051,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,0.414503,-0.390748,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,0.404573,-0.11729,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,0.087753,-0.351471,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892


In [36]:
# load dataset
from numpy import dstack
from pandas import read_csv
 
# load a single file as a numpy array
def load_file(filepath):
	dataframe = read_csv(filepath, header=None, delim_whitespace=True)
	return dataframe.values
 
# load a list of files, such as x, y, z data for a given variable
def load_group(filenames, prefix=''):
	loaded = list()
	for name in filenames:
		data = load_file(prefix + name)
		loaded.append(data)
	# stack group so that features are the 3rd dimension
	loaded = dstack(loaded)
	return loaded
 
# load a dataset group, such as train or test
def load_dataset(group, prefix=''):
	filepath = prefix + group + '/Inertial Signals/'
	# load all 9 files as a single array
	filenames = list()
	# total acceleration
	filenames += ['total_acc_x_'+group+'.txt', 'total_acc_y_'+group+'.txt', 'total_acc_z_'+group+'.txt']
	# body acceleration
	filenames += ['body_acc_x_'+group+'.txt', 'body_acc_y_'+group+'.txt', 'body_acc_z_'+group+'.txt']
	# body gyroscope
	filenames += ['body_gyro_x_'+group+'.txt', 'body_gyro_y_'+group+'.txt', 'body_gyro_z_'+group+'.txt']
	# load input data
	X = load_group(filenames, filepath)
	# load class output
	y = load_file(prefix + group + '/y_'+group+'.txt')
	return X, y
 
# load all train
trainX, trainy = load_dataset('train', 'UCI HAR Dataset/UCI HAR Dataset/')
print(trainX)
print(trainX.shape, trainy.shape)
# load all test
testX, testy = load_dataset('test', 'UCI HAR Dataset/UCI HAR Dataset/')
print(testX.shape, testy.shape)

[[[ 1.012817e+00 -1.232167e-01  1.029341e-01 ...  3.019122e-02
    6.601362e-02  2.285864e-02]
  [ 1.022833e+00 -1.268756e-01  1.056872e-01 ...  4.371071e-02
    4.269897e-02  1.031572e-02]
  [ 1.022028e+00 -1.240037e-01  1.021025e-01 ...  3.568780e-02
    7.485018e-02  1.324969e-02]
  ...
  [ 1.018445e+00 -1.240696e-01  1.003852e-01 ...  3.985177e-02
    1.909445e-03 -2.170124e-03]
  [ 1.019372e+00 -1.227451e-01  9.987355e-02 ...  3.744932e-02
   -7.982483e-05 -5.642633e-03]
  [ 1.021171e+00 -1.213260e-01  9.498741e-02 ...  2.881781e-02
   -3.771800e-05 -1.446006e-03]]

 [[ 1.018851e+00 -1.239760e-01  9.792958e-02 ...  1.711106e-02
    6.122797e-03  1.226815e-02]
  [ 1.022380e+00 -1.268078e-01  9.935086e-02 ...  2.417851e-02
    9.710357e-03  1.614958e-02]
  [ 1.020781e+00 -1.277862e-01  9.811381e-02 ...  3.022889e-02
    1.032192e-02  1.589471e-02]
  ...
  [ 1.014788e+00 -1.290268e-01  9.353520e-02 ... -3.474078e-02
   -8.694754e-03  5.044730e-03]
  [ 1.016499e+00 -1.264244e-01  8.90

In [29]:
from prettytable import PrettyTable
table = PrettyTable()
table.field_names = ["Model", "Accuracy"]

models = [
    SVC(),
    RandomForestClassifier()
]


for model in models:
    
    model.fit(train_data.values,train_labels.values.ravel().T)
    pred=model.predict(test_data)
    accuracy=accuracy_score(test_labels,pred)
    table.add_row([type(model).__name__, format(accuracy, '.2f')])
    
print(table)



+------------------------+----------+
|         Model          | Accuracy |
+------------------------+----------+
|          SVC           |   0.94   |
| RandomForestClassifier |   0.90   |
+------------------------+----------+


In [30]:
#SVC classifier
classifier = SVC()
parameters=[{'kernel': ['rbf'], 'gamma': [0.001, 0.0001], 'C': [1, 10, 100, 1000]}, {'kernel': ['linear'], 'C': [1, 10, 100, 1000]}]
model=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,verbose=4)
model.fit(train_data.values,train_labels.values.ravel().T)
pred=model.predict(test_data)
accuracy=accuracy_score(test_labels,pred)
accuracy

Fitting 4 folds for each of 12 candidates, totalling 48 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  2.2min
[Parallel(n_jobs=-1)]: Done  48 out of  48 | elapsed:  3.0min finished


0.9657278588394977

In [31]:
#Random Forest
classifier = RandomForestClassifier()
parameters = {'n_estimators': [10, 100, 1000], 'max_depth': [3, 6, 9], 'max_features' : ['auto', 'log2']}
model=GridSearchCV(classifier,parameters,n_jobs=-1,cv=4,scoring='accuracy',verbose=4)
model.fit(train_data.values,train_labels.values.ravel().T)
pred=model.predict(test_data)
accuracy=accuracy_score(test_labels,pred)
accuracy

Fitting 4 folds for each of 18 candidates, totalling 72 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  1.1min
[Parallel(n_jobs=-1)]: Done  72 out of  72 | elapsed:  7.8min finished


0.9273837801153716