## Deep Learning for Human Activity Recognition - Basic Data Analysis

In [2]:
# Suppress warnings 
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)

### 1. Imports

In [3]:
import numpy as np
import pandas as pd
np.set_printoptions(linewidth=200)
pd.options.display.max_columns = 100
pd.options.display.max_rows = 100
pd.options.display.max_colwidth = 100

# matplotlib and seaborn for plotting
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
sns.set(style="darkgrid")

# import other libraries
import os, pickle, json
from collections import Counter 
from operator import itemgetter

### 2. Read in Data

In [4]:
root = '../data/hapt_data_set/'
print(os.listdir(root))
print(os.listdir(root + 'Train'))
print(os.listdir(root + 'Test'))

['Test', 'features.txt', 'activity_labels.txt', 'features_info.txt', 'Train', 'README.txt', 'RawData']
['X_train.txt', 'subject_id_train.txt', 'y_train.txt']
['X_test.txt', 'y_test.txt', 'subject_id_test.txt']


In [5]:
features = pd.read_table(root + 'features.txt', header=None).values.flatten()
features = np.array([feature.rstrip() for feature in features])

In [6]:
%%time
X_train = pd.read_table(root + 'Train/X_train.txt', sep=' ', header=None, names=features)
y_train = pd.read_table(root + 'Train/y_train.txt', sep=' ', header=None)
subject_id_train = pd.read_table(root + 'Train/subject_id_train.txt', sep=' ', header=None)

X_test = pd.read_table(root + 'Test/X_test.txt', sep=' ', header=None, names=features)
y_test = pd.read_table(root + 'Test/y_test.txt', sep=' ', header=None)
subject_id_test = pd.read_table(root + 'Test/subject_id_test.txt', sep=' ', header=None)

CPU times: user 1.37 s, sys: 138 ms, total: 1.51 s
Wall time: 1.73 s


### 3. Glimpse of Data


In [7]:
print('Size of train data', X_train.shape)
display(pd.DataFrame(X_train.dtypes, columns=['dtype']).T)
display(X_train.head(3))

Size of train data (7767, 561)


Unnamed: 0,tBodyAcc-Mean-1,tBodyAcc-Mean-2,tBodyAcc-Mean-3,tBodyAcc-STD-1,tBodyAcc-STD-2,tBodyAcc-STD-3,tBodyAcc-Mad-1,tBodyAcc-Mad-2,tBodyAcc-Mad-3,tBodyAcc-Max-1,tBodyAcc-Max-2,tBodyAcc-Max-3,tBodyAcc-Min-1,tBodyAcc-Min-2,tBodyAcc-Min-3,tBodyAcc-SMA-1,tBodyAcc-Energy-1,tBodyAcc-Energy-2,tBodyAcc-Energy-3,tBodyAcc-IQR-1,tBodyAcc-IQR-2,tBodyAcc-IQR-3,tBodyAcc-ropy-1,tBodyAcc-ropy-2,tBodyAcc-ropy-3,tBodyAcc-ARCoeff-1,tBodyAcc-ARCoeff-2,tBodyAcc-ARCoeff-3,tBodyAcc-ARCoeff-4,tBodyAcc-ARCoeff-5,tBodyAcc-ARCoeff-6,tBodyAcc-ARCoeff-7,tBodyAcc-ARCoeff-8,tBodyAcc-ARCoeff-9,tBodyAcc-ARCoeff-10,tBodyAcc-ARCoeff-11,tBodyAcc-ARCoeff-12,tBodyAcc-Correlation-1,tBodyAcc-Correlation-2,tBodyAcc-Correlation-3,tGravityAcc-Mean-1,tGravityAcc-Mean-2,tGravityAcc-Mean-3,tGravityAcc-STD-1,tGravityAcc-STD-2,tGravityAcc-STD-3,tGravityAcc-Mad-1,tGravityAcc-Mad-2,tGravityAcc-Mad-3,tGravityAcc-Max-1,...,fBodyAccMag-MaxInds-1,fBodyAccMag-MeanFreq-1,fBodyAccMag-Skewness-1,fBodyAccMag-Kurtosis-1,fBodyAccJerkMag-Mean-1,fBodyAccJerkMag-STD-1,fBodyAccJerkMag-Mad-1,fBodyAccJerkMag-Max-1,fBodyAccJerkMag-Min-1,fBodyAccJerkMag-SMA-1,fBodyAccJerkMag-Energy-1,fBodyAccJerkMag-IQR-1,fBodyAccJerkMag-ropy-1,fBodyAccJerkMag-MaxInds-1,fBodyAccJerkMag-MeanFreq-1,fBodyAccJerkMag-Skewness-1,fBodyAccJerkMag-Kurtosis-1,fBodyGyroMag-Mean-1,fBodyGyroMag-STD-1,fBodyGyroMag-Mad-1,fBodyGyroMag-Max-1,fBodyGyroMag-Min-1,fBodyGyroMag-SMA-1,fBodyGyroMag-Energy-1,fBodyGyroMag-IQR-1,fBodyGyroMag-ropy-1,fBodyGyroMag-MaxInds-1,fBodyGyroMag-MeanFreq-1,fBodyGyroMag-Skewness-1,fBodyGyroMag-Kurtosis-1,fBodyGyroJerkMag-Mean-1,fBodyGyroJerkMag-STD-1,fBodyGyroJerkMag-Mad-1,fBodyGyroJerkMag-Max-1,fBodyGyroJerkMag-Min-1,fBodyGyroJerkMag-SMA-1,fBodyGyroJerkMag-Energy-1,fBodyGyroJerkMag-IQR-1,fBodyGyroJerkMag-ropy-1,fBodyGyroJerkMag-MaxInds-1,fBodyGyroJerkMag-MeanFreq-1,fBodyGyroJerkMag-Skewness-1,fBodyGyroJerkMag-Kurtosis-1,tBodyAcc-AngleWRTGravity-1,tBodyAccJerk-AngleWRTGravity-1,tBodyGyro-AngleWRTGravity-1,tBodyGyroJerk-AngleWRTGravity-1,tXAxisAcc-AngleWRTGravity-1,tYAxisAcc-AngleWRTGravity-1,tZAxisAcc-AngleWRTGravity-1
dtype,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,...,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64


Unnamed: 0,tBodyAcc-Mean-1,tBodyAcc-Mean-2,tBodyAcc-Mean-3,tBodyAcc-STD-1,tBodyAcc-STD-2,tBodyAcc-STD-3,tBodyAcc-Mad-1,tBodyAcc-Mad-2,tBodyAcc-Mad-3,tBodyAcc-Max-1,tBodyAcc-Max-2,tBodyAcc-Max-3,tBodyAcc-Min-1,tBodyAcc-Min-2,tBodyAcc-Min-3,tBodyAcc-SMA-1,tBodyAcc-Energy-1,tBodyAcc-Energy-2,tBodyAcc-Energy-3,tBodyAcc-IQR-1,tBodyAcc-IQR-2,tBodyAcc-IQR-3,tBodyAcc-ropy-1,tBodyAcc-ropy-2,tBodyAcc-ropy-3,tBodyAcc-ARCoeff-1,tBodyAcc-ARCoeff-2,tBodyAcc-ARCoeff-3,tBodyAcc-ARCoeff-4,tBodyAcc-ARCoeff-5,tBodyAcc-ARCoeff-6,tBodyAcc-ARCoeff-7,tBodyAcc-ARCoeff-8,tBodyAcc-ARCoeff-9,tBodyAcc-ARCoeff-10,tBodyAcc-ARCoeff-11,tBodyAcc-ARCoeff-12,tBodyAcc-Correlation-1,tBodyAcc-Correlation-2,tBodyAcc-Correlation-3,tGravityAcc-Mean-1,tGravityAcc-Mean-2,tGravityAcc-Mean-3,tGravityAcc-STD-1,tGravityAcc-STD-2,tGravityAcc-STD-3,tGravityAcc-Mad-1,tGravityAcc-Mad-2,tGravityAcc-Mad-3,tGravityAcc-Max-1,...,fBodyAccMag-MaxInds-1,fBodyAccMag-MeanFreq-1,fBodyAccMag-Skewness-1,fBodyAccMag-Kurtosis-1,fBodyAccJerkMag-Mean-1,fBodyAccJerkMag-STD-1,fBodyAccJerkMag-Mad-1,fBodyAccJerkMag-Max-1,fBodyAccJerkMag-Min-1,fBodyAccJerkMag-SMA-1,fBodyAccJerkMag-Energy-1,fBodyAccJerkMag-IQR-1,fBodyAccJerkMag-ropy-1,fBodyAccJerkMag-MaxInds-1,fBodyAccJerkMag-MeanFreq-1,fBodyAccJerkMag-Skewness-1,fBodyAccJerkMag-Kurtosis-1,fBodyGyroMag-Mean-1,fBodyGyroMag-STD-1,fBodyGyroMag-Mad-1,fBodyGyroMag-Max-1,fBodyGyroMag-Min-1,fBodyGyroMag-SMA-1,fBodyGyroMag-Energy-1,fBodyGyroMag-IQR-1,fBodyGyroMag-ropy-1,fBodyGyroMag-MaxInds-1,fBodyGyroMag-MeanFreq-1,fBodyGyroMag-Skewness-1,fBodyGyroMag-Kurtosis-1,fBodyGyroJerkMag-Mean-1,fBodyGyroJerkMag-STD-1,fBodyGyroJerkMag-Mad-1,fBodyGyroJerkMag-Max-1,fBodyGyroJerkMag-Min-1,fBodyGyroJerkMag-SMA-1,fBodyGyroJerkMag-Energy-1,fBodyGyroJerkMag-IQR-1,fBodyGyroJerkMag-ropy-1,fBodyGyroJerkMag-MaxInds-1,fBodyGyroJerkMag-MeanFreq-1,fBodyGyroJerkMag-Skewness-1,fBodyGyroJerkMag-Kurtosis-1,tBodyAcc-AngleWRTGravity-1,tBodyAccJerk-AngleWRTGravity-1,tBodyGyro-AngleWRTGravity-1,tBodyGyroJerk-AngleWRTGravity-1,tXAxisAcc-AngleWRTGravity-1,tYAxisAcc-AngleWRTGravity-1,tZAxisAcc-AngleWRTGravity-1
0,0.04358,-0.00597,-0.035054,-0.995381,-0.988366,-0.937382,-0.995007,-0.988816,-0.953325,-0.794796,-0.744893,-0.648447,0.841796,0.70844,0.651716,-0.975752,-0.99995,-0.999888,-0.998014,-0.993999,-0.99198,-0.97097,-0.547095,-0.700974,-0.622697,0.921884,-0.719483,0.342168,-0.161318,0.266049,-0.274351,0.267205,-0.020958,0.38261,-0.501748,0.512463,-0.206337,0.376778,0.435172,0.660199,0.960051,-0.135939,0.115556,-0.988134,-0.982693,-0.919723,-0.988362,-0.985523,-0.931834,0.892055,...,-0.842105,-0.061629,-0.44612,-0.797046,-0.99361,-0.994226,-0.992839,-0.993778,-0.988172,-0.99361,-0.999918,-0.991736,-1.0,-0.936508,0.34926,-0.517127,-0.801006,-0.980135,-0.961301,-0.974129,-0.956013,-0.989894,-0.980135,-0.99924,-0.992673,-0.701291,-1.0,-0.13248,0.565697,0.363478,-0.991994,-0.990877,-0.990169,-0.992521,-0.991044,-0.991994,-0.999937,-0.990537,-0.871306,-1.0,-0.012236,-0.314848,-0.713308,-0.112754,0.0304,-0.464761,-0.018446,-0.841559,0.179913,-0.051718
1,0.03948,-0.002131,-0.029067,-0.998348,-0.982945,-0.971273,-0.998702,-0.983315,-0.974,-0.802537,-0.736338,-0.712415,0.838758,0.70844,0.65934,-0.987427,-0.999993,-0.999826,-0.999411,-0.998918,-0.985482,-0.973481,-0.781973,-0.534604,-0.593165,0.607435,-0.266783,0.275882,0.200417,0.131266,-0.149017,0.292436,-0.192986,0.217496,-0.089175,0.059909,-0.236609,-0.012696,-0.072711,0.578649,0.963215,-0.136648,0.109558,-0.997918,-0.990006,-0.95516,-0.998358,-0.990346,-0.956796,0.89206,...,-1.0,-0.01827,-0.128777,-0.448744,-0.99022,-0.992431,-0.990594,-0.994902,-0.989545,-0.99022,-0.999867,-0.991506,-1.0,-0.84127,0.533688,-0.625993,-0.898311,-0.988296,-0.983313,-0.982951,-0.987406,-0.992134,-0.988296,-0.999811,-0.993996,-0.720683,-0.948718,-0.268979,-0.364219,-0.723724,-0.995857,-0.99658,-0.995671,-0.996939,-0.994436,-0.995857,-0.999981,-0.994623,-1.0,-1.0,0.202804,-0.603199,-0.860677,0.053477,-0.007435,-0.732626,0.703511,-0.845092,0.180261,-0.047436
2,0.039978,-0.005153,-0.022651,-0.995482,-0.977314,-0.98476,-0.996415,-0.975835,-0.985973,-0.798477,-0.736338,-0.712415,0.834002,0.705008,0.674551,-0.988528,-0.999972,-0.999719,-0.999803,-0.996898,-0.976781,-0.986754,-0.688176,-0.520514,-0.593165,0.272262,-0.056424,0.322283,-0.273292,0.03718,-0.133612,0.332487,-0.240491,0.348733,-0.195409,0.229436,-0.316816,-0.123889,-0.181137,0.608219,0.963532,-0.137105,0.102062,-0.999657,-0.993236,-0.995364,-0.999717,-0.993122,-0.995437,0.892401,...,-0.894737,0.277458,-0.630062,-0.880125,-0.989165,-0.991337,-0.988135,-0.993592,-0.999886,-0.989165,-0.999845,-0.987029,-1.0,-0.904762,0.661975,-0.725887,-0.926663,-0.989255,-0.986019,-0.984533,-0.991701,-0.995857,-0.989255,-0.999854,-0.993256,-0.736521,-0.794872,-0.212429,-0.564868,-0.874594,-0.995034,-0.995308,-0.994868,-0.996133,-0.995863,-0.995034,-0.999973,-0.993834,-1.0,-0.555556,0.440079,-0.404427,-0.761847,-0.118559,0.177899,0.100699,0.808529,-0.84923,0.18061,-0.042271


In [8]:
print('Size of test data', X_test.shape)
display(pd.DataFrame(X_test.dtypes, columns=['dtype']).T)
display(X_test.head(3))

Size of test data (3162, 561)


Unnamed: 0,tBodyAcc-Mean-1,tBodyAcc-Mean-2,tBodyAcc-Mean-3,tBodyAcc-STD-1,tBodyAcc-STD-2,tBodyAcc-STD-3,tBodyAcc-Mad-1,tBodyAcc-Mad-2,tBodyAcc-Mad-3,tBodyAcc-Max-1,tBodyAcc-Max-2,tBodyAcc-Max-3,tBodyAcc-Min-1,tBodyAcc-Min-2,tBodyAcc-Min-3,tBodyAcc-SMA-1,tBodyAcc-Energy-1,tBodyAcc-Energy-2,tBodyAcc-Energy-3,tBodyAcc-IQR-1,tBodyAcc-IQR-2,tBodyAcc-IQR-3,tBodyAcc-ropy-1,tBodyAcc-ropy-2,tBodyAcc-ropy-3,tBodyAcc-ARCoeff-1,tBodyAcc-ARCoeff-2,tBodyAcc-ARCoeff-3,tBodyAcc-ARCoeff-4,tBodyAcc-ARCoeff-5,tBodyAcc-ARCoeff-6,tBodyAcc-ARCoeff-7,tBodyAcc-ARCoeff-8,tBodyAcc-ARCoeff-9,tBodyAcc-ARCoeff-10,tBodyAcc-ARCoeff-11,tBodyAcc-ARCoeff-12,tBodyAcc-Correlation-1,tBodyAcc-Correlation-2,tBodyAcc-Correlation-3,tGravityAcc-Mean-1,tGravityAcc-Mean-2,tGravityAcc-Mean-3,tGravityAcc-STD-1,tGravityAcc-STD-2,tGravityAcc-STD-3,tGravityAcc-Mad-1,tGravityAcc-Mad-2,tGravityAcc-Mad-3,tGravityAcc-Max-1,...,fBodyAccMag-MaxInds-1,fBodyAccMag-MeanFreq-1,fBodyAccMag-Skewness-1,fBodyAccMag-Kurtosis-1,fBodyAccJerkMag-Mean-1,fBodyAccJerkMag-STD-1,fBodyAccJerkMag-Mad-1,fBodyAccJerkMag-Max-1,fBodyAccJerkMag-Min-1,fBodyAccJerkMag-SMA-1,fBodyAccJerkMag-Energy-1,fBodyAccJerkMag-IQR-1,fBodyAccJerkMag-ropy-1,fBodyAccJerkMag-MaxInds-1,fBodyAccJerkMag-MeanFreq-1,fBodyAccJerkMag-Skewness-1,fBodyAccJerkMag-Kurtosis-1,fBodyGyroMag-Mean-1,fBodyGyroMag-STD-1,fBodyGyroMag-Mad-1,fBodyGyroMag-Max-1,fBodyGyroMag-Min-1,fBodyGyroMag-SMA-1,fBodyGyroMag-Energy-1,fBodyGyroMag-IQR-1,fBodyGyroMag-ropy-1,fBodyGyroMag-MaxInds-1,fBodyGyroMag-MeanFreq-1,fBodyGyroMag-Skewness-1,fBodyGyroMag-Kurtosis-1,fBodyGyroJerkMag-Mean-1,fBodyGyroJerkMag-STD-1,fBodyGyroJerkMag-Mad-1,fBodyGyroJerkMag-Max-1,fBodyGyroJerkMag-Min-1,fBodyGyroJerkMag-SMA-1,fBodyGyroJerkMag-Energy-1,fBodyGyroJerkMag-IQR-1,fBodyGyroJerkMag-ropy-1,fBodyGyroJerkMag-MaxInds-1,fBodyGyroJerkMag-MeanFreq-1,fBodyGyroJerkMag-Skewness-1,fBodyGyroJerkMag-Kurtosis-1,tBodyAcc-AngleWRTGravity-1,tBodyAccJerk-AngleWRTGravity-1,tBodyGyro-AngleWRTGravity-1,tBodyGyroJerk-AngleWRTGravity-1,tXAxisAcc-AngleWRTGravity-1,tYAxisAcc-AngleWRTGravity-1,tZAxisAcc-AngleWRTGravity-1
dtype,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,...,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64,float64


Unnamed: 0,tBodyAcc-Mean-1,tBodyAcc-Mean-2,tBodyAcc-Mean-3,tBodyAcc-STD-1,tBodyAcc-STD-2,tBodyAcc-STD-3,tBodyAcc-Mad-1,tBodyAcc-Mad-2,tBodyAcc-Mad-3,tBodyAcc-Max-1,tBodyAcc-Max-2,tBodyAcc-Max-3,tBodyAcc-Min-1,tBodyAcc-Min-2,tBodyAcc-Min-3,tBodyAcc-SMA-1,tBodyAcc-Energy-1,tBodyAcc-Energy-2,tBodyAcc-Energy-3,tBodyAcc-IQR-1,tBodyAcc-IQR-2,tBodyAcc-IQR-3,tBodyAcc-ropy-1,tBodyAcc-ropy-2,tBodyAcc-ropy-3,tBodyAcc-ARCoeff-1,tBodyAcc-ARCoeff-2,tBodyAcc-ARCoeff-3,tBodyAcc-ARCoeff-4,tBodyAcc-ARCoeff-5,tBodyAcc-ARCoeff-6,tBodyAcc-ARCoeff-7,tBodyAcc-ARCoeff-8,tBodyAcc-ARCoeff-9,tBodyAcc-ARCoeff-10,tBodyAcc-ARCoeff-11,tBodyAcc-ARCoeff-12,tBodyAcc-Correlation-1,tBodyAcc-Correlation-2,tBodyAcc-Correlation-3,tGravityAcc-Mean-1,tGravityAcc-Mean-2,tGravityAcc-Mean-3,tGravityAcc-STD-1,tGravityAcc-STD-2,tGravityAcc-STD-3,tGravityAcc-Mad-1,tGravityAcc-Mad-2,tGravityAcc-Mad-3,tGravityAcc-Max-1,...,fBodyAccMag-MaxInds-1,fBodyAccMag-MeanFreq-1,fBodyAccMag-Skewness-1,fBodyAccMag-Kurtosis-1,fBodyAccJerkMag-Mean-1,fBodyAccJerkMag-STD-1,fBodyAccJerkMag-Mad-1,fBodyAccJerkMag-Max-1,fBodyAccJerkMag-Min-1,fBodyAccJerkMag-SMA-1,fBodyAccJerkMag-Energy-1,fBodyAccJerkMag-IQR-1,fBodyAccJerkMag-ropy-1,fBodyAccJerkMag-MaxInds-1,fBodyAccJerkMag-MeanFreq-1,fBodyAccJerkMag-Skewness-1,fBodyAccJerkMag-Kurtosis-1,fBodyGyroMag-Mean-1,fBodyGyroMag-STD-1,fBodyGyroMag-Mad-1,fBodyGyroMag-Max-1,fBodyGyroMag-Min-1,fBodyGyroMag-SMA-1,fBodyGyroMag-Energy-1,fBodyGyroMag-IQR-1,fBodyGyroMag-ropy-1,fBodyGyroMag-MaxInds-1,fBodyGyroMag-MeanFreq-1,fBodyGyroMag-Skewness-1,fBodyGyroMag-Kurtosis-1,fBodyGyroJerkMag-Mean-1,fBodyGyroJerkMag-STD-1,fBodyGyroJerkMag-Mad-1,fBodyGyroJerkMag-Max-1,fBodyGyroJerkMag-Min-1,fBodyGyroJerkMag-SMA-1,fBodyGyroJerkMag-Energy-1,fBodyGyroJerkMag-IQR-1,fBodyGyroJerkMag-ropy-1,fBodyGyroJerkMag-MaxInds-1,fBodyGyroJerkMag-MeanFreq-1,fBodyGyroJerkMag-Skewness-1,fBodyGyroJerkMag-Kurtosis-1,tBodyAcc-AngleWRTGravity-1,tBodyAccJerk-AngleWRTGravity-1,tBodyGyro-AngleWRTGravity-1,tBodyGyroJerk-AngleWRTGravity-1,tXAxisAcc-AngleWRTGravity-1,tYAxisAcc-AngleWRTGravity-1,tZAxisAcc-AngleWRTGravity-1
0,0.030914,-0.008927,0.040382,-0.938504,-0.944626,-0.759334,-0.952398,-0.950281,-0.802483,-0.757099,-0.733398,-0.40796,0.728511,0.658266,0.629169,-0.913306,-0.997966,-0.998683,-0.975818,-0.97544,-0.966793,-0.914396,-0.363822,-0.420897,0.181772,0.517928,-0.403636,0.449325,-0.147545,0.22315,-0.190143,0.155899,0.054035,0.053156,-3.7e-05,-0.077888,0.028585,0.077659,-0.490616,-0.711964,0.933151,-0.27736,0.115469,-0.940012,-0.94046,-0.714148,-0.945699,-0.942284,-0.7447,0.906083,...,-1.0,-0.448379,0.005537,-0.383555,-0.894952,-0.896808,-0.889015,-0.928866,-0.898096,-0.894952,-0.993471,-0.921836,-0.484619,-1.0,-0.031754,-0.255114,-0.698671,-0.77061,-0.797105,-0.769223,-0.834266,-0.940359,-0.77061,-0.970958,-0.798403,0.179435,-1.0,-0.054576,-0.49682,-0.764755,-0.890169,-0.90748,-0.895518,-0.917953,-0.909825,-0.890169,-0.994105,-0.898097,-0.234815,-1.0,0.12283,-0.345684,-0.709087,0.006462,0.16292,-0.825886,0.271151,-0.720559,0.276779,-0.051074
1,0.042548,0.001079,-0.026236,-0.975516,-0.977502,-0.960146,-0.986694,-0.978983,-0.96682,-0.757099,-0.733398,-0.701699,0.770927,0.706301,0.635732,-0.978295,-0.999618,-0.999708,-0.999158,-0.993775,-0.982664,-0.974733,-0.466563,-0.392219,-0.435565,0.504355,-0.117122,0.062621,0.188184,0.209414,-0.245994,0.390254,-0.2478,0.210276,-0.146806,0.106145,-0.022423,-0.104187,-0.429196,0.398133,0.924068,-0.283835,0.152755,-0.991197,-0.984752,-0.976904,-0.991464,-0.984357,-0.977489,0.856176,...,-0.894737,0.224164,-0.214108,-0.599074,-0.945325,-0.934609,-0.930151,-0.950377,-0.967702,-0.945325,-0.99761,-0.957543,-0.723642,-1.0,-0.486025,-0.216243,-0.664383,-0.924461,-0.916766,-0.920964,-0.924413,-0.991454,-0.924461,-0.995727,-0.933145,-0.21886,-1.0,-0.039379,-0.11658,-0.46726,-0.951981,-0.938387,-0.93823,-0.949994,-0.986899,-0.951981,-0.998272,-0.951332,-0.431053,-1.0,-0.314688,-0.142804,-0.600867,-0.083495,0.0175,-0.434375,0.920593,-0.698684,0.281322,-0.076825
2,0.038297,-0.01166,-0.025643,-0.993922,-0.979215,-0.97303,-0.994298,-0.980535,-0.977508,-0.799005,-0.745911,-0.695737,0.837921,0.69052,0.659163,-0.983764,-0.999959,-0.999645,-0.999527,-0.993224,-0.983084,-0.981887,-0.707815,-0.716036,-0.560652,0.241698,-0.081234,0.055641,0.103205,0.009404,-0.12318,0.439879,-0.402218,0.305289,-0.284243,0.637201,-0.572356,0.306166,-0.323897,0.278536,0.926579,-0.282139,0.146271,-0.996731,-0.988875,-0.987926,-0.996877,-0.989432,-0.98723,0.856269,...,-1.0,0.360245,-0.790164,-0.946104,-0.970955,-0.970773,-0.969403,-0.975865,-0.976354,-0.970955,-0.999269,-0.978591,-0.898,-1.0,0.143718,-0.206895,-0.638213,-0.975209,-0.97399,-0.967707,-0.984206,-0.997741,-0.975209,-0.999504,-0.968278,-0.561266,-0.897436,-0.170494,-0.692859,-0.927681,-0.985692,-0.983452,-0.983649,-0.98693,-0.989605,-0.985692,-0.999831,-0.986227,-0.809976,-0.936508,0.11473,-0.209525,-0.6455,-0.034956,0.202302,0.064103,0.145068,-0.703355,0.280062,-0.072302


In [9]:
activity_labels = pd.read_table(root + 'activity_labels.txt', header=None).values.flatten()
activity_labels = np.array([label.rstrip().split() for label in activity_labels])
label2activity_dict = {}
activity2label_dict = {}
for label, activity in activity_labels:
    label2activity_dict[int(label)] = activity
    activity2label_dict[activity] = int(label)

In [10]:
print('Size of train label', y_train.shape)
c = Counter(y_train.values.flatten())
for label in range(1, len(c)+1):
    print(f'label {label} ({label2activity_dict[label]}) has {c[label]} samples ({c[label] / len(y_train) * 100:.04}% of train data)')

Size of train label (7767, 1)
label 1 (WALKING) has 1226 samples (15.78% of train data)
label 2 (WALKING_UPSTAIRS) has 1073 samples (13.81% of train data)
label 3 (WALKING_DOWNSTAIRS) has 987 samples (12.71% of train data)
label 4 (SITTING) has 1293 samples (16.65% of train data)
label 5 (STANDING) has 1423 samples (18.32% of train data)
label 6 (LAYING) has 1413 samples (18.19% of train data)
label 7 (STAND_TO_SIT) has 47 samples (0.6051% of train data)
label 8 (SIT_TO_STAND) has 23 samples (0.2961% of train data)
label 9 (SIT_TO_LIE) has 75 samples (0.9656% of train data)
label 10 (LIE_TO_SIT) has 60 samples (0.7725% of train data)
label 11 (STAND_TO_LIE) has 90 samples (1.159% of train data)
label 12 (LIE_TO_STAND) has 57 samples (0.7339% of train data)


In [11]:
print('Size of test label', y_test.shape)
c = Counter(y_test.values.flatten())
for label in range(1, len(c)+1):
    print(f'label {label} ({label2activity_dict[label]}) has {c[label]} samples ({c[label] / len(y_test) * 100:.04}% of test data)')

Size of test label (3162, 1)
label 1 (WALKING) has 496 samples (15.69% of test data)
label 2 (WALKING_UPSTAIRS) has 471 samples (14.9% of test data)
label 3 (WALKING_DOWNSTAIRS) has 420 samples (13.28% of test data)
label 4 (SITTING) has 508 samples (16.07% of test data)
label 5 (STANDING) has 556 samples (17.58% of test data)
label 6 (LAYING) has 545 samples (17.24% of test data)
label 7 (STAND_TO_SIT) has 23 samples (0.7274% of test data)
label 8 (SIT_TO_STAND) has 10 samples (0.3163% of test data)
label 9 (SIT_TO_LIE) has 32 samples (1.012% of test data)
label 10 (LIE_TO_SIT) has 25 samples (0.7906% of test data)
label 11 (STAND_TO_LIE) has 49 samples (1.55% of test data)
label 12 (LIE_TO_STAND) has 27 samples (0.8539% of test data)


In [12]:
subject_id_train = pd.read_table(root + 'Train/subject_id_train.txt', header=None).values.flatten()
subject_id_test = pd.read_table(root + 'Test/subject_id_test.txt', header=None).values.flatten()

print('subjects in train are: ', set(subject_id_train))
print('subjects in test are: ', set(subject_id_test))

subjects in train are:  {1, 3, 5, 6, 7, 8, 11, 14, 15, 16, 17, 19, 21, 22, 23, 25, 26, 27, 28, 29, 30}
subjects in test are:  {2, 4, 9, 10, 12, 13, 18, 20, 24}
