In [1]:
import pandas as pd
import numpy as np

In [2]:
training_data = pd.read_csv('dataset/train.csv')
testing_data = pd.read_csv('dataset/test.csv') 

training_label = training_data.iloc[:,-1]
training_feats = training_data.iloc[:,:-2]

testing_label = testing_data.iloc[:,-1]
testing_feats = testing_data.iloc[:,:-2]

#catching information
print('Number of NaNs in training set: ', training_data.isnull().values.any())
print('Number of NaNs in testing set: ', testing_data.isnull().values.any())
print('Number of categorical features: ', np.sum(training_feats.dtypes == 'category'))

print('Number of observations in training set: ', np.shape(training_feats)[0])
print('Number of observations in testing set: ', np.shape(testing_feats)[0])
print('Number of features: ', np.shape(training_feats)[1])

Number of NaNs in training set:  False
Number of NaNs in testing set:  False
Number of categorical features:  0
Number of observations in training set:  7352
Number of observations in testing set:  2947
Number of features:  561


In [3]:
training_data.info(memory_usage = 'deep')
training_data.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7352 entries, 0 to 7351
Columns: 563 entries, tBodyAcc-mean()-X to Activity
dtypes: float64(561), int64(1), object(1)
memory usage: 32.0 MB


Unnamed: 0,tBodyAcc-mean()-X,tBodyAcc-mean()-Y,tBodyAcc-mean()-Z,tBodyAcc-std()-X,tBodyAcc-std()-Y,tBodyAcc-std()-Z,tBodyAcc-mad()-X,tBodyAcc-mad()-Y,tBodyAcc-mad()-Z,tBodyAcc-max()-X,...,fBodyBodyGyroJerkMag-kurtosis(),"angle(tBodyAccMean,gravity)","angle(tBodyAccJerkMean),gravityMean)","angle(tBodyGyroMean,gravityMean)","angle(tBodyGyroJerkMean,gravityMean)","angle(X,gravityMean)","angle(Y,gravityMean)","angle(Z,gravityMean)",subject,Activity
0,0.288585,-0.020294,-0.132905,-0.995279,-0.983111,-0.913526,-0.995112,-0.983185,-0.923527,-0.934724,...,-0.710304,-0.112754,0.0304,-0.464761,-0.018446,-0.841247,0.179941,-0.058627,1,STANDING
1,0.278419,-0.016411,-0.12352,-0.998245,-0.9753,-0.960322,-0.998807,-0.974914,-0.957686,-0.943068,...,-0.861499,0.053477,-0.007435,-0.732626,0.703511,-0.844788,0.180289,-0.054317,1,STANDING
2,0.279653,-0.019467,-0.113462,-0.99538,-0.967187,-0.978944,-0.99652,-0.963668,-0.977469,-0.938692,...,-0.760104,-0.118559,0.177899,0.100699,0.808529,-0.848933,0.180637,-0.049118,1,STANDING
3,0.279174,-0.026201,-0.123283,-0.996091,-0.983403,-0.990675,-0.997099,-0.98275,-0.989302,-0.938692,...,-0.482845,-0.036788,-0.012892,0.640011,-0.485366,-0.848649,0.181935,-0.047663,1,STANDING
4,0.276629,-0.01657,-0.115362,-0.998139,-0.980817,-0.990482,-0.998321,-0.979672,-0.990441,-0.942469,...,-0.699205,0.12332,0.122542,0.693578,-0.615971,-0.847865,0.185151,-0.043892,1,STANDING


# Feature Extraction with RandomForest

In [4]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.feature_selection import SelectFromModel

In [5]:
def features_selection(x_train, y_train, x_test):
    forest= RandomForestClassifier(n_estimators= 250, random_state= 0)
    forest.fit(x_train, y_train)
    
    selector = SelectFromModel(forest, threshold='median').fit(x_train, y_train)
    threshold= selector.threshold_
    selector = SelectFromModel(forest, threshold=5*threshold).fit(x_train, y_train)
    
    selected_features=np.array([])
    
    for i in selector.get_support(indices= True):
        selected_features = np.append(selected_features, x_train.columns[i])
    
    x_train_up = pd.DataFrame(selector.transform(x_train), columns= selected_features)
    x_test_up = pd.DataFrame(selector.transform(x_test), columns= selected_features)
    
    return selected_features, x_train_up, x_test_up

selected_features, x_train_up, x_test_up= features_selection(training_feats, training_label, testing_feats)

print('Number of features after feature selection: ', np.shape(selected_features)[0])

Number of features after feature selection:  114


# Simple ANN application

In [6]:
from tensorflow import keras
from tensorflow.keras import layers

In [7]:
from sklearn.preprocessing import LabelEncoder

# create a label encoder object
le = LabelEncoder()

# fit and transform the training labels to integer values
training_label_encoded = le.fit_transform(training_label)
testing_label_encoded = le.fit_transform(testing_label)

In [10]:
training_label.unique()

array(['STANDING', 'SITTING', 'LAYING', 'WALKING', 'WALKING_DOWNSTAIRS',
       'WALKING_UPSTAIRS'], dtype=object)

In [11]:
ann_model = keras.Sequential([
    keras.layers.Dense(16, input_shape=(114,), activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(6, activation='softmax')
])

ann_model.compile(optimizer='adam',
                 loss=keras.losses.SparseCategoricalCrossentropy(),
                 metrics=['accuracy'])
ann_model.fit(x_train_up
             , training_label_encoded
             , epochs=50
             , batch_size=8)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1976e9dfcd0>

In [12]:
ann_model.evaluate(x_test_up
               , testing_label_encoded
               , batch_size=8)



[0.45268237590789795, 0.9107567071914673]

# CNN application

In [15]:
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import models, layers
import matplotlib.pyplot as plt

In [20]:
cnn_model = keras.Sequential([
    keras.layers.Conv1D(filters = 64, kernel_size= 3, activation='relu', input_shape=(114,1)),
    keras.layers.Conv1D(filters = 64, kernel_size= 3, activation='relu'),
    keras.layers.Conv1D(filters = 64, kernel_size= 3, activation='relu'),
    keras.layers.Conv1D(filters = 64, kernel_size= 3, activation='relu'),
    keras.layers.Dropout(0.5),
    keras.layers.MaxPooling1D(pool_size=2),
    # keras.layers.Dropout(0.5),
    # keras.layers.MaxPooling1D(pool_size=2),
    keras.layers.Flatten(),
    keras.layers.Dense(64, activation='relu'),
    keras.layers.Dense(32, activation='relu'),
    keras.layers.Dense(6, activation='softmax')
])
cnn_model.compile(optimizer='adam',
                 loss=keras.losses.SparseCategoricalCrossentropy(),
                 metrics=['accuracy'])
cnn_model.fit(x_train_up
             , training_label_encoded
             , epochs=50
             , batch_size=8)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.History at 0x1982c9cd190>

In [21]:
cnn_model.evaluate(x_test_up
               , testing_label_encoded
               , batch_size=8)



[0.33898550271987915, 0.9321343898773193]