In [1]:
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline
from sklearn.metrics import classification_report
from matplotlib import cm


In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.svm import SVC
from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.tree import DecisionTreeClassifier

### GET THE DATA

In [3]:
walkDF = pd.read_csv("data/readyToTrain/Walking")
walkDF['activity'] = 1

runDF = pd.read_csv("data/readyToTrain/Running")
runDF['activity'] = 2

standDF = pd.read_csv("data/readyToTrain/Standing")
standDF['activity'] = 0

# Merge all data into 1 dataframe
dataDF  = pd.concat([walkDF, runDF, standDF]).reset_index(drop=True).loc[:, 'mean1AccX': 'activity']

X = dataDF.loc[:,'mean1AccX':'max1Acc-turns']
# X = dataDF.loc[:,['mean1AccX','mean1AccY', 'mean1AccZ']]

y = dataDF.activity


X

Unnamed: 0,mean1AccX,mean1AccY,mean1AccZ,mean1Acc-old,mean1Acc-turns,min1AccX,min1AccY,min1AccZ,min1Acc-old,min1Acc-turns,max1AccX,max1AccY,max1AccZ,max1Acc-old,max1Acc-turns
0,-0.099493,-0.465791,-0.856727,1.062917,0.057808,-1.334384,-0.717487,-1.159709,0.385,0.000030,0.938393,1.276115,0.038427,2.843,0.745250
1,0.057198,-0.669784,-0.831493,1.139026,0.074913,-1.879898,-0.883859,-1.168634,0.359,0.000077,0.352524,0.984944,0.192178,2.212,1.402344
2,-0.038519,-0.465664,-0.897671,1.078700,0.029025,-1.341853,-0.831175,-1.162999,0.262,0.000568,0.541564,1.422448,-0.143941,1.664,0.395674
3,-0.052338,-0.416041,-0.815729,0.976863,0.020162,-1.297735,-0.608330,-1.245164,0.440,0.000062,0.187806,0.969483,0.308912,2.063,0.246721
4,-0.040121,-0.429703,-0.741593,0.984209,0.027517,-1.554315,-0.799485,-1.456386,0.335,0.000010,0.363297,0.925566,0.373531,1.971,0.395730
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
85,-0.010691,-0.197479,-0.977073,0.997298,0.002903,-0.084777,-0.298335,-1.014310,0.853,0.000001,0.060777,-0.164927,-0.903001,1.105,0.017582
86,-0.051422,-0.253031,-0.962816,0.997493,0.012554,-0.122456,-0.327355,-1.039186,0.852,0.000014,0.060811,-0.212331,-0.881584,1.135,0.029266
87,0.034178,-0.281518,-0.955725,0.997426,0.009463,-0.040105,-0.351938,-0.989235,0.795,0.000016,0.134340,-0.245927,-0.755666,1.070,0.043126
88,-0.048716,-0.240942,-0.966212,0.997087,0.011312,-0.075867,-0.254398,-0.995577,0.891,0.008123,-0.034202,-0.221819,-0.878869,1.075,0.016347


# DATA OUTPUT

In [4]:
OUTPUT_TEMPLATE = (
    'Classifier:           {name}\n'
    'Train Score:          {train_score:.3f}\n'
    'Valid Score:          {valid_score:.3f}\n'
)


def plot_data(model, X, y=None):
#     default_cmap = cm.get_cmap('rainbow')
#     width = 400
#     height = 400
#     cmap = default_cmap
#     x0 = X.iloc[:, 0]
#     y0 = X.iloc[:, 1]
#     xg = np.linspace(x0.min(), x0.max(), width)
#     yg = np.linspace(y0.min(), y0.max(), height)
#     xx, yy = np.meshgrid(xg, yg)
#     X_grid = np.vstack([xx.ravel(), yy.ravel()]).T
#     y_grid = model.predict(X_grid)
#     plt.contourf(xx, yy, y_grid.reshape((height, width)), cmap=cmap)
#     if y is not None:
#         plt.scatter(x0, y0, c=y, cmap=cmap, edgecolor='k')
    
#     plt.show()
    pass


def outData(results):
    print(OUTPUT_TEMPLATE.format(
        name=results[0],
        train_score=results[1],
        valid_score=results[2],
    ))

In [5]:
def random_forest(X, y):

    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            RandomForestClassifier(n_estimators=50, max_depth=5,min_samples_leaf=2)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [6]:
def kn_neighbors(X, y):
    
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            MinMaxScaler(),
            KNeighborsClassifier(n_neighbors=3)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [7]:
def naive_bayes(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            StandardScaler(),
            GaussianNB()
        ).fit(X_train, y_train)        
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [8]:
def neural_nets(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            MLPClassifier(solver='lbfgs', hidden_layer_sizes=())
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [9]:
def gradient_boosting(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            GradientBoostingClassifier(n_estimators=50, max_depth=5, min_samples_leaf=0.3)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [10]:
def svc_classifier(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            SVC(kernel='linear', C=2.0, probability=True)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [11]:
def decision_tree(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            DecisionTreeClassifier(max_depth=15, min_samples_leaf=4)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [12]:
def pca(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            StandardScaler(),
            PCA(2),
            SVC(kernel='linear', C=2.0)
        ).fit(X_train, y_train)
    
    print(classification_report(y_valid, model.predict(X_valid)))
    plot_data(model, X, y)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

# Modeling and Training the data

In [13]:
np.random.seed(12121)
result = random_forest(X, y)
result

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      0.80      0.89        10
           2       0.82      1.00      0.90         9

    accuracy                           0.91        23
   macro avg       0.94      0.93      0.93        23
weighted avg       0.93      0.91      0.91        23



[1.0,
 0.9130434782608695,
 array([1, 2, 1, 2, 1, 2, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1, 1, 2, 1, 2, 0,
        2])]

In [14]:
np.random.seed(12121)
result = kn_neighbors(X, y)
result

              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       0.89      0.80      0.84        10
           2       1.00      0.89      0.94         9

    accuracy                           0.87        23
   macro avg       0.85      0.90      0.86        23
weighted avg       0.89      0.87      0.87        23



[0.9701492537313433,
 0.8695652173913043,
 array([0, 2, 1, 2, 1, 1, 1, 1, 1, 0, 1, 2, 2, 0, 0, 2, 1, 0, 2, 1, 2, 0,
        2])]

In [15]:
np.random.seed(12121)
result = gradient_boosting(X, y)
result

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      0.90      0.95        10
           2       0.90      1.00      0.95         9

    accuracy                           0.96        23
   macro avg       0.97      0.97      0.96        23
weighted avg       0.96      0.96      0.96        23



[1.0,
 0.9565217391304348,
 array([1, 2, 1, 2, 1, 1, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1, 1, 2, 1, 2, 0,
        2])]

In [16]:
np.random.seed(12121)
result = svc_classifier(X, y)
result

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.80      0.89        10
           2       0.90      1.00      0.95         9

    accuracy                           0.91        23
   macro avg       0.90      0.93      0.91        23
weighted avg       0.93      0.91      0.91        23



[1.0,
 0.9130434782608695,
 array([0, 2, 1, 2, 1, 2, 2, 1, 1, 0, 1, 2, 2, 0, 0, 2, 1, 1, 2, 1, 2, 0,
        2])]

In [17]:
np.random.seed(12121)
result = neural_nets(X, y)
result

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.70      0.82        10
           2       0.82      1.00      0.90         9

    accuracy                           0.87        23
   macro avg       0.87      0.90      0.87        23
weighted avg       0.89      0.87      0.86        23



[1.0,
 0.8695652173913043,
 array([0, 2, 1, 2, 1, 2, 2, 1, 1, 0, 1, 2, 2, 0, 0, 2, 1, 2, 2, 1, 2, 0,
        2])]

In [18]:
np.random.seed(12121)
result = naive_bayes(X, y)
result

              precision    recall  f1-score   support

           0       1.00      1.00      1.00         4
           1       1.00      0.60      0.75        10
           2       0.69      1.00      0.82         9

    accuracy                           0.83        23
   macro avg       0.90      0.87      0.86        23
weighted avg       0.88      0.83      0.82        23



[0.9104477611940298,
 0.8260869565217391,
 array([2, 2, 1, 2, 1, 2, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1, 2, 2, 1, 2, 0,
        2])]

In [19]:
np.random.seed(12121)
result = decision_tree(X, y)
result

              precision    recall  f1-score   support

           0       0.80      1.00      0.89         4
           1       1.00      0.80      0.89        10
           2       0.90      1.00      0.95         9

    accuracy                           0.91        23
   macro avg       0.90      0.93      0.91        23
weighted avg       0.93      0.91      0.91        23



[0.9402985074626866,
 0.9130434782608695,
 array([1, 2, 1, 2, 1, 0, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1, 1, 2, 1, 2, 0,
        2])]

In [20]:
np.random.seed(12121)
result = pca(X, y)
result

              precision    recall  f1-score   support

           0       0.67      1.00      0.80         4
           1       0.86      0.60      0.71        10
           2       0.80      0.89      0.84         9

    accuracy                           0.78        23
   macro avg       0.77      0.83      0.78        23
weighted avg       0.80      0.78      0.78        23



[0.9402985074626866,
 0.782608695652174,
 array([0, 2, 1, 2, 1, 2, 2, 2, 1, 0, 1, 2, 2, 0, 0, 2, 1, 0, 1, 1, 2, 0,
        2])]