In [1]:
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

In [171]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC



### GET THE DATA

In [39]:
walkDF = pd.read_csv("readyToTrain/walking")
walkDF['activity'] = "walking"

runDF = pd.read_csv("readyToTrain/running")
runDF['activity'] = "running"

standDF = pd.read_csv("readyToTrain/standing")
standDF['activity'] = "standing"

# Merge all data into 1 dataframe
dataDF  = pd.concat([walkDF, runDF, standDF]).reset_index(drop=True).loc[:, 'mean1AccX': 'activity']

X = dataDF.loc[:, 'mean1AccX':'max1Acc-turns']
y = dataDF.activity

dataDF

Unnamed: 0,mean1AccX,mean1AccY,mean1AccZ,mean1Acc-old,mean1Acc-turns,min1AccX,min1AccY,min1AccZ,min1Acc-old,min1Acc-turns,max1AccX,max1AccY,max1AccZ,max1Acc-old,max1Acc-turns,activity
0,-0.099493,-0.465791,-0.856727,1.062917,0.057808,-1.334384,-0.717487,-1.159709,0.385,2.955921e-05,-1.334384,-0.717487,-1.159709,0.385,2.955921e-05,walking
1,0.057198,-0.669784,-0.831493,1.139026,0.074913,-1.879898,-0.883859,-1.168634,0.359,7.671834e-05,0.352524,0.984944,0.192178,2.212,1.402344,walking
2,-0.038519,-0.465664,-0.897671,1.0787,0.029025,-1.341853,-0.831175,-1.162999,0.262,0.0005684539,0.541564,1.422448,-0.143941,1.664,0.395674,walking
3,-0.052338,-0.416041,-0.815729,0.976863,0.020162,-1.297735,-0.60833,-1.245164,0.44,6.165231e-05,0.187806,0.969483,0.308912,2.063,0.2467211,walking
4,-0.040121,-0.429703,-0.741593,0.984209,0.027517,-1.554315,-0.799485,-1.456386,0.335,9.996379e-06,0.363297,0.925566,0.373531,1.971,0.3957296,walking
5,-0.082297,0.570112,-0.302161,1.153189,0.106782,-2.192367,-0.839894,-1.5407,0.347,2.698647e-06,1.142554,2.117979,2.032827,4.334,1.221682,walking
6,-0.412463,0.708757,-0.331488,1.064641,0.098433,-1.400446,-0.702929,-1.361322,0.412,7.458628e-06,0.304883,1.40916,0.447642,1.896,0.7525619,walking
7,-0.742635,0.18447,-0.207033,1.046831,0.051221,-1.895901,-0.806297,-1.290434,0.41,2.425414e-05,0.332273,1.002058,0.894676,2.331,1.169377,walking
8,0.119954,-0.220934,-0.938152,1.029588,0.046076,-0.838779,-0.855511,-2.04388,0.239,2.770055e-08,1.050301,0.901961,0.129432,2.623,0.7940354,walking
9,-0.36135,0.650314,-0.28323,1.072069,0.081708,-1.357662,-0.553453,-1.382633,0.051,2.133785e-05,0.305129,1.411917,0.775901,2.873,1.071221,walking


# DATA OUTPUT

In [17]:
OUTPUT_TEMPLATE = (
    'Classifier:           {name}\n'
    'Train Score:          {train_score:.3f}\n'
    'Valid Score:          {valid_score:.3f}\n'
)


def plot_data():
    pass


def outData(results):
    print(OUTPUT_TEMPLATE.format(
        name=results[0],
        train_score=results[1],
        valid_score=results[2],
    ))

In [104]:
def random_forest(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            MinMaxScaler(),
            RandomForestClassifier(n_estimators=90, max_depth=10)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [65]:
def kn_neighbors(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            MinMaxScaler(),
            KNeighborsClassifier(n_neighbors=3)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [44]:
def naive_bayes(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            StandardScaler(),
            GaussianNB()
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [54]:
def linear_regressor(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            LinearRegression(fit_intercept=False)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [181]:
def gradient_boosting(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            GradientBoostingClassifier(n_estimators=90, max_depth=10, min_samples_leaf=0.1)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [172]:
def svc_classifier(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            SVC(kernel='linear', C=2.0, probability=True)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

# Modeling and Training the data

In [164]:
result = random_forest(X, y)
result

[1.0,
 0.875,
 array(['running', 'walking', 'standing', 'walking', 'running', 'standing',
        'walking', 'standing'], dtype=object)]

In [111]:
result = kn_neighbors(X, y)
result

[0.9090909090909091,
 0.625,
 array(['walking', 'standing', 'walking', 'running', 'standing', 'walking',
        'walking', 'standing'], dtype=object)]

In [207]:
result = gradient_boosting(X, y)
result

[1.0,
 0.875,
 array(['running', 'standing', 'walking', 'walking', 'standing',
        'standing', 'standing', 'walking'], dtype=object)]

In [180]:
result = svc_classifier(X, y)
result

[0.9545454545454546,
 1.0,
 array(['standing', 'running', 'running', 'walking', 'walking', 'walking',
        'standing', 'standing'], dtype=object)]