In [1]:
import sys
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.preprocessing import FunctionTransformer
from sklearn.pipeline import make_pipeline

In [2]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.decomposition import PCA
from sklearn.cluster import KMeans
from sklearn.ensemble import GradientBoostingClassifier
from sklearn.svm import SVC



### GET THE DATA

In [3]:
walkDF = pd.read_csv("data/readyToTrain/Walking")
walkDF['activity'] = "walking"

runDF = pd.read_csv("data/readyToTrain/Running")
runDF['activity'] = "running"

standDF = pd.read_csv("data/readyToTrain/Standing")
standDF['activity'] = "standing"

# Merge all data into 1 dataframe
dataDF  = pd.concat([walkDF, runDF, standDF]).reset_index(drop=True).loc[:, 'mean1AccX': 'activity']

X = dataDF.loc[:, 'mean1AccX':'max1Acc-turns']

y = dataDF.activity


#dataDF

# DATA OUTPUT

In [4]:
OUTPUT_TEMPLATE = (
    'Classifier:           {name}\n'
    'Train Score:          {train_score:.3f}\n'
    'Valid Score:          {valid_score:.3f}\n'
)


def plot_data():
    pass


def outData(results):
    print(OUTPUT_TEMPLATE.format(
        name=results[0],
        train_score=results[1],
        valid_score=results[2],
    ))

In [5]:
def random_forest(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            RandomForestClassifier(n_estimators=50, max_depth=5,min_samples_leaf=2)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [6]:
def kn_neighbors(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            MinMaxScaler(),
            KNeighborsClassifier(n_neighbors=3)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [7]:
def naive_bayes(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            StandardScaler(),
            GaussianNB()
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [8]:
def linear_regressor(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            LinearRegression(fit_intercept=False)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [9]:
def gradient_boosting(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            GradientBoostingClassifier(n_estimators=90, max_depth=10, min_samples_leaf=0.1)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

In [10]:
def svc_classifier(X, y):
    X_train, X_valid, y_train, y_valid = train_test_split(X, y)
    
    model = make_pipeline(
            SVC(kernel='linear', C=2.0, probability=True)
        ).fit(X_train, y_train)
    
    return [model.score(X_train, y_train), model.score(X_valid, y_valid), model.predict(X_valid)]

# Modeling and Training the data

In [35]:
result = random_forest(X, y)
result

[1.0,
 0.9130434782608695,
 array(['standing', 'standing', 'running', 'walking', 'standing',
        'walking', 'walking', 'standing', 'running', 'running', 'walking',
        'walking', 'standing', 'standing', 'walking', 'running', 'running',
        'running', 'running', 'standing', 'standing', 'walking',
        'standing'], dtype=object)]

In [52]:
result = kn_neighbors(X, y)
result

[0.9701492537313433,
 0.9130434782608695,
 array(['standing', 'running', 'running', 'running', 'running', 'walking',
        'walking', 'running', 'running', 'walking', 'running', 'standing',
        'walking', 'standing', 'walking', 'walking', 'walking', 'standing',
        'standing', 'walking', 'running', 'running', 'standing'],
       dtype=object)]

In [68]:
result = gradient_boosting(X, y)
result

[1.0,
 0.9565217391304348,
 array(['standing', 'walking', 'running', 'running', 'standing', 'running',
        'standing', 'walking', 'standing', 'running', 'walking',
        'standing', 'walking', 'walking', 'standing', 'running', 'walking',
        'standing', 'walking', 'running', 'running', 'walking', 'running'],
       dtype=object)]

In [75]:
result = svc_classifier(X, y)
result

[1.0,
 0.8695652173913043,
 array(['standing', 'running', 'running', 'walking', 'walking', 'standing',
        'standing', 'running', 'running', 'walking', 'standing', 'walking',
        'walking', 'walking', 'walking', 'running', 'standing', 'standing',
        'standing', 'standing', 'running', 'walking', 'running'],
       dtype=object)]