# Analyze the effect of classification on the speed data

<b>Intuition :</b> Literature in speed detection suggests that usage of hybrid models increases precision of the model. But these classifications are made strictly between slower and faster speeds or walking and runnning speeds. So we first test the effectiveness of classifier to split the speeds between slower and higher speeds. Then we try to see if there is any improvement in the precision in speed detection using the classifier with the highest accuracy. NOte: The regression algorithm used here will be RandomForest as the best regression model was found to be random forests. 

In [1]:
from DataBuilder import DataBuilder
import ClassEvaluator

import skSVM
import skRandomForestClassifier
import skKNeighborsClassifier
import skLogisticRegression

import numpy as np
import importlib,sys 
from IPython.display import clear_output

In [2]:
def rel(name):
    importlib.reload(sys.modules[name])

In [3]:
dataBuilder = DataBuilder(heuristic = False, applyFilter =False)

100%|██████████| 3/3 [00:21<00:00,  7.02s/it]


In [5]:
splits = [4, 4.5, 5.5, 5, 6]
PCA = [ True, False]

In [232]:
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier

clf = SVC()
#clf = RandomForestClassifier(n_estimators =250, max_depth = 60)
clf = KNeighborsClassifier(n_neighbors=5)
X_Train, y_Train, X_Test, y_Test = dataBuilder.confs['data1']['conf1']

In [233]:
y_Tr = [0 if y<4.2 else 1 for y in y_Train]
y_Te = [0 if y<4.2 else 1 for y in y_Test]

In [234]:
clf.fit(X_Train, y_Tr)

KNeighborsClassifier()

In [235]:
preds = clf.predict(X_Test)

In [236]:
from sklearn.metrics import classification_report, accuracy_score

print(classification_report(preds, y_Te))
print('{}%'.format(accuracy_score(preds, y_Te)*100))
from collections import Counter
Counter(y_Tr)

              precision    recall  f1-score   support

           0       0.98      0.87      0.93       579
           1       0.93      0.99      0.96       970

    accuracy                           0.95      1549
   macro avg       0.96      0.93      0.94      1549
weighted avg       0.95      0.95      0.95      1549

94.7062621045836%


Counter({0: 1283, 1: 2330})

## SVM CLASSIFIER

In [None]:
rel('skSVM')
rel('ClassEvaluator')
rel('hyperparameters')

for applyPCA in PCA:
    skSVMClassifier = skSVM.skSVM(applyPCA)
    model = skSVMClassifier.model
    name = skSVMClassifier.name
    for dataKey, confDict in dataBuilder.confs.items():
        for confKey, confData in confDict.items():
            for split_value in splits:
                print(confData[0].shape)
                evaluator  = ClassEvaluator.Evaluator(name, dataKey, model, confKey, confData, split_value)
                evaluator.train_model()
                evaluator.store()
clear_output()
print('Grid Search completed')

## RandomForest Classifier

In [None]:
rel('skRandomForestClassifier')
rel('ClassEvaluator')
rel('hyperparameters')

for applyPCA in PCA:
    skRFC = skRandomForestClassifier.skRandomForestClassifier(applyPCA)
    model = skRFC.model
    name = skRFC.name
    
    for dataKey, confDict in dataBuilder.confs.items():
        for confKey, confData in confDict.items():
            for split_value in splits:
                evaluator  = ClassEvaluator.Evaluator(name, dataKey, model, confKey, confData, split_value)
                evaluator.train_model()
                evaluator.store()        
            
clear_output()
print('Grid Search completed')

## K Nearest Neighbors Classifier

In [None]:
rel('skKNeighborsClassifier')
rel('ClassEvaluator')
rel('hyperparameters')

for applyPCA in PCA:
    skKNNC = skKNeighborsClassifier.skKNeighborsClassifier(applyPCA)
    model = skKNNC.model
    name = skKNNC.name

    for dataKey, confDict in dataBuilder.confs.items():
        for confKey, confData in confDict.items():
            for split_value in splits:
                print(confData[0].shape)
                evaluator  = ClassEvaluator.Evaluator(name, dataKey, model, confKey, confData, split_value)
                evaluator.train_model()
                evaluator.store()  
            
clear_output()
print('Grid Search completed')

## Logistic Regression

In [None]:
rel('skLogisticRegression')
rel('ClassEvaluator')
rel('hyperparameters')

for applyPCA in PCA:
    skLogReg = skLogisticRegression.skLogisticRegression(applyPCA)
    model = skLogReg.model
    name = skLogReg.name

    for dataKey, confDict in dataBuilder.confs.items():
        for confKey, confData in confDict.items():
            for split_value in splits:
                print(confData[0].shape)
                evaluator  = ClassEvaluator.Evaluator(name, dataKey, model, confKey, confData, split_value)
                evaluator.train_model()
                evaluator.store()  
            
clear_output()
print('Grid Search completed')

# Try with Heuristic Data

In [9]:
from Heuristics import *
import Heuristics

import MLData
rel('Heuristics')
rel('MLData')

X_TrainA, y_Train, X_TestA, y_Test = MLData.dataset_main(150,0,'Yes','no',True )
X_TrainG, y_TrainG, X_TestG, y_TestG = MLData.dataset_main(150,0,'No','no',True )

X_Train = np.hstack((X_TrainA, X_TrainG))
X_TestG = np.hstack((X_TestA, X_TestG))

Accel
9


  heuristicFeature = np.hstack(heuristics.values())


Train Labels shape   : (1121, 1)
Train Features shape : (1121, 39)
Test labels shape    : (481, 1)
Test Features shape  : (481, 39)
Gyro
9
Train Labels shape   : (1121, 1)
Train Features shape : (1121, 39)
Test labels shape    : (481, 1)
Test Features shape  : (481, 39)


In [12]:
from sklearn.linear_model import LogisticRegression

In [39]:
logreg = LogisticRegression(max_iter =10000)

In [40]:
logreg.fit(X_Train, y_Tr)

LogisticRegression(max_iter=10000)

In [45]:
preds = logreg.predict(X_TestG)

In [46]:
from sklearn.metrics import accuracy_score

In [47]:
accuracy_score(preds, y_Te)

0.8981288981288982

In [35]:
X_TestG[l] = 0