In [None]:
import pandas as pd
import numpy as np
import scipy.stats as stats
from os import listdir

## Features

In [None]:
from scipy.signal import savgol_filter

window_length = 60
polyorder = 3

def correlation(df, index, rows):
    x = df.iloc[:, index].values
    y = df.iloc[:, (index + 1)%rows].values
    return np.corrcoef(x, y)[0, 1]

def features(df):
    X = [["mean"], ["std"], ["corelation"], ["zero cross"], ["max"], ["min"], ["skewness"], ["kurtosis"], ["p2p time"], ["median"], ["25th percentile"], ["75th percentile"], ["range"], ["mode"], ["cover"], ["IQR"], ["square mean"]]
    for i in range(len(df.T)):
        x = savgol_filter(df.iloc[:, i].values, window_length = window_length, polyorder = polyorder)
        X[0].extend([np.mean(x)])
        X[1].extend([np.std(x)])
        X[2].extend([correlation(df, i, len(df.T))])
        X[3].extend([np.sum(np.diff(np.sign(x)) != 0)])
        X[4].extend([np.max(x)])
        X[5].extend([np.min(x)])
        X[6].extend([stats.skew(x)])
        X[7].extend([stats.kurtosis(x)])
        X[8].extend([abs(np.argmax(x) - np.argmin(x))])
        X[9].extend([np.median(x)])
        X[10].extend([np.percentile(x, 25)])
        X[11].extend([np.percentile(x, 75)])
        X[12].extend([np.max(x) - np.min(x)])
        X[13].extend([stats.mode(x)[0]])
        X[14].extend([np.std(x) / np.mean(x)])
        X[15].extend([np.percentile(x, 75) - np.percentile(x, 25)])
        X[16].extend([np.mean(x**2)])

    features = []

    for axis in X:
        features.extend(axis[1:])
    
    return features

## Feature Extractor

In [None]:
folders = listdir("../data")

columns = [(0, 80), (15, 95), (5, 85), (5, 55), (5, 85), (5, 85)]

for index, folder in enumerate(folders):
    open(f"../test_features/{folder}.csv", "w").close()

    files = listdir(f"../data/{folder}")[:17]
    
    for file in files:
        data = pd.read_csv(f"../data/{folder}/{file}", header=None).iloc[columns[0][0]:columns[0][1], :6]
        feature = features(data.iloc[:, 0:3])
        feature.extend(features(data.iloc[:, 3:6]))

        pd.DataFrame(feature).T.to_csv(f"../test_features/{folder}.csv", header = None, index = False, mode = "a")

## Analysing features using avg std

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from os import listdir

classes = listdir("../test_features")

features = [pd.read_csv(f"../test_features/{file}", header=None) for file in classes]

type = ["accl", "gyro"]
X = [["max m"], ["min m"], ["imag"], ["imag mean"], ["100 th percentile"], ["iqr"], ["range"], ["std"], ["half mean"], ["half median"], ["half 70th percentile"]]

class_std = []
avg_std = []

for i in range(17):
    feature = [data[i] for data in features]
    with np.errstate(divide='ignore', invalid='ignore'):
        feature = np.where(np.ptp(feature) != 0, (feature - np.min(feature)) / np.ptp(feature), feature)

    median = []
    std = []
    for j in range(len(classes)):
        median.append(np.median(feature[j]))
        std.append(np.round(np.std(feature[j]), 3)*1000)
        class_std.append(np.round(np.std(median),3)*1000)
    avg_std.append(np.average(std))
 
for i in range(17):
    feature = [data[i] for data in features]
    with np.errstate(divide='ignore', invalid='ignore'):
        feature = np.where(np.ptp(feature) != 0, (feature - np.min(feature)) / np.ptp(feature), feature)

    for j in range(len(classes)):
        plt.scatter(feature[j], [classes[j][:-4]]*len(features[j]), label = str(np.round(np.std(feature[j]), 3)*1000))
    plt.legend()
    plt.title("{0} {1} {2}   {3}".format(type[(i//3)//len(X)], X[(i//3)%len(X)][0], i%3 + 1, np.round(np.std(median),3)*1000))
    plt.show()

## Feature Selector

In [None]:
def correlation(df, index, rows):
    x = df.iloc[:, index].values
    y = df.iloc[:, (index + 1)%rows].values
    return np.corrcoef(x, y)[0, 1]

def features1(df):
    X = [["std"], ["max"], ["min"], ["range"], ["mode"]]
    for i in range(len(df.T)):
        x = df.iloc[:, i].values
        # X[0].extend([np.mean(x)])
        X[0].extend([np.std(x)])
        # X[2].extend([correlation(df, i, len(df.T))])
        # X[3].extend([np.sum(np.diff(np.sign(x)) != 0)])
        X[1].extend([np.max(x)])
        X[2].extend([np.min(x)])
        # X[6].extend([stats.skew(x)])
        # X[7].extend([stats.kurtosis(x)])
        # X[8].extend([abs(np.argmax(x) - np.argmin(x))])
        # X[3].extend([np.median(x)])
        # X[10].extend([np.percentile(x, 25)])
        # X[4].extend([np.percentile(x, 75)])
        X[3].extend([np.max(x) - np.min(x)])
        X[4].extend([stats.mode(x)[0]])
        # X[14].extend([np.std(x) / np.mean(x)])
        # X[7].extend([np.percentile(x, 75) - np.percentile(x, 25)])
        # X[8].extend([np.mean(x**2)])

    features = []

    for axis in X:
        features.extend(axis[1:])
    
    return features

def features2(df):
    X = [["max"], ["min"], ["range"]]
    for i in range(len(df.T)):
        x = df.iloc[:, i].values
        # X[0].extend([np.mean(x)])
        # X[1].extend([np.std(x)])
        # X[2].extend([correlation(df, i, len(df.T))])
        # X[3].extend([np.sum(np.diff(np.sign(x)) != 0)])
        X[0].extend([np.max(x)])
        X[1].extend([np.min(x)])
        # X[6].extend([stats.skew(x)])
        # X[7].extend([stats.kurtosis(x)])
        # X[8].extend([abs(np.argmax(x) - np.argmin(x))])
        # X[3].extend([np.median(x)])
        # X[4].extend([np.percentile(x, 25)])
        # X[11].extend([np.percentile(x, 75)])
        X[2].extend([np.max(x) - np.min(x)])
        # X[13].extend([stats.mode(x)[0]])
        # X[14].extend([np.std(x) / np.mean(x)])
        # X[15].extend([np.percentile(x, 75) - np.percentile(x, 25)])
        # X[16].extend([np.mean(x**2)])

    features = []

    for axis in X:
        features.extend(axis[1:])
    
    return features

folders = listdir("../data")

columns = [(0, 50), (15, 65), (0, 50), (5, 55), (0, 50), (0, 50)]

for index, folder in enumerate(folders):
    open(f"../features/{folder}.csv", "w").close()

    files = listdir(f"../data/{folder}")[:17]
    
    for file in files:
        data = pd.read_csv(f"../data/{folder}/{file}", header=None).iloc[columns[index][0]:columns[index][1], :6]
        feature = features1(data.iloc[:, 0:3])
        feature.extend(features2(data.iloc[:, 3:6]))

        pd.DataFrame(feature).T.to_csv(f"../features/{folder}.csv", header = None, index = False, mode = "a")

## Traning

In [1]:
import pandas as pd
from os import listdir
from sklearn import svm
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_val_score
from sklearn.metrics import accuracy_score

In [2]:
def csv_to_X_y(path):
    folder = listdir(path)
    X = []
    y = []
    for index, file in enumerate(folder):
        df = pd.read_csv(f"{path}/{file}", header = None)
        X.extend(df.values)
        y.extend([index]*len(df))
    return X, y

In [3]:
X, y = csv_to_X_y('../test_features')

In [4]:
X_train, X_test, y_train, y_test = train_test_split(X,y, stratify=y, random_state=10, test_size=0.2)

model = svm.SVC()

history = model.fit(X_train,y_train)

print(accuracy_score(model.predict(X),y)*100)

96.07843137254902


In [9]:
import numpy as np

In [11]:
X[0]

array([-9.93443049e+02,  0.00000000e+00,  2.20429433e+03,  7.31633588e+03,
        6.58005976e+03,  6.13419123e+02,  6.09746264e+02,  1.17084150e+03,
        1.59011457e+03, -6.38988404e+00, -2.53058041e+01, -2.29993233e+00,
       -1.11718635e+01,  7.71630481e-01,  1.33014394e+00,  2.33785002e+00,
        7.54880196e-01,  4.67027644e-01,  5.12281867e-01])

In [12]:
model.decision_function([X[0]])

array([[ 5.27688497, -0.21931466,  0.75370862,  3.10086833,  1.78151193,
         4.18943908]])

In [5]:
import joblib

joblib.dump(model, "../test.joblib")

['../test.joblib']