In [1]:
import csv, math, os
from datetime import datetime, timedelta
import pandas as pd
import numpy as np
from scipy import signal

from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
from sklearn import svm
from sklearn.linear_model import SGDClassifier

In [2]:
def calculateFeaturesStats(data = np.array([]),col_name=""):
    
    """Calculates features Mean, Variance, ZCR, and MCR
    Parameters
    ----------
        data:                   numpy array
    Return
    ------
        dict:
        - dict.mean:            double
        - dict.variance:        double
        - dict.zcr:             int
        - dict.mcr:             int
    """
    if data.size == 0:
        return 
    
    f = []
    rv = {}
    rv[col_name+'_min'] = np.amin(data)
    rv[col_name+'_max'] = np.amax(data)
    rv[col_name+'_mean'] = np.mean(data)
    rv[col_name+'_median'] = np.median(data)
    rv[col_name+'_mode'] = stats.mode(data)[0][0]
    rv[col_name+'_std'] = np.std(data)
    rv[col_name+'_variance'] = np.var(data)
    rv[col_name+'_skew'] = stats.skew(data,axis = 0)
    rv[col_name+'_kur'] = stats.kurtosis(data,axis = 0)
    rv[col_name+'_eightperc'] = np.percentile(data, 80,axis = 0)
    rv[col_name+'_sixperc'] = np.percentile(data, 60,axis = 0)
    rv[col_name+'_fourperc'] = np.percentile(data, 40,axis = 0)
    rv[col_name+'_twoperc'] = np.percentile(data, 20,axis = 0)
    rv[col_name+'_rms'] = np.sqrt(np.mean(data**2))
    rv[col_name+'_iqr'] = stats.iqr(data,axis = 0)
    rv[col_name+'_countgeq'] = len(np.where( data > rv[col_name+'_mean'])[0])/float(len(data))
    rv[col_name+'_countleq'] = len(np.where( data < rv[col_name+'_mean'])[0])/float(len(data))
    rv[col_name+'_range'] = rv[col_name+'_max'] - rv[col_name+'_min']
    rv[col_name+'_zcr'] = (np.diff(np.sign(data)) != 0).sum()
    
    normalized = data - rv[col_name+'_mean']
    rv[col_name+'_mcr'] = (np.diff(np.sign(normalized)) != 0).sum()

    return rv

In [3]:
def normalize(x):
    x = 2 * ( (x - np.min(x)) / (np.max(x) - np.min(x)) ) - 1
    return x


def demean(x):
    x = x - np.mean(x)
    return x

In [4]:
for root, subdirs, files in os.walk('data/train'):
    print(root, subdirs, files)

data/train ['talking', 'cough', 'normal_breathing', 'mask_off'] []
data/train/talking [] ['s_2021-02-10 18:34:39.763.npy', 's_2021-02-10 16:36:38.417.npy']
data/train/cough [] ['s_2021-02-10 22:18:44.719.npy', 's_2021-02-10 20:53:33.741.npy', 's_2021-02-10 22:18:17.501.npy', 's_2021-02-10 20:57:59.617.npy']
data/train/normal_breathing [] ['s_2021-02-10 15:11:17.966.npy', 's_2021-02-10 18:12:14.117.npy']
data/train/mask_off [] ['s_2021-02-10 16:24:58.357.npy', 's_2021-02-10 18:27:53.484.npy']


In [5]:
# data prep
clss = [0,1,2] # mask_off, normal_breathing, talking
X_train, Y_train, X_test, Y_test = [], [], [], []


data_files = []
for root, subdirs, files in os.walk('data/train'):
    if len(files) == 0: continue
    
    for file in files:
        data_files.append(f'{root.split("/")[-1]}/{file}')

print(data_files)


for file in data_files:
    train = np.load(f'data/train/{file}')
    test = np.load(f'data/test/{file}')

    # print(train[0][0] + train[0][1])

    # normalize sample
    for s in train:
        X_train.append(demean(s[1]))
        if 'mask_off' in file: Y_train.append(0)
        elif 'normal_breathing' in file: Y_train.append(1)
        elif 'talking' in file: Y_train.append(2)
        elif 'cough' in file: Y_train.append(3)

    for s in test:
        X_test.append(demean(s[1]))
        if 'mask_off' in file: Y_test.append(0)
        elif 'normal_breathing' in file: Y_test.append(1)
        elif 'talking' in file: Y_test.append(2)
        elif 'cough' in file: Y_test.append(3)
    
X_train, Y_train, X_test, Y_test = np.array(X_train, dtype='float64'), np.array(Y_train, dtype='float64'), np.array(X_test, dtype='float64'), np.array(Y_test, dtype='float64') 


['talking/s_2021-02-10 18:34:39.763.npy', 'talking/s_2021-02-10 16:36:38.417.npy', 'cough/s_2021-02-10 22:18:44.719.npy', 'cough/s_2021-02-10 20:53:33.741.npy', 'cough/s_2021-02-10 22:18:17.501.npy', 'cough/s_2021-02-10 20:57:59.617.npy', 'normal_breathing/s_2021-02-10 15:11:17.966.npy', 'normal_breathing/s_2021-02-10 18:12:14.117.npy', 'mask_off/s_2021-02-10 16:24:58.357.npy', 'mask_off/s_2021-02-10 18:27:53.484.npy']


In [6]:
print(X_train.shape, Y_train.shape)

(1358, 60) (1358,)


In [7]:
model = KNeighborsClassifier(n_neighbors=3)
model.fit(np.nan_to_num(X_train), Y_train)
X_train.shape

(1358, 60)

In [8]:
y_pred = model.predict(np.nan_to_num(X_test)) 

In [9]:
print(classification_report(Y_test, y_pred, target_names=['mask_off', 'normal_breathing', 'talking', 'cough']))

                  precision    recall  f1-score   support

        mask_off       0.81      1.00      0.90       186
normal_breathing       0.93      0.97      0.95       184
         talking       0.96      0.74      0.84       200
           cough       0.00      0.00      0.00         6

        accuracy                           0.89       576
       macro avg       0.68      0.68      0.67       576
    weighted avg       0.89      0.89      0.88       576



In [22]:
model_svm = svm.SVC()
model_svm.fit(np.nan_to_num(X_train), Y_train)

SVC()

In [23]:
y_pred = model_svm.predict(np.nan_to_num(X_test))
print(classification_report(Y_test, y_pred, target_names=['mask_off', 'normal_breathing', 'talking', 'cough'])) 

                  precision    recall  f1-score   support

        mask_off       0.78      1.00      0.87       186
normal_breathing       0.89      0.91      0.90       184
         talking       0.96      0.71      0.82       200
           cough       0.00      0.00      0.00         6

        accuracy                           0.86       576
       macro avg       0.66      0.65      0.65       576
    weighted avg       0.87      0.86      0.85       576



In [24]:
coreml_model_knn = ct.converters.sklearn.convert(model_svm)
coreml_model_knn.save('ResCls.mlmodel')

NameError: name '_tree' is not defined