In [1]:
import numpy as np
from sklearn import svm
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
import scipy.stats
import scipy
import json
import glob

In [2]:
arr = [0.123,0.123,0.231,0.2321,0.2324,23432]

In [3]:
def sliding_window(arr_full,step, size):
    step = step
    size = size
    segmented = []
    i = 0
    while i < len(arr_full):
        segment = arr_full[i : i + size]
        segmented.append(segment)
        i += step
    return segmented 

In [4]:
class Features(object):
    def __init__(self,arr):
        self.arr = np.array(arr)
        self.var = np.var(self.arr)
        self.mean = np.mean(self.arr)
        self.rms = np.sqrt(np.mean(np.square(self.arr)))
        self.energy = np.mean(np.square(self.arr))
        #self.mcr = self.mean_crossing_rate()
        self.aad = np.mean(np.abs(self.arr))
        self.kurtosis_val = scipy.stats.kurtosis(self.arr)
        #self.zero_crossing_rate = self.zero_crossing_rate_func()
        self.skew = scipy.stats.skew(self.arr)

    def get_features(self):
        return(self.var,
        self.mean,
        self.rms,
        self.energy,
        self.aad ,
        self.kurtosis_val,
        self.skew)
        





    

In [5]:
feat = Features(arr)

In [6]:
features = feat.get_features()

In [7]:
features

(76256916.60573815,
 3905.4902500000003,
 9566.073943817451,
 91509770.69858319,
 3905.4902500000003,
 1.1999999997763116,
 1.7888543818747866)

In [8]:
def convert_scale(valence, arousal, num_classes = 2):
    """
    input:
    valence score array: from 1-5 where 1 = negative, 5 = positive, 3 = origin,
    arousal score array: from 1-5 where 1 = low , 5 = high , 3 = neutral/origin.
    output:
    class: 1 or 0 depending whether classification score landed in the right quadrant
    """

    class_list = []
    arousal = -1 * arousal 
    arousal = arousal + 3
    valence = valence + 3
    if num_classes == 2:
        for i in range(len(valence)):
            if valence[i] < 3:
                if arousal[i] > 3:
                    class_val = 1
                else: 
                    class_val = 0
            else:
                class_val = 0
            class_list.append(class_val)
        assert len(class_list) == len(valence)
    else:
        for i in range(len(valence)):
            if valence[i] < 3:
                    if arousal[i] >= 3:
                        class_val = 1
                    else: 
                        class_val = 0
            else:
                class_val = 0
            class_list.append(class_val)
        for i in range(len(valence)):
            if valence[i] >= 3:
                if arousal[i] >= 3:
                    class_val = 2
                    class_list[i] = class_val
        for i in range(len(valence)):
            if valence[i] >= 3:
                if arousal[i] < 3:
                    class_val = 3
                    class_list[i] = class_val
        for i in range(len(valence)):
            if valence[i] < 3:
                if arousal[i] < 3:
                    class_val = 4
                    class_list[i] = class_val
               
    return class_list
            
        
    

In [9]:
def gather_feat(arr_long,segment = 0):
    if segment != 0:
        segmented = sliding_window(arr_long,step = 1,size = segment)
        all_feat = []
        for seg in segmented:
            feat = Features(seg)
            features = feat.get_features()
            all_feat.append(np.array(features))
        all_feat = np.array(all_feat)
        return all_feat
    else:
        feat = Features(arr_long)
        features = feat.get_features()
        return features


val_dict_ext = {
  "run1_clip1": {
    "arousel": -1,
    "heartrate": [141, 141, 132, 125, 120, 104, 91, 82, 75],
    "gsr": [824, 827, 824, 824, 824, 825, 825, 824, 826],
    "valence": 1
    },
  "run1_clip2": {
    "arousel": -2,
    "valence": 2,
    "heartrate": [70, 79, 86, 91, 95, 98, 100],
    "gsr": [827, 824, 824, 825, 824, 824, 824]
  }
}

def read_data(json_file,window_size = 0 ):
    heart_rate = []
    gsr = []
    valence_list = []
    arousal_list = []
    with open(json_file) as file:  
        json_dict = json.load(file)
    #import pdb; pdb.set_trace()
    if window_size == 0:
        for clip, values in json_dict.items():
            arousal = values['arousel'] 
            valence = values['valence']
            valence_list.append(valence)
            arousal_list.append(arousal)
            heart_rate_feat = gather_feat(np.array(values['heartrate']))
            gsr_feat = gather_feat(np.array(values['gsr']))
            heart_rate.append(heart_rate_feat)
            gsr.append(gsr_feat)
    else: 
        for clip, values in json_dict.items():

            arousal = values['arousel'] 
            valence = values['valence']
            heart_rate_feat = gather_feat(np.array(values['heartrate']),segment = window_size)
            gsr_feat = gather_feat(np.array(values['gsr']), segment = window_size)
            valence = np.repeat(valence, gsr_feat.shape[0])
            arousal = np.repeat(arousal, gsr_feat.shape[0])
            valence_list.extend(valence)
            arousal_list.extend(arousal)
            heart_rate.extend(heart_rate_feat)
            gsr.extend(gsr_feat)
    return np.array(heart_rate),np.array(gsr), np.array(valence_list), np.array(arousal_list)




In [10]:
datapath = '/Users/Amelie/Downloads/data_collection'
json_files = glob.glob(datapath + '/*')
hr_all, gsr_all, val_all, aro_all = read_data(json_files[0], window_size = 1) 

for json_file in json_files:
    hr, gsr, val, aro = read_data(json_file, window_size = 0)
    hr_all = np.append(hr_all,hr,axis = 0)
    gsr_all = np.append(gsr_all,gsr,axis = 0)
    val_all = np.append(val_all,val, axis = 0)
    aro_all = np.append(aro_all,aro, axis = 0)
                       

print(hr_all.shape)

(2726, 7)


In [11]:
from sklearn.neighbors import NearestNeighbors
from sklearn.linear_model import LogisticRegression

def svm_classifier(input_feat,labels):
    """
    input:
    input_feat: n_samples x n_features
    labels: n_samples (0,1)
    output:
    """
    X = input_feat
    Y = labels 
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state = 100000)
    #tuned_parameters = [{'C': np.logspace(-6,3, num = 9,base = 10), 'kernel': ['linear', 'rbf'], 
    #                      'gamma' : np.logspace(-6,3,num=9,base = 10)}]
    #clf = GridSearchCV(svm.SVC(), tuned_parameters, cv=9)
    #clf.fit(X_train, Y_train)
    #C = clf.best_params_["C"]
    #kernel = clf.best_params_['kernel']
    #gamma = clf.best_params_['gamma']
    #clf  = svm.SVC(kernel=kernel,C = C, gamma = gamma)
    #clf  = svm.SVC(kernel='rbf',C = 1000, gamma = 'auto', class_weight  ={1: 20})
    #clf  = svm.SVC(kernel='rbf',C = 1000, gamma = 'auto', class_weight  ={1: 4})
    clf  = svm.SVC(kernel='rbf',C = 0.3, gamma = 'auto', class_weight  ={1: 4})
    x = clf.fit(X_train,Y_train)
    pred = clf.predict(X_test)
    
    return pred, Y_test,clf,X_test
def decision_tres(input_feat,labels):

    pass

def logistic(input_feat,labels):
    X = input_feat
    Y = labels 
    X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.1, random_state = 100000)
    clf = LogisticRegression(penalty='l2', dual=False, tol=0.0001, C=100,  class_weight={1 :4}, solver='lbfgs',max_iter = 500)
    x = clf.fit(X_train,Y_train)
    pred = clf.predict(X_test)
    accuracy = len(np.intersect1d(np.where(np.array(pred) == 1),np.where(np.array(Y_test) == 1)))/len(np.where(np.array(Y_test)==1)[0])
    accuracy2 = len(np.intersect1d(np.where(np.array(pred) == 0),np.where(np.array(Y_test) == 0)))/len(np.where(np.array(Y_test)==0)[0])
    #accuracy = np.sum(pred == Y_test)/np.asarray(len(Y), dtype = np.float32)

    return pred, accuracy,accuracy2, Y_test,clf,X_test
    

        

In [12]:
labels = convert_scale(val_all,aro_all, num_classes = 2)
pred,true,clf,x_test= svm_classifier(gsr_all,labels)
pred1,true1,clf1,x_test1= svm_classifier(hr_all,labels)
from sklearn.metrics import precision_score,recall_score
print(precision_score(true, pred, average = None))
print(recall_score(true,pred,average = None))




[0.96050776 0.73394495]
[0.95915493 0.74074074]


In [18]:
import coremltools


coreml_model = coremltools.converters.sklearn.convert(clf, ['GSR_feat1','GSR_feat2','GSR_feat3','GSR_feat4','GSR_feat5','GSR_feat6','GSR_feat7'], 'emotion class')



In [20]:
coreml_model.save('anxiety.mlmodel')


In [21]:
for json_file in json_files:
    hr, gsr, val, aro = read_data(json_file, window_size = 10)
    hr_all = np.append(hr_all,hr,axis = 0)
    gsr_all = np.append(gsr_all,gsr,axis = 0)
    val_all = np.append(val_all,val, axis = 0)
    aro_all = np.append(aro_all,aro, axis = 0)