In [1]:
import glob
import itertools

import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

from sklearn.model_selection import cross_val_score
from sklearn.model_selection import train_test_split
from sklearn.model_selection import KFold

from sklearn.ensemble import BaggingClassifier

from sklearn.svm import LinearSVC

In [2]:
files = map(np.load, glob.glob("../export/data/feature_data_*.npz"))

In [3]:
n_subject = len(files)

skip_sample = 100
n_sample = 1500

signals = files[0]["signals"]
attributes = files[0]["attributes"]
print signals.tolist()
print attributes.tolist()

['ECG', 'EDA', 'Resp', 'SKT']
['mean', 'std', 'min', 'max', 'mean_diff', 'mean_abs_diff']


In [4]:
# return data and label for each signal in combination list
def load_data_from_combination_list(combination_list):

    data = pd.DataFrame()

    for f in files:

        session_info = f["session_info"].tolist()

        sid = int(session_info["sessionId"])
        label = int(session_info["feltVlnc"])

        d = pd.DataFrame()
        
        for signal in combination_list:
            d[signal] = f["valence"].tolist()[signal][skip_sample:skip_sample + n_sample]
            # add label for current series
            d["label"] = label
    
        # concat value
        data = pd.concat([data, d], ignore_index=True)
        
    # remove NaN values with median for each feature 
    data.fillna(data.median(), inplace=True)

    X = data.drop(["label"], axis=1)
    Y = data["label"]
    
    # reduce class numbers
    Y[Y==1] = 0
    Y[Y==2] = 0
    Y[Y==3] = 0
    Y[Y==5] = 0
    Y[Y==7] = 0
    Y[Y==12] = 0

    Y[Y==4] = 1
    Y[Y==6] = 1
    Y[Y==8] = 1
    Y[Y==9] = 1
    Y[Y==11] = 1
    
    return X, Y


def get_combinations(items):
    combinations = []
    for s in map(set, itertools.product(items, repeat=len(items))):
        if s not in combinations:
            combinations.append(s)
    return combinations


def generate_signal_and_attribute_combination(min_attribute_per_singal=3):

    signal_combination = get_combinations(signals.tolist())
    attribute_combination = get_combinations(attributes.tolist())
    
    for sig_comb in sorted(signal_combination, key=len):
                
        for attr_comb in sorted(attribute_combination, key=len):
            
            combination = []
            
            sig_key = { sc : 0 for sc in sig_comb }
            
            for sc in sig_comb:
                for ac in attr_comb:
                    
                    sig_key[sc] += 1
                
                    combination.append("{}_{}".format(sc, ac))
                
                if all([ v >= min_attribute_per_singal for v in sig_key.values() ]):
                
                    yield combination

                
def find_combination_with_best_accuracy(accuracy_list, k=1):
    
    def key_func(a):
        return a["scores"].mean()
        
    return sorted(accuracy_list, key=key_func, reverse=True)[:k]


# Create classifier and test prediction
def create_and_test_classifier(X, Y):
    
    clf = LinearSVC(C=1, tol=1e-4, dual=False, fit_intercept=False, class_weight='balanced', verbose=False)
    cv = KFold(n_splits=10, shuffle=False, random_state=0)
    
    n_estimators = 10
    bagging = BaggingClassifier(clf, max_samples=1.0 / n_estimators, n_estimators=n_estimators, n_jobs=-1, verbose=True)

    scores = cross_val_score(bagging, X, Y.values.ravel(), cv=cv, n_jobs=-1, verbose=True, scoring='average_precision')

    return scores

In [5]:
accuracy_list = [] 

for combination_list in generate_signal_and_attribute_combination(min_attribute_per_singal=6):
    X, Y = load_data_from_combination_list(combination_list)
    
    scores = create_and_test_classifier(X, Y)
    
    accuracy_list.append({
        "combination" : combination_list,
        "scores": scores
    })

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
A value is trying to be set on a copy of a slice from a

In [6]:
k_best_result = 300
for i in find_combination_with_best_accuracy(accuracy_list, k=k_best_result):
    print "combination : {}".format(i["combination"])
    print "\taccuracy: {:.2f} (+/- {:.2f})".format(i["scores"].mean(), i["scores"].std() * 2)
    print

combination : ['SKT_std', 'SKT_mean_diff', 'SKT_min', 'SKT_max', 'SKT_mean_abs_diff', 'SKT_mean']
	accuracy: 0.42 (+/- 0.60)

combination : ['SKT_std', 'SKT_mean_diff', 'SKT_min', 'SKT_max', 'SKT_mean_abs_diff', 'SKT_mean', 'Resp_std', 'Resp_mean_diff', 'Resp_min', 'Resp_max', 'Resp_mean_abs_diff', 'Resp_mean']
	accuracy: 0.42 (+/- 0.57)

combination : ['ECG_std', 'ECG_mean_diff', 'ECG_min', 'ECG_max', 'ECG_mean_abs_diff', 'ECG_mean', 'SKT_std', 'SKT_mean_diff', 'SKT_min', 'SKT_max', 'SKT_mean_abs_diff', 'SKT_mean']
	accuracy: 0.41 (+/- 0.57)

combination : ['ECG_std', 'ECG_mean_diff', 'ECG_min', 'ECG_max', 'ECG_mean_abs_diff', 'ECG_mean', 'SKT_std', 'SKT_mean_diff', 'SKT_min', 'SKT_max', 'SKT_mean_abs_diff', 'SKT_mean', 'Resp_std', 'Resp_mean_diff', 'Resp_min', 'Resp_max', 'Resp_mean_abs_diff', 'Resp_mean']
	accuracy: 0.39 (+/- 0.52)

combination : ['SKT_std', 'SKT_mean_diff', 'SKT_min', 'SKT_max', 'SKT_mean_abs_diff', 'SKT_mean', 'Resp_std', 'Resp_mean_diff', 'Resp_min', 'Resp_max', 

In [7]:
np.save("accuracy_list_mode_2_6_attr_per_signal", accuracy_list)