# Project 4

### Dependencies and Constants

In [1]:
import time
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.svm import SVC
from sklearn.metrics import balanced_accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from biosppy.signals import eeg
from biosppy.signals import emg

PROTOTYPING = False

### Read data

In [2]:
start = time.time()

# import train sets
train_eeg1_raw = pd.read_csv('files/train_eeg1.csv').drop('Id', axis=1).values
train_eeg2_raw = pd.read_csv('files/train_eeg2.csv').drop('Id', axis=1).values
train_emg_raw = pd.read_csv('files/train_emg.csv').drop('Id', axis=1).values

# import test sets
test_eeg1_raw = pd.read_csv('files/test_eeg1.csv').drop('Id', axis=1).values
test_eeg2_raw = pd.read_csv('files/test_eeg2.csv').drop('Id', axis=1).values
test_emg_raw = pd.read_csv('files/test_emg.csv').drop('Id', axis=1).values

# import labels
train_labels_raw = pd.read_csv('files/train_labels.csv').drop('Id', axis=1).values

print(train_eeg1_raw.shape, train_eeg2_raw.shape, train_emg_raw.shape)
print(test_eeg1_raw.shape, test_eeg2_raw.shape, test_emg_raw.shape)
print(train_labels_raw.shape)

print("Time: ", time.time() - start)

(64800, 512) (64800, 512) (64800, 512)
(43200, 512) (43200, 512) (43200, 512)
(64800, 1)
Time:  24.806116104125977


### Feature extraction

In [14]:
start = time.time()

def calculate_statistics(list_values):
    n5 = np.nanpercentile(list_values, 5)
    n25 = np.nanpercentile(list_values, 25)
    n75 = np.nanpercentile(list_values, 75)
    n95 = np.nanpercentile(list_values, 95)
    median = np.nanpercentile(list_values, 50)
    mean = np.nanmean(list_values)
    std = np.nanstd(list_values)
    var = np.nanvar(list_values)
    rms = np.nanmean(np.sqrt(list_values**2))
    return [n5, n25, n75, n95, median, mean, std, var, rms]
 
def calculate_crossings(list_values):
    zero_crossing_indices = np.nonzero(np.diff(np.array(list_values) > 0))[0]
    no_zero_crossings = len(zero_crossing_indices)
    mean_crossing_indices = np.nonzero(np.diff(np.array(list_values) > np.nanmean(list_values)))[0]
    no_mean_crossings = len(mean_crossing_indices)
    return [no_zero_crossings, no_mean_crossings]
 
def get_features(list_values):
    crossings = calculate_crossings(list_values)
    statistics = calculate_statistics(list_values)
    return crossings + statistics

def extract_features(eeg1, eeg2, emg):
    features = None
    
    for i in range(eeg1.shape[0]):
        if i % 1000 == 0:
            print(i, "/", eeg1.shape[0])
        row = np.array([])

        signal = np.array([eeg1[i], eeg2[i]]).T
        analysis = eeg.eeg(signal=signal, sampling_rate=128, show=False)    

        # theta
        row = np.append(row, get_features(analysis["theta"]))
        # row = np.append(row, get_features(analysis["theta"][:, 1]))

        # alpha low
        row = np.append(row, get_features(analysis["alpha_low"]))
        # row = np.append(row, get_features(analysis["alpha_low"][:, 1]))

        # alpha low
        row = np.append(row, get_features(analysis["alpha_high"]))
        # row = np.append(row, get_features(analysis["alpha_high"][:, 1]))

        # beta
        row = np.append(row, get_features(analysis["beta"]))
        # row = np.append(row, get_features(analysis["beta"][:, 1]))

        # gamma
        row = np.append(row, get_features(analysis["gamma"][:, 0]))
        # row = np.append(row, get_features(analysis["gamma"]))

        # format
        row = row.reshape((1, -1))

        # concatenate
        if features is None:
            features = row
        else:
            features = np.concatenate((features, row), axis=0)
    return features

X_train = extract_features(train_eeg1_raw, train_eeg2_raw, train_emg_raw)

if not PROTOTYPING:
    X_test = extract_features(test_eeg1_raw, test_eeg2_raw, test_emg_raw)
    print("X_test", X_test.shape)
print("X_train", X_train.shape)

print("Time: ", time.time() - start)

0 / 64800
1000 / 64800
2000 / 64800
3000 / 64800
4000 / 64800
5000 / 64800
6000 / 64800
7000 / 64800
8000 / 64800
9000 / 64800
10000 / 64800
11000 / 64800
12000 / 64800
13000 / 64800
14000 / 64800
15000 / 64800
16000 / 64800
17000 / 64800
18000 / 64800
19000 / 64800
20000 / 64800
21000 / 64800
22000 / 64800
23000 / 64800
24000 / 64800
25000 / 64800
26000 / 64800
27000 / 64800
28000 / 64800
29000 / 64800
30000 / 64800
31000 / 64800
32000 / 64800
33000 / 64800
34000 / 64800
35000 / 64800
36000 / 64800
37000 / 64800
38000 / 64800
39000 / 64800
40000 / 64800
41000 / 64800
42000 / 64800
43000 / 64800
44000 / 64800
45000 / 64800
46000 / 64800
47000 / 64800
48000 / 64800
49000 / 64800
50000 / 64800
51000 / 64800
52000 / 64800
53000 / 64800
54000 / 64800
55000 / 64800
56000 / 64800
57000 / 64800
58000 / 64800
59000 / 64800
60000 / 64800
61000 / 64800
62000 / 64800
63000 / 64800
64000 / 64800
0 / 43200
1000 / 43200
2000 / 43200
3000 / 43200
4000 / 43200
5000 / 43200
6000 / 43200
7000 / 43200
80

### Splitting

In [15]:
start = time.time()

def split(X_train, y_train):
    return train_test_split(
            X_train, 
            y_train, 
            test_size=0.1, 
            shuffle=False, 
            random_state=0)

print(X_train.shape, train_labels_raw.shape)
if PROTOTYPING:
    X_train, X_test, y_train, y_test = split(X_train, train_labels_raw)
    print(X_train.shape, y_train.shape, X_test.shape, y_test.shape)
else:
    y_train = train_labels_raw
    

print("Time: ", time.time() - start)

(64800, 55) (64800, 1)
Time:  0.00028014183044433594


### Feature scaling

In [16]:
start = time.time()

def scale(X_train, X_test):
    scaler = StandardScaler().fit(X_train)

    # scale
    X_train_scaled = scaler.transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    return X_train_scaled, X_test_scaled

X_train, X_test = scale(X_train, X_test)

print("Time: ", time.time() - start)

Time:  0.16492390632629395


### Training

In [17]:
start = time.time()

classifier = SVC(class_weight="balanced", gamma="auto", decision_function_shape="ovo")
classifier.fit(X_train, y_train)
y_predict = classifier.predict(X_test)

if PROTOTYPING:
    print(balanced_accuracy_score(y_test, y_predict))
    
print("Time: ", time.time() - start)

  y = column_or_1d(y, warn=True)


Time:  192.4114339351654


### Write result

In [13]:
start = time.time()

output = pd.read_csv('files/sample.csv')
for i in range(output.shape[0]):
    output.iat[i, 1] = y_predict[i]
output.to_csv("files/SVC_OvO_eeg_only.csv", index=False)
        
print("Time: ", time.time() - start)

Time:  0.4031839370727539
