In [1]:
from preprocessing import * 
from sklearn import svm
from sklearn import pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import cohen_kappa_score
from joblib import dump, load

%load_ext autoreload
%autoreload 2

# Convert GDF files to numpy

In [3]:
%%capture
# Uncomment the next to split the data folder into train and test
# convert_data()


# Read split data

In [4]:
i = 7

x_train, y_train = read_file("action_train", f"A0{i}")
x_test, y_test = read_file("action_test", f"A0{i}")


print("final shapes are: ", x_train.shape, y_train.shape)
print("final shapes are: ", x_test.shape, y_test.shape)


final shapes are:  (230, 25, 750) (230,)
final shapes are:  (58, 25, 750) (58,)


# Extract features from test and train data

In [5]:
%%capture
# Apply wavelet decomposition
train_coeff = featurize(x_train)
coeff_len = len(train_coeff)

# Fit CSP model on train data
csp = [mne.decoding.CSP(8) for _ in range(coeff_len)]
x_train_f = np.concatenate(tuple(csp[x].fit_transform(train_coeff[x], y_train) for x  in range(coeff_len)),axis=-1)

# Transform test data
test_coeff = featurize(x_test)
x_test_f = np.concatenate(tuple(csp[x].transform(test_coeff[x]) for x  in range(coeff_len)),axis=-1)
  

# Train model and test

In [6]:
clf = pipeline.make_pipeline(StandardScaler(), svm.SVC())
clf.fit(x_train_f, y_train)

print("Accuracy is ", sum(clf.predict(x_train_f) == y_train) / len(x_train))
print(len(x_train))

print("test Accuracy is ", sum(clf.predict(x_test_f) == y_test) / len(x_test))
print(len(x_test))

print("kappa score on train is: ", cohen_kappa_score(clf.predict(x_train_f), y_train))
print("kappa score on test is: ", cohen_kappa_score(clf.predict(x_test_f), y_test))


Accuracy is  1.0
230
test Accuracy is  0.9482758620689655
58
kappa score on train is:  1.0
kappa score on test is:  0.9303721488595438


# Save the trained model

In [7]:
dump(clf, "model.joblib")
dump(csp, "csp.joblib")

['csp.joblib']

# View classification report

In [8]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print(classification_report(y_test, clf.predict(x_test_f)))

              precision    recall  f1-score   support

         1.0       0.94      0.94      0.94        17
         2.0       1.00      0.93      0.97        15
         3.0       0.91      1.00      0.95        10
         4.0       0.94      0.94      0.94        16

    accuracy                           0.95        58
   macro avg       0.95      0.95      0.95        58
weighted avg       0.95      0.95      0.95        58



# Calculate average on all subjects

In [9]:
%%capture
outputs= []
acc_train_avg = 0
acc_test_avg = 0
kappa_train_avg = 0
kappa_test_avg = 0
for i in range(1, 10):

  x_train, y_train = read_file("action_train", f"A0{i}")
  x_test, y_test = read_file("action_test", f"A0{i}")


  outputs.append(("final shapes are: ", x_train.shape, y_train.shape))
  outputs.append(("final shapes are: ", x_test.shape, y_test.shape))

  train_coeff = featurize(x_train)
  coeff_len = len(train_coeff)

  csp = [mne.decoding.CSP(8) for _ in range(coeff_len)]
  x_train_f = np.concatenate(tuple(csp[x].fit_transform(train_coeff[x], y_train) for x  in range(coeff_len)),axis=-1)

  test_coeff = featurize(x_test)
  x_test_f = np.concatenate(tuple(csp[x].transform(test_coeff[x]) for x  in range(coeff_len)),axis=-1)

  clf = pipeline.make_pipeline(StandardScaler(), svm.SVC())
  clf.fit(x_train_f, y_train)
  outputs.append("*"*30)
  outputs.append(f"subject {i} is here babeeeeeeeeeee")
  outputs.append(("Accuracy is ", sum(clf.predict(x_train_f) == y_train) / len(x_train)))
  acc_train_avg += sum(clf.predict(x_train_f) == y_train) / len(x_train)
  outputs.append(len(x_train))

  outputs.append(("test Accuracy is ", sum(clf.predict(x_test_f) == y_test) / len(x_test)))
  outputs.append(len(x_test))
  acc_test_avg += sum(clf.predict(x_test_f) == y_test) / len(x_test)

  outputs.append(("kappa score on train is: ", cohen_kappa_score(clf.predict(x_train_f), y_train)))
  outputs.append(("kappa score on test is: ", cohen_kappa_score(clf.predict(x_test_f), y_test)))
  kappa_train_avg += cohen_kappa_score(clf.predict(x_train_f), y_train)
  kappa_test_avg += cohen_kappa_score(clf.predict(x_test_f), y_test)


# Print previous cell output

In [10]:
for i in outputs:
  print(i)

print(acc_train_avg / 9)
print(acc_test_avg / 9)
print(kappa_train_avg / 9)
print(kappa_test_avg / 9)

('final shapes are: ', (230, 25, 750), (230,))
('final shapes are: ', (58, 25, 750), (58,))
******************************
subject 1 is here babeeeeeeeeeee
('Accuracy is ', 1.0)
230
('test Accuracy is ', 0.7931034482758621)
58
('kappa score on train is: ', 1.0)
('kappa score on test is: ', 0.7203696263559662)
('final shapes are: ', (230, 25, 750), (230,))
('final shapes are: ', (58, 25, 750), (58,))
******************************
subject 2 is here babeeeeeeeeeee
('Accuracy is ', 1.0)
230
('test Accuracy is ', 0.7931034482758621)
58
('kappa score on train is: ', 1.0)
('kappa score on test is: ', 0.7207062600321027)
('final shapes are: ', (230, 25, 750), (230,))
('final shapes are: ', (58, 25, 750), (58,))
******************************
subject 3 is here babeeeeeeeeeee
('Accuracy is ', 0.9956521739130435)
230
('test Accuracy is ', 0.7068965517241379)
58
('kappa score on train is: ', 0.9941977800201817)
('kappa score on test is: ', 0.6070147469111199)
('final shapes are: ', (230, 25, 750)