In [None]:
from preprocessing import * 
from sklearn import svm
from sklearn import pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import cohen_kappa_score
from joblib import dump, load


%load_ext autoreload
%autoreload 2

In [None]:
%%capture

# ssize = sample size
ssize = 750
def convert_file(filename, test=False):
  
  # load the gdf file
  data = mne.io.read_raw_gdf(filename)

  # get data in dataframe format
  dataframe = data.to_data_frame()

  # Get the events 
  events = mne.events_from_annotations(data)
  codes = events[1]
  events = events[0]

  # last idle sample
  idle_size = events[6, 0]
  
  num_idle_samples = idle_size // ssize
  idle_size = num_idle_samples * ssize

  # x and y arrays of size 288(actions) + number of idle samples
  x = np.ones((288 + num_idle_samples, ssize, 26))
  y = np.ones(288 + num_idle_samples)
  
  
  # convert annotations to mne codes
  if test:
    cfilter = np.asarray(['783'])
  else:
    cfilter = np.asarray(['769', '770', '771', '772'])
  
  lis = np.asarray([codes[i] for i in cfilter])
  
  # # filter for classes
  ev = events[np.in1d(events[:, 2], lis)]
  
  values = dataframe.values#filter(dataframe.values.T, [8, 12], 250).T

  for point in range(len(ev)):
    x[point] = values[ev[point][0]:ev[point][0] + ssize]
    y[point] = 1

  index = 288
  for i in range(0, idle_size, ssize):
    if index == 429:
      print(i, idle_size, ssize, num_idle_samples)
    x[index] = values[i:i + ssize]
    y[index] = 0
    index += 1
  
    
  # Create directory for numpy data
  if not os.path.exists("binary_data"):
    os.mkdir(os.path.join(os.getcwd(), "binary_data"))

  # get file name without extension or path
  new_name = os.path.splitext(os.path.basename(filename))[0]
  
  # Save data to numpy arrays
  np.save(f"binary_data/{new_name}X", x)
  
  np.save(f"binary_data/{new_name}Y", y)


def convert_data():
  if os.path.exists("data"):
    datafiles = os.listdir("data")
    
  else:
    print("data directory doesn't exist")
    exit(1)

  for file in datafiles:
    if re.match(r"A0[0-9]T.gdf", file):
      convert_file("data/" +file)
    if re.match(r"A0[0-9]E.gdf", file):
      convert_file("data/" +file, True)

convert_data()

In [33]:
i = 6

x = np.load(f"binary_data/A0{i}TX.npy")
x = x[:, :,1:]
x = np.swapaxes(x, 1, 2)
y = np.load(f"binary_data/A0{i}TY.npy")


# x, y = read_file(i)

X_train, X_test, y_train, y_test = train_test_split(x, y, random_state = 100, test_size = 0.2)

print("final shapes are: ", X_train.shape, X_test.shape, y_train.shape, y_test.shape)


final shapes are:  (334, 25, 750) (84, 25, 750) (334,) (84,)


In [34]:
%%capture
train_coeff = featurize(X_train)
coeff_len = len(train_coeff)

csp = [mne.decoding.CSP(8) for _ in range(coeff_len)]
X_train_f = np.concatenate(tuple(csp[x].fit_transform(train_coeff[x], y_train) for x  in range(coeff_len)),axis=-1)



In [35]:
test_coeff = featurize(X_test)
X_test_f = np.concatenate(tuple(csp[x].transform(test_coeff[x]) for x  in range(coeff_len)),axis=-1)


In [36]:


clf = pipeline.make_pipeline(StandardScaler(), svm.SVC())
clf.fit(X_train_f, y_train)

print("Accuracy is ", clf.score(X_train_f, y_train))
print(len(X_train))

print("test Accuracy is ", clf.score(X_test_f ,y_test))
print(len(X_test))

print("kappa score on train is: ", cohen_kappa_score(clf.predict(X_train_f), y_train))
print("kappa score on test is: ", cohen_kappa_score(clf.predict(X_test_f), y_test))


Accuracy is  1.0
334
test Accuracy is  0.9761904761904762
84
kappa score on train is:  1.0
kappa score on test is:  0.9430508474576271


In [37]:
print(y_train)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 0. 0. 0. 0.
 1. 1. 1. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 1. 0. 0. 1. 0.
 1. 0. 0. 1. 1. 0. 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1.
 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 1. 1. 1. 0. 0. 1. 1. 0. 1. 1. 0. 0.
 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 0. 1. 0. 1. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0.
 0. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 0. 0. 1. 0. 1. 1.
 0. 1. 1. 1. 1. 1. 0. 0. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1. 1. 0.
 1. 0. 0. 0. 1. 1. 0. 1. 0. 1. 0. 1. 0. 0. 0. 1. 1. 1. 1. 0. 0. 1. 0. 1.
 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1.
 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 0. 1. 1. 1. 0. 0. 1.
 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0.
 1. 0. 0. 0. 1. 0. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1.
 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 0. 0. 1. 0. 1. 1.
 1. 1. 0. 0. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 0. 0. 1.

In [38]:
print(y_test)

[1. 1. 0. 1. 1. 1. 0. 1. 1. 1. 1. 1. 1. 0. 1. 0. 0. 1. 0. 1. 1. 1. 1. 0.
 0. 0. 1. 1. 1. 1. 0. 1. 0. 1. 1. 1. 1. 0. 1. 0. 1. 0. 1. 1. 1. 1. 1. 1.
 0. 1. 1. 1. 1. 1. 0. 1. 1. 1. 0. 1. 1. 0. 0. 1. 1. 0. 1. 1. 1. 1. 0. 1.
 0. 1. 0. 1. 0. 1. 1. 1. 0. 1. 1. 1.]


In [39]:
print(len(y[y==0])/len(y))

0.31100478468899523


In [26]:
print(len(y[y==1]))

288


In [32]:
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report

print(classification_report(y_test, clf.predict(X_test_f)))

              precision    recall  f1-score   support

         0.0       0.95      0.95      0.95        19
         1.0       0.98      0.98      0.98        64

    accuracy                           0.98        83
   macro avg       0.97      0.97      0.97        83
weighted avg       0.98      0.98      0.98        83

