# Training Model: SVM

## 1. Importing libraries ...

In [1]:
import pandas as pd
import numpy as np
import setup_jwlab
from jwlab.constants import cleaned_data_filepath
from jwlab.ml_prep import prep_ml
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

## 2. Importing Participants data set ...
Note: choose only one of the data set below

#### 2.1 All participants

In [2]:
participants = ["105", "106", "107", "109", "111", "904", "905", "906", "112", "909", "910", "115", "116", "912", "908"]

#### 2.2 All 13-month participants

In [2]:
participants = ["105", "106", "107", "109", "111", "112", "115", "116"]

#### 2.3 All 9-month participants

In [2]:
participants = ["904", "905", "906", "909", "910", "912", "908"]

## 3. Generating random subsets of the chosen participant list

In [3]:
participants_train, participants_test = train_test_split(participants,test_size=0.2)
print(len(participants_train), len(participants_test))

12 3


## 4. Preparing the data set for the ML model ...
Note: the dimensions are also verified

#### 4.1 No_averaging data

In [4]:
X_train_n,y_train_n,p_train_n,w_train_n,df_train_n = prep_ml(cleaned_data_filepath, participants_train, downsample_num=1000, averaging="no_averaging")
X_test_n, y_test_n, p_test_n, w_test_n, df_test_n = prep_ml(cleaned_data_filepath, participants_test, downsample_num=1000, averaging="no_averaging")

(n_train, d_train) = X_train_n.shape
assert n_train == y_train_n.shape[0]

(n_test, d_test) = X_test_n.shape
assert n_test == y_test_n.shape[0]

The number of bad trials of the participant - [105] that are removed is [10].
The number of bad channels that are removed is - [7].
The number of bad trials of the participant - [910] that are removed is [39].
The number of bad channels that are removed is - [11].
The number of bad trials of the participant - [908] that are removed is [7].
The number of bad channels that are removed is - [15].
The number of bad trials of the participant - [107] that are removed is [15].
The number of bad channels that are removed is - [34].
The number of bad trials of the participant - [909] that are removed is [17].
The number of bad channels that are removed is - [6].
The number of bad trials of the participant - [912] that are removed is [22].
The number of bad channels that are removed is - [10].
The number of bad trials of the participant - [904] that are removed is [30].
The number of bad channels that are removed is - [50].
The number of bad trials of the participant - [112] that are removed is 

#### 4.2 Average_trials

In [5]:
X_train_t, y_train_t, p_train_t, w_train_t,df_train_t = prep_ml(cleaned_data_filepath, participants_train, downsample_num=1000, averaging="average_trials")
X_test_t, y_test_t, p_test_t, w_test_t, df_test_t = prep_ml(cleaned_data_filepath, participants_test, downsample_num=1000, averaging="average_trials")

(n_train, d_train) = X_train_t.shape
assert n_train == y_train_t.shape[0]

(n_test, d_test) = X_test_t.shape
assert n_test == y_test_t.shape[0]

The number of bad trials of the participant - [105] that are removed is [10].
The number of bad channels that are removed is - [7].
The number of bad trials of the participant - [910] that are removed is [39].
The number of bad channels that are removed is - [11].
The number of bad trials of the participant - [908] that are removed is [7].
The number of bad channels that are removed is - [15].
The number of bad trials of the participant - [107] that are removed is [15].
The number of bad channels that are removed is - [34].
The number of bad trials of the participant - [909] that are removed is [17].
The number of bad channels that are removed is - [6].
The number of bad trials of the participant - [912] that are removed is [22].
The number of bad channels that are removed is - [10].
The number of bad trials of the participant - [904] that are removed is [30].
The number of bad channels that are removed is - [50].
The number of bad trials of the participant - [112] that are removed is 

#### 4.3 Average_trials_and_participants

In [6]:
X_train_tp, y_train_tp, p_train_tp, w_train_tp, df_train_tp = prep_ml(cleaned_data_filepath, participants_train, downsample_num=1000, averaging="average_trials_and_participants")
X_test_tp, y_test_tp, p_test_tp, w_test_tp, df_test_tp = prep_ml(cleaned_data_filepath, participants_test, downsample_num=1000, averaging="average_trials_and_participants")

(n_train, d_train) = X_train_tp.shape
assert n_train == y_train_tp.shape[0]

(n_test, d_test) = X_test_tp.shape
assert n_test == y_test_tp.shape[0]

The number of bad trials of the participant - [105] that are removed is [10].
The number of bad channels that are removed is - [7].
The number of bad trials of the participant - [910] that are removed is [39].
The number of bad channels that are removed is - [11].
The number of bad trials of the participant - [908] that are removed is [7].
The number of bad channels that are removed is - [15].
The number of bad trials of the participant - [107] that are removed is [15].
The number of bad channels that are removed is - [34].
The number of bad trials of the participant - [909] that are removed is [17].
The number of bad channels that are removed is - [6].
The number of bad trials of the participant - [912] that are removed is [22].
The number of bad channels that are removed is - [10].
The number of bad trials of the participant - [904] that are removed is [30].
The number of bad channels that are removed is - [50].
The number of bad trials of the participant - [112] that are removed is 

## 5. Setting up the SVM model ...

In [7]:
model = LinearSVC(C=1e-9, max_iter=1000)

## 6. Training and testing the model ...

### 6.1 Train on no_average, with error rate ...

In [8]:
model.fit(X_train_n, y_train_n)

np.mean(model.predict(X_test_n) != y_test_n)

0.49714285714285716

### 6.2 Train on average_trials, with error rate ...

In [9]:
model.fit(X_train_t, y_train_t)

np.mean(model.predict(X_test_t) != y_test_t)

0.5

### 6.3 Train on average_trials_and_participants, with error rate ...

In [10]:
model.fit(X_train_tp, y_train_tp)

np.mean(model.predict(X_test_tp) != y_test_tp)

0.5625

In [17]:
X_n.shape

(461, 60000)

In [18]:
X_tp.shape

(16, 60000)

In [19]:
X_t.shape

(128, 60000)