# Training Model: SVM

## 1. Importing libraries ...

In [1]:
import pandas as pd
import numpy as np
import setup_jwlab
from jwlab.constants import cleaned_data_filepath
from jwlab.ml_prep import prep_ml
from sklearn.svm import LinearSVC
from sklearn.model_selection import train_test_split

## 2. Importing Participants..

In [2]:
#participants = ["904", "905", "906", "909", "910", "912", "908", "913", "914", "916", "917", "919", "920", "921", "923", "924","927", "928", "929", "930", "932"]
#participants = [ "909", "910", "912", "908", "913", "914", "916", "917", "919", "920", "921", "923", "924","927", "928", "929", "930", "932"]
#9m with >40 trials
participants = [ "909", "912", "908", "913", "914", "916", "917", "919", "920", "921", "924","927", "930"]

#12m all
#participants = ["109", "111", "112", "115", "116", "117", "119", "121", "122", "120", "124"]
#12m with >40 trials
#participants = ["109", "111", "112", "115", "124"]



## 3. Set up averaging

In [3]:
X, y, good_trial_count = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="no_averaging")
Xt, yt, good_trial_count_t = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="average_trials")
Xa, ya, good_trial_count_a = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="average_trials_and_participants")


loaded
loaded
loaded


In [4]:
(n, d) = X[0][0].shape
assert n == y[0][0].shape[0]


In [5]:
unique, counts = np.unique(y[0][0], return_counts=True)
dict(zip(unique, counts))

{0.0: 443, 1.0: 455}

In [6]:
(n, d) = Xt[0][0].shape
assert n == yt[0][0].shape[0]


n

288

In [7]:
unique, counts = np.unique(yt[0][0], return_counts=True)
dict(zip(unique, counts))

{0.0: 144, 1.0: 144}

In [8]:
(n, d) = Xa[0][0].shape
assert n == ya[0][0].shape[0]
n

16

In [9]:
unique, counts = np.unique(ya[0][0], return_counts=True)
dict(zip(unique, counts))

{0.0: 8, 1.0: 8}

## 4. Setting up the SVM model ...

In [4]:
model = LinearSVC(C=1e-9, max_iter=5000)

## 5. Training and testing the model ...

### 5.1. Train on raw, test on raw (validation)

In [5]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(X[0][0]) != y[0][0])

0.2344559585492228

### 5.2. Train on raw, test on avg by trial (word repetition) 

In [6]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(Xt[0][0]) != yt[0][0])

0.4567307692307692

### 5.3. Train on raw, test on avg by word and ps 

In [7]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(Xa[0][0]) != ya[0][0])

0.375

# Alternate accuracy measurements

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df_concat, y, test_size=0.2)
model = SVC(gamma='scale')

model.fit(X_train, y_train)

y_pred = model.predict(X_test)

from sklearn.metrics import accuracy_score,recall_score,precision_score,f1_score
print('Accuracy Score : ' + str(accuracy_score(y_test,y_pred)))
print('Precision Score : ' + str(precision_score(y_test,y_pred)))
print('Recall Score : ' + str(recall_score(y_test,y_pred)))
print('F1 Score : ' + str(f1_score(y_test,y_pred)))

from sklearn.metrics import confusion_matrix
print('Confusion Matrix : \n' + str(confusion_matrix(y_test,y_pred)))

# 2. Importing 13-Month-Olds..

In [2]:
#participants = ["105", "106", "107", "109", "111", "112", "115", "116", "117", "119", "121", "122", "120", "124"]
#participants = ["109", "111", "112", "115", "116", "117", "119", "121", "122", "120", "124"]
participants = ["109", "111", "112", "115", "124"]

## 3. Set up averaging

In [3]:
X, y, good_trial_count = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="no_averaging")
Xt, yt, good_trial_count_t = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="average_trials")
Xa, ya, good_trial_count_a = prep_ml(cleaned_data_filepath, participants, downsample_num=1000, averaging="average_trials_and_participants")


loaded
loaded
loaded


In [9]:
(n, d) = X[0][0].shape
assert n == y[0][0].shape[0]
n

409

In [13]:
unique, counts = np.unique(y[0][0], return_counts=True)
dict(zip(unique, counts))

{-5837.697689302085: 1,
 -5823.5781875480125: 1,
 -5745.890223444095: 1,
 -5702.329904348617: 1,
 -5554.267135358004: 1,
 -5477.609973662548: 1,
 -5272.739370393582: 1,
 -5156.799050742925: 1,
 -4914.693626355867: 1,
 -4753.147198508801: 1,
 -4497.089110593857: 1,
 -4479.989451410409: 1,
 -4469.1105633904435: 1,
 -4409.145463552375: 1,
 -4375.680902883818: 1,
 -4281.794089316381: 1,
 -4261.94566628182: 1,
 -4202.97324932638: 1,
 -4045.8406258570285: 1,
 -4036.616017084429: 1,
 -3992.6010965776923: 1,
 -3955.833099943023: 1,
 -3955.2081257349914: 1,
 -3913.545678819911: 1,
 -3866.2183786513287: 1,
 -3846.429812089109: 1,
 -3813.1482832538354: 1,
 -3812.150070924324: 1,
 -3774.2502344058853: 1,
 -3770.801604639757: 1,
 -3761.007291639704: 1,
 -3759.307179254542: 1,
 -3731.3104314889433: 1,
 -3705.649044160303: 1,
 -3687.293259819628: 1,
 -3652.6414851899626: 1,
 -3644.9891190153116: 1,
 -3639.521005573019: 1,
 -3601.3514167134244: 1,
 -3591.3086883699652: 1,
 -3553.6424502181603: 1,
 -35

In [11]:
(n, d) = Xt[0][0].shape
assert n == yt[0][0].shape[0]
n

80

In [12]:
(n, d) = Xa[0][0].shape
assert n == ya[0][0].shape[0]
n

16

## 4. Setting up the SVM model ...

In [4]:
model = LinearSVC(C=1e-9, max_iter=1000)

## 5. Training and testing the model ...

### 5.1. Train on raw, test on raw (validation)

In [5]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(X[0][0]) != y[0][0])

0.22982885085574573

### 5.2. Train on raw, test on avg by trial (word repetition) 

In [6]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(Xt[0][0]) != yt[0][0])

0.4375

### 5.3. Train on raw, test on avg by word and ps 

In [7]:
# from sklearn import preprocessing

# scaler = preprocessing.StandardScaler().fit(X)
# X,Xp = scaler.transform(X), scaler.transform(Xp)
model.fit(X[0][0], y[0][0])
np.mean(model.predict(Xa[0][0]) != ya[0][0])

0.375