In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.activations import linear, relu, sigmoid
import os
from sklearn.model_selection import train_test_split

In [2]:
# Defining input and output directories and loading behavioral data
os.chdir('C:/Users/silvia/seadrive_root/Silvia F/My Libraries/JointRSA2/JointDrawing_BIDS/')

pps_folders = os.listdir()
subjlist = [x for x in pps_folders if x.startswith('sub') ]

X = []
y = []

for subj in subjlist:
    data = pd.read_csv(subj + "/beh-processed/InterpolatedTrajectories.csv")
    data_keep = data.loc[np.all([data.catch == 0 , data.incomplete_traj == 0 , data.wrong_shape == 0 , data.gross_error == 0], axis = 0), :]
    
    # creating X and y
    X1 = np.empty([len(np.unique(data_keep.trial_sort)), np.max(data_keep.traj_point)+1, 2])
    
    y1 = np.zeros([len(np.unique(data_keep.trial_sort)), 4])
    
    for idx, t in enumerate(np.unique(data_keep.trial_sort)):
        X1[idx, :, :] = data_keep.loc[data_keep.trial_sort == t, ['x', 'y']].to_numpy()
    
        y1[idx, 0] = 1 if data_keep[data_keep.trial_sort == t]['shape'].iloc[0] == 'c' else 0
        y1[idx, 1] = 1 if data_keep[data_keep.trial_sort == t]['shape'].iloc[0] == 'd' else 0
        y1[idx, 2] = 1 if data_keep[data_keep.trial_sort == t]['shape'].iloc[0] == 's' else 0
        
        y1[idx, 3] = 1 if data_keep[data_keep.trial_sort == t]['cong'].iloc[0] == 'congruent' else 0
    
    # reshaping X to have all features in one dimension
    X1 = np.reshape(X1, [len(np.unique(data_keep.trial_sort)), -1])

    X.append(X1)
    y.append(y1)

In [3]:
X = np.concatenate(X, axis = 0)
y = np.concatenate(y, axis = 0)

print(f"Dimensions of X: {X.shape}")
print(f"Dimensions of y: {y.shape}")

Dimensions of X: (12976, 200)
Dimensions of y: (12976, 4)


In [4]:
# Get 60% of the dataset as the training set. Put the remaining 40% in temporary variables: x_ and y_.
x_train, x_, y_train, y_ = train_test_split(X, y, test_size=0.40, random_state=1)

# Split the 40% subset above into two: one half for cross validation and the other for the test set
x_cv, x_test, y_cv, y_test = train_test_split(x_, y_, test_size=0.50, random_state=1)

# Delete temporary variables
del x_, y_

print(f"the shape of the training set (input) is: {x_train.shape}")
print(f"the shape of the training set (target) is: {y_train.shape}\n")
print(f"the shape of the cross validation set (input) is: {x_cv.shape}")
print(f"the shape of the cross validation set (target) is: {y_cv.shape}\n")
print(f"the shape of the test set (input) is: {x_test.shape}")
print(f"the shape of the test set (target) is: {y_test.shape}")

the shape of the training set (input) is: (7785, 200)
the shape of the training set (target) is: (7785, 4)

the shape of the cross validation set (input) is: (2595, 200)
the shape of the cross validation set (target) is: (2595, 4)

the shape of the test set (input) is: (2596, 200)
the shape of the test set (target) is: (2596, 4)


In [17]:
tf.random.set_seed(1234) # for consistent results
model = Sequential(
    [   tf.keras.Input(shape=(200,)),
        Dense(50, activation = 'relu', name = 'L1'),
       Dense(15, activation = 'relu', name = 'L2'),
       Dense(4, activation = 'sigmoid', name = 'L3'),
    ], name = "my_model" 
)

model.summary()

In [18]:
model.compile(
    loss=tf.keras.losses.BinaryCrossentropy(),
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
)

In [19]:
history = model.fit(
    x_train,y_train,
    epochs=40
)

Epoch 1/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 1ms/step - loss: 0.3860
Epoch 2/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1819
Epoch 3/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1707
Epoch 4/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2ms/step - loss: 0.1664
Epoch 5/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1645
Epoch 6/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step - loss: 0.1618
Epoch 7/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1593
Epoch 8/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1577
Epoch 9/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - loss: 0.1567
Epoch 10/40
[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step - lo

In [20]:
predictions = model.predict(x_cv)

[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step


In [48]:
# Evaluating model
predictions = model.predict(x_train)
yhat = np.round(predictions, decimals = 0)

shape = np.zeros(yhat.shape[0])
shape[np.where(yhat[:, 1] == 1)] = 1
shape[np.where(yhat[:, 2] == 1)] = 2

shape_y = np.zeros(y_train.shape[0])
shape_y[np.where(y_train[:, 1] == 1)] = 1
shape_y[np.where(y_train[:, 2] == 1)] = 2
print('\n-----------\nModel performance on Training set')

train_shape_errors = len(shape_y) - np.sum(shape == shape_y) 
print(f"The model missclassified shape {train_shape_errors}/{len(shape_y)} times, {np.round(train_shape_errors/len(shape_y)*100, decimals =2)}%")

train_cong_errors = len(y_train) - np.sum(y_train[:, 3] == yhat[:, 3]) 
print(f"The model missclassified congruency {train_cong_errors}/{len(y_train)} times, {np.round(train_cong_errors/len(y_train)*100, decimals =2)}%")


predictions = model.predict(x_cv)
yhat_predictions = np.round(predictions, decimals = 0)

shape = np.zeros(yhat_predictions.shape[0])
shape[np.where(yhat_predictions[:, 1] == 1)] = 1
shape[np.where(yhat_predictions[:, 2] == 1)] = 2

shape_y = np.zeros(y_cv.shape[0])
shape_y[np.where(y_cv[:, 1] == 1)] = 1
shape_y[np.where(y_cv[:, 2] == 1)] = 2
print('\n-----------\nModel performance on CV set')

cv_shape_errors = len(shape_y) - np.sum(shape == shape_y) 
print(f"The model missclassified shape {cv_shape_errors}/{len(shape_y)} times, {np.round(cv_shape_errors/len(shape_y)*100, decimals =2)}%")

cv_cong_errors = len(y_cv) - np.sum(y_cv[:, 3] == yhat_predictions[:, 3]) 
print(f"The model missclassified congruency {cv_cong_errors}/{len(y_cv)} times, {np.round(cv_cong_errors/len(y_cv)*100, decimals =2)}%")


predictions = model.predict(x_test)
yhat = np.round(predictions, decimals = 0)

shape = np.zeros(yhat.shape[0])
shape[np.where(yhat[:, 1] == 1)] = 1
shape[np.where(yhat[:, 2] == 1)] = 2

shape_y = np.zeros(y_test.shape[0])
shape_y[np.where(y_test[:, 1] == 1)] = 1
shape_y[np.where(y_test[:, 2] == 1)] = 2
print('\n-----------\nModel performance on Testing set')

test_shape_errors = len(shape_y) - np.sum(shape == shape_y) 
print(f"The model missclassified shape {test_shape_errors}/{len(shape_y)} times, {np.round(test_shape_errors/len(shape_y)*100, decimals =2)}%")

test_cong_errors = len(y_test) - np.sum(y_test[:, 3] == yhat[:, 3]) 
print(f"The model missclassified congruency {test_cong_errors}/{len(y_test)} times, {np.round(test_cong_errors/len(y_test)*100, decimals =2)}%")


[1m244/244[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1ms/step

-----------
Model performance on Training set
The model missclassified shape 64/7785 times, 0.82%
The model missclassified congruency 1603/7785 times, 20.59%
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

-----------
Model performance on CV set
The model missclassified shape 21/2595 times, 0.81%
The model missclassified congruency 538/2595 times, 20.73%
[1m82/82[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 2ms/step

-----------
Model performance on Testing set
The model missclassified shape 20/2596 times, 0.77%
The model missclassified congruency 503/2596 times, 19.38%
