# Code

## Print libraries versions

In [None]:
import sys, platform

print("Python:", sys.version.split()[0])
print("Platform:", platform.platform())

# Scientific core
import numpy as np, pandas as pd, matplotlib, sklearn
print("Numpy:", np.__version__)
print("Pandas:", pd.__version__)
print("Matplotlib:", matplotlib.__version__)
print("Scikit-learn:", sklearn.__version__)

# PyTorch stack
import torch, torchvision
print("PyTorch:", torch.__version__)
print("Torchvision:", torchvision.__version__)
print("CUDA available:", torch.cuda.is_available())
print("CUDA runtime:", torch.version.cuda)
print("cuDNN:", torch.backends.cudnn.version())

# Gradient boosting libs
try:
    import xgboost
    print("XGBoost:", xgboost.__version__)
except ImportError:
    print("XGBoost: not installed")

try:
    import lightgbm
    print("LightGBM:", lightgbm.__version__)
except ImportError:
    print("LightGBM: not installed")

# Transformers
try:
    import transformers
    print("Transformers:", transformers.__version__)
except ImportError:
    print("Transformers: not installed")

# TensorFlow (optional)
try:
    import tensorflow as tf
    print("TensorFlow:", tf.__version__)
except ImportError:
    print("TensorFlow: not installed")

# Optional video I/O libs
for pkg in ["decord", "av"]:
    try:
        mod = __import__(pkg)
        print(f"{pkg}:", mod.__version__)
    except:
        pass


Python: 3.12.12
Platform: Linux-6.6.105+-x86_64-with-glibc2.35
Numpy: 2.0.2
Pandas: 2.2.2
Matplotlib: 3.10.0
Scikit-learn: 1.6.1
PyTorch: 2.8.0+cu126
Torchvision: 0.23.0+cu126
CUDA available: True
CUDA runtime: 12.6
cuDNN: 91002
XGBoost: 3.1.1
LightGBM: 4.6.0
Transformers: 4.57.1
TensorFlow: 2.19.0


## Remove an old code version
- if present

In [None]:
!rm -r SpecklesAI

rm: cannot remove 'SpecklesAI': No such file or directory


## Clone repository

In [1]:
#if you are clonning a public version, use:
!git clone https://github.com/natalyasegal/SpecklesAI.git

Cloning into 'SpecklesAI'...
remote: Enumerating objects: 943, done.[K
remote: Counting objects: 100% (52/52), done.[K
remote: Compressing objects: 100% (49/49), done.[K
remote: Total 943 (delta 23), reused 0 (delta 0), pack-reused 891 (from 3)[K
Receiving objects: 100% (943/943), 15.00 MiB | 10.21 MiB/s, done.
Resolving deltas: 100% (491/491), done.


## Copy config.py (for Broca area)

In [2]:
!cp SpecklesAI/config/config_BCI.py SpecklesAI/config/config.py

## Import functions that will be used directly

In [3]:
import sys
sys.path.append('/content/SpecklesAI')   # add package root to Python path

#from utils.swap import swap_categories
from utils.swap import *

# Mount google drive

In [4]:
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


## Copy cofiguration files
- add here code that copies your actual configuartion files with subjects and dates

#Dataset - speckle video

### Remove old data

Run this only if you have created this directory before in the same runtime and need to recreate it differently

In [None]:
!rm -r exp3

rm: cannot remove 'exp3': No such file or directory


### Unzip

In [None]:
!unzip gdrive/My\ Drive/__PHd_2025/code_and_data/data_BCI/your_file_name.zip
# add here more data files

Archive:  gdrive/My Drive/__PHd_2025/code_and_data/data_BCI/BCI_Adi_9June25_forehead.zip
   creating: BCI_Adi_9June25_forehead/
   creating: BCI_Adi_9June25_forehead/090625_day1_1/
   creating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/
   creating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/
   creating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/
  inflating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/acA1440-220um__40034984__20250609_113919482.avi  
  inflating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/acA1440-220um__40034984__20250609_113950238.avi  
  inflating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/acA1440-220um__40034984__20250609_114020231.avi  
  inflating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/acA1440-220um__40034984__20250609_114359069.avi  
  inflating: BCI_Adi_9June25_forehead/090625_day1_1/Adi/forehead/no/acA1440-220um__40034984__20250609_114431310.avi  
   creating: BCI_Adi_9June25_forehead

### Move
- add here moving the data to exp3

In [None]:
!mkdir exp3
# move here ...

In [None]:
!ls exp3/

In [None]:
# sample path structure you will get:
!ls exp3/090625_day1_1/SubjNameOrCode/Broca/yes
#!ls exp3/090625_day1_1/SubjNameOrCode/Broca/no

acA1440-220um__40034984__20250609_143942792.avi
acA1440-220um__40034984__20250609_144012680.avi
acA1440-220um__40034984__20250609_144042863.avi
acA1440-220um__40034984__20250609_144112971.avi
acA1440-220um__40034984__20250609_144145530.avi


# Create per subject test datasets
- for morning/midday/forehead, dataset is chused during unzip/move satage
- changing directory name to Broca just for simplicity in case of forehead as well
- changing directory mame to 1_1 at the end for simplicity in case of morning set, in order to use the test creation code as is

## Create for SampleSubj

#### copy config

In [None]:
!cp -r gdrive/MyDrive/your_input_config_location/subjects_and_dates_Zeev__2__only.yaml SpecklesAI/config/config_files/subjects_and_dates.yaml
!cat SpecklesAI/config/config_files/subjects_and_dates.yaml

#subjects_and_dates.yaml

subjects:
  1: 
    dates: ['090625_day1', '090625_day1_1']
    name: 'Zeev'
  

#### create

In [None]:
!python -u SpecklesAI/prepare_test_sets.py --split_num 1 --random_seed 9  --test_set_per_category_file test_per_category_BCI_forehead__
!mv test_per_category_BCI_forehead___1.npy test_per_category_BCI_forehead___5_2.npy
!ls

 number_of_classes = 2
 binary_lables=[[0]
 [1]]
Splits list:
{1: {'test_mix': [1], 'model_name': 'AYYZDS_mix_1'}, 2: {'test_mix': [2], 'model_name': 'AYYZDS_mix_2'}, 3: {'test_mix': [3], 'model_name': 'AYYZDS_mix_3'}, 4: {'test_mix': [4], 'model_name': 'AYYZDS_mix_4'}, 5: {'test_mix': [5], 'model_name': 'AYYZDS_mix_5'}, 6: {'test_mix': [6], 'model_name': 'AYYZDS_mix_6'}, 7: {'test_mix': [7], 'model_name': 'AYYZDS_mix_7'}, 8: {'test_mix': [8], 'model_name': 'AYYZDS_mix_8'}, 9: {'test_mix': [7, 8], 'model_name': 'AYYZDS_mix_9'}}
The chosen split number is 1, test: [1], AYYZDS_mix_1
 subject number = 1, name = ['Zeev'],  dates=['090625_day1_1'] 
 test {'090625_day1_1'} {'Zeev'}
 number_of_classes = 2
 binary_lables=[[0]
 [1]]
Splits list:
{1: {'test_mix': [1], 'model_name': 'AYYZDS_mix_1'}, 2: {'test_mix': [2], 'model_name': 'AYYZDS_mix_2'}, 3: {'test_mix': [3], 'model_name': 'AYYZDS_mix_3'}, 4: {'test_mix': [4], 'model_name': 'AYYZDS_mix_4'}, 5: {'test_mix': [5], 'model_name': 'AYYZDS_m

#### save

In [None]:
!mkdir gdrive/MyDrive/your_location
!cp test_per_category_BCI_forehead___5_2.npy	gdrive/MyDrive/your_location/.
!ls -l gdrive/MyDrive/your_location

# Test forehead
- on per subj models that were trained/validated on the morning set and tested on midday set of the same subject

## copy test sets

In [None]:
!cp gdrive/MyDrive/ypur_location/*.npy .
!ls -l *.npy

-rw------- 1 root root 204800128 Nov 21 10:57 test_per_category_BCI_forehead___10.npy
-rw------- 1 root root 122880128 Nov 21 10:57 test_per_category_BCI_forehead___1.npy
-rw------- 1 root root 122880128 Nov 21 10:57 test_per_category_BCI_forehead___1_swapped.npy
-rw------- 1 root root 122880128 Nov 21 10:57 test_per_category_BCI_forehead___2.npy
-rw------- 1 root root 204800128 Nov 21 10:57 test_per_category_BCI_forehead___3.npy
-rw------- 1 root root 122880128 Nov 21 10:57 test_per_category_BCI_forehead___4.npy
-rw------- 1 root root 204800128 Nov 21 10:58 test_per_category_BCI_forehead___5_2.npy
-rw------- 1 root root 122880128 Nov 21 10:58 test_per_category_BCI_forehead___5.npy
-rw------- 1 root root 245760128 Nov 21 10:58 test_per_category_BCI_forehead___6.npy
-rw------- 1 root root 204800128 Nov 21 10:58 test_per_category_BCI_forehead___7_2.npy
-rw------- 1 root root 204800128 Nov 21 10:58 test_per_category_BCI_forehead___7.npy
-rw------- 1 root root 204800128 Nov 21 10:59 test_p

## sample subject

In [None]:
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_3/models .

# 25 chunks (40 frames in chunk, taken eith 1000 fps) correspond to 1 second of input
!python -u SpecklesAI/inference_and_eval.py --use_per_subj_config --num_of_chunks_to_aggregate 25 --read_stored_dataset  --test_set_per_category_file test_per_category_BCI_forehead___3.npy --split_num 1 #patch giving meaningless split number here

!mkdir gdrive/MyDrive/your_path/pBCI_control__test_forehead_per_subj/per_subj_3_25
!mv models gdrive/MyDrive/your_path/pBCI_control__test_forehead_per_subj/per_subj_3_25/.
!mv *.png gdrive/MyDrive/your_path/pBCI_control__test_forehead_per_subj/per_subj_3_25/.
!mv *.csv gdrive/MyDrive/your_path/pBCI_control__test_forehead_per_subj/per_subj_3_25/.
!ls -l gdrive/MyDrive/your_path/pBCI_control__test_forehead_per_subj/per_subj_3_25

2025-11-19 13:15:14.171955: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1763558114.186546   19007 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1763558114.190720   19007 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
W0000 00:00:1763558114.201762   19007 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1763558114.201806   19007 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.
W0000 00:00:1763558114.201808   19007 computation_placer.cc:177] computation placer alr

# Ensembled Inference

### copy datasets - midday

In [8]:
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/test_per_category__per_subj___2_no_beep.npy .
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/test_per_category__per_subj__2.npy .
!ls

gdrive					     train_set__per_subj__1.npy
sample_data				     train_set__per_subj__3.npy
SpecklesAI				     train_set__per_subj__4.npy
test_per_category__per_subj__1.npy	     train_set__per_subj__5.npy
test_per_category__per_subj___2_no_beep.npy  validation_set__per_subj__1.npy
test_per_category__per_subj__2.npy	     validation_set__per_subj__3.npy
test_per_category__per_subj__3.npy	     validation_set__per_subj__4.npy
test_per_category__per_subj__4.npy	     validation_set__per_subj__5.npy
test_per_category__per_subj__5.npy


In [7]:
#To copy preprecessed datasets for the current split run:
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_1.npy .
#!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_2.npy .
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_3.npy .
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_4.npy .
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_5.npy .
!ls

gdrive				    train_set__per_subj__3.npy
sample_data			    train_set__per_subj__4.npy
SpecklesAI			    train_set__per_subj__5.npy
test_per_category__per_subj__1.npy  validation_set__per_subj__1.npy
test_per_category__per_subj__3.npy  validation_set__per_subj__3.npy
test_per_category__per_subj__4.npy  validation_set__per_subj__4.npy
test_per_category__per_subj__5.npy  validation_set__per_subj__5.npy
train_set__per_subj__1.npy


## subj 2 in **test**
- example on 8 models minus 1 (of the subj on test)

In [None]:
!rm -r all_models
!mkdir all_models
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_1/models all_models/models_1
#!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split__2_no_beep/models all_models/models_2
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_3/models all_models/models_3
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_4/models all_models/models_4
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5/models all_models/models_5
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_6/models all_models/models_6
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_7/models all_models/models_7
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_8/models all_models/models_8
!ls all_models

#50 as there is a word every 2 sec in version of data with beep
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/test_per_category__per_subj__2.npy .

!python -u SpecklesAI/inference_and_eval.py  --need_ensemble --use_per_subj_config --num_of_chunks_to_aggregate 50 --read_stored_dataset  --test_set_per_category_file test_per_category__per_subj__2.npy --split_num 1 #patch giving meaningless split number here

models_1  models_3  models_4  models_5	models_6  models_7  models_8
2025-05-25 13:36:34.182220: I tensorflow/core/util/port.cc:153] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2025-05-25 13:36:34.200938: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1748180194.221669   45836 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1748180194.228056   45836 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2025-05-25 13:36:34.249003: I tensorflow/core/platform/cpu_feature_guard.cc

# Inference

## different subject
- test per subject models to see how they perform on another subject

### copy datasets

In [None]:
#To copy preprocessed datasets for the current split run:
!cp gdrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_*.npy .
!ls

gdrive				    train_set__per_subj__3.npy
sample_data			    train_set__per_subj__4.npy
SpecklesAI			    train_set__per_subj__5.npy
test_per_category__per_subj__1.npy  validation_set__per_subj__1.npy
test_per_category__per_subj__3.npy  validation_set__per_subj__3.npy
test_per_category__per_subj__4.npy  validation_set__per_subj__4.npy
test_per_category__per_subj__5.npy  validation_set__per_subj__5.npy
train_set__per_subj__1.npy


### model_1 on test_5

In [None]:
!cp -r gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_1/models .
!ls -l models
#for now config per subject, so change the subject number to be 1
!python -u SpecklesAI/inference_and_eval.py --use_per_subj_config --num_of_chunks_to_aggregate 25 --read_stored_dataset  --test_set_per_category_file test_per_category__per_subj__5.npy --split_num 1

#Train

In [None]:
!mkdir models

## sample subj

In [None]:
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/per_subj_splits/*_5.npy .
!ls

gdrive	     SpecklesAI				 train_set__per_subj__5.npy
sample_data  test_per_category__per_subj__5.npy  validation_set__per_subj__5.npy


### morning in train/val and midday in test

In [None]:
# id using per subject config with a single subject, leave split_num 1
!python -u SpecklesAI/prep_and_train_tf.py --use_per_subj_config --batch_size 4100 --epochs 170 --sz_conv 8 --num_of_chunks_to_aggregate 50 --random_seed 9  --train_set_file train_set__per_subj__5.npy --validation_set_file validation_set__per_subj__5.npy --test_set_per_category_file test_per_category__per_subj__5.npy --split_num 1  --read_stored_dataset

#### save model

In [None]:
!mkdir gdrive/MyDrive/your_path/pBCI_models_per_subj/
!mkdir gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5
!mv models gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5/.
!mv *.png gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5/.
!mv *.csv gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5/.
!ls -l gdrive/MyDrive/your_path/pBCI_models_per_subj/per_subj_split_5

#Inference - for comprehension
- for first paper

Inference on already trained and saved models per gen split.

###Copy test sets

In [None]:
!cp gdrive/MyDrive/your_path/data_exp3_prep/per_subj_splits/test*_*.npy .
!ls -l

### sample subj1

In [None]:
!cp -r gdrive/your_path/code_and_data/p1_models_per_subj/per_subj_split_1/models .
!ls -l models
!python -u SpecklesAI/inference_and_eval.py --use_per_subj_config --num_of_chunks_to_aggregate 25 --read_stored_dataset  --test_set_per_category_file test_per_category__per_subj__1.npy --split_num 1

### sample subj 2

In [None]:
!cp -r gdrive/MyDrive/your_path/p1_models_per_subj/per_subj_split_2/models .
!ls -l models
!python -u SpecklesAI/inference_and_eval.py --use_per_subj_config --num_of_chunks_to_aggregate 25 --read_stored_dataset  --test_set_per_category_file test_per_category__per_subj__2.npy --split_num 2

# Generalization

### Create gen split datasets

In [None]:
!python -u SpecklesAI/prepare_datasets_for_all_splits.py --random_seed 9  --train_set_file train_set_split_ --validation_set_file validation_set_split_ --test_set_per_category_file test_per_category_split_

In [None]:
!ls -l

#### Save

In [None]:
!mkdir gdrive/MyDrive/your_path/data_exp3_prep_BCI/gen_splits
!mv *.npy	gdrive/MyDrive/your_path/data_exp3_prep_BCI/gen_splits/.
!ls -l gdrive/MyDrive/your_path/data_exp3_prep_BCI/gen_splits

## Train

### Gen split 1
- example

In [None]:
#To copy preprecessed datasets for the current split run:
!cp gdrive/MyDrive/your_path/data_exp3_prep_BCI/gen_splits/*_1.npy .

!python -u SpecklesAI/prep_and_train_tf.py --batch_size 1000 --epochs 120 --num_of_chunks_to_aggregate 25 --random_seed 9  --train_set_file train_set_split__1.npy --validation_set_file validation_set_split__1.npy --test_set_per_category_file test_per_category_split__1.npy --split_num 1  --read_stored_dataset