In [None]:
import os
import numpy as np
from pathlib import Path
from tensorflow import keras
from tensorflow.keras.preprocessing.image import load_img
from tensorflow.keras.preprocessing.image import img_to_array
from tensorflow.keras.applications.xception import preprocess_input
from sklearn.model_selection import train_test_split
from tensorflow.keras.optimizers import Adam
from tqdm import tqdm
from scipy.stats import pearsonr as corr
from tensorflow.keras.models import load_model

from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
data_dir = '/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/algonauts_2023_tutorial_data'
parent_submission_dir = '/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/algonauts_2023_challenge_submission'

Mounted at /content/drive/


# Train/Validation Block
It is likely memory will crash if you do not specify a smaller list of ```subj```.

In [None]:
for subj in [1, 2, 3, 4, 5, 6, 7, 8]: 
  print('---------- Subject '+str(subj)+' ----------')
  class argObj:
    def __init__(self, data_dir, parent_submission_dir, subj):
      
      self.subj = format(subj, '02')
      self.data_dir = os.path.join(data_dir, 'subj'+self.subj)
      self.parent_submission_dir = parent_submission_dir
      self.subject_submission_dir = os.path.join(self.parent_submission_dir,
          'subj'+self.subj)

      # Create the submission directory if not existing
      if not os.path.isdir(self.subject_submission_dir):
          os.makedirs(self.subject_submission_dir)

  args = argObj(data_dir, parent_submission_dir, subj)

  # ----- LOAD fMRI DATA -----
  fmri_dir = os.path.join(args.data_dir, 'training_split', 'training_fmri')
  lh_fmri = np.load(os.path.join(fmri_dir, 'lh_training_fmri.npy'))
  rh_fmri = np.load(os.path.join(fmri_dir, 'rh_training_fmri.npy'))

  print('LH training fMRI data shape:')
  print(lh_fmri.shape)
  print('(Training stimulus images × LH vertices)')

  print('\nRH training fMRI data shape:')
  print(rh_fmri.shape)
  print('(Training stimulus images × RH vertices)')

  # ----- LOAD IMAGE LOCATION -----
  train_img_dir  = os.path.join(args.data_dir, 'training_split', 'training_images')

  # ----- LOAD IMAGE DATA & PREPROCESS -----
  # Read in, store, and preprocess training images
  train_imgs = []
  for path in sorted(list(Path(train_img_dir).iterdir())):
    img = load_img(path, target_size=(299, 299))
    img_array = img_to_array(img)
    img_processed = preprocess_input(img_array)
    train_imgs.append(img_processed)
  train_imgs = np.array(train_imgs)

  # ----- SPLIT DATA -----
  imgs_train, imgs_val, lh_fmri_train, lh_fmri_val, rh_fmri_train, rh_fmri_val = train_test_split(train_imgs, 
                                                                                                  lh_fmri,
                                                                                                  rh_fmri,
                                                                                                  test_size=0.2, 
                                                                                                  random_state=0)
  print('\nImage Shapes:')
  print(imgs_train.shape, imgs_val.shape)    
  print('\nLeft fMRI Voxel Data Shapes:')
  print(lh_fmri_train.shape, lh_fmri_val.shape)   
  print('Right fMRI Voxel Data Shapes:\n')
  print(rh_fmri_train.shape, rh_fmri_val.shape)

  # Make model for left and right hemisphere 
  # 0 = Left, 1 = Right
  for hemi in ['L', 'R']:
    # ----- DEFINE MODEL -----
    data_augmentation = keras.Sequential(
        [keras.layers.RandomFlip("horizontal"), keras.layers.RandomRotation(0.1),]
    )

    # Create model
    # Load the ResNet50 model with pre-trained weights
    base_model = keras.applications.Xception(include_top=False)

    # Freeze the convolutional layers of the ResNet50 model
    base_model.trainable = False

    inputs = keras.Input(shape=(299, 299, 3))
    x = data_augmentation(inputs) # augment data to generalize
    x = base_model(x, training=False) # start with resnet50
    x = keras.layers.GlobalAveragePooling2D()(x) # pool
    x = keras.layers.Dropout(0.2)(x)  # regularize with dropout

    # Choose correct hemisphere size
    num_voxels = lh_fmri_train.shape[1]
    if hemi == 'R':
      num_voxels = rh_fmri_train.shape[1]
      
    outputs = keras.layers.Dense(num_voxels, activation='linear', kernel_regularizer=keras.regularizers.l2(0.01))(x) # define voxels to predict
    model = keras.Model(inputs, outputs)

    # Compile the encoding model
    model.compile(optimizer='adam', loss='mse')

    # Train the top layer
    if hemi == 'L':
      model.fit(imgs_train, lh_fmri_train, validation_data=(imgs_val, lh_fmri_val), epochs=20, batch_size=32, verbose=1)
    else:
      model.fit(imgs_train, rh_fmri_train, validation_data=(imgs_val, rh_fmri_val), epochs=20, batch_size=32, verbose=1)
    print()
    # Unfreeze the last few convolutional layers for fine-tuning
    base_model.trainable = True

    # Lower the learning rate for fine-tuning
    optimizer = Adam(learning_rate=1e-5)

    # Recompile the model with the new optimizer
    model.compile(optimizer=optimizer, loss='mse')

    # Train the model on your own dataset
    if hemi == 'L':
      model.fit(imgs_train, lh_fmri_train, validation_data=(imgs_val, lh_fmri_val), epochs=10, batch_size=32, verbose=1)
    else:
      model.fit(imgs_train, rh_fmri_train, validation_data=(imgs_val, rh_fmri_val), epochs=10, batch_size=32, verbose=1)
    print()
    # Validation Prediction
    fmri_val_pred = model.predict(imgs_val)

    # Get corrleation of predictions
    correlation = np.zeros(fmri_val_pred.shape[1])
    for v in tqdm(range(fmri_val_pred.shape[1])):
      if hemi == 'L':
        correlation[v] = corr(fmri_val_pred[:,v], lh_fmri_val[:,v])[0]
      else:
        correlation[v] = corr(fmri_val_pred[:,v], rh_fmri_val[:,v])[0]
    print('\nMean Corr Val: Subj ' +str(subj)+' '+hemi)
    print(np.mean(correlation))
    print()

    # Save Model
    model.save('/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/model'+str(subj)+hemi+'.h5')

---------- Subject 7 ----------
LH training fMRI data shape:
(9841, 19004)
(Training stimulus images × LH vertices)

RH training fMRI data shape:
(9841, 20544)
(Training stimulus images × RH vertices)

Image Shapes:
(7872, 299, 299, 3) (1969, 299, 299, 3)

Left fMRI Voxel Data Shapes:
(7872, 19004) (1969, 19004)
Right fMRI Voxel Data Shapes:

(7872, 20544) (1969, 20544)
Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/xception/xception_weights_tf_dim_ordering_tf_kernels_notop.h5
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



100%|██████████| 19004/19004 [00:23<00:00, 795.03it/s]



Mean Corr Val: Subj 7 L
0.3399097441705209

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10



100%|██████████| 20544/20544 [00:26<00:00, 772.84it/s]


Mean Corr Val: Subj 7 R
0.3322917732492202






# Final Model Training Block
Run this code when prepping to submit to challenge. There is no validation set. <br/>
It is likely memory will crash if you do not specify a smaller list of ```subj```.

In [None]:
for subj in [1, 2, 3, 4, 5, 6, 7, 8]: 
  print('---------- Subject '+str(subj)+' ----------')
  class argObj:
    def __init__(self, data_dir, parent_submission_dir, subj):
      
      self.subj = format(subj, '02')
      self.data_dir = os.path.join(data_dir, 'subj'+self.subj)
      self.parent_submission_dir = parent_submission_dir
      self.subject_submission_dir = os.path.join(self.parent_submission_dir,
          'subj'+self.subj)

      # Create the submission directory if not existing
      if not os.path.isdir(self.subject_submission_dir):
          os.makedirs(self.subject_submission_dir)

  args = argObj(data_dir, parent_submission_dir, subj)

  # ----- LOAD fMRI DATA -----
  fmri_dir = os.path.join(args.data_dir, 'training_split', 'training_fmri')
  lh_fmri = np.load(os.path.join(fmri_dir, 'lh_training_fmri.npy'))
  rh_fmri = np.load(os.path.join(fmri_dir, 'rh_training_fmri.npy'))

  print('LH training fMRI data shape:')
  print(lh_fmri.shape)
  print('(Training stimulus images × LH vertices)')

  print('\nRH training fMRI data shape:')
  print(rh_fmri.shape)
  print('(Training stimulus images × RH vertices)')

  # ----- LOAD IMAGE LOCATION -----
  train_img_dir  = os.path.join(args.data_dir, 'training_split', 'training_images')

  # ----- LOAD IMAGE DATA & PREPROCESS -----
  # Read in, store, and preprocess training images
  train_imgs = []
  for path in sorted(list(Path(train_img_dir).iterdir())):
    img = load_img(path, target_size=(299, 299))
    img_array = img_to_array(img)
    img_processed = preprocess_input(img_array)
    train_imgs.append(img_processed)
  train_imgs = np.array(train_imgs)

  print('\nImage Shapes:')
  print(train_imgs.shape)    
  print('\nLeft fMRI Voxel Data Shapes:')
  print(lh_fmri.shape)   
  print('Right fMRI Voxel Data Shapes:\n')
  print(rh_fmri.shape)

  # Make model for left and right hemisphere 
  # 0 = Left, 1 = Right
  for hemi in ['L', 'R']:
    # ----- DEFINE MODEL -----
    data_augmentation = keras.Sequential(
        [keras.layers.RandomFlip("horizontal"), keras.layers.RandomRotation(0.1),]
    )

    # Create model
    # Load the ResNet50 model with pre-trained weights
    base_model = keras.applications.Xception(include_top=False)

    # Freeze the convolutional layers of the ResNet50 model
    base_model.trainable = False

    inputs = keras.Input(shape=(299, 299, 3))
    x = data_augmentation(inputs) # augment data to generalize
    x = base_model(x, training=False) # start with resnet50
    x = keras.layers.GlobalAveragePooling2D()(x) # pool
    x = keras.layers.Dropout(0.2)(x)  # regularize with dropout

    # Choose correct hemisphere size
    num_voxels = lh_fmri.shape[1]
    if hemi == 'R':
      num_voxels = rh_fmri.shape[1]
      
    outputs = keras.layers.Dense(num_voxels, activation='linear', kernel_regularizer=keras.regularizers.l2(0.01))(x) # define voxels to predict
    model = keras.Model(inputs, outputs)

    # Compile the encoding model
    model.compile(optimizer='adam', loss='mse')

    # Train the top layer
    if hemi == 'L':
      model.fit(train_imgs, lh_fmri, epochs=20, batch_size=32, verbose=1)
    else:
      model.fit(train_imgs, rh_fmri, epochs=20, batch_size=32, verbose=1)

    # Unfreeze the last few convolutional layers for fine-tuning
    base_model.trainable = True

    # Lower the learning rate for fine-tuning
    optimizer = Adam(learning_rate=1e-5)

    # Recompile the model with the new optimizer
    model.compile(optimizer=optimizer, loss='mse')

    # Train the model on your own dataset
    if hemi == 'L':
      model.fit(train_imgs, lh_fmri, epochs=10, batch_size=32, verbose=1)
    else:
      model.fit(train_imgs, rh_fmri, epochs=10, batch_size=32, verbose=1)
    
    # Save Model
    model.save('/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/final_model'+str(subj)+hemi+'.h5')


---------- Subject 8 ----------
LH training fMRI data shape:
(8779, 18981)
(Training stimulus images × LH vertices)

RH training fMRI data shape:
(8779, 20530)
(Training stimulus images × RH vertices)

Image Shapes:
(8779, 299, 299, 3)

Left fMRI Voxel Data Shapes:
(8779, 18981)
Right fMRI Voxel Data Shapes:

(8779, 20530)
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


# Evaluate Final Models on Algonauts Test Set

In [None]:
for subj in [1, 2, 3, 4, 5, 6, 7, 8]: 
  print('---------- Subject '+str(subj)+' ----------')
  class argObj:
    def __init__(self, data_dir, parent_submission_dir, subj):
      
      self.subj = format(subj, '02')
      self.data_dir = os.path.join(data_dir, 'subj'+self.subj)
      self.parent_submission_dir = parent_submission_dir
      self.subject_submission_dir = os.path.join(self.parent_submission_dir,
          'subj'+self.subj)

      # Create the submission directory if not existing
      if not os.path.isdir(self.subject_submission_dir):
          os.makedirs(self.subject_submission_dir)

  args = argObj(data_dir, parent_submission_dir, subj)

  # ----- LOAD IMAGE LOCATION -----
  test_img_dir  = os.path.join(args.data_dir, 'test_split', 'test_images')

  # ----- LOAD IMAGE DATA & PREPROCESS -----
  # Read in, store, and preprocess test images
  test_imgs = []
  for path in sorted(list(Path(test_img_dir).iterdir())):
    img = load_img(path, target_size=(299, 299))
    img_array = img_to_array(img)
    img_processed = preprocess_input(img_array)
    test_imgs.append(img_processed)
  test_imgs = np.array(test_imgs)

  # ----- RUN MODELS -----
  model_L = load_model('/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/final_model'+str(subj)+'L.h5')
  model_R = load_model('/content/drive/MyDrive/ColabNotebooks/NeuroAI:FINALPROJECT/final_model'+str(subj)+'R.h5')
  lh_fmri_test_pred = model_L.predict(test_imgs)
  rh_fmri_test_pred = model_R.predict(test_imgs)

  print(lh_fmri_test_pred.shape, rh_fmri_test_pred.shape)

  lh_fmri_test_pred = lh_fmri_test_pred.astype(np.float32)
  rh_fmri_test_pred = rh_fmri_test_pred.astype(np.float32)

  np.save(os.path.join(args.subject_submission_dir, 'lh_pred_test.npy'), lh_fmri_test_pred)
  np.save(os.path.join(args.subject_submission_dir, 'rh_pred_test.npy'), rh_fmri_test_pred)

---------- Subject 1 ----------
(159, 19004) (159, 20544)
---------- Subject 2 ----------
(159, 19004) (159, 20544)
---------- Subject 3 ----------
(293, 19004) (293, 20544)
---------- Subject 4 ----------
(395, 19004) (395, 20544)
---------- Subject 5 ----------
(159, 19004) (159, 20544)
---------- Subject 6 ----------
(293, 18978) (293, 20220)
---------- Subject 7 ----------
(159, 19004) (159, 20544)
---------- Subject 8 ----------
(395, 18981) (395, 20530)


# Citations

If you use the data provided for the Algonauts Project 2023 Challenge please cite the following papers:
1. Gifford AT, Lahner B, Saba-Sadiya S, Vilas MG, Lascelles A, Oliva A, Kay K, Roig G, Cichy RM. 2023. *The Algonauts Project 2023 Challenge: How the Human Brain Makes Sense of Natural Scenes*. arXiv preprint, arXiv:2301.03198. DOI: https://doi.org/10.48550/arXiv.2301.03198

2. Allen EJ, St-Yves G, Wu Y, Breedlove JL, Prince JS, Dowdle LT, Nau M, Caron B, Pestilli F, Charest I, Hutchinson JB, Naselaris T, Kay K. 2022. *A massive 7T fMRI dataset to bridge cognitive neuroscience and computational intelligence*. Nature Neuroscience, 25(1):116–126. DOI: https://doi.org/10.1038/s41593-021-00962-x