In [15]:
import os
import numpy as np
from pathlib import Path
from PIL import Image
from tqdm import tqdm
import matplotlib
from matplotlib import pyplot as plt
from nilearn import datasets
from nilearn import plotting
import torch
from torch.utils.data import DataLoader, Dataset
from torchvision.models.feature_extraction import create_feature_extractor, get_graph_node_names
from torchvision import transforms
from sklearn.decomposition import IncrementalPCA
from sklearn.linear_model import LinearRegression
from scipy.stats import pearsonr as corr
import re
import shutil

In [16]:
# Get list of all indeces that are from shared 1000 images

In [17]:
file_list = os.listdir("shared1000")

In [18]:
pattern = r'_nsd(\d+)\.png'
nsd_parts = [re.search(pattern, filename).group(1) for filename in file_list]

In [19]:
# for some odd reason all the nsd_idx were 1 higher than their corresponding idx in the subjects

In [20]:
nsd_parts = [int(i) for i in nsd_parts]

In [21]:
nsd_parts = [i-1 for i in nsd_parts]

In [22]:
nsd_parts = [str(i) for i in nsd_parts]

In [23]:
nsd_parts = [s.zfill(5) for s in nsd_parts]

In [24]:
nsd_parts = sorted(nsd_parts)

In [25]:
nsd_parts[:10], len(nsd_parts)

(['02950',
  '02990',
  '03049',
  '03077',
  '03146',
  '03157',
  '03164',
  '03171',
  '03181',
  '03386'],
 1000)

In [26]:
shared1000 = set(nsd_parts)

In [27]:
def filter_fmri_and_images(dir_path_img, dir_path_lh, dir_path_rh, shared1000):
    sub_files = sorted(os.listdir(dir_path_img))
    pattern = r'_nsd-(\d+)\.png'
    nsd_parts = [re.search(pattern, filename).group(1) for filename in sub_files]
    to_be_kept_indices = [i for i, x in enumerate(nsd_parts) if x in shared1000]
    updated_img_files = [sub_files[idx] for idx in to_be_kept_indices]
    lh_fmri = np.load(dir_path_lh)
    rh_fmri = np.load(dir_path_rh)
    lh_fmri = lh_fmri[to_be_kept_indices]
    rh_fmri = rh_fmri[to_be_kept_indices]
    return updated_img_files, lh_fmri, rh_fmri

In [30]:
def update_files(base_path, dest, shared1000):
    img_path = base_path + "training_images"
    lh_fmri_path = base_path + "training_fmri/lh_training_fmri.npy"
    rh_frmi_path = base_path + "training_fmri/rh_training_fmri.npy"
    
    os.makedirs(dest, exist_ok=True)
    os.makedirs(dest + "img/", exist_ok=True)
    dest_img = dest + "img/"
    upd_img_files, lh_fmri, rh_fmri = filter_fmri_and_images(img_path, lh_fmri_path, rh_frmi_path, shared1000)
    
    for img in upd_img_files:
        source_path = os.path.join(img_path, img)
        
        destination_path = os.path.join(dest_img, img)
        shutil.move(source_path, destination_path)

    np.save(dest + "lh_fmri.npy", lh_fmri)
    np.save(dest + "rh_fmri.npy", rh_fmri)

In [32]:
base_path = "algonauts_data/subj01/training_split/"
dest = "algonauts_data/subj01/updated/"

In [33]:
update_files(base_path, dest, shared1000)

In [122]:
for subject_num in range(2,9):
    base_path = f"algonauts_data/subj0{subject_num}/training_split/"
    dest = f"algonauts_data/subj0{subject_num}/updated/"
    update_files(base_path, dest, shared1000)
    print(f"Subject {subject_num} done")

Subject 2 done
Subject 3 done
Subject 4 done
Subject 5 done
Subject 6 done
Subject 7 done
Subject 8 done


In [34]:
all_sub = [f"subj0{i}" for i in range(1,9)]
all_sub

['subj01',
 'subj02',
 'subj03',
 'subj04',
 'subj05',
 'subj06',
 'subj07',
 'subj08']

In [70]:
l_nsd = []
l_full = []
for subj in all_sub:
    img_dir_upd = f"algonauts_data/{subj}/updated/img"
    img_files = sorted(os.listdir(img_dir_upd))
    img_files = [filename for filename in img_files if filename.lower().endswith('.png')]
    pattern = r'_nsd-(\d+)\.png'
    nsd_img_files = [re.search(pattern, filename).group(1) for filename in img_files]
    l_nsd.append(nsd_img_files)
    l_full.append(img_files)

In [71]:
l_nsd[1][:2]

['02950', '02990']

In [72]:
# delete from lh_fmri according to number in img path of the onmes i keep

In [89]:
set1 = set(l_nsd[0])
set2 = set(l_nsd[1])
set3 = set(l_nsd[2])
set4 = set(l_nsd[3])
set5 = set(l_nsd[4])
set6 = set(l_nsd[5])
set7 = set(l_nsd[6])
set8 = set(l_nsd[7])

In [90]:
common_elements2 = set1.intersection(set2, set3, set4, set5, set6, set7, set8)
len(common_elements2)

872

In [46]:
sets = [set(lst) for lst in l_nsd]

# Find the intersection of all sets using the 'intersection' method
common_elements = set.intersection(*sets)

# Convert the result back to a list
common_elements_list = sorted(list(common_elements))
len(common_elements_list)
common_elements_list[:2]

['02950', '02990']

In [86]:
len(l_nsd[7])

872

In [157]:
len(common_elements_list)

872

In [47]:
non_common_indices = [[index for index, item in enumerate(lst) if item not in common_elements] for lst in l_nsd]

In [91]:
len(non_common_indices[0])

399

In [53]:
l_full[2][0]

'train-0313_nsd-02950.png'

In [160]:
filtered_lists = [[item for index, item in enumerate(lst) if index not in non_common_indices[idx]] for idx, lst in enumerate(l_full)]


In [165]:
filtered_lists[1][:10]

['train-0406_nsd-02950.png',
 'train-0412_nsd-02990.png',
 'train-0419_nsd-03049.png',
 'train-0423_nsd-03077.png',
 'train-0431_nsd-03146.png',
 'train-0432_nsd-03157.png',
 'train-0436_nsd-03164.png',
 'train-0439_nsd-03171.png',
 'train-0472_nsd-03386.png',
 'train-0483_nsd-03434.png']

In [181]:
base_path = "algonauts_data/subj01/updated/"
filtered_list = filtered_lists[0]
img_path = base_path + "img"
trash_path = base_path + "trash"
os.makedirs(trash_path, exist_ok=True)
source_files = os.listdir(img_path)
for filename in source_files:
    if filename not in filtered_list:
        source_path = os.path.join(img_path, filename)
        destination_path = os.path.join(trash_path, filename)
        shutil.move(source_path, destination_path)

In [198]:
base_path = "algonauts_data/subj01/updated/"

In [199]:
len(non_common_indices[0])

110

In [200]:
lh_fmri = np.load(base_path + "lh_fmri.npy")
rh_fmri = np.load(base_path + "rh_fmri.npy")
lh_fmri_upd = np.delete(lh_fmri, non_common_indices[0], axis=0)
rh_fmri_upd = np.delete(rh_fmri, non_common_indices[0], axis=0)
np.save(base_path + "lh_fmri_upd.npy", lh_fmri_upd)
np.save(base_path + "rh_fmri_upd.npy", rh_fmri_upd)

In [189]:
rh_fmri_upd.shape

(872, 20544)

In [193]:
def update_fmri_data(base_path, filtered_list, non_comm_idx):
    img_path = base_path + "img"
    trash_path = base_path + "trash"
    os.makedirs(trash_path, exist_ok=True)
    source_files = os.listdir(img_path)
    for filename in source_files:
        if filename not in filtered_list:
            source_path = os.path.join(img_path, filename)
            destination_path = os.path.join(trash_path, filename)
            shutil.move(source_path, destination_path)
    lh_fmri = np.load(base_path + "lh_fmri.npy")
    rh_fmri = np.load(base_path + "rh_fmri.npy")
    lh_fmri_upd = np.delete(lh_fmri, non_comm_idx, axis=0)
    rh_fmri_upd = np.delete(rh_fmri, non_comm_idx, axis=0)
    print(lh_fmri_upd.shape)
    np.save(base_path + "lh_fmri_upd.npy", lh_fmri_upd)
    np.save(base_path + "rh_fmri_upd.npy", rh_fmri_upd)

In [192]:
all_sub = [f"subj0{i}" for i in range(2,9)]
all_sub

['subj02', 'subj03', 'subj04', 'subj05', 'subj06', 'subj07', 'subj08']

In [196]:
for i, sub in enumerate(all_sub):
    base_path = f"algonauts_data/{sub}/updated/"
    print(i+1)
    filtered_list = filtered_lists[i+1]
    non_common_idx = non_common_indices[i+1]
    update_fmri_data(base_path, filtered_list, non_common_idx)

1
(872, 19004)
2
(872, 19004)
3
(872, 19004)
4
(872, 19004)
5
(872, 18978)
6
(872, 19004)
7
(872, 18981)


In [201]:
os.listdir("algonauts_data/subj01/roi_masks/")

['lh.streams_challenge_space.npy',
 'rh.all-vertices_fsaverage_space.npy',
 'lh.floc-words_challenge_space.npy',
 'rh.floc-places_challenge_space.npy',
 'rh.floc-faces_fsaverage_space.npy',
 'lh.floc-bodies_fsaverage_space.npy',
 'lh.floc-faces_fsaverage_space.npy',
 'rh.floc-words_challenge_space.npy',
 'lh.floc-faces_challenge_space.npy',
 'rh.floc-words_fsaverage_space.npy',
 'lh.floc-bodies_challenge_space.npy',
 'lh.all-vertices_fsaverage_space.npy',
 'rh.floc-places_fsaverage_space.npy',
 'mapping_prf-visualrois.npy',
 'lh.floc-words_fsaverage_space.npy',
 'rh.floc-faces_challenge_space.npy',
 'mapping_floc-bodies.npy',
 'mapping_floc-words.npy',
 'mapping_floc-places.npy',
 'lh.streams_fsaverage_space.npy',
 'mapping_floc-faces.npy',
 'rh.prf-visualrois_fsaverage_space.npy',
 'rh.streams_fsaverage_space.npy',
 'lh.floc-places_challenge_space.npy',
 'lh.prf-visualrois_fsaverage_space.npy',
 'rh.floc-bodies_fsaverage_space.npy',
 'mapping_streams.npy',
 'rh.floc-bodies_challenge_s

In [6]:
roi_map = np.load("algonauts_data/subj01/roi_masks/mapping_prf-visualrois.npy", allow_pickle=True).item()

In [7]:
challenge = np.load("algonauts_data/subj01/roi_masks/lh.prf-visualrois_challenge_space.npy")

In [8]:
fsaverage = np.load("algonauts_data/subj01/roi_masks/lh.prf-visualrois_fsaverage_space.npy")

In [9]:
roi_map.keys(), challenge.shape, fsaverage.shape

(dict_keys([0, 1, 2, 3, 4, 5, 6, 7]), (19004,), (163842,))

In [2]:
lh_fmri_upd.shape

NameError: name 'lh_fmri_upd' is not defined

In [10]:
roi_mapping = list(roi_map.keys())[list(roi_map.values()).index("V1v")]

In [11]:
challenge_roi = np.asarray(challenge == roi_mapping, dtype=int)

In [12]:
np.unique(challenge_roi)

array([0, 1])

In [263]:
fsaverage_roi = np.asarray(fsaverage == roi_mapping, dtype=int)


In [264]:
fsaverage_response = np.zeros(len(fsaverage_roi))

In [5]:
lh_fmri = np.load("algonauts_data/subj01/updated/lh_fmri_upd.npy")

In [13]:
lh_fmri[:, np.where(challenge_roi)[0]].shape

(872, 710)

In [None]:
fsaverage_response[np.where(fsaverage_roi)[0]] = lh_fmri[img,np.where(challenge_roi)[0]]

In [274]:
fsaverage_response[np.where(fsaverage_roi)[0]].shape

(710,)