In [1]:
import os
import pandas as pd 
import librosa
import numpy as np
import matplotlib.pyplot as plt
import torch 
from IPython.display import Audio
from tqdm import tqdm

import nibabel as nib
from nilearn import plotting
from nilearn.glm.first_level import FirstLevelModel
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_contrast_matrix

In [2]:
fmri_music_path = '/data01/data/fMRI_music_genre'
print(os.listdir(fmri_music_path))
stimuli_path = fmri_music_path + '/data_wav'
print(os.listdir(stimuli_path))

subject = "sub-001"
neural_path_deriv = fmri_music_path + '/ds003720-download/derivatives/' + subject
neural_path = fmri_music_path + '/ds003720-download/' + subject
print(os.listdir(neural_path_deriv))
# fmri_data = nib.load(event_path_train.replace('events.tsv', 'bold.nii'))

['data_wav', 'ds003720', 'glmsingle_outputs', 'ds003720-download', 'annex-uuid', 'fmri_preproc_data']
['brain2music-captions.csv', 'genres_original', '.DS_Store', 'features_30_sec.csv', 'images_original', 'features_3_sec.csv']
['anat', 'figures', 'log', 'func']


In [3]:
def is_training_events_file(filename, phase):
    return filename.startswith("sub-001_task-"+phase+"_run-") and filename.endswith("_events.tsv")

all_files = os.listdir(neural_path + '/func')
training_files = [filename for filename in all_files if is_training_events_file(filename, "Training")]
print(training_files)

['sub-001_task-Training_run-07_events.tsv', 'sub-001_task-Training_run-08_events.tsv', 'sub-001_task-Training_run-01_events.tsv', 'sub-001_task-Training_run-10_events.tsv', 'sub-001_task-Training_run-02_events.tsv', 'sub-001_task-Training_run-11_events.tsv', 'sub-001_task-Training_run-03_events.tsv', 'sub-001_task-Training_run-04_events.tsv', 'sub-001_task-Training_run-12_events.tsv', 'sub-001_task-Training_run-05_events.tsv', 'sub-001_task-Training_run-06_events.tsv', 'sub-001_task-Training_run-09_events.tsv']


In [4]:
def get_fmri_data(training_files, t_r):
    eff_map_array = []
    stimuli_array = []
    genre_track_array = []
    for event in tqdm(training_files):
        event_path = neural_path + '/func/' + event
        fmri_preproc_path = neural_path_deriv + '/func/' + event.replace("_events.tsv","_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz")
        events_df = pd.read_csv(event_path, sep='\t')

        for i in tqdm(range(len(events_df))):
            genre = events_df['genre'].iloc[i].strip("'")
            track = int(events_df['track'].iloc[i]) 
            start = float(events_df['start'].iloc[i])
            track_str = str(track).zfill(5)
            wav_path = os.path.join(stimuli_path, 'genres_original', genre, f"{genre}.{track_str}.wav")
            y_sound, sr = librosa.load(wav_path, sr=48000, offset=start, duration=15)
            genre_track_array.append(genre+track_str)
            stimuli_array.append(y_sound.reshape(-1,1))

        fmri_img = nib.load(fmri_preproc_path)
        events_df['trial_type'] = events_df['genre'].str.strip("'") + ' - ' + events_df['track'].astype(str)
        first_level_model = FirstLevelModel(t_r)
        first_level_model = first_level_model.fit(fmri_img, events=events_df)
        design_matrix_gentrack = first_level_model.design_matrices_[0]

        contrast_val_gentrack = np.eye(design_matrix_gentrack.shape[1])[0:-13]
        eff_map_gentrack = first_level_model.compute_contrast(contrast_val_gentrack, output_type="effect_size")
        data_gentrack = eff_map_gentrack.get_fdata()
        eff_map_array.append(data_gentrack)

    eff_map_stacked = np.concatenate(eff_map_array, axis=-1)
    stimuli_stacked = np.concatenate(stimuli_array, axis=-1)
    gtrack_stacked = np.array(genre_track_array)
    return eff_map_stacked, stimuli_stacked, gtrack_stacked

eff_map_stacked, stimuli_stacked, gtrack_stacked = get_fmri_data(training_files, 1.5)
print(eff_map_stacked.shape)
print(stimuli_stacked.shape)
print(gtrack_stacked.shape)


100%|██████████| 41/41 [00:00<00:00, 41.78it/s]
100%|██████████| 41/41 [00:00<00:00, 201.68it/s]
100%|██████████| 41/41 [00:00<00:00, 197.85it/s]
100%|██████████| 41/41 [00:00<00:00, 194.05it/s]
100%|██████████| 41/41 [00:00<00:00, 200.71it/s]
100%|██████████| 41/41 [00:00<00:00, 204.05it/s]
100%|██████████| 41/41 [00:00<00:00, 190.76it/s]
100%|██████████| 41/41 [00:00<00:00, 194.85it/s]
100%|██████████| 41/41 [00:00<00:00, 203.31it/s]
100%|██████████| 41/41 [00:00<00:00, 205.12it/s]
100%|██████████| 41/41 [00:00<00:00, 191.90it/s]
100%|██████████| 41/41 [00:00<00:00, 214.46it/s]
100%|██████████| 12/12 [03:12<00:00, 16.08s/it]


(77, 95, 82, 492)
(720001, 492)
(492,)


In [5]:
# event_path = neural_path + '/func/' + training_files[0]
# events_df = pd.read_csv(event_path, sep='\t')
# print(len(events_df))
# genre = events_df['genre'].iloc[0].strip("'") 
# print(genre)
# track = int(events_df['track'].iloc[0])
# print(track)
# start = float(events_df['start'].iloc[0])
# print(start)
# track_str = str(track).zfill(5)  
# print(track_str)
# get_path = os.path.join(stimuli_path, 'genres_original', genre, f"{genre}.{track_str}.wav")
# print(get_path)
# y, sr = librosa.load(get_path, sr=16000, offset=start, duration=15)
# print(y.reshape(-1,1).shape)