In [2]:
import os
import pandas as pd 
import librosa
import numpy as np
import matplotlib.pyplot as plt
import torch 
from IPython.display import Audio
from tqdm import tqdm

import nibabel as nib
from nilearn import plotting
from nilearn.glm.first_level import FirstLevelModel
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_contrast_matrix

In [3]:
fmri_music_path = '/data01/data/fMRI_music_genre'
print(os.listdir(fmri_music_path))
stimuli_path = fmri_music_path + '/data_wav'
print(os.listdir(stimuli_path))

subject = "sub-001"
neural_path_deriv = fmri_music_path + '/ds003720-download/derivatives/' + subject
neural_path = fmri_music_path + '/ds003720-download/' + subject
print(os.listdir(neural_path_deriv))
# fmri_data = nib.load(event_path_train.replace('events.tsv', 'bold.nii'))

['data_wav', 'ds003720', 'glmsingle_outputs', 'ds003720-download', 'annex-uuid', 'fmri_preproc_data']
['brain2music-captions.csv', 'genres_original', '.DS_Store', 'features_30_sec.csv', 'images_original', 'features_3_sec.csv']
['anat', 'figures', 'log', 'func']


In [4]:
def is_training_events_file(filename, phase):
    return filename.startswith("sub-001_task-"+phase+"_run-") and filename.endswith("_events.tsv")

all_files = os.listdir(neural_path + '/func')
training_files = [filename for filename in all_files if is_training_events_file(filename, "Training")]
print(training_files)

['sub-001_task-Training_run-07_events.tsv', 'sub-001_task-Training_run-08_events.tsv', 'sub-001_task-Training_run-01_events.tsv', 'sub-001_task-Training_run-10_events.tsv', 'sub-001_task-Training_run-02_events.tsv', 'sub-001_task-Training_run-11_events.tsv', 'sub-001_task-Training_run-03_events.tsv', 'sub-001_task-Training_run-04_events.tsv', 'sub-001_task-Training_run-12_events.tsv', 'sub-001_task-Training_run-05_events.tsv', 'sub-001_task-Training_run-06_events.tsv', 'sub-001_task-Training_run-09_events.tsv']


In [8]:
def get_fmri_data(training_files, t_r):
    eff_map_array = []
    stimuli_array = []
    genre_track_array = []
    for event in tqdm(training_files):
        event_path = neural_path + '/func/' + event
        fmri_preproc_path = neural_path_deriv + '/func/' + event.replace("_events.tsv","_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz")
        events_df = pd.read_csv(event_path, sep='\t')

        for i in tqdm(range(len(events_df))):
            genre = events_df['genre'].iloc[i].strip("'")
            track = int(events_df['track'].iloc[i]) 
            start = float(events_df['start'].iloc[i])
            track_str = str(track).zfill(5)
            wav_path = os.path.join(stimuli_path, 'genres_original', genre, f"{genre}.{track_str}.wav")
            y_sound, sr = librosa.load(wav_path, sr=16000, offset=start, duration=15)
            genre_track_array.append(genre+track_str)
            stimuli_array.append(y_sound.reshape(-1,1))

        fmri_img = nib.load(fmri_preproc_path)
        events_df['trial_type'] = events_df['genre'].str.strip("'") + ' - ' + events_df['track'].astype(str)
        first_level_model = FirstLevelModel(t_r)
        first_level_model = first_level_model.fit(fmri_img, events=events_df)
        design_matrix_gentrack = first_level_model.design_matrices_[0]

        contrast_val_gentrack = np.eye(design_matrix_gentrack.shape[1])[0:-13]
        eff_map_gentrack = first_level_model.compute_contrast(contrast_val_gentrack, output_type="effect_size")
        data_gentrack = eff_map_gentrack.get_fdata()
        eff_map_array.append(data_gentrack)

    eff_map_stacked = np.concatenate(eff_map_array, axis=-1)
    stimuli_stacked = np.concatenate(stimuli_array, axis=-1)
    gtrack_stacked = np.array(genre_track_array)
    return eff_map_stacked, stimuli_stacked, gtrack_stacked

eff_map_stacked, stimuli_stacked, gtrack_stacked = get_fmri_data(training_files, 1.5)
print(eff_map_stacked.shape)
print(stimuli_stacked.shape)
print(gtrack_stacked.shape)


  0%|          | 0/12 [00:00<?, ?it/s]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-07_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 302.25it/s]
  8%|▊         | 1/12 [00:16<03:03, 16.71s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-08_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 270.14it/s]
 17%|█▋        | 2/12 [00:34<02:50, 17.10s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-01_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 281.57it/s]
 25%|██▌       | 3/12 [00:51<02:33, 17.02s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-10_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 274.80it/s]
 33%|███▎      | 4/12 [01:07<02:15, 16.88s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-02_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 262.70it/s]
 42%|████▏     | 5/12 [01:24<01:58, 16.96s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-11_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 269.54it/s]
 50%|█████     | 6/12 [01:41<01:40, 16.76s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-03_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 270.83it/s]
 58%|█████▊    | 7/12 [01:56<01:22, 16.46s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-04_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 258.58it/s]
 67%|██████▋   | 8/12 [02:13<01:05, 16.38s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-12_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 271.23it/s]
 75%|███████▌  | 9/12 [02:29<00:49, 16.44s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-05_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 281.49it/s]
 83%|████████▎ | 10/12 [02:46<00:33, 16.51s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-06_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 264.67it/s]
 92%|█████████▏| 11/12 [03:03<00:16, 16.61s/it]

/data01/data/fMRI_music_genre/ds003720-download/derivatives/sub-001/func/sub-001_task-Training_run-09_space-MNI152NLin2009cAsym_desc-preproc_bold.nii.gz


100%|██████████| 41/41 [00:00<00:00, 275.10it/s]
100%|██████████| 12/12 [03:19<00:00, 16.62s/it]


(77, 95, 82, 492)
(240000, 492)
(492,)


In [9]:
gtrack_stacked

array(['jazz00034', 'pop00016', 'jazz00004', 'country00017',
       'classical00038', 'hiphop00002', 'disco00012', 'reggae00018',
       'metal00094', 'rock00062', 'blues00092', 'metal00091',
       'hiphop00094', 'rock00005', 'country00010', 'pop00023',
       'disco00063', 'classical00076', 'reggae00090', 'blues00033',
       'jazz00063', 'pop00048', 'reggae00027', 'classical00091',
       'jazz00083', 'rock00051', 'hiphop00006', 'blues00080',
       'disco00031', 'metal00015', 'country00073', 'reggae00042',
       'jazz00099', 'disco00074', 'rock00035', 'metal00040', 'blues00031',
       'classical00067', 'hiphop00080', 'pop00001', 'country00062',
       'country00062', 'reggae00059', 'metal00054', 'country00053',
       'pop00008', 'rock00040', 'hiphop00070', 'disco00098', 'blues00089',
       'jazz00026', 'classical00030', 'disco00052', 'classical00097',
       'country00090', 'blues00096', 'rock00072', 'jazz00033', 'pop00082',
       'hiphop00074', 'reggae00030', 'metal00031', 'j

In [15]:
event_path = neural_path + '/func/' + training_files[0]
events_df = pd.read_csv(event_path, sep='\t')
print(len(events_df))
genre = events_df['genre'].iloc[0].strip("'") 
print(genre)
track = int(events_df['track'].iloc[0])
print(track)
start = float(events_df['start'].iloc[0])
print(start)
track_str = str(track).zfill(5)  
print(track_str)
get_path = os.path.join(stimuli_path, 'genres_original', genre, f"{genre}.{track_str}.wav")
print(get_path)
y, sr = librosa.load(get_path, sr=16000, offset=start, duration=15)
print(y.reshape(-1,1).shape)

41
disco
9
4.21
00009
/data01/data/fMRI_music_genre/data_wav/genres_original/disco/disco.00009.wav
(240000, 1)


In [6]:
# def construct_wav_path(row):
#     genre = row['genre'].strip("'")  
#     track = int(row['track'])
#     track_str = str(track).zfill(5)  
#     return os.path.join(base_directory, genre, f"{genre}.{track_str}.wav")