In [61]:
import os, pickle
import numpy as np
import pandas as pd
import librosa

In [62]:
#paths
scale = 'MISTROI'
sub = 'sub-03'
dataset = 'mutemusic'
tracks_path = f'/home/maellef/DataBase/{dataset}/stimuli'
data_path = f'/home/maellef/DataBase/{dataset}/Preprocessed_fMRI'
output_path = '/home/maellef/git/MuteMusic_analysis/data/training_data'

#-----------------specific to muetmusic
silenced_tracks = True
stim_tracks = 'silenced' if silenced_tracks else 'full' 
tracks_path = os.path.join(tracks_path, stim_tracks)

sessions_data = './data/sub_session_data.tsv'
sessions_df = pd.read_csv(sessions_data, sep='\t')
silences_data = './data/silences_data.tsv'
silences_df = pd.read_csv(silences_data, sep='\t')

In [63]:
def find_label_value(name, label):
    label += '-' if label[-1] != '-' else label
    startval = name.find(label)+len(label)
    endval = startval
    for i in range(startval, len(name)):
        if name[i] in ['_', '.']:
            break
        endval+=1
    return name[startval:endval]

In [64]:
#structure to store all data pair (wav-fMRI)
df = pd.DataFrame({
})
wavbold = []
#general args
tr = 1.49
hrf_nb_tr = 0

sub_path = os.path.join(data_path, scale, sub)

for filename in os.listdir(sub_path):
    #load bold data
    runpath = os.path.join(sub_path, filename)
    with np.load(runpath) as data:
        x = data['X']
    data.close()
    
    #args necessary to extract infos from run
    sub = sub
    ses = int(find_label_value(filename, 'ses'))
    #------------different between friends and mutemusic
    run = int(find_label_value(filename, 'run'))

    #-----------specific to mutemusic
    run_data = sessions_df.loc[(sessions_df['sub']==sub)&
                                (sessions_df['session']==ses)&
                                (sessions_df['run']==run)]
    
    for i, track in run_data.iterrows():
        #track infos
        groupe = track['Groupe']
        category = track['category']
        score = track['value']
        repetition = track['repetition']
        title_silenced = track['title']
        title = title_silenced.replace('_silenced','')
        onset = round(track['onset'])
        duration = round(track['total_duration'])

        #load silenced track
        selected_title = title_silenced if silenced_tracks else title
        wav_path = os.path.join(tracks_path, category, selected_title)
        wav, sr = librosa.load(wav_path, sr=None)

        #bold data for track (WITH HRF delay !!!!!)
        tr_onset = round(onset/tr) + hrf_nb_tr
        tr_duration = round(duration/tr)
        track_fmri = x[tr_onset:tr_onset+tr_duration, :]
        print(title, duration, track_fmri.shape, sr)

        #define timestamps to extract both music and silence    
        sdf = silences_df.loc[silences_df['track']==title]
        sdf = sdf.drop(['category', 'track'], axis='columns').reset_index()
        for col in sdf.columns:
            if 'duration' not in col:
                sdf[col]=sdf[col]/sr
        

        #general----------------------------------------------------------
        #extract corresponding wav/bold
        bold_segment = track_fmri
        wav_segment = wav
        segment_df = pd.Series({
            'session':ses,
            'run':run,
            'category':category,
            'sr':sr,
            'groupe':groupe,
            'title':selected_title,
            'score':score,
            'repetition':repetition
        }).to_frame().T
                
        segment_df = pd.concat([segment_df, sdf], axis='columns')
        print(segment_df)
        df = pd.concat([df, segment_df], axis='rows', ignore_index=True)  
        wavbold.append((wav_segment, bold_segment))

df.to_csv(os.path.join(output_path, f'{dataset}_{sub}_{stim_tracks}_metadata.tsv'), sep='\t')
with open(os.path.join(output_path, f'{dataset}_{sub}_{stim_tracks}_pairWavBold'), 'wb') as f: 
    pickle.dump(wavbold, f) 

Super_Mario_64_BobOmb_Battlefield_(Extended)_Theme.wav 54 (36, 210) 48000
  session run   category     sr groupe  \
0       1   5  Synthetic  48000      U   

                                               title score repetition  \
0  Super_Mario_64_BobOmb_Battlefield_(Extended)_T...     0          1   

      index  S1_duration  ...    S1_stop  S2_duration   S2_start    S2_stop  \
0  0.000542          4.0  ...  53.435563          5.0  38.836958  43.836958   

   S3_duration   S3_start    S3_stop  S4_duration  S4_start  S4_stop  
0          5.0  20.406021  25.406021          NaN       NaN      NaN  

[1 rows x 21 columns]
Super_Mario_Bros_3_Underwater_Theme.wav 46 (31, 210) 48000
  session run   category     sr groupe  \
0       1   5  Synthetic  48000      F   

                                              title score repetition  \
0  Super_Mario_Bros_3_Underwater_Theme_silenced.wav     4          1   

      index  S1_duration  ...    S1_stop  S2_duration   S2_start    S2_stop  \
0 