In [1]:
import numpy as np
import scipy
import scipy.io as sio
import matplotlib.pyplot as plt
import nibabel as nib
import torch
import torchaudio

import sys
import os
from os.path import join as opj
from os.path import join, exists, split

import time
import urllib.request
import warnings
from tqdm import tqdm
from pprint import pprint
import zipfile
import glob
warnings.filterwarnings('ignore')

# from glmsingle.glmsingle import GLM_single
import pandas as pd
from nilearn import maskers
from nilearn import plotting
import tqdm
import nibabel as nib
from nilearn.glm.first_level import FirstLevelModel
from nilearn.image import concat_imgs, mean_img
import matplotlib.pyplot as plt
import nilearn
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_contrast_matrix

default_n_threads = 64
os.environ['OPENBLAS_NUM_THREADS'] = f"{default_n_threads}"
os.environ['MKL_NUM_THREADS'] = f"{default_n_threads}"
os.environ['OMP_NUM_THREADS'] = f"{default_n_threads}"

In [2]:
# === Path principali ===
base_dir = "/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6"
stimuli_metadata_path = os.path.join(base_dir, "stimuli", "face_videos_stimuli_category_description.tsv")
events_label_column = "emotion"  



In [3]:
# === Caricamento metadati stimoli video ===
metadata_df = pd.read_csv(stimuli_metadata_path, sep='\t', index_col=None)
metadata_df = metadata_df.reset_index() 
metadata_df = metadata_df.dropna(axis=1, how='all')

metadata_df.columns = [
    "stimulus", "emotion_label", "gender_label", "ethnicity_label",
    "actions", "expression", "description", 
    *metadata_df.columns[len(metadata_df.columns)-1:]  
]



In [4]:
metadata_df

Unnamed: 0,stimulus,emotion_label,gender_label,ethnicity_label,actions,expression,description,Unnamed: 7
0,stimtrn_1.mp4,1,1,1,"talking, smiling",smiling,This is a woman.She has black hair.She is talk...,
1,stimtrn_2.mp4,2,1,1,talking，wagging her head，crying,crying,This is a little girl.She has black hair.To ...,
2,stimtrn_3.mp4,1,1,1,smiling， her smile gradually grows larger.,smiling,This is a woman. She has brown hair.She is sm...,
3,stimtrn_4.mp4,2,2,2,talking，nodding his head,an angry emotion,This is a man. He has gray hair and a moustach...,
4,stimtrn_5.mp4,3,2,1,lifts his head up,a neutral emotion,This is a man. He has black hair. He gradually...,
...,...,...,...,...,...,...,...,...
1315,stimtst_1316.mp4,1,1,2,smiling,smiling,This is a woman. She has blond hair.She is smi...,
1316,stimtst_1317.mp4,2,2,2,talking,an angry emotion,This is a man.He has black hair and a moustac...,
1317,stimtst_1318.mp4,1,1,1,smiling,smiling,This is a woman. She has black hair.She is smi...,
1318,stimtst_1319.mp4,3,2,2,talking,a neutral emotion,This is a man.He has black hair.He is talking...,


In [20]:
import os
import nibabel as nib
import pandas as pd

# === Funzione per estrarre dati fMRI + label emozionale ===
def extract_fmri_and_labels(subject_id, session_id="02", task_name="face", base_dir="your_base_dir_here", metadata_df=None, events_label_column="expression"):
    func_dir = os.path.join(base_dir, f"sub-{subject_id}", f"ses-{session_id}", "func")
    
    all_runs_data = []

    for file in os.listdir(func_dir):
        if file.endswith("_bold.nii") and f"task-{task_name}" in file:
            # Estrai il run ID
            run_id = file.split("_run-")[1].split("_")[0]
            print(f"Processing subject {subject_id}, run {run_id}...")

            # === Carica file NIfTI ===
            nii_path = os.path.join(func_dir, file)
            fmri_img = nib.load(nii_path)
            fmri_data = fmri_img.get_fdata()  # shape: (x, y, z, timepoints)

            # === Costruisci path del file eventi ===
            events_file = f"sub-{subject_id}_ses-{session_id}_task-{task_name}_run-{run_id}_events.tsv"
            events_file_path = os.path.join(func_dir, events_file)

            if os.path.exists(events_file_path):
                events_df = pd.read_csv(events_file_path, sep="\t")

                for idx, row in events_df.iterrows():
                    stim_video_name = row["stim_file"].split("/")[-1]
                    
                    # Cerca metadati stimolo nella tabella generale
                    stim_meta = metadata_df[metadata_df["stimulus"].str.contains(stim_video_name)]

                    if not stim_meta.empty:
                        emotion = stim_meta.iloc[0][events_label_column]
                        all_runs_data.append({
                            "subject": subject_id,
                            "session": session_id,
                            "run": run_id,
                            "onset": row["onset"],
                            "duration": row["duration"],
                            "stim_file": stim_video_name,
                            "emotion": emotion,
                            "fmri_data": fmri_data 
                        })

    return all_runs_data


In [25]:
# === Esempio: estrazione per un soggetto ===
subject_id = "01"
data = extract_fmri_and_labels(subject_id=subject_id, session_id="02", task_name="face", 
                                  base_dir="/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6", metadata_df=metadata_df)

# === Visualizza estratti ===
for d in data[0:3]:
    print(f"Soggetto {d['subject']} - Run {d['run']} - Stimolo: {d['stim_file']} - Emozione: {d['emotion']}")
    print(f"fMRI shape: {d['fmri_data'].shape}\n")
print('len data: ',len(data))

Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...
Soggetto 01 - Run 01 - Stimolo: stimtrn_1.mp4 - Emozione: smiling
fMRI shape: (110, 110, 56, 428)

Soggetto 01 - Run 01 - Stimolo: stimtrn_2.mp4 - Emozione: crying
fMRI shape: (110, 110, 56, 428)

Soggetto 01 - Run 01 - Stimolo: stimtrn_3.mp4 - Emozione: smiling
fMRI shape: (110, 110, 56, 428)

len data:  384


In [30]:
import os
import nibabel as nib
import pandas as pd
import re

def extract_bold_and_timeseries(subject_id, session_id="01", task_name="face", base_dir="your_base_dir_here"):
    func_dir = os.path.join(base_dir, "derivatives", "pre-processed_volume_data", f"sub-{subject_id}", f"ses-{session_id}")

    all_runs_data = []

    for file in os.listdir(func_dir):
        if file.endswith("_bold.nii") and f"task-{task_name}" in file and "desc-volume" in file:
            # Estrai il run_id con regex
            match = re.search(r"run-(\d+)", file)
            if match:
                run_id = match.group(1)
                print(f"Processing subject {subject_id}, run {run_id}...")

                # === Carica file NIfTI ===
                nii_path = os.path.join(func_dir, file)
                try:
                    fmri_img = nib.load(nii_path)
                    fmri_data = fmri_img.get_fdata()
                except Exception as e:
                    print(f"[!] Errore caricamento NIfTI: {file} – {e}")
                    continue

                # === Path del file timeseries.tsv ===
                timeseries_file = f"sub-{subject_id}_ses-{session_id}_task-{task_name}_space-individual_desc-volume_run-{run_id}_timeseries.tsv"
                timeseries_path = os.path.join(func_dir, timeseries_file)

                if os.path.exists(timeseries_path):
                    try:
                        timeseries_df = pd.read_csv(timeseries_path, sep="\t")
                        all_runs_data.append({
                            "subject": subject_id,
                            "session": session_id,
                            "run": run_id,
                            "fmri_data": fmri_data,
                            "timeseries": timeseries_df
                        })
                    except Exception as e:
                        print(f"[!] Errore lettura timeseries TSV: {timeseries_path} – {e}")
                        
    return all_runs_data


In [31]:
results = extract_bold_and_timeseries(
    subject_id="01",
    session_id="02",
    task_name="face",
    base_dir="/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6"
)

Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...


In [None]:
results[1]["fmri_data"].shape

(110, 110, 56, 428)

In [39]:
results[1]["timeseries"]

Unnamed: 0,0,0.1,0.2,0.3,0.4,-1.7347235e-18
0,0.004314,-0.019380,0.022385,-0.000077,-0.000119,0.000121
1,0.013454,-0.010174,0.026303,0.000288,-0.000574,0.000304
2,0.021558,-0.001186,0.022593,-0.000266,-0.000419,0.000377
3,0.025934,0.022873,0.062510,-0.000555,-0.000398,0.000121
4,0.026588,0.071500,0.064442,-0.000281,-0.000397,-0.000177
...,...,...,...,...,...,...
422,0.042539,-0.009206,-0.233882,0.001633,-0.000421,0.000371
423,0.047924,0.027502,-0.218085,0.001373,-0.000455,0.000297
424,0.045156,-0.033883,-0.289497,0.001434,-0.000543,0.000589
425,0.049934,0.039238,-0.217001,0.001145,-0.000540,0.000292
