In [1]:
import numpy as np
import scipy
import scipy.io as sio
import matplotlib.pyplot as plt
import nibabel as nib
import torch
import torchaudio

import sys
import os
from os.path import join as opj
from os.path import join, exists, split

import time
import urllib.request
import warnings
from tqdm import tqdm
from pprint import pprint
import zipfile
import glob
warnings.filterwarnings('ignore')

# from glmsingle.glmsingle import GLM_single
import pandas as pd
from nilearn import maskers
from nilearn import plotting
import tqdm
import nibabel as nib
from nilearn.glm.first_level import FirstLevelModel
from nilearn.image import concat_imgs, mean_img
import matplotlib.pyplot as plt
import nilearn
from nilearn.plotting import plot_design_matrix
from nilearn.plotting import plot_contrast_matrix

default_n_threads = 64
os.environ['OPENBLAS_NUM_THREADS'] = f"{default_n_threads}"
os.environ['MKL_NUM_THREADS'] = f"{default_n_threads}"
os.environ['OMP_NUM_THREADS'] = f"{default_n_threads}"

In [2]:
# === Path principali ===
base_dir = "/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6"
stimuli_metadata_path = os.path.join(base_dir, "stimuli", "face_videos_stimuli_category_description.tsv")
events_label_column = "emotion"  



In [3]:
# === Caricamento metadati stimoli video ===
metadata_df = pd.read_csv(stimuli_metadata_path, sep='\t', index_col=None)
metadata_df = metadata_df.reset_index() 
metadata_df = metadata_df.dropna(axis=1, how='all')

metadata_df.columns = [
    "stimulus", "emotion_label", "gender_label", "ethnicity_label",
    "actions", "expression", "description", 
    *metadata_df.columns[len(metadata_df.columns)-1:]  
]



In [6]:
metadata_df

Unnamed: 0,stimulus,emotion_label,gender_label,ethnicity_label,actions,expression,description,Unnamed: 7
0,stimtrn_1.mp4,1,1,1,"talking, smiling",smiling,This is a woman.She has black hair.She is talk...,
1,stimtrn_2.mp4,2,1,1,talking，wagging her head，crying,crying,This is a little girl.She has black hair.To ...,
2,stimtrn_3.mp4,1,1,1,smiling， her smile gradually grows larger.,smiling,This is a woman. She has brown hair.She is sm...,
3,stimtrn_4.mp4,2,2,2,talking，nodding his head,an angry emotion,This is a man. He has gray hair and a moustach...,
4,stimtrn_5.mp4,3,2,1,lifts his head up,a neutral emotion,This is a man. He has black hair. He gradually...,
...,...,...,...,...,...,...,...,...
1315,stimtst_1316.mp4,1,1,2,smiling,smiling,This is a woman. She has blond hair.She is smi...,
1316,stimtst_1317.mp4,2,2,2,talking,an angry emotion,This is a man.He has black hair and a moustac...,
1317,stimtst_1318.mp4,1,1,1,smiling,smiling,This is a woman. She has black hair.She is smi...,
1318,stimtst_1319.mp4,3,2,2,talking,a neutral emotion,This is a man.He has black hair.He is talking...,


## Estrai raw data

In [98]:
import os
import nibabel as nib
import pandas as pd

# === Funzione per estrarre dati fMRI + label emozionale ===
def extract_fmri_and_labels(subject_id, session_id="02", task_name="face", base_dir="your_base_dir_here", metadata_df=None, events_label_column="expression"):
    func_dir = os.path.join(base_dir, f"sub-{subject_id}", f"ses-{session_id}", "func")
    
    all_runs_data = []

    for file in os.listdir(func_dir):
        if file.endswith("_bold.nii") and f"task-{task_name}" in file:
            # Estrai il run ID
            run_id = file.split("_run-")[1].split("_")[0]
            print(f"Processing subject {subject_id}, run {run_id}...")

            # === Carica file NIfTI ===
            nii_path = os.path.join(func_dir, file)
            fmri_img = nib.load(nii_path)
            fmri_data = fmri_img.get_fdata()  # shape: (x, y, z, timepoints)

            # === Costruisci path del file eventi ===
            events_file = f"sub-{subject_id}_ses-{session_id}_task-{task_name}_run-{run_id}_events.tsv"
            events_file_path = os.path.join(func_dir, events_file)

            if os.path.exists(events_file_path):
                events_df = pd.read_csv(events_file_path, sep="\t")
                print(events_df)
                break

                for idx, row in events_df.iterrows():
                    stim_video_name = row["stim_file"].split("/")[-1]    
                    # Cerca metadati stimolo nella tabella generale
                    stim_meta = metadata_df[metadata_df["stimulus"].str.contains(stim_video_name)]

                    if not stim_meta.empty:
                        expression = stim_meta.iloc[0][events_label_column]
                        all_runs_data.append({
                            "subject": subject_id,
                            "session": session_id,
                            "run": run_id,
                            "onset": row["onset"],
                            "duration": row["duration"],
                            "stim_file": stim_video_name,
                            "expression": expression,
                            "fmri_data": fmri_data 
                        })

    return all_runs_data


In [99]:
# === Esempio: estrazione per un soggetto ===
subject_id = "01"
data = extract_fmri_and_labels(subject_id=subject_id, session_id="02", task_name="face", 
                                  base_dir="/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6", metadata_df=metadata_df)

# === Visualizza estratti ===
for d in data[0:3]:
    print(f"Soggetto {d['subject']} - Run {d['run']} - Stimolo: {d['stim_file']} - Emozione: {d['emotion']}")
    print(f"fMRI shape: {d['fmri_data'].shape}\n")
print('len data: ',len(data))

Processing subject 01, run 01...
    onset  duration                    stim_file  sti_started  sti_stopped  \
0      12         3     face-video/stimtrn_1.mp4    82.555083    85.588482   
1      18         3     face-video/stimtrn_2.mp4    86.363600    89.380339   
2      24         3     face-video/stimtrn_3.mp4    92.347333    95.363945   
3      30         3     face-video/stimtrn_4.mp4    98.330823   101.347517   
4      36         3  face-video/stimtrn_1321.mp4   104.347884   107.356015   
..    ...       ...                          ...          ...          ...   
63    390         3  face-video/stimtrn_1322.mp4   458.322435   461.339104   
64    396         3    face-video/stimtrn_29.mp4   464.322800   467.339374   
65    402         3    face-video/stimtrn_20.mp4   470.356253   473.364553   
66    408         3     face-video/stimtrn_8.mp4   476.348308   479.356476   
67    414         3    face-video/stimtrn_28.mp4   482.356837   485.365074   

    key_fix  
0       NaN  
1 

## Estrati preprocessed

In [4]:
import os
import nibabel as nib
import pandas as pd
import re

def extract_fmri_and_labels_preproc(subject_id, session_id="01", task_name="face", base_dir="your_base_dir_here", metadata_df=None, events_label_column="expression"):
    func_dir = os.path.join(base_dir, "derivatives", "pre-processed_volume_data", f"sub-{subject_id}", f"ses-{session_id}")
    
    all_runs_data = []

    for file in os.listdir(func_dir):
        if file.endswith("_bold.nii") and f"task-{task_name}" in file and "desc-volume" in file:
            match = re.search(r"run[-_]?(\d+)", file)
            if match:
                run_id = match.group(1)
                print(f"Processing subject {subject_id}, run {run_id}...")

                # === Carica file NIfTI ===
                nii_path = os.path.join(func_dir, file)
                try:
                    fmri_img = nib.load(nii_path)
                    # fmri_data = fmri_img.get_fdata()
                except Exception as e:
                    print(f"[!] Errore caricamento NIfTI: {file} – {e}")
                    continue

                # === Carica events.tsv ===
                events_file = f"sub-{subject_id}_ses-{session_id}_task-{task_name}_run-{run_id}_events.tsv"
                events_path = os.path.join(func_dir.replace("derivatives/pre-processed_volume_data", ""), "func", events_file)
                run_events = []

                if os.path.exists(events_path):
                    try:
                        events_df = pd.read_csv(events_path, sep="\t")
                    except Exception as e:
                        print(f"[!] Errore lettura events.tsv: {events_path} – {e}")
                        events_df = None
                else:
                    events_df = None

                if events_df is not None and metadata_df is not None:
                    for idx, row in events_df.iterrows():
                        stim_file = row["stim_file"].split("/")[-1]
                        stim_meta = metadata_df[metadata_df["stimulus"].str.contains(stim_file, na=False)]

                        expression = stim_meta.iloc[0][events_label_column] if not stim_meta.empty else None

                        run_events.append({
                            "onset": row["onset"],
                            "duration": row["duration"],
                            "stim_file": stim_file,
                            "expression": expression
                        })

                all_runs_data.append({
                    "subject": subject_id,
                    "session": session_id,
                    "run": run_id,
                    "fmri_data": fmri_img,
                    "events": run_events
                })

    return all_runs_data


In [None]:
def create_masker_from_example_image(example_img, strategy="epi"):
    masker = maskers.NiftiMasker(mask_strategy=strategy)
    masker.fit(example_img)
    report = masker.generate_report()
    return masker, report

In [65]:
import pickle

# subj 01 --> ses-02, run-02
# subj 03 --> ses-02, run-01
# subj 04 --> ses-02, run-05
# subj 05 --> ses-02, run-01

subject_id = "01"
TR = 1.0
hrf_delay_sec = 3
window_sec = 3
hrf_delay_vol = int(hrf_delay_sec / TR)
window_vols = int(window_sec / TR)
base_dir = "/srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6"
example_img_path = os.path.join(base_dir, "derivatives", "pre-processed_volume_data", f"sub-{subject_id}", f"ses-02", f"sub-{subject_id}_ses-02_task-face_space-individual_desc-volume_run-02_bold.nii")
example_img = nib.load(example_img_path)
masker, report = create_masker_from_example_image(example_img)

subject_data = {}
save_dir = '/srv/nfs-data/sisko/matteoc/fmri_emo/data_save'

# === Ciclo sulle sessioni
for session_id in [f"{i:02d}" for i in range(2, 12)]:  # da 02 a 11
    print(f"\n== Subject {subject_id}, Session {session_id} ==")
    
    session_runs = extract_fmri_and_labels_preproc(
        subject_id=subject_id,
        session_id=session_id,
        task_name="face",
        base_dir=base_dir,
        metadata_df=metadata_df,
        events_label_column="expression"
    )

    for item in tqdm.tqdm(session_runs):
        fmri_segments = []
        run_id = int(item["run"])
        set_type = "train" if run_id <= 4 else "test"
        item["set_type"] = set_type

        if isinstance(item["fmri_data"], nib.Nifti1Image):
            try:
                masked_data = masker.transform_single_imgs(item["fmri_data"]).T
                cleaned_data = nilearn.signal.clean(masked_data.T, detrend=True, standardize=True, t_r=TR)
            except Exception as e:
                print(f"[!] Errore masking run-{item['run']} ses-{item['session']}: {e}")
                continue

            for event in item["events"]:
                onset = event["onset"]
                start_vol = int(onset // TR) + hrf_delay_vol
                end_vol = start_vol + window_vols
                if end_vol <= cleaned_data.shape[0]:
                    segment = cleaned_data[start_vol:end_vol, :]  # (3, num_voxels)
                    fmri_segments.append(segment)
                else:
                    print(f"⚠ Evento troppo vicino alla fine del run. Skippato.")

            item["fmri_data"] = fmri_segments

        else:
            print(f"⚠ Non-Nifti image found in item, skipping: {type(item['fmri_data'])}")
    
    session_key = f"ses-{session_id}"
    subject_data[session_key] = session_runs
        
with open(os.path.join(save_dir, f"data_{subject_id}.pkl"), "wb") as f:
    pickle.dump(subject_data, f)
print(f"Salvato: data_{subject_id}.pkl")





== Subject 01, Session 02 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...


100%|██████████| 5/5 [00:13<00:00,  2.78s/it]



== Subject 01, Session 03 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


100%|██████████| 6/6 [00:16<00:00,  2.78s/it]



== Subject 01, Session 04 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


100%|██████████| 6/6 [00:16<00:00,  2.79s/it]



== Subject 01, Session 05 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


100%|██████████| 6/6 [00:16<00:00,  2.77s/it]



== Subject 01, Session 06 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...


100%|██████████| 4/4 [00:11<00:00,  2.77s/it]



== Subject 01, Session 07 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


 17%|█▋        | 1/6 [00:00<00:02,  1.79it/s]

[!] Errore masking run-01 ses-07: Expected 1160051200 bytes, got 272250757 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-07/sub-01_ses-07_task-face_space-individual_desc-volume_run-01_bold.nii
 - could the file be damaged?


 33%|███▎      | 2/6 [00:01<00:03,  1.27it/s]

[!] Errore masking run-02 ses-07: Expected 1160051200 bytes, got 280948763 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-07/sub-01_ses-07_task-face_space-individual_desc-volume_run-02_bold.nii
 - could the file be damaged?


 50%|█████     | 3/6 [00:01<00:01,  1.73it/s]

[!] Errore masking run-03 ses-07: Expected 580025600 bytes, got 43370381 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-07/sub-01_ses-07_task-face_space-individual_desc-volume_run-03_bold.nii
 - could the file be damaged?


100%|██████████| 6/6 [00:13<00:00,  2.33s/it]



== Subject 01, Session 08 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


 17%|█▋        | 1/6 [00:00<00:03,  1.44it/s]

[!] Errore masking run-01 ses-08: Expected 580025600 bytes, got 198506903 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-08/sub-01_ses-08_task-face_space-individual_desc-volume_run-01_bold.nii
 - could the file be damaged?


 33%|███▎      | 2/6 [00:01<00:02,  1.39it/s]

[!] Errore masking run-02 ses-08: Expected 580025600 bytes, got 193981509 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-08/sub-01_ses-08_task-face_space-individual_desc-volume_run-02_bold.nii
 - could the file be damaged?


100%|██████████| 6/6 [00:17<00:00,  2.94s/it]



== Subject 01, Session 09 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


100%|██████████| 6/6 [00:26<00:00,  4.40s/it]



== Subject 01, Session 10 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


100%|██████████| 6/6 [00:26<00:00,  4.49s/it]



== Subject 01, Session 11 ==
Processing subject 01, run 01...
Processing subject 01, run 02...
Processing subject 01, run 03...
Processing subject 01, run 04...
Processing subject 01, run 05...
Processing subject 01, run 06...


 33%|███▎      | 2/6 [00:05<00:10,  2.63s/it]

[!] Errore masking run-02 ses-11: Expected 580025600 bytes, got 420393441 bytes from /srv/nfs-data/sisko/matteoc/fmri_emo/ds005047-1.0.6/derivatives/pre-processed_volume_data/sub-01/ses-11/sub-01_ses-11_task-face_space-individual_desc-volume_run-02_bold.nii
 - could the file be damaged?


100%|██████████| 6/6 [00:22<00:00,  3.70s/it]


Salvato: data_01.pkl


In [None]:
len(subject_data['ses-02'][0]['fmri_data'])    #  60 + OneBack + BlankTrial

68

In [None]:
subject_data['ses-02'][4]['set_type']   

'test'

In [96]:
for i in range(0, len(subject_data['ses-02'][4]['events'])):
    print(subject_data['ses-02'][4]['events'][i]['stim_file'])

stimtst_1201.mp4
stimtst_1202.mp4
stimtst_1202.mp4
stimtst_1203.mp4
stimtst_1204.mp4
stimtst_1205.mp4
stimtst_1206.mp4
stimtst_1207.mp4
stimtst_1208.mp4
stimtst_1209.mp4
stimtst_1322.mp4
stimtst_1211.mp4
stimtst_1212.mp4
stimtst_1213.mp4
stimtst_1214.mp4
stimtst_1215.mp4
stimtst_1216.mp4
stimtst_1217.mp4
stimtst_1218.mp4
stimtst_1219.mp4
stimtst_1219.mp4
stimtst_1220.mp4
stimtst_1221.mp4
stimtst_1222.mp4
stimtst_1223.mp4
stimtst_1224.mp4
stimtst_1225.mp4
stimtst_1226.mp4
stimtst_1227.mp4
stimtst_1228.mp4
stimtst_1229.mp4
stimtst_1321.mp4
stimtst_1230.mp4
stimtst_1210.mp4
stimtst_1223.mp4
stimtst_1212.mp4
stimtst_1203.mp4
stimtst_1204.mp4
stimtst_1205.mp4
stimtst_1209.mp4
stimtst_1207.mp4
stimtst_1202.mp4
stimtst_1206.mp4
stimtst_1218.mp4
stimtst_1222.mp4
stimtst_1321.mp4
stimtst_1213.mp4
stimtst_1213.mp4
stimtst_1214.mp4
stimtst_1225.mp4
stimtst_1216.mp4
stimtst_1226.mp4
stimtst_1210.mp4
stimtst_1219.mp4
stimtst_1230.mp4
stimtst_1230.mp4
stimtst_1221.mp4
stimtst_1211.mp4
stimtst_1201.m