In [1]:
# Stacking Period Data from MATLAB Files in Python
# ------------------------------------------------------------
# This script walks through how to load .mat files, extract structured timing data,
# and build a clean DataFrame summarizing all trials across multiple sessions.

import scipy.io
import pandas as pd
import numpy as np

In [2]:
# === STEP 1: Load MATLAB .mat files ===
# These files contain structured timing info and session metadata
periods_mat = scipy.io.loadmat("periods.mat", simplify_cells=True)
sessions_mat = scipy.io.loadmat("sessions.mat", simplify_cells=True)

In [3]:
# === STEP 2: Extract relevant variables ===
# 'periodsAll' contains trial timing [trial_num, stim_on, stim_off] for each session
# 'sessions' holds metadata including session IDs like 'p6WV_CelebA_Sess1'
periods_all = periods_mat["periodsAll"]
sessions_list = sessions_mat["sessions"]

In [4]:
# === STEP 3: Define desired session order ===
# These are the labels we want to extract in this exact order
desired_order = [
    'p6WV_CelebA_Sess1', 'p6WV_CelebA_Sess2',
    'p7WV_CelebA_Sess1', 'p7WV_CelebA_Sess2', 'p7WV_CelebA_Sess3', 'p7WV_CelebA_Sess4',
    'p9WV_CelebA_Sess1', 'p9WV_CelebA_Sess2', 'p9WV_CelebA_Sess3', 'p9WV_CelebA_Sess4',
    'p10WV_CelebA_S2_FBI_S2', 'p10WV_CelebA_Sess3', 'p10WV_Loc2_S1_CelebA_S1_FBI_S1',
    'p11WV_CelebA_S1_FBI_S1_Loc2_S1', 'p11WV_CelebA_S2_FBI_S2_Loc2_S2',
    'p11WV_CelebA_S3_FBI_S3_Loc2_S3', 'p11WV_CelebA_S4_FBI_S4_Loc2_S4', 'p11WV_CelebA_Sess5',
    'p13WV_CelebA_Sess1',
    'p14WV_CelebA_S1_FBI_S1', 'p14WV_CelebA_S2_FBI_S2', 'p14WV_CelebA_S3_FBI_S3', 'p14WV_CelebA_S4_FBI_S4',
    'p15WV_CelebA_S1_FBI_S1', 'p15WV_CelebA_S2_FBI_S2',
    'p16WV_CelebA_S1', 'p16WV_CelebA_S2_NavFace_S1', 'p16WV_CelebA_S3_NavFace_S3',
    'p16WV_CelebA_S4_NavObj_S2', 'p16WV_CelebA_S5_FBI_S1_NavFace_S4', 'p16WV_CelebA_S6_NavFace_S5',
    'p18WV_CelebA_S1_FBI_S1', 'p18WV_CelebA_S2_NavFace_S1', 'p18WV_CelebA_S3_NavFace_S2', 'p18WV_CelebA_S4',
    'p19WV_CelebA_S1_NavFace_S1', 'p19WV_CelebA_S2',
    'p20WV_CelebA_S1_NavFace_S1', 'p20WV_CelebA_S2_NavFace_S2', 'p20WV_CelebA_S3_FBI_S1'
]

In [5]:
# === STEP 4: Build a mapping from session ID to index ===
# This helps us retrieve timing data for a given session label
session_id_map = {}
for idx, session in enumerate(sessions_list):
    try:
        session_id_map[session['ID']] = idx
        session_id_map[session['ID1']] = idx  # handles variants like 'Sesse'
    except Exception:
        continue

In [7]:
# === STEP 5: Extract trial rows for each session in desired order ===
# We flatten and store [trial, stim_on, stim_off, session_label]
all_rows = []
missing_sessions = []

for session_label in desired_order:
    if session_label in session_id_map:
        idx = session_id_map[session_label]
        session_data = periods_all[idx]  # ✅ FIX: no [0, idx] — just [idx]
        for entry in session_data:
            flat = np.array(entry).flatten()
            trial_num, stim_on, stim_off = map(int, flat[:3])
            all_rows.append([trial_num, stim_on, stim_off, session_label])
    else:
        missing_sessions.append(session_label)

In [8]:
# === STEP 6: Build DataFrame and save ===
df = pd.DataFrame(all_rows, columns=["Trial", "StimOn_us", "StimOff_us", "subject_session"])
df.to_csv("stacked_subjects_sessions_data.csv", index=False)

print("File saved as 'stacked_subjects_sessions_data.csv'")
if missing_sessions:
    print("The following sessions were not found:")
    for s in missing_sessions:
        print(" -", s)

File saved as 'stacked_subjects_sessions_data.csv'
