In [1]:
# In[ ]:

import torch
import numpy as np
import pandas as pd
import os

from omegaconf import OmegaConf
from utils.dataset import ZarrDataset
from models.CNN import CNN
from models.resnet import ResNet, BasicBlock

def build_offset_map(zarr_dataset):
    """
    Creates a dictionary of offset -> list of (volume_idx, row_idx) for quick lookup.
    """
    offset_to_indices = {}
    if zarr_dataset.aligned_labels is None:
        print("No aligned_labels found in ZarrDataset. Make sure 'aligned_labels' attribute is present.")
        return offset_to_indices

    # For each row in aligned_labels, gather (volume_idx, row_index).
    for i, row in zarr_dataset.aligned_labels.iterrows():
        offset = row['time_offset']
        vol_idx = row['file_index']
        t_idx = row['row_index']
        offset_to_indices.setdefault(offset, []).append((vol_idx, t_idx))

    return offset_to_indices

def forward_model_on_offset(model, zarr_dataset, offset, offset_map, device):
    """
    Find all samples in zarr_dataset for a given time offset, run them through the model,
    and return a list of predictions and any metadata you want to track.
    """
    # Find all (volume_idx, row_idx) pairs for this offset
    if offset not in offset_map:
        print(f"No samples found for offset {offset}")
        return []

    results = []
    model.eval()
    with torch.no_grad():
        for (vol_idx, row_idx) in offset_map[offset]:
            # We must find the dataset index (the "flattened" index) that corresponds to (vol_idx, row_idx).
            # Because ZarrDataset.__getitem__ uses self.valid_indices, we look up the pair in valid_indices:
            try:
                dataset_index = zarr_dataset.valid_indices.index((vol_idx, row_idx))
            except ValueError:
                # This means (vol_idx, row_idx) wasn't in valid_indices (it might be an unlabeled sample)
                continue
            
            item = zarr_dataset[dataset_index]
            data_tensor = item["data_tensor"].unsqueeze(0).float().to(device)
            if data_tensor.dim() == 4:
                # Model expects (batch, channels, x, y, z)
                data_tensor = data_tensor.unsqueeze(1)

            output = model(data_tensor)
            _, predicted_idx = torch.max(output, dim=1)
            predicted_idx = predicted_idx.item()

            results.append({
                "volume_idx": vol_idx,
                "row_idx": row_idx,
                "time_offset": item["time_offset"],
                "true_label_idx": item["label_tensor"].item(),
                "predicted_idx": predicted_idx,
                "subject": item["subject"],
                "session": item["session"]
            })
    return results

# -------------------- Example usage --------------------

# 1) Build an in-memory config (or load your real Hydra config)
PROJECT_ROOT = os.path.abspath("../../")
print(f"PROJECT_ROOT: {PROJECT_ROOT}")
cfg = OmegaConf.create({
    "project_root": PROJECT_ROOT,
    "verbose": True,
    "wandb": True,
    "sys_log": True,
    "model": "CNN",
    "CNN": {
        "c1": 16, "c2": 32, "c3": 64, "k1": 3, "k2": 3, "k3": 3,
        "pk": 2, "ps": 2, "kernel_size": 3, "stride": 1, "padding": 1
    },
    "train": {
        "epochs": 50, "batch_size": 20, "shuffle": True, "train_ratio": 0.8,
        "print_label_frequencies": True
    },
    "data": {
        "data_path": f"{PROJECT_ROOT}/data/raw/derivatives/non-linear_anatomical_alignment",
        "zarr_dir_path": f"{PROJECT_ROOT}/zarr_datasets",
        "zarr_path": f"{PROJECT_ROOT}/zarr_datasets/pool_emotions",
        "label_path": f"{PROJECT_ROOT}/data/updated_annotations/pooled_annotations_structured.tsv",
        "sessions": ["01", "02", "03", "04", "05", "06", "07", "08"],
        "file_pattern_template": "*_ses-forrestgump_task-forrestgump_rec-dico7Tad2grpbold7TadNL_run-{}_bold.nii.gz",
        "subjects": ["sub-20"],
        "session_offsets": [0, 902, 1784, 2660, 3636, 4560, 5438, 6522],
        "emotion_idx": {"NONE": 0, "HAPPINESS": 1, "FEAR": 2, "SADNESS": 3, "LOVE": 4, "ANGER": 5},
        "normalization": False,
        "weight_decay": 0,
        "learning_rate": 0.0001,
        "seed": 42,
        "save_model": True,
        "load_model": True,
        "save_model_path": "output/models",
        "load_model_path": f"{PROJECT_ROOT}/output/models/sub_ALL.pth",
        "output_csv_path": f"{PROJECT_ROOT}/output/inference/sub_ALL.csv"
    }
})

print(cfg.data.load_model_path)

# 2) Load dataset
zarr_dataset = ZarrDataset(cfg.data.zarr_path)

# 3) Build the offset map for quick lookups
offset_map = build_offset_map(zarr_dataset)

# 4) Instantiate the model and load weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
emotion_idx = cfg.data.emotion_idx
output_dim = len(emotion_idx)

if cfg.model == "CNN":
    from models.CNN import CNN
    model = CNN(cfg=cfg, output_dim=output_dim)
elif cfg.model == "ResNet":
    from models.resnet import ResNet, BasicBlock
    model = ResNet(BasicBlock, [1, 1, 1, 1], in_channels=1, num_classes=output_dim)
else:
    raise ValueError(f"Unsupported model: {cfg.model}")

model_path = cfg.data.load_model_path
if os.path.exists(model_path):
    model.load_state_dict(torch.load(model_path, map_location=device))
    print(f"Loaded model from {model_path}")
model.to(device)

# 5) Run inference for a chosen offset
sample_offset = 6760.0  # seconds into the movie, for instance
results = forward_model_on_offset(model, zarr_dataset, sample_offset, offset_map, device)

# 6) Map prediction indices back to emotion names
inv_idx = {v: k for k, v in emotion_idx.items()}

for r in results:
    pred_name = inv_idx[r["predicted_idx"]]
    true_name = inv_idx[r["true_label_idx"]]
    print(f"Time {r['time_offset']:.1f}s, volume_idx={r['volume_idx']}, row_idx={r['row_idx']} "
          f"Subject={r['subject']}, Session={r['session']} | "
          f"True={true_name}, Predicted={pred_name}")


PROJECT_ROOT: /home/paperspace/DeepEmotion
/home/paperspace/DeepEmotion/output/models/sub_ALL.pth
Loaded model from /home/paperspace/DeepEmotion/output/models/sub_ALL.pth
Time 6760.0s, volume_idx=133, row_idx=119 Subject=sub-01, Session=01 | True=SADNESS, Predicted=SADNESS
Time 6760.0s, volume_idx=134, row_idx=119 Subject=sub-02, Session=02 | True=SADNESS, Predicted=SADNESS
Time 6760.0s, volume_idx=135, row_idx=119 Subject=sub-03, Session=03 | True=SADNESS, Predicted=SADNESS
Time 6760.0s, volume_idx=136, row_idx=119 Subject=sub-04, Session=04 | True=SADNESS, Predicted=LOVE
Time 6760.0s, volume_idx=137, row_idx=119 Subject=sub-05, Session=05 | True=SADNESS, Predicted=SADNESS
Time 6760.0s, volume_idx=138, row_idx=119 Subject=sub-06, Session=06 | True=SADNESS, Predicted=SADNESS
Time 6760.0s, volume_idx=139, row_idx=119 Subject=sub-07, Session=07 | True=SADNESS, Predicted=LOVE
Time 6760.0s, volume_idx=140, row_idx=119 Subject=sub-08, Session=08 | True=SADNESS, Predicted=SADNESS
Time 6760.0

In [3]:
# In[ ]:

import torch
import numpy as np
import pandas as pd
import os
from sklearn.decomposition import IncrementalPCA

from utils.dataset import ZarrDataset
from models.CNN import CNN
from models.resnet import ResNet, BasicBlock
from omegaconf import OmegaConf

###############################################################################
# Example: Perform incremental PCA on the full dataset by directly iterating 
# over ZarrDataset. Produces a CSV with [PC1, PC2, EmotionLabel, time_offset, subject],
# sorted first by subject, then by time offset.
###############################################################################

# 1) Define or load a Hydra config
cfg = OmegaConf.create({
    "project_root": PROJECT_ROOT,
    "verbose": True,
    "wandb": True,
    "sys_log": True,
    "model": "CNN",
    "CNN": {
        "c1": 16, "c2": 32, "c3": 64, "k1": 3, "k2": 3, "k3": 3,
        "pk": 2, "ps": 2, "kernel_size": 3, "stride": 1, "padding": 1
    },
    "train": {
        "epochs": 50, "batch_size": 20, "shuffle": True, "train_ratio": 0.8,
        "print_label_frequencies": True
    },
    "data": {
        "data_path": f"{PROJECT_ROOT}/data/raw/derivatives/non-linear_anatomical_alignment",
        "zarr_dir_path": f"{PROJECT_ROOT}/zarr_datasets",
        "zarr_path": f"{PROJECT_ROOT}/zarr_datasets/pool_emotions",
        "label_path": f"{PROJECT_ROOT}/data/updated_annotations/pooled_annotations_structured.tsv",
        "sessions": ["01", "02", "03", "04", "05", "06", "07", "08"],
        "file_pattern_template": "*_ses-forrestgump_task-forrestgump_rec-dico7Tad2grpbold7TadNL_run-{}_bold.nii.gz",
        "subjects": ["sub-20"],
        "session_offsets": [0, 902, 1784, 2660, 3636, 4560, 5438, 6522],
        "emotion_idx": {"NONE": 0, "HAPPINESS": 1, "FEAR": 2, "SADNESS": 3, "LOVE": 4, "ANGER": 5},
        "normalization": False,
        "weight_decay": 0,
        "learning_rate": 0.0001,
        "seed": 42,
        "save_model": True,
        "load_model": True,
        "save_model_path": "output/models",
        "load_model_path": f"{PROJECT_ROOT}/output/models/sub_ALL.pth",
        "output_csv_path": f"{PROJECT_ROOT}/output/inference/sub_ALL_ao.csv"
    }
})

# 2) Load your ZarrDataset
zarr_dataset = ZarrDataset(cfg.data.zarr_path)
print(f"Total samples (including NONE): {len(zarr_dataset)}")

# 3) Build an inverse emotion mapping for easier label naming
inverse_emotion_idx = {v: k for k, v in cfg.data.emotion_idx.items()}

# 4) Instantiate the model and load weights
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

if cfg.model == "CNN":
    model = CNN(cfg=cfg, output_dim=len(cfg.data.emotion_idx))
elif cfg.model == "ResNet":
    model = ResNet(BasicBlock, [1,1,1,1], in_channels=1, num_classes=len(cfg.data.emotion_idx))
else:
    raise ValueError(f"Unsupported model: {cfg.model}")

if os.path.exists(cfg.data.load_model_path):
    model.load_state_dict(torch.load(cfg.data.load_model_path, map_location=device))
    print(f"Loaded model weights from {cfg.data.load_model_path}")
model.eval()
model.to(device)

# 5) Incremental PCA: partial_fit in one pass
ipca = IncrementalPCA(n_components=2)
buffer = []

print("Pass 1: perform PCA")
for i in range(len(zarr_dataset)):
    item = zarr_dataset[i]
    label_idx = item["label_tensor"].item()

    if label_idx == cfg.data.emotion_idx["NONE"]:
        continue

    data_tensor = item["data_tensor"].unsqueeze(0).float().to(device)
    with torch.no_grad():
        _, hidden = model(data_tensor, return_hidden=True)
    hidden_np = hidden.cpu().numpy()

    buffer.append(hidden_np)

    if len(buffer) == 10:  # buffer size >= n_components
        stacked = np.vstack(buffer)
        ipca.partial_fit(stacked)
        buffer.clear()

# Optional: partial_fit remaining samples
if buffer:
    stacked = np.vstack(buffer)
    ipca.partial_fit(stacked)

# 6) Second pass: transform, collect (PC1, PC2) + [Emotion, time_offset, subject]
print("Pass 2: transform hidden states and store results")
pca_results = []
for i in range(len(zarr_dataset)):
    item = zarr_dataset[i]
    label_idx = item["label_tensor"].item()

    # Skip samples labeled as NONE
    if label_idx == cfg.data.emotion_idx["NONE"]:
        continue

    data_tensor = item["data_tensor"].unsqueeze(0).float().to(device)
    with torch.no_grad():
        _, hidden = model(data_tensor, return_hidden=True)

    hidden_np = hidden.cpu().numpy()
    pc = ipca.transform(hidden_np)[0]  # shape (1, n_components)
    pc1, pc2 = pc[0], pc[1]

    emotion_str = inverse_emotion_idx.get(label_idx, "UNKNOWN")
    time_offset = item["time_offset"]
    subject = item["subject"]

    pca_results.append([pc1, pc2, emotion_str, time_offset, subject])

df = pd.DataFrame(
    pca_results,
    columns=["PC1", "PC2", "EmotionLabel", "time_offset", "subject"]
)

# 7) Sort results: first by subject, then by time_offset
df_sorted = df.sort_values(["subject", "time_offset"]).reset_index(drop=True)

# 8) Save to CSV
df_sorted.to_csv(cfg.data.output_csv_path, index=False)
print(f"PCA results saved to {cfg.data.output_csv_path}")


Total samples (including NONE): 16682
Loaded model weights from /home/paperspace/DeepEmotion/output/models/sub_ALL.pth
Pass 1: perform PCA
Pass 2: transform hidden states and store results
PCA results saved to /home/paperspace/DeepEmotion/output/inference/sub_ALL_ao.csv
