In [2]:
import os
import sys

PROJECT_ROOT = os.path.abspath(os.path.join(os.getcwd(), '..'))
sys.path.append(PROJECT_ROOT)

from trial_class import *
from experiment_class import Experiment

import numpy as np
import pandas as pd
import tdt
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker

from scipy.signal import butter, filtfilt
from sklearn.linear_model import LinearRegression
from sp_extension import *

In [3]:
experiment_path = r"C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac"
csv_base_path = r"C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac_csvs"

# NAc: #15616F
# mPFC: #FFAF00

cups = r"C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\Social_Pref_sheet.csv"


In [4]:
# groups csv + experiment data into one variable
experiment = Experiment(experiment_path, csv_base_path)

# batch process the data, removing the specified time segments for subjects
experiment.default_batch_process()

Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n1-240522-072114\Notes.txt
read from t=0s to t=794.67s
Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n2-240522-084131\Notes.txt
read from t=0s to t=789.95s
Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n3-240523-073132\Notes.txt
read from t=0s to t=788.57s
Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n4-240523-084829\Notes.txt
read from t=0s to t=790.88s
Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n5-240826-083822\Notes.txt
read from t=0s to t=793.05s
Found Synapse note file: C:\Users\alber\OneDrive\Desktop\PC_Lab\Photometry\Pilot_2\Combined_Cohorts\Social_Pref\nac\n6-240826-094701\Notes.txt
re

In [5]:
bout_definitions = [
    {'prefix': 'Subject', 'introduced': 'Subject Introduced', 'removed': 'Subject Removed'},
]


experiment.group_extract_manual_annotations(bout_definitions,first_only=True)

Processing behaviors for n1-240522-072114...
Processing behaviors for n2-240522-084131...
Processing behaviors for n3-240523-073132...
Processing behaviors for n4-240523-084829...
Processing behaviors for n5-240826-083822...
Processing behaviors for n6-240826-094701...
Processing behaviors for n7-240827-072608...


## Dopamine Stuff (Need help with this)

In [6]:
def prep_combined_da_metrics(experiment, sniff_cup_csv_path, metric_list=None, first_only=False):
    import pandas as pd
    import re

    # Normalize behavior label spacing
    def normalize_behavior_label(label):
        return re.sub(r'\s+', ' ', label.strip().lower().replace('\u00a0', ' '))

    assign_df = pd.read_csv(sniff_cup_csv_path)
    assign_df['Subject'] = assign_df['Subject'].astype(str).str.lower()

    # Build subject -> behavior name -> agent identity mapping
    subject_to_behavior_to_agent = {}
    for _, row in assign_df.iterrows():
        subj = row['Subject']
        subject_to_behavior_to_agent[subj] = {}
        for col in row.index:
            col_norm = normalize_behavior_label(str(col))
            if col_norm.startswith("sniff cup"):
                agent_label = normalize_behavior_label(str(row[col]))
                subject_to_behavior_to_agent[subj][col_norm] = agent_label

    all_rows = []

    for trial_name, trial in experiment.trials.items():
        if not hasattr(trial, 'behaviors') or trial.behaviors.empty:
            continue

        df = trial.behaviors.copy()
        df['Behavior'] = df['Behavior'].astype(str).apply(normalize_behavior_label)

        subject_id = trial_name.lower()

        if subject_id not in subject_to_behavior_to_agent:
            continue

        mapping = subject_to_behavior_to_agent[subject_id]

        # Keep only sniff cup behaviors
        df = df[df["Behavior"].str.startswith("sniff cup")]

        # Map behaviors to agents
        df["Agent"] = df["Behavior"].apply(lambda b: mapping.get(b))
        df["Subject"] = subject_id
        df["Trial"] = trial_name

        unmatched = df[df["Agent"].isna()]
        if not unmatched.empty:
            print(f"‼️ Unmatched behaviors for subject '{subject_id}':")
            print("Behaviors that failed to map:", unmatched["Behavior"].unique())
            print("Available mapping keys:", list(mapping.keys()))

        df = df.dropna(subset=["Agent"])

        # Choose metrics
        known_cols = ["Behavior", "Agent", "Subject", "Trial"]
        if metric_list:
            metric_cols = [m for m in metric_list if m in df.columns]
        else:
            metric_cols = [c for c in df.columns if c not in known_cols and pd.api.types.is_numeric_dtype(df[c])]

        if not metric_cols:
            continue

        df = df[["Subject", "Agent"] + metric_cols]

        if first_only:
            df = df.groupby(["Subject", "Agent"], as_index=False).first()

        all_rows.append(df)

    if not all_rows:
        print("⚠️ No rows added to DataFrame. Check if behavior labels match and mapping keys are clean.")
        print(f"Subjects in experiment: {list(experiment.trials.keys())}")
        print(f"Subjects in assignments file: {assign_df['Subject'].tolist()}")
        print("Sample mapping dictionary:")
        for subj, mapping in subject_to_behavior_to_agent.items():
            print(f"{subj} -> {mapping}")
        return pd.DataFrame()

    combined_df = pd.concat(all_rows, ignore_index=True)

    # --- Aggregate by Subject-Agent pair ---
    if first_only:
        grouped = combined_df  # already one row per subject-agent
    else:
        grouped = combined_df.groupby(["Subject", "Agent"], as_index=False)[metric_cols].mean()

    # --- Ensure each subject has all 4 agent rows ---
    all_agents = ['nothing', 'short_term', 'long_term', 'novel']
    all_subjects = sorted(grouped['Subject'].unique())
    full_index = pd.MultiIndex.from_product([all_subjects, all_agents], names=['Subject', 'Agent'])

    final_df = (
        grouped.set_index(['Subject', 'Agent'])
               .reindex(full_index)
               .fillna(0)
               .reset_index()
    )

    print(f"✅ Final DA metrics DataFrame created with {len(final_df)} rows from {len(all_subjects)} subjects.")
    return final_df



In [7]:
experiment.compute_all_da_metrics(use_max_length=False,
                                  max_bout_duration=5, #total_avg_bout_duration
                                  mode='standard')

Computing DA metrics for n1-240522-072114 ...
Computing DA metrics for n2-240522-084131 ...
Computing DA metrics for n3-240523-073132 ...
Computing DA metrics for n4-240523-084829 ...
Computing DA metrics for n5-240826-083822 ...
Computing DA metrics for n6-240826-094701 ...
Computing DA metrics for n7-240827-072608 ...


In [8]:
import re

# --- helpers ---------------------------------------------------------------

def _norm(s: str) -> str:
    """lowercase, collapse whitespace, replace NBSPs"""
    return re.sub(r"\s+", " ", str(s).strip().lower().replace("\u00a0", " "))

def _canonical_agent(s: str) -> str:
    """map common variants to canonical tokens used in your code"""
    s = _norm(s).replace("-", " ")
    s = re.sub(r"\s+", "_", s)
    alias = {
        "empty": "nothing",
        "short_term": "short_term",
        "long_term": "long_term",
        "novel": "novel",
        "nothing": "nothing"
    }
    return alias.get(s, s)

def _build_agent_mapping(assign_csv: str) -> pd.DataFrame:
    """
    Returns long-form mapping with columns: subject, cupnum (Int64), agent
    """
    map_df = pd.read_csv(assign_csv)
    map_df.columns = [_norm(c) for c in map_df.columns]
    map_df["subject"] = map_df["subject"].astype(str).str.lower()

    cup_cols = [c for c in map_df.columns if c.startswith("sniff cup")]
    long = map_df.melt(
        id_vars=["subject"],
        value_vars=cup_cols,
        var_name="cup_label",
        value_name="agent"
    )
    long["cupnum"] = long["cup_label"].str.extract(r"(\d+)").astype("Int64")
    long["agent"] = long["agent"].map(_canonical_agent)
    return long.dropna(subset=["cupnum"])[["subject", "cupnum", "agent"]]


def _collect_sniff_events(experiment, metric_cols=None) -> pd.DataFrame:
    """
    Concatenate all blocks' sniff-cup events with requested metrics.
    Returns columns: Subject, cupnum, Event_Start (if present), <metric_cols...>
    """
    frames = []
    for block_name, block in experiment.trials.items():  # 'trials' container
        if not hasattr(block, "behaviors") or block.behaviors.empty:
            continue

        df = block.behaviors.copy()
        if "Behavior" not in df.columns:
            continue

        # normalize and keep only 'sniff cup n' rows
        beh = df["Behavior"].astype(str).map(_norm)
        mask = beh.str.match(r"^sniff\s*cup\s*\d+$")
        if not mask.any():
            continue

        df = df.loc[mask].copy()
        df["cupnum"] = beh.loc[mask].str.extract(r"(\d+)").astype("Int64")
        df["Subject"] = str(block_name).lower()

        # pick metrics (default: all numeric except bookkeeping)
        if metric_cols is None:
            numeric_cols = df.select_dtypes(include="number").columns.tolist()
            # keep common metrics; drop obvious non-metrics if present
            drop = {"event_end", "adjusted end", "time of max peak"}
            metric_cols_use = [c for c in numeric_cols if _norm(c) not in drop]
        else:
            metric_cols_use = [c for c in metric_cols if c in df.columns]

        keep = ["Subject", "cupnum"] + (["Event_Start"] if "Event_Start" in df.columns else []) + metric_cols_use
        frames.append(df[keep])

    return pd.concat(frames, ignore_index=True) if frames else pd.DataFrame(
        columns=["Subject", "cupnum"] + (metric_cols or [])
    )

# --- main API --------------------------------------------------------------

def associate_da_with_agents(
    experiment,
    assign_csv: str,
    metric_cols=None,          # e.g., ["Max Peak", "Mean Z-score", "AUC"]
    first_only: bool = True,   # first event per Subject×Agent (by Event_Start)
    ensure_all_agents=False,   # reindex to nothing/short_term/long_term/novel
    fill_value=np.nan
) -> pd.DataFrame:
    """
    Produces a tidy dataframe with DA metrics per Subject×Agent efficiently.
    """
    # gather events and build mapping
    events = _collect_sniff_events(experiment, metric_cols=metric_cols)
    if events.empty:
        return pd.DataFrame()

    mapping = _build_agent_mapping(assign_csv)

    # merge on Subject + cup number to attach Agent
    merged = events.merge(
        mapping, how="left",
        left_on=["Subject", "cupnum"],
        right_on=["subject", "cupnum"]
    ).drop(columns=["subject"])

    merged.rename(columns={"agent": "Agent"}, inplace=True)

    # choose first event or aggregate mean per Subject×Agent
    if first_only and "Event_Start" in merged.columns:
        merged = (merged.sort_values(["Subject", "Agent", "Event_Start"])
                        .drop_duplicates(subset=["Subject", "Agent"], keep="first"))
    else:
        agg = {c: "mean" for c in merged.columns if c not in ["Subject", "Agent", "cupnum", "Event_Start"]}
        merged = merged.groupby(["Subject", "Agent"], as_index=False).agg(agg)

    # optional: ensure each subject has all 4 agents
    if ensure_all_agents:
        agents = ["nothing", "short_term", "long_term", "novel"]
        merged["Agent"] = merged["Agent"].map(_canonical_agent)
        idx = pd.MultiIndex.from_product([sorted(merged["Subject"].unique()), agents],
                                         names=["Subject", "Agent"])
        merged = (merged.set_index(["Subject", "Agent"])
                        .reindex(idx)
                        .reset_index()
                        .fillna(fill_value))

    return merged


In [10]:
associate_da_with_agents(experiment=experiment,
                         assign_csv=cups)

ValueError: The column label 'cupnum' is not unique.