# Behavior Analysis: Comparison of Timepoints

Press SHIFT + ENTER to run code

### USER INPUT!

In [5]:
## Define project
project_name = ''

selected_groups = []
selected_conditions = []

### Main Code: Create Individual CSVs for Timepoint Comparisons (Fraction Time in Behavior; Bouts/Min; Mean Bout Duration) 

In [ ]:
### Continue to Press SHIFT + ENTER to run code ###

# Prompt user for input
num_timepoints = int(input("Enter the number of time ranges you want to compare (e.g., 2, 3, etc.): "))
time_ranges = [] # Enter the time ranges IN MINUTES (e.g., 0-10 for 0 to 10 minutes, then 11-30 for remaining 11-30 minutes)

for i in range(num_timepoints):
    time_range = input(f"Time range {i + 1}: ")
    try:
        start_min, end_min = map(int, time_range.split('-'))
        if start_min >= end_min:
            print(f"Error: Start time ({start_min}) must be less than end time ({end_min}).")
            exit()
        start_sec, end_sec = start_min * 60, end_min * 60
        time_ranges.append((start_sec, end_sec))
    except ValueError:
        print("Invalid input format. Please enter the time range as 'start-end' (e.g., 0-10).")
        exit()

### Individual File Comparisons

In [7]:
import os
import pandas as pd

# Define behavior label mapping
behavior_map = {
    0: 'still',
    1: 'walking',
    2: 'rearing',
    3: 'grooming',
    4: 'licking hindpaw L',
    5: 'licking hindpaw R'
}

print("time_ranges =", time_ranges)
print("len(time_ranges) =", len(time_ranges))
if len(time_ranges) < 2:
    print("Error: At least two time ranges are required for comparison.")
    exit()

bins = [start for start, end in time_ranges] + [time_ranges[-1][1]]
time_labels = [f"{start//60}-{end//60} min" for start, end in time_ranges]
print("Using bins:", bins)
print("Using time_labels:", time_labels)

input_dir = f'../processed_dataset/{project_name}/figures/behaviors_csv_raw-classification/seconds'
print("Input directory path (should contain subfolders and CSVs):", input_dir)

analysis_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison'
print("Analysis directory will be:", analysis_dir)
os.makedirs(analysis_dir, exist_ok=True)

def calculate_behavior_metrics(data, frame_rate=60):
    metrics = {}
    unique_behaviors = data['behavior'].unique()
    for behavior in unique_behaviors:
        behavior_data = data[data['behavior'] == behavior]
        fraction_time = len(behavior_data) / len(data)

        bout_starts = (behavior_data.index.to_series().diff() > 1).cumsum()
        bouts = behavior_data.groupby(bout_starts)
        bouts_per_minute = len(bouts) / (len(data) / frame_rate / 60)
        mean_bout_duration = bouts.size().mean() / frame_rate

        metrics[behavior] = {
            'Fraction Time': fraction_time,
            'Bouts per Minute': bouts_per_minute,
            'Mean Bout Duration (s)': mean_bout_duration
        }
    return metrics

found_any_csv = False
for root, dirs, files in os.walk(input_dir):
    for file_name in files:
        if not file_name.lower().endswith('.csv'):
            continue

        found_any_csv = True
        file_path = os.path.join(root, file_name)
        print("\n---")
        print(f"Processing file: {file_path}")
        df = pd.read_csv(file_path)

        if 'time_seconds' not in df.columns:
            print(f"  ERROR: 'time_seconds' column not found in {file_path}. Skipping.")
            continue

        max_time = df['time_seconds'].max()
        print(f"  max_time in this file = {max_time}")

        # Recompute bins if max_time < bins[-1]
        bins = [start for start, end in time_ranges] + [time_ranges[-1][1]]
        if max_time < bins[-1]:
            print(
                f"  WARNING: max_time ({max_time}s) < final bin end ({bins[-1]}s). "
                "Shrinking last bin to max_time."
            )
            bins[-1] = max_time
        print(f"  Using bins for pd.cut: {bins}")

        try:
            df['time_group'] = pd.cut(
                df['time_seconds'],
                bins=bins,
                labels=time_labels,
                right=False
            )
        except ValueError as e:
            print(f"  ValueError in pd.cut for file {file_path}: {e}")
            continue

        counts = df['time_group'].value_counts(dropna=False)
        print("  Row counts per time_group (NaN = didn’t fit any bin):")
        print(counts.to_dict())

        all_metrics = []
        for time_group, group_data in df.groupby('time_group', observed=False):
            if group_data.empty:
                continue
            metrics = calculate_behavior_metrics(group_data)
            for behavior, behavior_metrics in metrics.items():
                behavior_name = behavior_map.get(behavior, f"Unknown ({behavior})")
                all_metrics.append({
                    'Time Group': time_group,
                    'Behavior': behavior_name,
                    **behavior_metrics
                })

        analysis_df = pd.DataFrame(all_metrics)
        print(f"  Number of metric‐rows to save: {len(analysis_df)}")

        analysis_file_path = os.path.join(
            analysis_dir,
            f'analysis_{os.path.basename(file_name)}'
        )
        analysis_df.to_csv(analysis_file_path, index=False)
        print(f"  ⇒ Saved analysis CSV: {analysis_file_path}")

if not found_any_csv:
    print("No CSV files were found under", input_dir)

print("\nBehavior analysis completed for all files.")

Processing file: Combined_CSO19_CSO19_1weekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Bins: [0, 660, 1800]
Time Labels: ['0-10 min', '11-30 min']
Saved analysis for Combined_CSO19_CSO19_1weekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv to ../processed_dataset/project_ACC_MiniscopeSNI_Male_Psilo_1Week/figures/behavior_timepoint_comparison/analysis_Combined_CSO19_CSO19_1weekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Processing file: Combined_CSO17_CSO17_1WeekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Bins: [0, 660, 1800]
Time Labels: ['0-10 min', '11-30 min']
Saved analysis for Combined_CSO17_CSO17_1WeekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv to ../processed_dataset/project_ACC_MiniscopeSNI_Male_Psilo_1Week/figures/behavior_timepoint_comparison/analysis_Combined_CSO17_CSO17_1WeekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Processing file: Combined_MM98_MM98_1weekPsilDLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Bins: [0, 660, 842.9833333333333]
T

### Cohort Comparisons

In [8]:
import os
import pandas as pd

# Input and output directories
input_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison'
cohort_summary_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison/cohort_summaries'
os.makedirs(cohort_summary_dir, exist_ok=True)

behavior_map = {
    0: 'still',
    1: 'walking',
    2: 'rearing',
    3: 'grooming',
    4: 'licking hindpaw L',
    5: 'licking hindpaw R'
}

def aggregate_cohort_data(condition_name):
    all_metrics = []

    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv') and condition_name in file_name:
            file_path = os.path.join(input_dir, file_name)
            file_data = pd.read_csv(file_path)
            all_metrics.append(file_data)

    if not all_metrics:
        print(f"No matching files found for condition '{condition_name}'")
        return None

    combined_data = pd.concat(all_metrics, ignore_index=True)

    summary = combined_data.groupby(['Time Group', 'Behavior']).agg({
        'Fraction Time': ['mean', 'std'],
        'Bouts per Minute': ['mean', 'std'],
        'Mean Bout Duration (s)': ['mean', 'std']
    }).reset_index()

    summary.columns = ['Time Group', 'Behavior',
                       'Fraction Time (mean)', 'Fraction Time (std)',
                       'Bouts per Minute (mean)', 'Bouts per Minute (std)',
                       'Mean Bout Duration (mean)', 'Mean Bout Duration (std)']

    summary = summary.dropna(subset=[
        'Fraction Time (mean)',
        'Bouts per Minute (mean)',
        'Mean Bout Duration (mean)'
    ], how='all')

    # Map numeric behavior codes to names
    summary['Behavior'] = summary['Behavior'].apply(lambda x: behavior_map.get(x, x))

    return summary

# Loop through each group and each condition
for group_name in selected_groups:
    for condition_name in selected_conditions:
        summary = aggregate_cohort_data(condition_name)
        if summary is not None:
            output_filename = f'{group_name}_{condition_name}_summary.csv'
            summary_file_path = os.path.join(cohort_summary_dir, output_filename)
            summary.to_csv(summary_file_path, index=False)
            print(f"Saved cohort summary for group '{group_name}', condition '{condition_name}' to {summary_file_path}")

print("Cohort summaries created.")

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

input_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison'
cohort_summary_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison/cohort_summaries'
os.makedirs(cohort_summary_dir, exist_ok=True)

behavior_map = {
    0: 'still',
    1: 'walking',
    2: 'rearing',
    3: 'grooming',
    4: 'licking hindpaw L',
    5: 'licking hindpaw R'
}

# ——— Data aggregation with SEM ———
def aggregate_cohort_data_sem(group_name, condition_name):
    """Read CSVs matching group+condition, map behaviors, and compute mean, SD, n, SEM."""
    dfs = []
    pattern = f"{group_name}_{condition_name}"
    for fn in os.listdir(input_dir):
        if fn.endswith('.csv') and pattern in fn:
            df = pd.read_csv(os.path.join(input_dir, fn))
            df['Behavior'] = df['Behavior'].apply(lambda x: behavior_map.get(x, x))
            dfs.append(df)
    if not dfs:
        print(f"[!] No files for pattern '{pattern}'")
        return None

    combined = pd.concat(dfs, ignore_index=True)

    # aggregate
    agg = combined.groupby(['Time Group','Behavior']).agg({
        'Fraction Time':         ['mean','std','count'],
        'Bouts per Minute':      ['mean','std','count'],
        'Mean Bout Duration (s)': ['mean','std','count']
    })
    # flatten columns
    agg.columns = [
        'FT_mean','FT_std','FT_n',
        'BPM_mean','BPM_std','BPM_n',
        'MBD_mean','MBD_std','MBD_n'
    ]
    summary = agg.reset_index()

    # compute SEMs
    summary['FT_sem']  = summary['FT_std']  / np.sqrt(summary['FT_n'])
    summary['BPM_sem'] = summary['BPM_std'] / np.sqrt(summary['BPM_n'])
    summary['MBD_sem'] = summary['MBD_std'] / np.sqrt(summary['MBD_n'])

    return summary

# ——— Plotting (with save) ———
def plot_metric_side_by_side_sem(summaries, metric_mean, metric_sem, ylabel, title, save_name):
    """
    summaries: dict of {condition_name: summary_df}
    metric_mean/metric_sem: column names in summary_df
    save_name: filename (no path) to use when saving PNG
    """
    n = len(summaries)
    fig, axes = plt.subplots(1, n, figsize=(5*n, 4), sharey=True)
    if n == 1:
        axes = [axes]

    for ax, (cond, df) in zip(axes, summaries.items()):
        for beh in df['Behavior'].unique():
            d = df[df['Behavior']==beh]
            ax.errorbar(
                d['Time Group'], 
                d[metric_mean],
                yerr=d[metric_sem],
                marker='o',
                capsize=3,
                label=beh
            )
        ax.set_title(cond)
        ax.set_xlabel('Time Group')
        if ax is axes[0]:
            ax.set_ylabel(ylabel)
        ax.legend(title='Behavior', bbox_to_anchor=(1.05,1), loc='upper left')

    fig.suptitle(title, y=1.05)
    plt.tight_layout()

    # --- SAVE FIGURE ---
    fig_path = os.path.join(cohort_summary_dir, save_name)
    fig.suptitle(title, y=1.05)
    plt.tight_layout()
    plt.show()
    fig.savefig(fig_path, dpi=300)
    plt.close(fig)
    print(f"Saved figure: {fig_path}")

# ——— Main loop ———
for group in selected_groups:
    # aggregate & save CSVs
    condition_summaries = {}
    for cond in selected_conditions:
        summ = aggregate_cohort_data_sem(group, cond)
        if summ is None:
            continue
        # save CSV
        csv_name = f'{group}_{cond}_summary.csv'
        summ.to_csv(os.path.join(cohort_summary_dir, csv_name), index=False)
        print(f"Saved CSV: {csv_name}")
        condition_summaries[cond] = summ

    if not condition_summaries:
        continue

    # Plot & save each metric
    plot_metric_side_by_side_sem(
        condition_summaries,
        metric_mean='FT_mean', metric_sem='FT_sem',
        ylabel='Fraction Time (mean ± SEM)',
        title=f'Fraction Time — {group}',
        save_name=f'{group}_FractionTime.png'
    )
    plot_metric_side_by_side_sem(
        condition_summaries,
        metric_mean='BPM_mean', metric_sem='BPM_sem',
        ylabel='Bouts per Min (mean ± SEM)',
        title=f'Bouts per Minute — {group}',
        save_name=f'{group}_BoutsPerMinute.png'
    )
    plot_metric_side_by_side_sem(
        condition_summaries,
        metric_mean='MBD_mean', metric_sem='MBD_sem',
        ylabel='Bout Duration (s) (mean ± SEM)',
        title=f'Mean Bout Duration — {group}',
        save_name=f'{group}_MeanBoutDuration.png'
    )

print("All summaries and figures saved.")

Saved cohort summary for group 'Combined' to ../processed_dataset/project_ACC_MiniscopeSNI_Male_Psilo_1Week/figures/behavior_timepoint_comparison/cohort_summaries/Combined_cohort_summary.csv
Cohort summaries created.


# COMPLETE