# Behavior Analysis: Comparison of Timepoints

Press SHIFT + ENTER to run code

### USER INPUT!

In [13]:
## Define project
project_name = 'project_ACC_Study3_FormalinMorphine'

# Select group and condition, see meta.py to see how you previously defined it for your project
selected_groups = ['Male','Female']
selected_conditions = ['Group1_0mgkg','Group2_0.5mgkg','Group3_1.0mgkg','Group4_5mgkg','Group5_10mgkg']


### Main Code: Create Individual CSVs for Timepoint Comparisons (Fraction Time in Behavior; Bouts/Min; Mean Bout Duration) 

In [14]:
### Continue to Press SHIFT + ENTER to run code ###

# Prompt user for input
num_timepoints = int(input("Enter the number of time ranges you want to compare (e.g., 2, 3, etc.): "))
time_ranges = [] # Enter the time ranges IN MINUTES (e.g., 0-10 for 0 to 10 minutes, then 11-30 for remaining 11-30 minutes)

for i in range(num_timepoints):
    time_range = input(f"Time range {i + 1}: ")
    try:
        start_min, end_min = map(int, time_range.split('-'))
        if start_min >= end_min:
            print(f"Error: Start time ({start_min}) must be less than end time ({end_min}).")
            exit()
        start_sec, end_sec = start_min * 60, end_min * 60
        time_ranges.append((start_sec, end_sec))
    except ValueError:
        print("Invalid input format. Please enter the time range as 'start-end' (e.g., 0-10).")
        exit()

### Individual File Comparisons

In [15]:
import os
import pandas as pd

# Validate the time ranges
if len(time_ranges) < 2:
    print("Error: At least two time ranges are required for comparison.")
    exit()

bins = [start for start, end in time_ranges] + [time_ranges[-1][1]]
time_labels = [f"{start//60}-{end//60} min" for start, end in time_ranges]

# Directory containing the per-second CSV files
input_dir = f'../processed_dataset/{project_name}/figures/behaviors_csv_raw-classification/seconds'

# Directory to save the analysis results
analysis_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison'
os.makedirs(analysis_dir, exist_ok=True)

def calculate_behavior_metrics(data, frame_rate=60):
    metrics = {}
    unique_behaviors = data['behavior'].unique()

    for behavior in unique_behaviors:
        behavior_data = data[data['behavior'] == behavior]
        
        fraction_time = len(behavior_data) / len(data)

        bout_starts = (behavior_data.index.to_series().diff() > 1).cumsum()
        bouts = behavior_data.groupby(bout_starts)

        bouts_per_minute = len(bouts) / (len(data) / frame_rate / 60)

        mean_bout_duration = bouts.size().mean() / frame_rate

        metrics[behavior] = {
            'Fraction Time': fraction_time,
            'Bouts per Minute': bouts_per_minute,
            'Mean Bout Duration (s)': mean_bout_duration
        }
    return metrics

# Processing each file
for file_name in os.listdir(input_dir):
    if file_name.endswith('.csv'):
        
        file_path = os.path.join(input_dir, file_name)
        df = pd.read_csv(file_path)
        
        max_time = df['time_seconds'].max()
        
        bins = [start for start, end in time_ranges] + [time_ranges[-1][1]]
        if max_time < bins[-1]:
            print(f"Warning: Maximum time ({max_time}s) in {file_name} is less than the final bin end ({bins[-1]}s).")
            bins[-1] = max_time

        print(f"Processing file: {file_name}")
        print(f"Bins: {bins}")
        print(f"Time Labels: {time_labels}")
        
        try:
            df['time_group'] = pd.cut(df['time_seconds'], 
                                      bins=bins,
                                      labels=time_labels,
                                      right=False)
        except ValueError as e:
            print(f"Error in pd.cut for file {file_name}: {e}")
            continue 
        
        # Analyzing behaviors for each time group
        all_metrics = []
        for time_group, group_data in df.groupby('time_group', observed=False):
            if not group_data.empty:
                metrics = calculate_behavior_metrics(group_data)
                for behavior, behavior_metrics in metrics.items():
                    all_metrics.append({
                        'Time Group': time_group,
                        'Behavior': behavior,
                        **behavior_metrics
                    })

        # DataFrame + save results
        analysis_df = pd.DataFrame(all_metrics)
        analysis_file_path = os.path.join(analysis_dir, f'analysis_{file_name}')
        analysis_df.to_csv(analysis_file_path, index=False)
        print(f'Saved analysis for {file_name} to {analysis_file_path}')

print('Behavior analysis completed for all files.')

Processing file: Male_Group1_0mgkg_3_2088.1M_L3_acA2040-120um__23701299__20230726_120404770DLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Bins: [0, 660, 1800]
Time Labels: ['0-10 min', '11-30 min']
Saved analysis for Male_Group1_0mgkg_3_2088.1M_L3_acA2040-120um__23701299__20230726_120404770DLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv to ../processed_dataset/project_ACC_Study3_FormalinMorphine/figures/behavior_timepoint_comparison/analysis_Male_Group1_0mgkg_3_2088.1M_L3_acA2040-120um__23701299__20230726_120404770DLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Processing file: Male_Group1_0mgkg_3_2088.4M_L2_acA2040-120um__23701274__20230726_124045566DLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv
Bins: [0, 660, 1800]
Time Labels: ['0-10 min', '11-30 min']
Saved analysis for Male_Group1_0mgkg_3_2088.4M_L2_acA2040-120um__23701274__20230726_124045566DLC_resnet50_LUPE_MALEDec5shuffle1_350000.csv to ../processed_dataset/project_ACC_Study3_FormalinMorphine/figures/behavior_timepoint_comparison/ana

### Cohort Comparisons

In [16]:
import os
import pandas as pd

# Input and output directories
input_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison'
cohort_summary_dir = f'../processed_dataset/{project_name}/figures/behavior_timepoint_comparison/cohort_summaries'
os.makedirs(cohort_summary_dir, exist_ok=True)

def aggregate_cohort_data(group_name, condition_list):
    all_metrics = []
    
    for file_name in os.listdir(input_dir):
        if file_name.endswith('.csv'):
            if any(condition in file_name for condition in condition_list):
                file_path = os.path.join(input_dir, file_name)
                file_data = pd.read_csv(file_path)
                all_metrics.append(file_data)
    
    if not all_metrics:
        print(f"No matching files found for group '{group_name}' with conditions {condition_list}")
        return None
    
    combined_data = pd.concat(all_metrics, ignore_index=True)
    
    summary = combined_data.groupby(['Time Group', 'Behavior']).agg({
        'Fraction Time': ['mean', 'std'],  # Mean and standard deviation
        'Bouts per Minute': ['mean', 'std'],
        'Mean Bout Duration (s)': ['mean', 'std']
    }).reset_index()
    
    summary.columns = ['Time Group', 'Behavior', 
                       'Fraction Time (mean)', 'Fraction Time (std)',
                       'Bouts per Minute (mean)', 'Bouts per Minute (std)',
                       'Mean Bout Duration (mean)', 'Mean Bout Duration (std)']
    
    summary = summary.dropna(subset=[
        'Fraction Time (mean)', 
        'Bouts per Minute (mean)', 
        'Mean Bout Duration (mean)'
    ], how='all')
    
    return summary


for group_name in selected_groups:

    summary = aggregate_cohort_data(group_name, selected_conditions)
    if summary is not None:

        summary_file_path = os.path.join(cohort_summary_dir, f'{group_name}_cohort_summary.csv')
        summary.to_csv(summary_file_path, index=False)
        print(f"Saved cohort summary for group '{group_name}' to {summary_file_path}")

print("Cohort summaries created.")

Saved cohort summary for group 'Male' to ../processed_dataset/project_ACC_Study3_FormalinMorphine/figures/behavior_timepoint_comparison/cohort_summaries/Male_cohort_summary.csv
Saved cohort summary for group 'Female' to ../processed_dataset/project_ACC_Study3_FormalinMorphine/figures/behavior_timepoint_comparison/cohort_summaries/Female_cohort_summary.csv
Cohort summaries created.


# COMPLETE