In [None]:
from pathlib import Path
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import plotly.io as pio
import plotly.express as px
from plotly.subplots import make_subplots
from scipy.stats import mode
from scipy.integrate import cumulative_trapezoid
from scipy.signal import correlate
import json
%config Completer.use_jedi = False  # Fixes autocomplete issues
%config InlineBackend.figure_format = 'retina'  # Improves plot resolution

import gc # garbage collector for removing large variables from memory instantly 
import importlib #for force updating changed packages 

#import harp
import harp_resources.process
import harp_resources.utils
from harp_resources import process, utils # Reassign to maintain direct references for force updating 
#from sleap import load_and_process as lp

In [None]:
#-------------------------------
# data paths setup
#-------------------------------

#data_dir = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/VestibularMismatch_day1')
data_dir = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/Visual_mismatch_day3')
#data_dir = Path('/Users/rancze/Documents/Data/vestVR/Cohort1/Visual_mismatch_day4')
rawdata_paths = [Path(p) for p in data_dir.iterdir() if p.is_dir() and not p.name.endswith('_processedData')]

#-------------------------------
# initial variables setup
#-------------------------------
rawdata_path = rawdata_paths[0]
time_window_start = -2  # s, FOR PLOTTING PURPOSES
time_window_end = 5  # s, FOR PLOTTING PURPOSES
baseline_window = (-2, 0) # s, FOR baselining averages 
event_name = "No halt"
vestibular_mismatch = False
common_resampled_rate = 1000 #in Hz


data_path = rawdata_path.parent / f"{rawdata_path.name}_processedData/downsampled_data"
save_path = rawdata_path.parent / f"{rawdata_path.name}_processedData"
session_name = "_".join(data_path.parts[-2:])


In [None]:
#-------------------------------
# load downsampled data 
#-------------------------------
photometry_tracking_encoder_data = pd.read_parquet(data_path / "photometry_tracking_encoder_data.parquet", engine="pyarrow")
camera_photodiode_data = pd.read_parquet(data_path / "camera_photodiode_data.parquet", engine="pyarrow")
experiment_events = pd.read_parquet(data_path / "experiment_events.parquet", engine="pyarrow")
photometry_info = pd.read_parquet(data_path / "photometry_info.parquet", engine="pyarrow")
session_settings = pd.read_parquet(data_path / "session_settings.parquet", engine="pyarrow")
session_settings["metadata"] = session_settings["metadata"].apply(process.safe_from_json)

print(f"✅ Finished loading all parquet files")

# Calculate time differences between event_name events
event_times = experiment_events[experiment_events["Event"] == event_name].index
time_diffs = event_times.to_series().diff().dropna().dt.total_seconds()

# # Print the 5 shortest time differences
# print("5 shortest time differences between events:")
# print(time_diffs.nsmallest(5))

if (time_diffs < 10).any():
    print(f"⚠️ Warning: Some '{event_name}' events are less than 10 seconds apart. Consider applying a filter to events.")

mouse_name = process.check_exp_events(experiment_events, photometry_info, verbose = True)


In [None]:
#---------------------------------------------------
# PLOT FIGURE to ascertain everything is well loaded
#---------------------------------------------------

df_to_analyze = photometry_tracking_encoder_data["Photodiode_int"] #using downsampled values in common time grid 
#df_to_analyze = camera_photodiode_data["Photodiode"] #use async raw values if needed for troubleshooting, but the nearest indices needs to be found , see couple of lines below

if vestibular_mismatch or event_name == "No halt": #determine halt times based on experiment events 
    photodiode_halts = experiment_events[experiment_events["Event"] == event_name].index.tolist()
    nearest_indices = photometry_tracking_encoder_data.index.get_indexer(photodiode_halts, method='nearest')
    photodiode_halts = photometry_tracking_encoder_data.index[nearest_indices] # as experiment events timestamps are not in the same time grid as downsampled data
    print ("INFO: vestibular MM or 'No halt', no signal in the photodiode, using experiment events for MM times")
else: #determine exact halt times based on photodiode signal
    photodiode_halts, photodiode_delay_min, photodiode_delay_avg, photodiode_delay_max = process.analyze_photodiode(df_to_analyze, experiment_events, event_name, plot = True)
# nearest_indices = photometry_tracking_encoder_data.index.get_indexer(photodiode_halts, method='nearest')
# photodiode_halts = photometry_tracking_encoder_data.index[nearest_indices]
process.plot_figure_1(photometry_tracking_encoder_data, session_name, save_path, common_resampled_rate, photodiode_halts, save_figure = False, show_figure = True, downsample_factor=50)
del df_to_analyze
gc.collect()
None

LOOK INTO THE superbig filtering in the velocity and ACC calculations (looks like it's 1 Hz???)

In [None]:
# Create an empty DataFrame to store aligned data
aligned_data = []

# Loop through each halt event time
for halt_time in photodiode_halts:
    # Extract data within the selected time window
    window_data = photometry_tracking_encoder_data.loc[
        (photometry_tracking_encoder_data.index >= halt_time + pd.Timedelta(seconds=time_window_start)) &
        (photometry_tracking_encoder_data.index <= halt_time + pd.Timedelta(seconds=time_window_end))
    ].copy()

    # Compute time relative to halt
    window_data["Time (s)"] = (window_data.index - halt_time).total_seconds()
    
    # Add event identifier
    window_data["Halt Time"] = halt_time

    # Store aligned data
    aligned_data.append(window_data)

# Concatenate all windows
aligned_df = pd.concat(aligned_data, ignore_index=True)

# Compute mean and standard error of the mean (SEM)
mean_df = aligned_df.groupby("Time (s)").mean()
sem_df = aligned_df.groupby("Time (s)").sem()  

# Create figure for the two plots
fig, axes = plt.subplots(1, 2, figsize=(14, 6), sharex=True)

### PLOT 1: Individual Traces - Photodiode, z_470, z_560 ###
ax1 = axes[0]

for halt_time in photodiode_halts:
    subset = aligned_df[aligned_df["Halt Time"] == halt_time]
    ax1.plot(subset["Time (s)"], subset["Photodiode_int"], color='grey', alpha=0.5)

ax1.set_xlabel('Time (s) relative to halt')
ax1.set_ylabel('Photodiode')
ax1.set_title('Photodiode, z_470, and z_560')

ax1_2 = ax1.twinx()
for halt_time in photodiode_halts:
    subset = aligned_df[aligned_df["Halt Time"] == halt_time]
    ax1_2.plot(subset["Time (s)"], subset["z_470"], color='green', linestyle='-', alpha=0.5)
    ax1_2.plot(subset["Time (s)"], subset["z_560"], color='red', linestyle='-', alpha=0.5)

ax1_2.set_ylabel('Fluorescence (z-score)', color='green')

### PLOT 2: Mean & SEM of All Signals ###
ax2 = axes[1]

# Photodiode
ax2.plot(mean_df.index, mean_df["Photodiode_int"], color='grey', alpha=0.8)
ax2.fill_between(mean_df.index, mean_df["Photodiode_int"] - sem_df["Photodiode_int"], 
                 mean_df["Photodiode_int"] + sem_df["Photodiode_int"], color='grey', alpha=0.2)

ax2.set_xlabel('Time (s) relative to halt')
ax2.set_ylabel('Photodiode')
ax2.set_title('Mean & SEM of All Signals')

# Fluorescence signals (z_470 and z_560)
ax2_2 = ax2.twinx()
ax2_2.plot(mean_df.index, mean_df["z_470"], color='green', linestyle='-', alpha=0.8)
ax2_2.fill_between(mean_df.index, mean_df["z_470"] - sem_df["z_470"], 
                   mean_df["z_470"] + sem_df["z_470"], color='green', alpha=0.2)

ax2_2.plot(mean_df.index, mean_df["z_560"], color='red', linestyle='-', alpha=0.8)
ax2_2.fill_between(mean_df.index, mean_df["z_560"] - sem_df["z_560"], 
                   mean_df["z_560"] + sem_df["z_560"], color='red', alpha=0.2)

ax2_2.set_ylabel('Fluorescence (z-score)', color='green')

# Motor Velocity
ax2_3 = ax2.twinx()
ax2_3.spines['right'].set_position(('outward', 50))  
ax2_3.plot(mean_df.index, mean_df["Motor_Velocity"], color='#00008B', linestyle='-', alpha=0.8)
ax2_3.fill_between(mean_df.index, mean_df["Motor_Velocity"] - sem_df["Motor_Velocity"], 
                   mean_df["Motor_Velocity"] + sem_df["Motor_Velocity"], color='#00008B', alpha=0.2)
ax2_3.set_ylabel('Motor Velocity (Dark Blue)')
ax2_3.yaxis.label.set_color('#00008B')

# Running Velocity (Velocity_0X)
ax2_4 = ax2.twinx()
ax2_4.spines['right'].set_position(('outward', 100))  
ax2_4.plot(mean_df.index, mean_df["Velocity_0X"]*1000, color='orange', linestyle='-', alpha=0.8)
ax2_4.fill_between(mean_df.index, (mean_df["Velocity_0X"] - sem_df["Velocity_0X"])*1000, 
                   (mean_df["Velocity_0X"] + sem_df["Velocity_0X"])*1000, color='orange', alpha=0.2)
ax2_4.set_ylabel('Running velocity (mm/s²) WRONG SCALE?', color='orange')

# Turning Velocity (Velocity_0Y)
ax2_5 = ax2.twinx()
ax2_5.spines['right'].set_position(('outward', 150))  
ax2_5.plot(mean_df.index, mean_df["Velocity_0Y"], color='#4682B4', linestyle='-', alpha=0.8)
ax2_5.fill_between(mean_df.index, mean_df["Velocity_0Y"] - sem_df["Velocity_0Y"], 
                   mean_df["Velocity_0Y"] + sem_df["Velocity_0Y"], color='#4682B4', alpha=0.2)
ax2_5.set_ylabel('Turning velocity (deg/s²) WRONG SCALE?', color='#4682B4')

# ---- New code added below for Ellipse signals ----

# Ellipse.Diameter
ax2_6 = ax2.twinx()
ax2_6.spines['right'].set_position(('outward', 200))  
ax2_6.plot(mean_df.index, mean_df["Ellipse.Diameter_1"], color='purple', linestyle='-', alpha=0.8)
ax2_6.fill_between(mean_df.index, mean_df["Ellipse.Diameter_1"] - sem_df["Ellipse.Diameter_1"], 
                   mean_df["Ellipse.Diameter_1"] + sem_df["Ellipse.Diameter_1"], color='purple', alpha=0.2)
ax2_6.set_ylabel('Pupil Diameter', color='purple')

# Ellipse.Center.X
ax2_7 = ax2.twinx()
ax2_7.spines['right'].set_position(('outward', 250))  
ax2_7.plot(mean_df.index, mean_df["Ellipse.Center.X_1"], color='magenta', linestyle='-', alpha=0.8)
ax2_7.fill_between(mean_df.index, mean_df["Ellipse.Center.X_1"] - sem_df["Ellipse.Center.X_1"], 
                   mean_df["Ellipse.Center.X_1"] + sem_df["Ellipse.Center.X_1"], color='magenta', alpha=0.2)
ax2_7.set_ylabel('Ellipse Center X', color='magenta')

# Adjust layout and show plot
fig.tight_layout()
plt.show()

del aligned_data
gc.collect()
None



In [None]:
# Set the plot width (in inches)
plot_width = 14  # Change this value to adjust the plot width

# Compute baseline values for each signal (excluding Photodiode)
baseline_df = aligned_df[(aligned_df["Time (s)"] >= baseline_window[0]) & 
                         (aligned_df["Time (s)"] <= baseline_window[1])].groupby("Halt Time").mean()

# Subtract baseline from each signal (except Photodiode)
for signal_name in ["z_470", "z_560", "Motor_Velocity", "Velocity_0X", "Velocity_0Y"]:
    aligned_df[f"{signal_name}_Baseline"] = aligned_df[signal_name] - aligned_df["Halt Time"].map(baseline_df[signal_name])

# ---- NEW: Baseline correction for Ellipse signals ----
for signal_name in ["Ellipse.Diameter_1", "Ellipse.Center.X_1"]:
    aligned_df[f"{signal_name}_Baseline"] = aligned_df[signal_name] - aligned_df["Halt Time"].map(baseline_df[signal_name])

# Compute mean and SEM of baseline-adjusted signals
mean_baseline_df = aligned_df.groupby("Time (s)").mean()
sem_baseline_df = aligned_df.groupby("Time (s)").sem()

# Function to ensure zero is centered while covering SEM values
def get_symmetric_ylim(mean_data, sem_data):
    max_abs_value = max(abs(mean_data).max() + sem_data.max(), abs(mean_data).min() - sem_data.min())
    return (-max_abs_value, max_abs_value)

# Create figure for the new baseline-corrected plot using plot_width parameter
fig, ax = plt.subplots(figsize=(plot_width, 6))

### PLOT: Mean & SEM of Baseline-Corrected Signals ###
ax.plot(mean_baseline_df.index, mean_baseline_df["Photodiode_int"], color='grey', alpha=0.8)
ax.fill_between(mean_baseline_df.index, mean_baseline_df["Photodiode_int"] - sem_baseline_df["Photodiode_int"], 
                mean_baseline_df["Photodiode_int"] + sem_baseline_df["Photodiode_int"], color='grey', alpha=0.2)

ax.set_xlabel('Time (s) relative to halt')
ax.set_ylabel('Photodiode', color='grey')
ax.set_title(f'Baselined Mean & SEM of All Signals - {mouse_name}')

# Fluorescence axis (z_470 and z_560)
ax2 = ax.twinx()
ax2.plot(mean_baseline_df.index, mean_baseline_df["z_470_Baseline"], color='green', linestyle='-', alpha=0.8)
ax2.fill_between(mean_baseline_df.index, mean_baseline_df["z_470_Baseline"] - sem_baseline_df["z_470_Baseline"], 
                 mean_baseline_df["z_470_Baseline"] + sem_baseline_df["z_470_Baseline"], color='green', alpha=0.2)
ax2.plot(mean_baseline_df.index, mean_baseline_df["z_560_Baseline"], color='red', linestyle='-', alpha=0.8)
ax2.fill_between(mean_baseline_df.index, mean_baseline_df["z_560_Baseline"] - sem_baseline_df["z_560_Baseline"], 
                 mean_baseline_df["z_560_Baseline"] + sem_baseline_df["z_560_Baseline"], color='red', alpha=0.2)
ax2.set_ylabel('Fluorescence (z-score, red 560nm)', color='green')
ax2.set_ylim(get_symmetric_ylim(
    pd.concat([mean_baseline_df["z_470_Baseline"], mean_baseline_df["z_560_Baseline"]]),
    pd.concat([sem_baseline_df["z_470_Baseline"], sem_baseline_df["z_560_Baseline"]])
))
ax2.yaxis.label.set_color('green')

# Motor Velocity axis
ax3 = ax.twinx()
ax3.spines['right'].set_position(('outward', 50))  
ax3.plot(mean_baseline_df.index, mean_baseline_df["Motor_Velocity_Baseline"], color='#00008B', linestyle='-', alpha=0.8)
ax3.fill_between(mean_baseline_df.index, mean_baseline_df["Motor_Velocity_Baseline"] - sem_baseline_df["Motor_Velocity_Baseline"], 
                 mean_baseline_df["Motor_Velocity_Baseline"] + sem_baseline_df["Motor_Velocity_Baseline"], color='#00008B', alpha=0.2)
ax3.set_ylabel('Motor Velocity (deg/s²)', color='#00008B')
ax3.set_ylim(get_symmetric_ylim(mean_baseline_df["Motor_Velocity_Baseline"], sem_baseline_df["Motor_Velocity_Baseline"]))
ax3.yaxis.label.set_color('#00008B')

# Running Velocity axis (Velocity_0X)
ax4 = ax.twinx()
ax4.spines['right'].set_position(('outward', 100))  
ax4.plot(mean_baseline_df.index, mean_baseline_df["Velocity_0X_Baseline"]*1000, color='orange', linestyle='-', alpha=0.8)
ax4.fill_between(mean_baseline_df.index, (mean_baseline_df["Velocity_0X_Baseline"] - sem_baseline_df["Velocity_0X_Baseline"])*1000, 
                 (mean_baseline_df["Velocity_0X_Baseline"] + sem_baseline_df["Velocity_0X_Baseline"])*1000, color='orange', alpha=0.2)
ax4.set_ylabel('Running velocity (mm/s²) WRONG SCALE?', color='orange')
ax4.set_ylim(get_symmetric_ylim(mean_baseline_df["Velocity_0X_Baseline"]*1000, sem_baseline_df["Velocity_0X_Baseline"]*1000))
ax4.yaxis.label.set_color('orange')

# Turning Velocity axis (Velocity_0Y)
ax5 = ax.twinx()
ax5.spines['right'].set_position(('outward', 150))  
ax5.plot(mean_baseline_df.index, mean_baseline_df["Velocity_0Y_Baseline"], color='#4682B4', linestyle='-', alpha=0.8)
ax5.fill_between(mean_baseline_df.index, mean_baseline_df["Velocity_0Y_Baseline"] - sem_baseline_df["Velocity_0Y_Baseline"], 
                 mean_baseline_df["Velocity_0Y_Baseline"] + sem_baseline_df["Velocity_0Y_Baseline"], color='#4682B4', alpha=0.2)
ax5.set_ylabel('Turning velocity (deg/s²) WRONG SCALE?', color='#4682B4')
ax5.set_ylim(get_symmetric_ylim(mean_baseline_df["Velocity_0Y_Baseline"], sem_baseline_df["Velocity_0Y_Baseline"]))
ax5.yaxis.label.set_color('#4682B4')

# ---- NEW: Plot Ellipse signals ----

# Ellipse.Diameter_1 axis
ax6 = ax.twinx()
ax6.spines['right'].set_position(('outward', 200))  
ax6.plot(mean_baseline_df.index, mean_baseline_df["Ellipse.Diameter_1_Baseline"], color='purple', linestyle='-', alpha=0.8)
ax6.fill_between(mean_baseline_df.index, mean_baseline_df["Ellipse.Diameter_1_Baseline"] - sem_baseline_df["Ellipse.Diameter_1_Baseline"], 
                mean_baseline_df["Ellipse.Diameter_1_Baseline"] + sem_baseline_df["Ellipse.Diameter_1_Baseline"], color='purple', alpha=0.2)
ax6.set_ylabel('Ellipse Diameter', color='purple')
ax6.set_ylim(get_symmetric_ylim(mean_baseline_df["Ellipse.Diameter_1_Baseline"], sem_baseline_df["Ellipse.Diameter_1_Baseline"]))

# Ellipse.Center.X_1 axis
ax7 = ax.twinx()
ax7.spines['right'].set_position(('outward', 250))  
ax7.plot(mean_baseline_df.index, mean_baseline_df["Ellipse.Center.X_1_Baseline"], color='magenta', linestyle='-', alpha=0.8)
ax7.fill_between(mean_baseline_df.index, mean_baseline_df["Ellipse.Center.X_1_Baseline"] - sem_baseline_df["Ellipse.Center.X_1_Baseline"], 
                mean_baseline_df["Ellipse.Center.X_1_Baseline"] + sem_baseline_df["Ellipse.Center.X_1_Baseline"], color='magenta', alpha=0.2)
ax7.set_ylabel('Ellipse Center X', color='magenta')
ax7.set_ylim(get_symmetric_ylim(mean_baseline_df["Ellipse.Center.X_1_Baseline"], sem_baseline_df["Ellipse.Center.X_1_Baseline"]))

# Adjust layout and show plot
fig.tight_layout()
plt.show()
# Save the figure as a PNG file at 1200 dpi
fig.savefig(save_path / f"figure2_{event_name}.png", dpi=1200)


In [None]:
import importlib #for force updating changed packages 
# Force reload the modules
importlib.reload(harp_resources.process)
importlib.reload(harp_resources.utils)
# Reassign after reloading to ensure updated references
process = harp_resources.process
utils = harp_resources.utils

In [None]:
pympler_memory_df = utils.get_pympler_memory_usage(top_n=10)
mouse_name = "B6J2717"