# Extract and align data from Onix, Harp, Sleap, and photometry
## Cohort 1 and 2 working, Cohort 0: onix_digital Clock column is 0, explore why and/or use timestamps instead 

In [None]:
import numpy as np
from pathlib import Path
import os
import matplotlib.pyplot as plt
import plotly.graph_objects as go
import pandas as pd
#import harp
import plotly.express as px
from scipy.stats import mode

import gc # garbage collector for removing large variables from memory instantly 
import importlib #for force updating changed packages 

import harp_resources.process
import harp_resources.utils
from harp_resources import process, utils # Reassign to maintain direct references for force updating 
#from sleap import load_and_process as lp

In [None]:
#initiate variables 
has_heartbeat = False
cohort0 = False
cohort2 = False
onix_analog_clock_downsampled = False
onix_analog_framecount_upsampled = False
common_resampled_rate = 10000 #in Hz
unit_conversions = False #
save_full_asynchronous_data = False #saves alldata before resampling
has_photometry = True #if photometry data is present
has_onix_harp = True
has_video = True

#Cohort 2 tests for Encoder Test, NO photometry, No videodata1 
data_path = Path('/Users/rancze/Documents/Data/vestVR/EncoderTest/2025-02-26T15-03-34') #with ball rotations after motor rotation, first in Y, then in X
#data_path = Path('/Users/rancze/Documents/Data/vestVR/EncoderTest/2025-02-26T14-57-04')
has_heartbeat = True
has_photometry = False
has_onix_harp = False
has_video = False


photometry_path = data_path.parent / f"{data_path.name}_processedData" / "photometry"

#create loaders 
session_settings_reader = utils.SessionData("SessionSettings")
experiment_events_reader = utils.TimestampedCsvReader("ExperimentEvents", columns=["Event"])
onix_framecount_reader = utils.TimestampedCsvReader("OnixAnalogFrameCount", columns=["Index"])
#photometry_reader = utils.PhotometryReader("Processed_fluorescence")
video_reader1 = utils.VideoReader("VideoData1")
video_reader2 = utils.VideoReader("VideoData2")
onix_digital_reader = utils.OnixDigitalReader("OnixDigital", columns=["Value.Clock", "Value.HubClock", 
                                                                         "Value.DigitalInputs",
                                                                         "Seconds"])
onix_harp_reader = utils.TimestampedCsvReader("OnixHarp", columns=["Clock", "HubClock", "HarpTime"])

### Load all data 

In [None]:
print ("Loading session settings")
session_settings = utils.load_2(session_settings_reader, data_path) #Andrew's, creates ugly df, but used in further analysis code
print ("Loading experiment events")
experiment_events = utils.load_2(experiment_events_reader, data_path)

if has_photometry:
    print ("Loading processed photometry")
    photometry_data=pd.read_csv(str(photometry_path)+'/Processed_fluorescence.csv')
    photometry_data.set_index("TimeStamp", inplace=True)
    photometry_data.index.name = 'Seconds'
    print ("Loading processed photometry info")
    photometry_info=pd.read_csv(str(photometry_path)+'/Info.csv')
    print ("Loading processed photometry events")
    photometry_events=pd.read_csv(str(photometry_path)+'/Events.csv')
    photometry_events["TimeStamp"] = photometry_events["TimeStamp"] /1000 # convert to seconds from ms
    photometry_events.set_index("TimeStamp", inplace=True)
    photometry_events.index.name = 'Seconds'

if has_video:
    print ("Loading video data 1")
    video_data1 = utils.load_2(video_reader1, data_path)
    print ("Loading video data 2")
    video_data2 = utils.load_2(video_reader2, data_path)

# read Onix data
if has_onix_harp:
    print ("Loading OnixDigital")
    onix_digital = utils.load_2(onix_digital_reader, data_path)

if cohort0:
    print ("Loading OnixAnalogFrameClock")
    onix_analog_framecount = utils.load_2(onix_framecount_reader, data_path)
    
print ("Loading OnixAnalogClock")
onix_analog_clock = utils.read_OnixAnalogClock(data_path)
print ("Loading OnixAnalogData and converting to boolean photodiode array")
photodiode = utils.read_OnixAnalogData(data_path, channels = [0], binarise=True, method='adaptive', refractory = 300, flip=True, verbose=False) #method adaptive or threshold (which is hard threshold at 120), refractory to avoid multiple detections

#read HARP data
print ("Loading H1 and H2 streams, AnalogInput removed")
harp_streams = utils.load_registers(data_path, dataframe = True, has_heartbeat = has_heartbeat, verbose = False) #loads as df, or if False, as dict
harp_streams.drop(columns=["AnalogInput(39)"], inplace=True)  # Removes AnalogInput permanently, as not currently used
harp_streams = harp_streams.dropna(how="all") # remove rows with all NaNs
# Convert specific columns in harp_streams to boolean type
columns_to_convert = []
for col in columns_to_convert:
    harp_streams[col] = harp_streams[col].astype(bool)

#read syncronising signal between HARP and ONIX
if not cohort0 and has_onix_harp:
    print ("Loading OnixHarp")
    onix_harp = utils.load_2(onix_harp_reader, data_path)
    onix_harp = utils.detect_and_remove_outliers(
    df=onix_harp,
    x_column="HarpTime",
    y_column="Clock",
    verbose=False  # True prints all outliers
    )
    onix_harp["HarpTime"] = onix_harp["HarpTime"] + 1 # known issue with current version of ONIX, harp timestamps lag 1 second
    print ("❗Reminder: HarpTime was increased by 1s to account for know issue with ONIX")

print ("✅ Done Loading")

In [None]:
# Copy harp_streams to harp_streams_orig for later use
harp_streams_orig = harp_streams.copy()

# Calculate the minimum and maximum values in the "Encoder(38)" column
encoder_min = harp_streams_orig["Encoder(38)"].min()
encoder_max = harp_streams_orig["Encoder(38)"].max()

# Print the minimum and maximum values
print(f"Minimum value in 'Encoder(38)': {encoder_min}")
print(f"Maximum value in 'Encoder(38)': {encoder_max}")

# Plot the "Encoder(38)" column
harp_streams_orig["Encoder(38)"].plot()

Convert platform position and flow sensor streams to real world units and forward fill 

In [None]:
harp_streams_orig["OpticalTrackingRead0X(46)"].plot()

In [None]:
harp_streams_orig["OpticalTrackingRead0Y(46)"].plot()

In [None]:
# Get encoder values for homing and next event positions as absolute real life 0 position 
homing_position, next_event_position = process.get_encoder_home_position(experiment_events, harp_streams)
print ("Encoder values for homing and next event positions")
print(f"Encoder value at 'Homing platform': {homing_position}")
print(f"Encoder value at the next experiment event: {next_event_position}")
print("❗ Warning: home position is determined by the time of the experiment event after 'Homing platform'. It works for e.g. 'Waiting for run threshold' which starts immediately after homing, but may not work for other session types.")

# Perform unit conversions if not already done
if not unit_conversions:
    harp_streams["OpticalTrackingRead0X(46)"] = process.running_unit_conversion(
        harp_streams["OpticalTrackingRead0X(46)"].to_numpy())  # m / s
    harp_streams["OpticalTrackingRead0Y(46)"] = process.turning_unit_conversion(
        harp_streams["OpticalTrackingRead0Y(46)"].to_numpy())  # degrees / s
    harp_streams["OpticalTrackingRead1X(46)"] = process.running_unit_conversion(
        harp_streams["OpticalTrackingRead1X(46)"].to_numpy())
    harp_streams["OpticalTrackingRead1Y(46)"] = process.turning_unit_conversion(
        harp_streams["OpticalTrackingRead1Y(46)"].to_numpy())
    
    ## Forward fill then bacward fill values to remove NaNs for optical sensors after conversion 
    # columns_to_fill = [
    #     "OpticalTrackingRead0X(46)", "OpticalTrackingRead0Y(46)",
    #     "OpticalTrackingRead1X(46)", "OpticalTrackingRead1Y(46)"]
    # harp_streams[columns_to_fill] = harp_streams[columns_to_fill].ffill().bfill()
    
    harp_streams["Encoder(38)"] = harp_streams["Encoder(38)"].ffill().bfill() #fill before unit conversion
    harp_streams["Encoder(38)"] = process.encoder_unit_conversion(
        harp_streams["Encoder(38)"], next_event_position)
    


    unit_conversions = True
    print("✅ Unit conversions to real-life values done")
else:
    print("❗ Flow sensor and encoder values already converted to real-world units, skipping")


In [None]:
import plotly.graph_objects as go
import plotly.io as pio

# Create a figure
fig = go.Figure()

# Add the "Encoder(38)" trace from harp_streams as a line plot
fig.add_trace(go.Scatter(
    x=harp_streams.index,
    y=harp_streams["Encoder(38)"],
    mode='lines+markers',
    line=dict(dash='dash'),    
    name='Encoder(38)',
    yaxis='y1'
))

# Add the "Encoder(38)" trace from harp_streams_orig as red dots
fig.add_trace(go.Scatter(
    x=harp_streams_orig.index,
    y=harp_streams_orig["Encoder(38)"],
    mode='markers',
    name='Encoder(38) (Original)',
    marker=dict(color='red', size=8),
    yaxis='y2'
))

# Update layout for secondary y-axis
fig.update_layout(
    title="Encoder Values Over Time",
    xaxis_title="Time",
    yaxis=dict(
        title="Encoder(38)",
        side='left'
    ),
    yaxis2=dict(
        title="Encoder(38) (Original)",
        overlaying='y',
        side='right'
    )
)

# Open the figure in the default web browser
pio.show(fig, renderer='browser')

In [None]:
harp_streams_orig["OpticalTrackingRead0X(46)"].plot()

In [None]:
harp_streams["OpticalTrackingRead0X(46)"].plot()

In [None]:
from scipy.integrate import cumulative_trapezoid
import plotly.graph_objects as go
import plotly.io as pio
# Fill missing values with 0
harp_streams["OpticalTrackingRead0X(46)"].fillna(0, inplace=True)
harp_streams["OpticalTrackingRead0X(46)"] = harp_streams["OpticalTrackingRead0X(46)"].rolling(window=3, center=True).mean()

# Calculate the cumulative integral of the "OpticalTrackingRead0X(46)" column
optical_tracking_values = harp_streams["OpticalTrackingRead0X(46)"].to_numpy()
time_values_seconds = (harp_streams.index - harp_streams.index[0]).total_seconds()

# Use the cumulative trapezoidal rule to calculate the integral
cumulative_integral = cumulative_trapezoid(optical_tracking_values, x=time_values_seconds, initial=0)

# Create a new DataFrame with the resulting integral values and the original datetime index
integral_df = pd.DataFrame(cumulative_integral, index=harp_streams.index, columns=["CumulativeIntegral_OpticalTrackingRead0X(46)"])

#------------------------------------------------
# Create a figure
fig = go.Figure()

# Add the original values trace
fig.add_trace(go.Scatter(
    x=harp_streams.index,
    y=harp_streams["OpticalTrackingRead0X(46)"],
    mode='lines+markers',
    name='Original Values',
    line=dict(color='red'),
    marker=dict(size=8),
    yaxis='y1'
))

# Add the cumulative integral trace
fig.add_trace(go.Scatter(
    x=integral_df.index,
    y=integral_df["CumulativeIntegral_OpticalTrackingRead0X(46)"],
    mode='lines+markers',
    name='Cumulative Integral',
    line=dict(color='blue'),
    marker=dict(size=8),
    yaxis='y2'
))

# Update layout for secondary y-axis
fig.update_layout(
    title="Cumulative Integral and Original Values",
    xaxis_title="Time",
    yaxis=dict(
        title="Original Values",
        side='left'
    ),
    yaxis2=dict(
        title="Cumulative Integral",
        overlaying='y',
        side='right'
    )
)

# Open the figure in the default web browser
pio.show(fig, renderer='browser')


In [None]:
optical_tracking_values