In [4]:
import sys, os
from pathlib import Path

# Find the repository root (look for a marker file or go up to known root)
# Option 1: Navigate up to find the root directory name
current_path = Path(os.getcwd())
root = None
for parent in [current_path] + list(current_path.parents):
    if parent.name == "latent-neural-dynamics-modeling":
        root = parent
        break

if root is None:
    # Option 2: Fallback - assume we're always 2 levels deep in notebooks/training
    root = Path(os.getcwd()).parent.parent

# Add root to Python path so imports work
sys.path.insert(0, str(root))

print(f"Repository root: {root}")
print(f"Current working directory: {Path.cwd()}")

Repository root: /home/bobby/repos/latent-neural-dynamics-modeling
Current working directory: /home/bobby/repos/latent-neural-dynamics-modeling/notebooks/data_analysis


In [5]:
import polars as pl
from pathlib import Path

from utils.motion import tracing_speed

In [6]:
root = Path("/home/bobby/repos/latent-neural-dynamics-modeling")

In [7]:
resampled_data_path = root / "resampled_recordings"
participants = "participants_at_150Hz"

In [8]:
participant = "PDI1"
session = 2
block = 6

In [9]:
participants_df = pl.read_parquet(
    resampled_data_path
    / participants
    / f"participant_id={participant}"
    / f"session={session}"
)

In [10]:
participants_df["original_length_ts"].mean()

1350.0

In [11]:
time_original = participants_df["time_original"][0]

In [12]:
time_original

18.102864
18.10953
18.116197
18.122864
18.12953
…
27.06953
27.076197
27.082864
27.08953
27.096197


In [None]:
ecog = participants_df["ECOG_2"][0]

In [None]:
ecog

0.000007
0.000013
0.000018
0.000021
0.000025
…
0.000002
0.000007
0.000011
0.00001
0.000003


In [None]:
tracing_speed = participants_df["tracing_speed"][0]

In [None]:
tracing_speed

244.799357
244.799357
253.967235
263.41608
269.318998
…
202.180042
192.575355
187.025981
184.333399
174.281091


In [None]:
train_split = pl.read_parquet(
    root / "results" / "psid_behavioral" / "split" / "train.parquet"
)

In [None]:
train_split["ECOG_2"][0]

0.000006
0.000003
0.000005
0.000008
0.000009
…
0.000027
0.000028
0.000024
0.000018
0.000012


In [None]:
train_split["tracing_speed"][0]

73.709561
73.709561
76.481336
78.460672
79.253111
…
89.508908
87.409505
86.149365
85.449564
82.789822


In [None]:
train_split = pl.read_parquet(
    root / "results" / "psid_behavioral" / "split" / "train.parquet"
)

# Check margin values across trials
print("Chunk margins across trials:")
print(
    train_split.select(
        ["participant_id", "session", "block", "trial", "chunk_margin"]
    ).head(10)
)

# Check if margins are consistent
print(f"\nUnique chunk_margin values: {train_split['chunk_margin'].unique().to_list()}")

# Check lengths after margin removal
print("\nExpected lengths after removing margins:")
for row in train_split.head(10).iter_rows(named=True):
    chunk_margin_ts = row["chunk_margin_ts"]
    original_length = len(row["ECOG_2"])
    after_margin = original_length - 2 * chunk_margin_ts
    print(
        f"Trial {row['trial']}: original={original_length}, margin={chunk_margin_ts}, after_slice={after_margin}"
    )