In [1]:
import pandas as pd

# Load pieman_segments
pieman_segments = pd.read_csv("pieman_segments.csv")

# Compute row-wise mode of LLM segment indices
segment_cols = [f"segment_idx_{i}" for i in range(6)]
pieman_segments["mode_segment"] = pieman_segments[segment_cols].apply(lambda row: row.mode().iloc[0], axis=1)

# Keep only TR and mode_segment
clean_seg = pieman_segments[["TR", "mode_segment"]].copy()

# Get one row per TR (e.g., first token in TR)
clean_seg = clean_seg.groupby("TR").agg({"mode_segment": "first"}).reset_index()

# Compute segment boundary (no boundary on the first row)
clean_seg["segment_boundary"] = (clean_seg["mode_segment"] != clean_seg["mode_segment"].shift()).astype(int)
clean_seg.loc[0, "segment_boundary"] = 0  # make sure first row is 0

# Have ready to feed into cross correlation
event_boundaries = clean_seg["segment_boundary"]

In [None]:
import numpy as np

def get_state_transitions(state_vector):
    transitions = np.zeros_like(state_vector)
    transitions[1:] = state_vector[1:] != state_vector[:-1]
    return transitions.astype(int)

# Import HMM results
state_vectors = []
for i in range(1,83)
    #state_vectors.append()

transitions_list = []
# Apply get_state_transitions to HMM results for each subject
for i in range(82):
    transitions = get_state_transitions(state_vectors)
    transitions_list.append(transitions)

In [None]:
from scipy.signal import correlate

def cross_correlation(a, b, max_lag=10):
    assert len(a) == len(b)
    a = a - np.mean(a)
    b = b - np.mean(b)
    corr = correlate(a, b, mode='full')
    lags = np.arange(-len(a) + 1, len(a))
    center = len(a) - 1
    lag_range = range(center - max_lag, center + max_lag + 1)
    return lags[lag_range], corr[lag_range]

# Parameters
n_permutations = 500
max_lag = 10

results = {
    "Lags": [],
    "Correlations": [],
    "P_Values": []
}

for i in range(82):
    transition_vector = transitions_list[i]
    transition_vector = np.array(transition_vector)
    event_vector = np.array(event_boundaries)

    # True correlation
    lags, true_corr = cross_correlation(transition_vector, event_vector, max_lag=max_lag)

    # Null distribution via circular shift
    null_corrs = np.zeros((n_permutations, len(true_corr)))

    for p in range(n_permutations):
        shift_amount = np.random.randint(1, len(event_vector))  # exclude 0 shift
        shifted = np.roll(event_vector, shift_amount)
        _, permuted_corr = cross_correlation(transition_vector, shifted, max_lag=max_lag)
        null_corrs[p] = permuted_corr

    # Compute two-tailed p-values
    p_vals = np.mean(np.abs(null_corrs) >= np.abs(true_corr), axis=0)

    # Store results
    results["Lags"].append(lags)
    results["Correlations"].append(true_corr)
    results["P_Values"].append(p_vals)