# Load the movement dataset

In [None]:
# imports
import matplotlib.pyplot as plt
import numpy as np
from calibration_utils import (
    binning_based_calibration,
    plot_reliability_diagram,
)

from movement import sample_data
from movement.filtering import rolling_filter, savgol_filter

In [None]:
# Load DLC sample data
filename = "DLC_single-mouse_EPM.predictions.h5"
data = sample_data.fetch_dataset(filename)
# Print dataset structure
print(data)
# Check available variables
print(data.variables)

## Extract confidencs scores

In [None]:
# extract confidence values
confidence_values = data["confidence"].values

# check if all values are NaN
print("Total NaN values:", np.isnan(confidence_values).sum())
print("Total confidence values:", confidence_values.size)

# Find frames that contain at least one non-NaN confidence score
valid_confidence_frames = np.where(
    ~np.isnan(confidence_values).any(axis=(1, 2))
)[0]
print("Frames with valid confidence scores:", valid_confidence_frames)
confidence_values.shape

In [None]:
# Remove NaNs if any
confidence_values = confidence_values[~np.isnan(confidence_values)]

# Print summary statistics
print(f"Mean: {np.mean(confidence_values):.4f}")
print(f"Median: {np.median(confidence_values):.4f}")
print(f"Min: {np.min(confidence_values):.4f}")
print(f"Max: {np.max(confidence_values):.4f}")

## Plot the distribution of confidence scores

In [None]:
# Extract confidence scores from DLC dataset
confidence_values = data["confidence"].values.flatten()

# Plot histogram to see distribution
plt.hist(confidence_values, bins=20, edgecolor="black")
plt.xlabel("Confidence Score")
plt.ylabel("Frequency")
plt.title("Distribution of Confidence Scores (DLC Data)")
plt.show()

In [None]:
num_bins = 10  # Number of bins
bin_edges = np.linspace(0, 1, num_bins + 1)  # Create bin edges
bin_edges

In [None]:
binned_confidence, _ = np.histogram(confidence_values, bins=bin_edges)
avg_confidence_scores = np.mean(binned_confidence)
binned_confidence

In [None]:
plt.bar(
    bin_edges[:-1],
    binned_confidence,
    width=0.1,
    edgecolor="black",
    align="edge",
)
plt.xlabel("Confidence Score Bins")
plt.ylabel("Frequency")
plt.title("Binned Confidence Scores")
plt.show()

In [None]:
positions = data["position"].values  # Extract (x, y) coordinates
confidences = data["confidence"].values  # Extract confidence scores

print("Position shape:", positions.shape)
print("Confidence shape:", confidences.shape)
print("Position shape:", positions)
print("Confidence shape:", confidences)

## smoothing positions

In [None]:
# Set smoothing window size (0.1 seconds of frames)
fps = data.attrs["fps"]  # Get FPS from dataset attributes
window = int(0.1 * fps)

# Apply Rolling Median Filter
smoothed_positions = rolling_filter(
    data["position"], window, statistic="median"
)

# Apply Savitzky-Golay Filter
smoothed_positions = savgol_filter(smoothed_positions, window)
smoothed_positions

In [None]:
# Compute velocity (change in position)
velocity = np.diff(smoothed_positions, axis=0)

# Compute acceleration (change in velocity)
acceleration = np.diff(velocity, axis=0)

# Compute acceleration magnitude
acceleration_magnitude = np.sqrt(np.sum(acceleration**2, axis=1))

print("Acceleration Shape:", acceleration.shape)
print("Sample Acceleration Magnitude:", acceleration_magnitude[:5])

##  Identify Uncertain Frames

In [None]:
threshold = np.percentile(acceleration_magnitude, 95)  # 95th percentile

# Mark frames with high acceleration as "uncertain"
uncertain_frames = acceleration_magnitude > threshold

print("Number of Uncertain Frames:", np.sum(uncertain_frames))
uncertain_frames

In [None]:
print("Confidence Values Shape:", confidence_values.shape)
print("Uncertain Frames Shape:", uncertain_frames.shape)

In [None]:
# Reshape confidence_values to match (frames, keypoints, individuals)
num_frames = 18485  # From DLC dataset
num_keypoints = 8  # From DLC dataset
num_individuals = 1  # Only one tracked mouse

confidence_values = confidence_values.reshape(
    num_frames, num_keypoints, num_individuals
)
print("Reshaped Confidence Values Shape:", confidence_values.shape)

In [None]:
# Convert uncertain_frames to match confidence_values first dimension
uncertain_confidences = confidence_values[uncertain_frames.nonzero()[0], :, :]

# Print stats
print(
    f"Mean Confidence of Uncertain Frames: "
    f"{np.mean(uncertain_confidences):.4f}"
)
print(
    f"Median Confidence of Uncertain Frames:"
    f"{np.median(uncertain_confidences):.4f}"
)

## Plot histogram for overall confidence & uncertain confidence

In [None]:
# Define bins (same as before)
num_bins = 10
bin_edges = np.linspace(0, 1, num_bins + 1)

# Compute histogram for overall confidence & uncertain confidence
overall_counts, _ = np.histogram(confidence_values.flatten(), bins=bin_edges)
uncertain_counts, _ = np.histogram(
    uncertain_confidences.flatten(), bins=bin_edges
)

# Normalize to probability distribution
overall_prob = overall_counts / np.sum(overall_counts)
uncertain_prob = uncertain_counts / np.sum(uncertain_counts)

# Plot reliability diagram
plt.figure(figsize=(6, 4))
plt.plot(bin_edges[:-1], overall_prob, label="All Frames", marker="o")
plt.plot(
    bin_edges[:-1],
    uncertain_prob,
    label="Uncertain Frames",
    marker="o",
    linestyle="dashed",
    color="red",
)

plt.xlabel("Confidence Score Bins")
plt.ylabel("Probability")
plt.title("Reliability Diagram: Overall vs. Uncertain Frames")
plt.legend()
plt.show()

In [None]:
# Example Usage
np.random.seed(42)  # For reproducibility
confidences = np.random.rand(1000)  # Simulated confidence scores
labels = (
    confidences + np.random.normal(0, 0.1, 1000)
) > 0.5  # Simulated correctness labels

calibrated_confidences, bin_edges, bin_accuracies = binning_based_calibration(
    confidences, labels
)
print("Original Confidence Mean:", np.mean(confidences))
print("Calibrated Confidence Mean:", np.mean(calibrated_confidences))

In [None]:
# Plot the Reliability Diagram
plot_reliability_diagram(
    confidences, calibrated_confidences, labels, bin_edges
)

In [None]:
# Compute and print ECE
from calibration_utils import compute_ece

ece_before, ece_after = compute_ece(
    confidences, calibrated_confidences, labels, bin_edges
)
print(f"Expected Calibration Error (ECE) Before Calibration: {ece_before:.4f}")
print(f"Expected Calibration Error (ECE) After Calibration: {ece_after:.4f}")