In [1]:
import os
import time
import json
from pathlib import Path
import numpy as np
import tables
import pandas as pd
import tensorflow as tf

2026-02-23 11:27:07.956887: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2 FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2026-02-23 11:27:08.132068: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcudart.so.11.0'; dlerror: libcudart.so.11.0: cannot open shared object file: No such file or directory
2026-02-23 11:27:08.132106: I tensorflow/compiler/xla/stream_executor/cuda/cudart_stub.cc:29] Ignore above cudart dlerror if you do not have a GPU set up on your machine.
2026-02-23 11:27:09.594918: W tensorflow/compiler/xla/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2026-

In [2]:
# Ensure TF1-style behavior (matches the original repo)
tf.compat.v1.disable_eager_execution()

# ----------------------------
# LinearModel (adapted)
# ----------------------------
def kaiming(shape, dtype=None, partition_info=None):
    """Kaiming initialization function compatible with tf.get_variable initializer.
    The original repo used a callable initializer; we keep the same behavior here.
    """
    # shape[0] used in original (fan_in). We guard for shape being tuple/list.
    fan_in = float(shape[0]) if len(shape) > 0 else 1.0
    std = np.sqrt(2.0 / fan_in)
    return tf.random.truncated_normal(shape, dtype=dtype) * std


class LinearModel(object):
    """Linear + ReLU model compatible with the original H36M linear model."""

    def __init__(self,
                 linear_size=1024,
                 num_layers=2,
                 residual=False,
                 batch_norm=False,
                 max_norm=False,
                 batch_size=64,
                 learning_rate=1e-3,
                 summaries_dir=None,
                 predict_14=False,
                 dtype=tf.float32):
        """
        Args mirror the original implementation. This version assumes the input
        is 16 joints x,y => 32 dims (HUMAN_2D_SIZE).
        """

        # Constants same as original repo
        self.HUMAN_2D_SIZE = 16 * 2
        self.HUMAN_3D_SIZE = 14 * 3 if predict_14 else 16 * 3

        self.input_size = self.HUMAN_2D_SIZE
        self.output_size = self.HUMAN_3D_SIZE

        # TF placeholders
        self.isTraining = tf.compat.v1.placeholder(tf.bool, name="isTrainingflag")
        self.dropout_keep_prob = tf.compat.v1.placeholder(tf.float32, name="dropout_keep_prob")

        # Summary writers (optional)
        if summaries_dir is not None:
            os.makedirs(os.path.join(summaries_dir, 'train'), exist_ok=True)
            os.makedirs(os.path.join(summaries_dir, 'test'), exist_ok=True)
            self.train_writer = tf.compat.v1.summary.FileWriter(os.path.join(summaries_dir, 'train'))
            self.test_writer = tf.compat.v1.summary.FileWriter(os.path.join(summaries_dir, 'test'))
        else:
            self.train_writer = None
            self.test_writer = None

        # Params
        self.linear_size = linear_size
        self.batch_size = batch_size
        self.learning_rate = tf.Variable(float(learning_rate), trainable=False, dtype=dtype, name="learning_rate")
        self.global_step = tf.Variable(0, trainable=False, name="global_step")

        # Learning rate decay (same defaults as original)
        decay_steps = 100000
        decay_rate = 0.96
        self.learning_rate = tf.compat.v1.train.exponential_decay(self.learning_rate, self.global_step, decay_steps, decay_rate)

        # Inputs placeholders
        with tf.compat.v1.variable_scope("inputs"):
            enc_in = tf.compat.v1.placeholder(dtype, shape=[None, self.input_size], name="enc_in")
            dec_out = tf.compat.v1.placeholder(dtype, shape=[None, self.output_size], name="dec_out")
            self.encoder_inputs = enc_in
            self.decoder_outputs = dec_out

        # Build model (linear + relu blocks)
        with tf.compat.v1.variable_scope("linear_model"):
            # First linear layer
            w1 = tf.compat.v1.get_variable(name="w1", initializer=kaiming, shape=[self.HUMAN_2D_SIZE, linear_size], dtype=dtype)
            b1 = tf.compat.v1.get_variable(name="b1", initializer=kaiming, shape=[linear_size], dtype=dtype)
            if max_norm:
                w1 = tf.clip_by_norm(w1, 1)
            y3 = tf.matmul(enc_in, w1) + b1

            if batch_norm:
                # tf.compat.v1.layers.batch_normalization
                y3 = tf.compat.v1.layers.batch_normalization(y3, training=self.isTraining, name="batch_normalization")
            y3 = tf.nn.relu(y3)
            y3 = tf.nn.dropout(y3, rate=1.0 - self.dropout_keep_prob)  # TF2-style dropout with rate

            # Two-linear blocks (num_layers times)
            for idx in range(num_layers):
                y3 = self.two_linear(y3, linear_size, residual, self.dropout_keep_prob, max_norm, batch_norm, dtype, idx)

            # Final linear -> output
            w4 = tf.compat.v1.get_variable(name="w4", initializer=kaiming, shape=[linear_size, self.HUMAN_3D_SIZE], dtype=dtype)
            b4 = tf.compat.v1.get_variable(name="b4", initializer=kaiming, shape=[self.HUMAN_3D_SIZE], dtype=dtype)
            if max_norm:
                w4 = tf.clip_by_norm(w4, 1)
            y = tf.matmul(y3, w4) + b4

        # Outputs & loss
        self.outputs = y
        self.loss = tf.reduce_mean(tf.square(y - dec_out))
        self.loss_summary = tf.compat.v1.summary.scalar('loss/loss', self.loss)

        # Error in mm placeholder for TB logging (keeps API similar)
        self.err_mm = tf.compat.v1.placeholder(tf.float32, name="error_mm")
        self.err_mm_summary = tf.compat.v1.summary.scalar("loss/error_mm", self.err_mm)

        # Optimizer and updates
        opt = tf.compat.v1.train.AdamOptimizer(self.learning_rate)
        update_ops = tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            gradients = opt.compute_gradients(self.loss)
            self.gradients = [[] if i is None else i for i in gradients]
            self.updates = opt.apply_gradients(gradients, global_step=self.global_step)

        self.learning_rate_summary = tf.compat.v1.summary.scalar('learning_rate/learning_rate', self.learning_rate)
        self.saver = tf.compat.v1.train.Saver(tf.compat.v1.global_variables(), max_to_keep=10)

    def two_linear(self, xin, linear_size, residual, dropout_keep_prob, max_norm, batch_norm, dtype, idx):
        """Two linear layers with optional residual (mirrors original)."""
        with tf.compat.v1.variable_scope("two_linear_%d" % idx):
            input_size = int(xin.get_shape()[1])

            # Linear 1
            w2 = tf.compat.v1.get_variable(name="w2_%d" % idx, initializer=kaiming, shape=[input_size, linear_size], dtype=dtype)
            b2 = tf.compat.v1.get_variable(name="b2_%d" % idx, initializer=kaiming, shape=[linear_size], dtype=dtype)
            if max_norm:
                w2 = tf.clip_by_norm(w2, 1)
            y = tf.matmul(xin, w2) + b2
            if batch_norm:
                y = tf.compat.v1.layers.batch_normalization(y, training=self.isTraining, name="batch_norm1_%d" % idx)
            y = tf.nn.relu(y)
            y = tf.nn.dropout(y, rate=1.0 - dropout_keep_prob)

            # Linear 2
            w3 = tf.compat.v1.get_variable(name="w3_%d" % idx, initializer=kaiming, shape=[linear_size, linear_size], dtype=dtype)
            b3 = tf.compat.v1.get_variable(name="b3_%d" % idx, initializer=kaiming, shape=[linear_size], dtype=dtype)
            if max_norm:
                w3 = tf.clip_by_norm(w3, 1)
            y = tf.matmul(y, w3) + b3
            if batch_norm:
                y = tf.compat.v1.layers.batch_normalization(y, training=self.isTraining, name="batch_norm2_%d" % idx)
            y = tf.nn.relu(y)
            y = tf.nn.dropout(y, rate=1.0 - dropout_keep_prob)

            # Residual (original only adds input every 2 blocks; we keep behavior same as input param)
            if residual:
                y = xin + y

            return y

    def step(self, session, encoder_inputs, decoder_outputs, dropout_keep_prob, isTraining=True):
        """Run a training or validation step. Returns similar tuple to original repo."""

        input_feed = {
            self.encoder_inputs: encoder_inputs,
            self.decoder_outputs: decoder_outputs,
            self.isTraining: isTraining,
            self.dropout_keep_prob: dropout_keep_prob
        }

        if isTraining:
            output_feed = [self.updates, self.loss, self.loss_summary, self.learning_rate_summary, self.outputs]
            outputs = session.run(output_feed, input_feed)
            # returns (loss, loss_summary, lr_summary, outputs)
            return outputs[1], outputs[2], outputs[3], outputs[4]
        else:
            output_feed = [self.loss, self.loss_summary, self.outputs]
            outputs = session.run(output_feed, input_feed)
            return outputs[0], outputs[1], outputs[2]


    # A helper batch maker for already-prepared numpy arrays
    def get_all_batches_from_arrays(self, X, Y, training=True):
        """Split X,Y arrays into batches matching self.batch_size.
           Returns lists of batches: (X_batches, Y_batches)"""
        assert X.shape[0] == Y.shape[0], "X and Y must have same number of rows"
        n = X.shape[0]
        if training:
            perm = np.random.permutation(n)
            X = X[perm]
            Y = Y[perm]
        # Trim extras so divisible by batch_size
        n_extra = n % self.batch_size
        if n_extra > 0:
            X = X[:-n_extra]
            Y = Y[:-n_extra]
        n_batches = X.shape[0] // self.batch_size
        if n_batches == 0:
            raise ValueError("Not enough examples for a single batch. Increase data or reduce batch_size.")
        X_batches = np.split(X, n_batches)
        Y_batches = np.split(Y, n_batches)
        return X_batches, Y_batches


# ----------------------------
# Training loop helper
# ----------------------------
def train_model_on_arrays(X_train, Y_train, X_val, Y_val,
                          linear_size=1024, num_layers=2, residual=False,
                          batch_norm=False, max_norm=False, batch_size=64,
                          learning_rate=1e-3, epochs=10, dropout=1.0,
                          summaries_dir=None, predict_14=False, use_gpu=True,
                          restore_checkpoint=None, save_dir='experiments'):
    """
    Train the LinearModel on arrays. This is the simple analog of the original train() flow.
    """
    device_count = {"GPU": 0} if not use_gpu else {"GPU": 1}
    config = tf.compat.v1.ConfigProto(device_count=device_count, allow_soft_placement=True)

    # Prepare training directory
    train_dir = save_dir
    summaries_dir = summaries_dir or os.path.join(train_dir, "log")
    os.makedirs(train_dir, exist_ok=True)
    os.makedirs(summaries_dir, exist_ok=True)

    sess = tf.compat.v1.Session(config=config)

    # Create model
    model = LinearModel(
        linear_size=linear_size,
        num_layers=num_layers,
        residual=residual,
        batch_norm=batch_norm,
        max_norm=max_norm,
        batch_size=batch_size,
        learning_rate=learning_rate,
        summaries_dir=summaries_dir,
        predict_14=predict_14,
        dtype=tf.float32
    )

    # Initialize or restore
    sess.run(tf.compat.v1.global_variables_initializer())
    if restore_checkpoint:
        ckpt = tf.train.get_checkpoint_state(restore_checkpoint)
        if ckpt and ckpt.model_checkpoint_path:
            model.saver.restore(sess, ckpt.model_checkpoint_path)

    train_writer = model.train_writer

    # Training loop
    current_step = 0
    for epoch in range(1, epochs + 1):
        print(f"Epoch {epoch}/{epochs}")
        X_batches, Y_batches = model.get_all_batches_from_arrays(X_train, Y_train, training=True)
        nbatches = len(X_batches)
        epoch_loss = 0.0
        start_time = time.time()

        for i in range(nbatches):
            enc_in = X_batches[i]
            dec_out = Y_batches[i]
            step_loss, loss_summary, lr_summary, _ = model.step(sess, enc_in, dec_out, dropout_keep_prob=dropout, isTraining=True)

            if train_writer is not None and (current_step % 100 == 0):
                train_writer.add_summary(loss_summary, current_step)
                train_writer.add_summary(lr_summary, current_step)

            epoch_loss += step_loss
            current_step += 1

        epoch_loss /= nbatches
        print(f"  Train loss avg: {epoch_loss:.6f}  (time: {time.time()-start_time:.2f}s)")

        # Validation
        Xv_batches, Yv_batches = model.get_all_batches_from_arrays(X_val, Y_val, training=False)
        val_loss = 0.0
        for i in range(len(Xv_batches)):
            vl, vs, preds = model.step(sess, Xv_batches[i], Yv_batches[i], dropout_keep_prob=1.0, isTraining=False)
            val_loss += vl
        val_loss /= len(Xv_batches)
        print(f"  Val loss avg:   {val_loss:.6f}")

        # Save checkpoint
        ckpt_name = os.path.join(train_dir, 'checkpoint')
        model.saver.save(sess, ckpt_name, global_step=current_step)
        print(f"  Saved checkpoint at step {current_step}")

    print("Training finished.")
    # RETURN the open session (caller must close it when done)
    return model, sess

In [6]:
# Clear any existing graph (VERY important in notebooks)
tf.compat.v1.reset_default_graph()

# ----------------------------
# Synthetic Example Data
# ----------------------------
N_train = 2048
N_val = 512

batch_size = 64
predict_14 = False

input_dim = 16 * 2
output_dim = 14*3 if predict_14 else 16*3

# Generate synthetic data
X_train = np.random.randn(N_train, input_dim).astype(np.float32)
Y_train = np.random.randn(N_train, output_dim).astype(np.float32)

X_val = np.random.randn(N_val, input_dim).astype(np.float32)
Y_val = np.random.randn(N_val, output_dim).astype(np.float32)

# ----------------------------
# Train
# ----------------------------
model, sess = train_model_on_arrays(
    X_train, Y_train,
    X_val, Y_val,
    linear_size=1024,
    num_layers=2,
    residual=False,
    batch_norm=False,
    max_norm=False,
    batch_size=batch_size,
    learning_rate=1e-3,
    epochs=3,
    dropout=0.9,
    summaries_dir="./logs_example",
    predict_14=predict_14,
    use_gpu=True,
    save_dir="./example_experiments"
)

# ----------------------------
# Evaluate on ONE FULL BATCH
# ----------------------------
# Make sure we pass exactly one batch_size worth of samples
Xb = X_val[:batch_size]
Yb = Y_val[:batch_size]

loss_val, summary, preds = model.step(
    sess,
    Xb,
    Yb,
    dropout_keep_prob=1.0,
    isTraining=False
)

print("Example validation loss:", loss_val)
print("Predictions shape:", preds.shape)

# Close session when completely finished
sess.close()


E0000 00:00:1771347372.478110    2157 cuda_executor.cc:1309] INTERNAL: CUDA Runtime error: Failed call to cudaGetRuntimeVersion: Error loading CUDA libraries. GPU will not be used.: Error loading CUDA libraries. GPU will not be used.
W0000 00:00:1771347372.479353    2157 gpu_device.cc:2342] Cannot dlopen some GPU libraries. Please make sure the missing libraries mentioned above are installed properly if you would like to use GPU. Follow the guide at https://www.tensorflow.org/install/gpu for how to download and setup the required libraries for your platform.
Skipping registering GPU devices...


Epoch 1/3
  Train loss avg: 1.096895  (time: 1.26s)
  Val loss avg:   1.013701
  Saved checkpoint at step 32
Epoch 2/3
  Train loss avg: 1.002927  (time: 0.98s)
  Val loss avg:   1.013460
  Saved checkpoint at step 64
Epoch 3/3
  Train loss avg: 0.998458  (time: 0.82s)
  Val loss avg:   1.014558
  Saved checkpoint at step 96
Training finished.
Example validation loss: 1.0390238
Predictions shape: (64, 48)


In [4]:
def load_scene(scene_json_path: Path) -> list:
    with open(scene_json_path, "r") as f:
        data = json.load(f)

    cameras = []

    for cam in data["cameras"]:
        # Intrinsics
        K = np.array(cam["k"], dtype=np.float64).reshape(3, 3)

        # Distortion (not needed for DLT, but useful to keep)
        D = np.array(cam["d"], dtype=np.float64)

        # Extrinsics
        R = np.array(cam["r"], dtype=np.float64).reshape(3, 3)
        t = np.array(cam["t"], dtype=np.float64).reshape(3, 1)

        # Projection matrix: P = K [R | t]
        Rt = np.hstack([R, t])   # 3x4
        P = K @ Rt               # 3x4

        cameras.append({
            "K": K,
            "D": D,
            "R": R,
            "t": t,
            "P": P
        })

    return cameras

In [5]:
# Load data for 2d joint detections and camera projection matrices

cam3 = pd.read_hdf("/gws/nopw/j04/iecdt/cheetah/2017_08_29/bottom/phantom/flick2/filtered_2D/cam3DLC_resnet152_CheetahOct14shuffle1_500000.h5")
cam4 = pd.read_hdf("/gws/nopw/j04/iecdt/cheetah/2017_08_29/bottom/phantom/flick2/filtered_2D/cam4DLC_resnet152_CheetahOct14shuffle1_500000.h5")
cam5 = pd.read_hdf("/gws/nopw/j04/iecdt/cheetah/2017_08_29/bottom/phantom/flick2/filtered_2D/cam5DLC_resnet152_CheetahOct14shuffle1_500000.h5")
cam6 = pd.read_hdf("/gws/nopw/j04/iecdt/cheetah/2017_08_29/bottom/phantom/flick2/filtered_2D/cam6DLC_resnet152_CheetahOct14shuffle1_500000.h5")

cams = [cam3, cam4, cam5, cam6]

cam_path = Path("/gws/nopw/j04/iecdt/cheetah/2017_08_29/bottom/extrinsic_calib/4_cam_scene_sba.json")

cameras = load_scene(cam_path)

Ps = [cam["P"] for cam in cameras]

In [13]:
from typing import List, Dict, Tuple, Optional

def _find_column_triplet(df: pd.DataFrame, bodypart: str) -> Tuple[Tuple, Tuple, Tuple]:
    """
    Find the (x,y,likelihood) column tuples for a given bodypart in a DLC-style MultiIndex df.
    Returns (xcol, ycol, lcol) as column-key tuples suitable for df[...] indexing.
    Raises KeyError if not found.
    """
    # Accept both 2- or 3-level column index shapes, but DLC usually has 3-level: (scorer, bodypart, coord)
    cols = list(df.columns)
    xcol = ycol = lcol = None
    for c in cols:
        # treat c as tuple-like
        if len(c) >= 2 and c[1] == bodypart:
            coord = c[-1]  # assume final level is 'x','y','likelihood'
            if coord == 'x':
                xcol = c
            elif coord == 'y':
                ycol = c
            elif coord in ('likelihood','prob','score','p'):
                lcol = c
    if xcol is None or ycol is None:
        raise KeyError(f"couldn't find x/y columns for bodypart '{bodypart}'")
    # if likelihood missing, create a dummy column tuple (we'll fill with NaNs later)
    if lcol is None:
        # create a fake tuple with 'likelihood' appended (will error if used for indexing)
        # We prefer returning None to indicate missing
        lcol = None
    return xcol, ycol, lcol

def get_joint_from_df(df: pd.DataFrame, bodypart: str, frame: int) -> Tuple[Optional[float], Optional[float], Optional[float]]:
    """
    Return (u, v, likelihood) for a given bodypart at a specific frame index from a single camera DataFrame.
    If likelihood column is missing, returns np.nan for likelihood.
    """
    try:
        xcol, ycol, lcol = _find_column_triplet(df, bodypart)
    except KeyError:
        return (np.nan, np.nan, np.nan)
    u = df.at[frame, xcol] if xcol in df.columns else np.nan
    v = df.at[frame, ycol] if ycol in df.columns else np.nan
    if lcol is not None and lcol in df.columns:
        l = df.at[frame, lcol]
    else:
        l = np.nan
    # ensure numeric (could be pandas Series scalar)
    return float(u) if pd.notna(u) else np.nan, float(v) if pd.notna(v) else np.nan, float(l) if pd.notna(l) else np.nan

def triangulate_point_dlt(us_vs: List[Tuple[float,float]], Ps: List[np.ndarray]) -> Optional[np.ndarray]:
    """
    us_vs: list of (u,v) observations (pixel coords) for the same 3D point across cameras
    Ps: corresponding list of 3x4 camera projection matrices (same order)
    Returns: 3-element np.array [X, Y, Z] (float) on success, or None if failure.
    """
    if len(us_vs) < 2:
        return None

    # Build A (2*C x 4)
    A_rows = []
    for (u, v), P in zip(us_vs, Ps):
        p1 = P[0, :]  # 1x4
        p2 = P[1, :]
        p3 = P[2, :]
        A_rows.append(u * p3 - p1)
        A_rows.append(v * p3 - p2)
    A = np.vstack(A_rows)  # shape (2C, 4)

    # Solve by SVD: A x = 0 -> x is last col of V (Vt[-1])
    try:
        _, _, Vt = np.linalg.svd(A)
    except np.linalg.LinAlgError:
        return None
    X = Vt[-1, :]  # last row of Vt is last column of V
    if np.isclose(X[-1], 0):
        # cannot homogenize; ill-conditioned
        return None
    X = X / X[-1]
    return X[:3]  # X, Y, Z

def triangulate_frame_all_joints(
    cams_dfs: List[pd.DataFrame],
    P_matrices: List[np.ndarray],
    frame_idx: int,
    conf_thresh: float = 0.6,
    bodyparts: Optional[List[str]] = None
) -> Dict[str, Optional[np.ndarray]]:
    """
    Triangulate each bodypart for a single frame.
    - cams_dfs: list of pandas DataFrames, one per camera (same ordering as P_matrices)
    - P_matrices: list of 3x4 numpy arrays
    - frame_idx: integer row index in the DataFrames (same frame indexing across cams)
    - conf_thresh: min likelihood to count a detection
    - bodyparts: optional list of bodypart names to process; if None, infer from first df
    Returns: dict mapping bodypart -> np.array([X,Y,Z]) or None if triangulation not possible.
    """
    if len(cams_dfs) != len(P_matrices):
        raise ValueError("number of cameras (dfs) must match number of projection matrices")

    # infer bodyparts from first DataFrame if not supplied
    if bodyparts is None:
        first_df = cams_dfs[0]
        # find second level values assuming MultiIndex with bodypart as level 1
        if isinstance(first_df.columns, pd.MultiIndex) and first_df.columns.nlevels >= 2:
            bodyparts = list(dict.fromkeys(first_df.columns.get_level_values(1)))  # preserve order, unique
        else:
            # fallback: can't infer
            raise ValueError("Cannot infer bodyparts from the DataFrame columns. Provide bodyparts argument.")

    results = {}
    for bp in bodyparts:
        obs = []   # list of (u, v)
        Ps = []    # corresponding projection matrices
        for df, P in zip(cams_dfs, P_matrices):
            u, v, l = get_joint_from_df(df, bp, frame_idx)
            # treat NaN or small likelihood as missing
            if np.isfinite(u) and np.isfinite(v) and (np.isfinite(l) and l >= conf_thresh):
                obs.append((u, v))
                Ps.append(P)
            # if there is no likelihood column (l is NaN), you may optionally still use the point:
            # elif np.isfinite(u) and np.isfinite(v) and np.isnan(l):
            #     obs.append((u,v)); Ps.append(P)
        if len(obs) < 2:
            results[bp] = None
        else:
            X = triangulate_point_dlt(obs, Ps)
            results[bp] = X  # np.array([X,Y,Z]) or None
    return results

In [14]:
# Reset graph (important for repeated runs in notebooks)
tf.compat.v1.reset_default_graph()

# --- User-configurable options ---
conf_thresh = 0.6          # minimum detection confidence to accept 2D points
min_frac_joints = 1.00     # fraction of required joints that must be present in a frame (1.0 -> require all)
use_camera_index = 0       # which camera DataFrame to use as the input 2D view (0 = first camera in `cams`)
predict_14 = False         # match model setting (False -> predict 16 joints*3)
batch_size = 64
epochs = 3
dropout = 0.9

# --- Infer bodyparts list (take first 16 bodyparts from the first camera DF by default) ---
first_df = cams[use_camera_index]
if not (isinstance(first_df.columns, pd.MultiIndex) and first_df.columns.nlevels >= 2):
    raise RuntimeError("Expected DLC-style MultiIndex columns (scorer, bodypart, coord) in camera DataFrame.")

all_bodyparts = list(dict.fromkeys(first_df.columns.get_level_values(1)))
# Ensure we pick exactly 16 bodyparts for this model
n_joints_req = 16
if len(all_bodyparts) < n_joints_req:
    raise RuntimeError(f"Only found {len(all_bodyparts)} bodyparts; model expects {n_joints_req}.")
bodyparts = all_bodyparts[:n_joints_req]
print("Using bodyparts (order):", bodyparts)

# --- Build dataset (iterate frames, triangulate per-frame) ---
n_frames = min(df.shape[0] for df in cams)  # use smallest frame count across cameras
print("Frames available (min across cams):", n_frames)

X_list = []
Y_list = []
frames_used = []

for frame_idx in range(n_frames):
    # Triangulate all joints for this frame (uses DLT function you already have)
    tri3d = triangulate_frame_all_joints(cams, Ps, frame_idx, conf_thresh=conf_thresh, bodyparts=bodyparts)

    uvs = []
    valid_mask = []
    for bp in bodyparts:
        # get 2D from chosen camera
        u, v, l = get_joint_from_df(cams[use_camera_index], bp, frame_idx)
        # require finite coords and confidence threshold
        ok_2d = np.isfinite(u) and np.isfinite(v) and (np.isfinite(l) and l >= conf_thresh)
        ok_3d = (tri3d.get(bp) is not None) and (tri3d.get(bp) is not None and np.all(np.isfinite(tri3d.get(bp))))
        if ok_2d and ok_3d:
            uvs.append((u, v))
            valid_mask.append(True)
        else:
            uvs.append((np.nan, np.nan))
            valid_mask.append(False)

    n_valid = sum(valid_mask)
    if n_valid / float(n_joints_req) >= min_frac_joints:
        # require all joints present by default; change min_frac_joints if you want a looser filter
        # build input vector (flatten u,v for each joint in order)
        X_vec = np.array([coord for uv in uvs for coord in uv], dtype=np.float32)  # length = n_joints_req * 2

        # build target vector (flatten X,Y,Z for each joint in order)
        Y_vec = np.zeros((n_joints_req * 3,), dtype=np.float32)
        for i, bp in enumerate(bodyparts):
            pt3 = tri3d.get(bp)
            if pt3 is None:
                # should not happen due to min_frac_joints check; but set NaN to flag if it does
                Y_vec[3*i:3*i+3] = np.array([np.nan, np.nan, np.nan], dtype=np.float32)
            else:
                Y_vec[3*i:3*i+3] = np.array(pt3, dtype=np.float32)
        # final sanity: ensure no NaNs in target/input
        if np.all(np.isfinite(X_vec)) and np.all(np.isfinite(Y_vec)):
            X_list.append(X_vec)
            Y_list.append(Y_vec)
            frames_used.append(frame_idx)
    # else skip this frame

X = np.stack(X_list, axis=0) if len(X_list) > 0 else np.empty((0, n_joints_req*2), dtype=np.float32)
Y = np.stack(Y_list, axis=0) if len(Y_list) > 0 else np.empty((0, n_joints_req*3), dtype=np.float32)

print(f"Collected {X.shape[0]} samples (frames) with {n_joints_req} joints each.")
if X.shape[0] == 0:
    raise RuntimeError("No training samples found. Try lowering conf_thresh or min_frac_joints.")

# --- Train/Val split (random) ---
N = X.shape[0]
perm = np.random.permutation(N)
train_frac = 0.8
n_train = int(train_frac * N)
train_idx = perm[:n_train]
val_idx = perm[n_train:]

X_train = X[train_idx]
Y_train = Y[train_idx]
X_val = X[val_idx]
Y_val = Y[val_idx]

print("Train/Val shapes:", X_train.shape, Y_train.shape, X_val.shape, Y_val.shape)

# --- Cast to float32 (already done) and check dims match model expected dims ---
input_dim = n_joints_req * 2
output_dim = (14*3 if predict_14 else 16*3)
if X_train.shape[1] != input_dim:
    raise RuntimeError(f"Input dim mismatch: X has {X_train.shape[1]} but model expects {input_dim}.")
if Y_train.shape[1] != output_dim:
    raise RuntimeError(f"Output dim mismatch: Y has {Y_train.shape[1]} but model expects {output_dim}.")

# --- Train the model using the existing helper ---
model, sess = train_model_on_arrays(
    X_train, Y_train,
    X_val, Y_val,
    linear_size=1024,
    num_layers=2,
    residual=False,
    batch_norm=False,
    max_norm=False,
    batch_size=batch_size,
    learning_rate=1e-3,
    epochs=epochs,
    dropout=dropout,
    summaries_dir="./logs_realdata",
    predict_14=predict_14,
    use_gpu=True,
    save_dir="./realdata_experiments"
)

# ----------------------------
# Evaluate on ONE FULL BATCH from validation (same size as batch_size)
# ----------------------------
if X_val.shape[0] < batch_size:
    print("Warning: validation set has fewer than batch_size samples; using all validation samples for one evaluation batch.")
    Xb = X_val
    Yb = Y_val
else:
    Xb = X_val[:batch_size]
    Yb = Y_val[:batch_size]

loss_val, summary, preds = model.step(
    sess,
    Xb,
    Yb,
    dropout_keep_prob=1.0,
    isTraining=False
)

print("Validation loss:", loss_val)
print("Predictions shape:", preds.shape)

# Keep session open for further evaluation / saving outside this cell

Using bodyparts (order): ['r_eye', 'l_eye', 'r_shoulder', 'r_front_knee', 'r_front_ankle', 'r_front_paw', 'spine', 'r_hip', 'r_back_knee', 'r_back_ankle', 'r_back_paw', 'tail1', 'tail2', 'l_shoulder', 'l_front_knee', 'l_front_ankle']
Frames available (min across cams): 321
Collected 0 samples (frames) with 16 joints each.


RuntimeError: No training samples found. Try lowering conf_thresh or min_frac_joints.

In [None]:
sess.close()