In [1]:
import os 
import time
import json
from pathlib import Path
import pandas as pd
import torch.nn as nn
import numpy as np
import torch
from torch.utils.data import DataLoader
from torch import optim

- design a neural network to predict 3d pose from 2d coords
- options:

1. simple MLP:
    - input: single frame with camera views concatenated -> (C, J, 2) 
    - output: 3d coords (J, 3)



In [2]:
NUM_JOINTS = 20
NUM_CAMS = 6
device = 'cuda' if torch.cuda.is_available() else 'cpu'

In [3]:
# for metrics it needs to be (J, 3)

# for a test, run a sequence and network will output (60) length vector for each frame in the sequence
# (60) -> (20,3)


In [33]:
# model
class SimpleMLP(nn.Module):

    def __init__(self, activation_function):
        super(SimpleMLP, self).__init__()
        self.fc1 = nn.Linear(in_features = NUM_JOINTS * NUM_CAMS * 3, out_features = 512) # we want to change this back to 2
        self.fc2 = nn.Linear(in_features = 512, out_features = 512) 
        self.fc3 = nn.Linear(in_features= 512, out_features= NUM_JOINTS * 3)

        self.activation_function = activation_function

    def forward(self, x: torch.Tensor):
        """Defines a forward pass through the network."""
        # preserve batch dimension, but flatten the rest (C, J, 2) to single vector
        x[torch.isnan(x)] = 0
        x = x.view(x.size(0), -1) 
        # protect the data and turn all nans to zeros
        x = self.fc1(x)
        x = self.activation_function(x) 
        # x = self.activation_function(self.fc1(x))
        x = self.activation_function(self.fc2(x))
        x = self.fc3(x)
        return x



In [34]:
# Evaluation function
def evaluate_model(model, criterion, loader: DataLoader, device: str):
    model.eval()  # Set the model to evaluation mode
    val_running_loss = 0.0

    with torch.no_grad():  # Disable gradient computation
        for dets_2d, gt_3d in loader:
            dets_2d = dets_2d.to(device)
            gt_3d = gt_3d.to(device)
            # flatten the targets before loss computation
            gt_3d = gt_3d.view(gt_3d.size(0), -1)

            preds = model(dets_2d)  # Forward pass
            loss = criterion(preds, gt_3d)  # Compute validation loss
            val_running_loss += loss.item()

    # Calculate validation metrics
    val_loss = val_running_loss / len(loader)
    return val_loss

In [None]:
# Training function
def train_model(model ,criterion, optimizer, train_loader, val_loader, device, epochs=5):
    """

    """
    for epoch in range(epochs):
        print(f"WE ARE IN EPOCH {epoch}")
        # Training phase
        model.train()  # Set the model to training mode
        running_loss = 0.0

        for dets_2d, gt_3d in train_loader:

            dets_2d = dets_2d.to(device)
            gt_3d = gt_3d.to(device)
            # flatten 3d gt to match model output
            gt_3d = gt_3d.view(gt_3d.size(0), -1)

            optimizer.zero_grad()  # Clear previous gradients


            preds = model(dets_2d)  # Forward pass
            loss = criterion(preds, gt_3d)  # Compute loss
            loss.backward()  # Backpropagation
            optimizer.step()  # Update weights


            running_loss += loss.item()  # Accumulate batch loss

        # Calculate epoch training accuracy
        # Validation phase (call separate function)
        val_loss = evaluate_model(model, criterion,val_loader,device) #CAUTION: See note above

        # Print progress
        print(
            f"Epoch [{epoch+1}/{epochs}], "
            f"Train Loss: {running_loss/len(train_loader):.4f}, "
            f"Val Loss: {val_loss:.4f}"
        )

    return val_loss

In [32]:
# load the data
from data_loader import get_data_loaders

train_loader, val_loader, test_loader = get_data_loaders()
model = SimpleMLP(nn.ReLU()).to(device)
optimizer = optim.AdamW(model.parameters())

train_model(model, nn.MSELoss(), optimizer, train_loader, val_loader, device)

18 13 59
WE ARE IN EPOCH 0
THE FLATTENED X LOOKS LIKE
tensor([[0.0000e+00, 0.0000e+00, 1.9541e-04,  ..., 0.0000e+00, 0.0000e+00,
         7.7814e-05],
        [0.0000e+00, 0.0000e+00, 8.6474e-04,  ..., 1.7729e+03, 6.4366e+02,
         8.1789e-01],
        [2.2811e+03, 6.9873e+02, 9.1337e-01,  ..., 1.3383e+02, 8.7941e+02,
         5.2369e-01],
        ...,
        [0.0000e+00, 0.0000e+00, 2.1402e-02,  ..., 8.5177e+02, 6.0435e+02,
         8.5735e-01],
        [1.8500e+03, 7.9758e+02, 9.9654e-01,  ..., 6.8442e+02, 7.2013e+02,
         9.9328e-01],
        [0.0000e+00, 0.0000e+00, 5.2845e-04,  ..., 0.0000e+00, 0.0000e+00,
         1.1727e-04]])
THE ACTIVATED X LOOKS LIKE
tensor([[  0.0000, 326.7367,   0.0000,  ...,   0.0000,  24.0740,   0.0000],
        [106.2269,  90.1843,   0.0000,  ...,   0.0000,   0.0000,   0.0000],
        [225.7448,   0.0000,   0.0000,  ..., 280.6210, 155.2523,   0.0000],
        ...,
        [393.2144,  13.8206,   0.0000,  ...,   0.0000,  61.0215,  55.1585],
      

2803.0146730484503