In [2]:
import sys
import os


def import_modules(libpath):
    path2add = os.path.normpath(os.path.abspath(os.path.join(os.path.dirname(
        libpath), os.path.pardir)))
    print(f'Adding path: {path2add}')
    if (not (path2add in sys.path)):
        sys.path.append(path2add)

In [3]:
# from torchvision.models.feature_extraction import create_feature_extractor
# from torchvision.models.feature_extraction import get_graph_node_names
from torch import nn
import torch

import_modules('../src/models/components')
from datamodules.components.frame_dataset import H2OFrameDataset
from models.components.unified_fcn import UnifiedFCNModule

Adding path: /data/sri.hegde/ptg-activity-recognition/activity_hydra/src


  from .autonotebook import tqdm as notebook_tqdm


In [None]:
# Test the datahandler/dataloader implementation
dataset = H2OFrameDataset(
    '../data/h2o/', '../data/h2o/label_split/pose_train.txt')
data = dataset[2]
print(data.keys())

In [None]:
# Utility functions to create grid data

def conf_func(dist, alpha, dth):
    dist = np.sqrt(np.sum((dist)**2, axis=-1))
    mask = (dist < dth)
    conf = np.exp(alpha*(1 - dist/dth))
    conf = mask * conf
    mean_conf = np.mean(conf, axis=-1)
    return mean_conf


def corner_confidences(cp_pred_np: np.ndarray, obj_pose: np.ndarray, l_hand: np.ndarray, r_hand: np.ndarray, alpha: float = 2.0, dth=[75, 75, 7.5]):
    cp_gt = np.stack([obj_pose, l_hand, r_hand])
    cp_gt = cp_gt.reshape(cp_gt.shape[:-1] + (-1, 3))
    cp_pred_np = cp_pred_np.reshape(cp_pred_np.shape[:-1] + (-1, 3))
    dist = cp_gt - cp_pred_np[:, :, ]
    c_uv = conf_func(dist[..., :2], alpha, dth[0])
    z_mask = (dist[..., -1] < dth[-1])
    c_z = np.mean(z_mask * np.abs(dist[..., -1]), axis=-1)
    conf = 0.5*(c_uv + c_z)

    return conf

In [None]:
# Test the working of Unified FCN
ufcn = UnifiedFCNModule('convnext_tiny', 21, 9, 12)
net = ufcn.net
train_nodes, _ = get_graph_node_names(net)

net = create_feature_extractor(
    net, return_nodes={'features.7.2.block.4': 'feat_out'})
out = net(torch.rand(1, 3, 416, 416))
x = out['feat_out']
out_channels = 5 * 3 * (3 * ufcn.num_cpts + 1 +
                        ufcn.obj_classes + ufcn.verb_classes)
lin = nn.Linear(x.shape[-1], out_channels)
x = lin(x)
# x = x.permute(0, 3, 1, 2)

bsize, _, h, w = x.size()
x_reshaped = x.contiguous().view(bsize, -1, 3, 3 * ufcn.num_cpts +
                                 1 + ufcn.obj_classes + ufcn.verb_classes)
# print(x.shape)

# vector indices (at position 2): 0 -> object, 1 -> l_hand, 2 -> r_hand
cp_pred = torch.sigmoid(x_reshaped[:, :, :, 0:3 * ufcn.num_cpts])
conf_pred = x_reshaped[:, :, :, 3 * ufcn.num_cpts].contiguous()
obj_pred = torch.sigmoid(
    x_reshaped[:, :, 0, 3 * ufcn.num_cpts+1: 3 * ufcn.num_cpts+1+ufcn.obj_classes])
l_verb_pred = torch.sigmoid(x_reshaped[:, :, 1, 3 * ufcn.num_cpts+1 +
                            ufcn.obj_classes: 3 * ufcn.num_cpts+1+ufcn.obj_classes+ufcn.verb_classes])
r_verb_pred = torch.sigmoid(x_reshaped[:, :, 2, 3 * ufcn.num_cpts+1 +
                            ufcn.obj_classes: 3 * ufcn.num_cpts+1+ufcn.obj_classes+ufcn.verb_classes])

print(cp_pred.shape, conf_pred.shape, obj_pred.shape,
      l_verb_pred.shape, r_verb_pred.shape)

In [None]:
# Confidence computation
l_hand, r_hand, obj_label, obj_pose, verb = data['l_hand'], data[
    'r_hand'], data['obj_label'], data['obj_pose'], data['verb']
conf = corner_confidences(cp_pred.data.cpu().numpy(), obj_pose, l_hand, r_hand)
# print(conf.shape)

noho_scale = 0.1
ho_scale = 5
conf_mask = np.ones_like(conf)*noho_scale
print(conf_mask.shape)

# Visualize predicted and GT bboxes

In [None]:
%matplotlib notebook
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np

In [None]:
# Data
gt = [{'gt_num': 1, 'gt_boxes_upright_depth': [[0.0608, 0.0426, 0.5437, 0.1157, 0.1120, 0.1126]], 'class': [8]}, {'gt_num': 1, 'gt_boxes_upright_depth': [[0.0367, 0.0234, 0.5902, 0.1157, 0.1120, 0.1126]], 'class': [8]}, {'gt_num': 1, 'gt_boxes_upright_depth': [[0.0276, 0.0230, 0.6005, 0.1157, 0.1120, 0.1126]], 'class': [8]}, {'gt_num': 1, 'gt_boxes_upright_depth': [[0.0098, 0.0136, 0.5946, 0.1157, 0.1120, 0.1126]], 'class': [8]}, {'gt_num': 1, 'gt_boxes_upright_depth': [[0.0230, 0.0301, 0.6039, 0.1157, 0.1120, 0.1126]], 'class': [8]}]
results = [{'boxes_3d': [[ 4.2602e-03,  5.2471e-04,  4.9059e-02,  4.1665e-02,  6.8070e-02,
          3.5510e-02,  6.3585e+00],
        [ 2.7554e-03, -3.2314e-04,  3.0911e-02,  2.0331e-02,  4.1466e-02,
          1.7244e-02,  6.3778e+00],
        [ 1.4760e-03,  2.7081e-04,  2.2068e-02,  1.7085e-02,  3.7264e-02,
          1.5370e-02,  6.4259e+00],
        [ 2.5878e-03,  2.4019e-04,  3.7491e-02,  3.5269e-02,  6.2041e-02,
          2.9389e-02,  6.4114e+00],
        [ 1.8443e-03, -5.7145e-04,  3.1550e-02,  1.2864e-02,  3.4855e-02,
          1.2427e-02,  6.4068e+00],
        [ 4.8076e-03,  1.5290e-03,  4.0302e-02,  3.0070e-02,  5.4107e-02,
          2.4297e-02,  6.3619e+00],
        [ 1.2358e-02,  2.9496e-03,  6.4462e-02,  5.6419e-02,  8.6110e-02,
          4.8433e-02,  6.4080e+00],
        [ 5.3392e-03, -9.3258e-04,  4.8271e-02,  2.3491e-02,  4.8560e-02,
          2.0297e-02,  6.3691e+00],
        [-1.4695e-03,  6.3801e-04,  2.7495e-02,  2.1895e-02,  4.6697e-02,
          2.0390e-02,  6.3689e+00],
        [-3.0802e-03,  2.0738e-03,  3.9001e-02,  4.4559e-02,  7.7915e-02,
          3.7976e-02,  6.3573e+00],
        [-2.4542e-03, -1.1351e-03,  4.2858e-02,  1.8496e-02,  4.5568e-02,
          1.8163e-02,  6.3802e+00],
        [ 9.9102e-03,  2.5398e-02,  9.2619e-02,  9.9285e-02,  1.4596e-01,
          9.9848e-02,  6.3094e+00]], 'scores_3d': [0.2975, 0.2823, 0.2410, 0.2406, 0.1858, 0.1828, 0.1757, 0.1128, 0.1045,
        0.0875, 0.0748, 0.0644], 'labels_3d': [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]}, {'boxes_3d': [[ 3.5143e-03,  1.1140e-03,  4.5588e-02,  4.4120e-02,  7.2670e-02,
          3.8275e-02,  6.3660e+00],
        [ 2.2898e-03, -3.3071e-04,  2.5120e-02,  2.2491e-02,  4.4327e-02,
          1.8669e-02,  6.3837e+00],
        [ 2.4653e-03,  6.7453e-04,  3.4972e-02,  3.6590e-02,  6.1495e-02,
          3.0332e-02,  6.4175e+00],
        [ 1.2567e-03,  2.1678e-04,  2.1562e-02,  1.6043e-02,  3.5033e-02,
          1.3773e-02,  6.4222e+00],
        [ 1.3964e-03,  6.7389e-05,  3.0861e-02,  1.2121e-02,  3.3254e-02,
          1.1411e-02,  6.4274e+00],
        [-1.5366e-03,  6.7237e-04,  2.6178e-02,  2.0999e-02,  4.5943e-02,
          1.9971e-02,  6.3779e+00],
        [ 6.3550e-03,  1.2876e-03,  3.6886e-02,  3.2408e-02,  5.7179e-02,
          2.7509e-02,  6.4173e+00],
        [ 1.4005e-02,  4.3267e-03,  6.6092e-02,  5.8352e-02,  8.8441e-02,
          4.9612e-02,  6.4718e+00],
        [-2.4569e-03,  1.6528e-03,  4.0393e-02,  4.2257e-02,  7.3554e-02,
          3.5196e-02,  6.3795e+00],
        [ 7.5612e-03, -1.5335e-03,  5.1426e-02,  2.4108e-02,  5.0948e-02,
          2.1128e-02,  6.4236e+00],
        [ 9.2552e-03,  2.7353e-02,  9.4263e-02,  1.0368e-01,  1.5013e-01,
          9.8138e-02,  6.3014e+00]], 'scores_3d': [0.2935, 0.2829, 0.2604, 0.2549, 0.1933, 0.1420, 0.1240, 0.1172, 0.1156,
        0.0876, 0.0665], 'labels_3d': [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]}, {'boxes_3d': [[ 2.2215e-03,  1.2342e-03,  4.5963e-02,  4.5802e-02,  7.3616e-02,
          3.8791e-02,  6.3598e+00],
        [ 1.5422e-03,  1.9453e-04,  2.8852e-02,  2.2254e-02,  4.3009e-02,
          1.7833e-02,  6.3638e+00],
        [ 1.7672e-03,  2.2735e-04,  3.6136e-02,  3.7316e-02,  6.3868e-02,
          3.1232e-02,  6.4154e+00],
        [ 8.3405e-04,  1.8281e-04,  2.1335e-02,  1.7564e-02,  3.7569e-02,
          1.5221e-02,  6.4088e+00],
        [ 7.8722e-04,  3.1088e-04,  3.0859e-02,  1.2647e-02,  3.4672e-02,
          1.2047e-02,  6.4058e+00],
        [-1.8676e-03,  5.3797e-04,  2.4578e-02,  2.0148e-02,  4.3895e-02,
          1.9182e-02,  6.3849e+00],
        [-3.0614e-03,  1.5974e-03,  3.7432e-02,  4.2867e-02,  7.4040e-02,
          3.6636e-02,  6.3648e+00],
        [ 6.1169e-03,  2.0373e-03,  4.1623e-02,  3.1713e-02,  5.5458e-02,
          2.6231e-02,  6.4310e+00],
        [ 1.4327e-02,  4.4097e-03,  7.1036e-02,  6.0509e-02,  8.9436e-02,
          5.1614e-02,  6.4688e+00],
        [ 7.6242e-03, -1.4671e-03,  5.1131e-02,  2.4165e-02,  5.0639e-02,
          2.1695e-02,  6.4447e+00],
        [ 6.9473e-03,  2.8553e-02,  1.0006e-01,  1.0847e-01,  1.5243e-01,
          1.0024e-01,  6.2842e+00]], 'scores_3d': [0.2967, 0.2812, 0.2659, 0.2577, 0.1947, 0.1489, 0.1246, 0.1203, 0.1050,
        0.0831, 0.0618], 'labels_3d': [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]}, {'boxes_3d': [[ 2.0585e-03, -3.7776e-04,  2.9671e-02,  2.2234e-02,  4.3007e-02,
          1.7865e-02,  6.4333e+00],
        [ 3.7915e-03,  7.6625e-04,  4.8518e-02,  4.4233e-02,  7.3481e-02,
          3.7787e-02,  6.4017e+00],
        [ 2.1777e-03,  2.7639e-04,  3.5109e-02,  3.7682e-02,  6.4989e-02,
          3.1578e-02,  6.4383e+00],
        [ 1.3203e-03, -1.8006e-04,  2.1038e-02,  1.6179e-02,  3.5543e-02,
          1.4186e-02,  6.4761e+00],
        [ 1.1089e-03, -4.8889e-04,  3.0157e-02,  1.2362e-02,  3.4118e-02,
          1.1559e-02,  6.4289e+00],
        [-1.1203e-03,  1.4172e-04,  2.3592e-02,  1.9768e-02,  4.4702e-02,
          1.9158e-02,  6.4054e+00],
        [ 6.6975e-03,  9.2305e-04,  3.9167e-02,  3.2537e-02,  5.7294e-02,
          2.6770e-02,  6.4495e+00],
        [-2.8697e-03,  1.5351e-03,  3.8210e-02,  4.4426e-02,  7.5343e-02,
          3.7498e-02,  6.4079e+00],
        [-1.3871e-03, -1.2734e-03,  3.8744e-02,  1.6143e-02,  4.1936e-02,
          1.6370e-02,  6.4024e+00],
        [ 1.5768e-02,  4.9125e-03,  7.1725e-02,  6.4920e-02,  9.5957e-02,
          5.5656e-02,  6.4698e+00],
        [ 7.2098e-03, -9.0235e-04,  4.8401e-02,  2.3512e-02,  4.8862e-02,
          2.0625e-02,  6.4217e+00],
        [ 9.3729e-03,  3.0865e-02,  1.0556e-01,  1.0813e-01,  1.5269e-01,
          1.0352e-01,  6.3016e+00]], 'scores_3d': [0.2836, 0.2812, 0.2607, 0.2582, 0.1879, 0.1539, 0.1176, 0.1167, 0.1031,
        0.1023, 0.0912, 0.0576], 'labels_3d': [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]}, {'boxes_3d': [[ 2.3924e-03,  1.0150e-03,  4.5687e-02,  4.5438e-02,  7.3256e-02,
          3.8520e-02,  6.3666e+00],
        [ 1.5279e-03,  6.0028e-04,  2.7723e-02,  2.2595e-02,  4.4020e-02,
          1.8334e-02,  6.3806e+00],
        [ 1.4771e-03,  1.4006e-05,  3.5330e-02,  3.5814e-02,  6.1235e-02,
          2.9381e-02,  6.4096e+00],
        [ 6.3927e-04,  3.8336e-04,  2.0594e-02,  1.6960e-02,  3.6159e-02,
          1.4450e-02,  6.4069e+00],
        [ 4.2850e-04,  5.8526e-04,  2.9921e-02,  1.2704e-02,  3.5004e-02,
          1.2056e-02,  6.3828e+00],
        [-2.0349e-03,  5.9369e-04,  2.3466e-02,  1.9317e-02,  4.2250e-02,
          1.8173e-02,  6.3765e+00],
        [-3.0615e-03,  1.4443e-03,  3.4656e-02,  4.2257e-02,  7.3143e-02,
          3.5784e-02,  6.3724e+00],
        [ 6.5856e-03,  2.2203e-03,  4.2721e-02,  3.2581e-02,  5.6871e-02,
          2.6687e-02,  6.4282e+00],
        [ 1.4278e-02,  4.6393e-03,  7.1333e-02,  6.3113e-02,  9.1646e-02,
          5.4669e-02,  6.4552e+00],
        [ 8.4494e-03, -1.5283e-03,  5.5426e-02,  2.5070e-02,  5.2540e-02,
          2.2768e-02,  6.4366e+00],
        [ 7.5856e-03,  2.7500e-02,  9.6643e-02,  1.0626e-01,  1.5090e-01,
          9.9937e-02,  6.2948e+00]], 'scores_3d': [0.2976, 0.2846, 0.2724, 0.2543, 0.1893, 0.1558, 0.1299, 0.1096, 0.1009,
        0.0812, 0.0598], 'labels_3d': [8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8]}]

In [None]:
def cuboid_data(center, size):
    '''
    Create a data array for cuboid plotting.


    ============= ================================================
    Argument      Description
    ============= ================================================
    center        center of the cuboid, triple
    size          size of the cuboid, triple, (x_length,y_width,z_height)
    :type size: tuple, numpy.array, list
    :param size: size of the cuboid, triple, (x_length,y_width,z_height)
    :type center: tuple, numpy.array, list
    :param center: center of the cuboid, triple, (x,y,z)

    '''


    # suppose axis direction: x: to left; y: to inside; z: to upper
    # get the (left, outside, bottom) point
    o = [a - b / 2 for a, b in zip(center, size)]
    # get the length, width, and height
    l, w, h = size
    x = np.array([[o[0], o[0] + l, o[0] + l, o[0], o[0]],  # x coordinate of points in bottom surface
         [o[0], o[0] + l, o[0] + l, o[0], o[0]],  # x coordinate of points in upper surface
         [o[0], o[0] + l, o[0] + l, o[0], o[0]],  # x coordinate of points in outside surface
         [o[0], o[0] + l, o[0] + l, o[0], o[0]]])  # x coordinate of points in inside surface
    y = np.array([[o[1], o[1], o[1] + w, o[1] + w, o[1]],  # y coordinate of points in bottom surface
         [o[1], o[1], o[1] + w, o[1] + w, o[1]],  # y coordinate of points in upper surface
         [o[1], o[1], o[1], o[1], o[1]],          # y coordinate of points in outside surface
         [o[1] + w, o[1] + w, o[1] + w, o[1] + w, o[1] + w]])    # y coordinate of points in inside surface
    z = np.array([[o[2], o[2], o[2], o[2], o[2]],                        # z coordinate of points in bottom surface
         [o[2] + h, o[2] + h, o[2] + h, o[2] + h, o[2] + h],    # z coordinate of points in upper surface
         [o[2], o[2], o[2] + h, o[2] + h, o[2]],                # z coordinate of points in outside surface
         [o[2], o[2], o[2] + h, o[2] + h, o[2]]])                # z coordinate of points in inside surface
    return x, y, z


In [None]:
N = len(gt)
for i in range(N):
    gt_box = np.array(gt[i]['gt_boxes_upright_depth'])
    pred_box = np.array(results[i]['boxes_3d'])[:,:6]
    
    all_boxes = np.concatenate([gt_box, pred_box])
    
    fig = plt.figure()
    ax = fig.gca(projection='3d')
    for b in range(len(all_boxes)):
        X, Y, Z = cuboid_data(all_boxes[b][:3], (all_boxes[b][3], all_boxes[b][4], all_boxes[b][5]))
        if b == 0:
            col = 'r'
        else:
            col = 'b'
        ax.plot_surface(X, Y, Z, color=col, rstride=1, cstride=1, alpha=0.1)
#     ax.set_xlabel('X')
#     ax.set_xlim(-100, 100)
#     ax.set_ylabel('Y')
#     ax.set_ylim(-100, 100)
#     ax.set_zlabel('Z')
#     ax.set_zlim(-100, 100)
    plt.show()

# Pre-computation of features

In [4]:
import tqdm
import torch
from torch import nn
from torchvision.models import resnext50_32x4d
from torch.utils.data import DataLoader, Dataset
from torchvision.transforms import transforms

import torch.multiprocessing
torch.multiprocessing.set_sharing_strategy('file_system')

In [6]:
net = UnifiedFCNModule("resnext", 21, 9, 12).to('cuda')
trans = transforms.Compose(
                [
                    transforms.ToTensor(),
                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
                ]
            )
root_path = "../data/h2o/"
dataset = H2OFrameDataset(root_path, '../data/h2o/label_split/pose_test.txt', trans)
dataloader = DataLoader(
            dataset=dataset,
            batch_size=1,
            num_workers=12,
            pin_memory=False,
            shuffle=False,
        )

print(len(dataloader))
for data in tqdm.tqdm(dataloader):
    data["frm"] = data["frm"].to('cuda')
    data["obj_label"] = data["obj_label"].to('cuda')
    data["verb"] = data["verb"].to('cuda')    
    feats, _, _ = net(data)
    labels = {k: data[k] for k in data if k != 'frm'}
    sample_info = dict(feats=feats, labels=labels)

    fsplit = data['fname'][0].strip().split('/')
    if not os.path.isdir('/'.join(fsplit[:-2] + ['feat'])):
        os.mkdir('/'.join(fsplit[:-2] + ['feat']))
    fpath = '/'.join(fsplit[:-2] + ['feat', f'{fsplit[-1].split(".")[0]}.pk'])
#     print(fpath)
    torch.save(sample_info, fpath)

print("All training image features computed and saved.")

23391


  0%|                                                                                                                                                                            | 0/23391 [00:00<?, ?it/s]


FileNotFoundError: Caught FileNotFoundError in DataLoader worker process 0.
Original Traceback (most recent call last):
  File "/home/khq.kitware.com/sri.hegde/miniconda3/envs/myenv/lib/python3.8/site-packages/torch/utils/data/_utils/worker.py", line 287, in _worker_loop
    data = fetcher.fetch(index)
  File "/home/khq.kitware.com/sri.hegde/miniconda3/envs/myenv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in fetch
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/home/khq.kitware.com/sri.hegde/miniconda3/envs/myenv/lib/python3.8/site-packages/torch/utils/data/_utils/fetch.py", line 49, in <listcomp>
    data = [self.dataset[idx] for idx in possibly_batched_index]
  File "/data/sri.hegde/ptg-activity-recognition/activity_hydra/src/datamodules/components/frame_dataset.py", line 102, in __getitem__
    verb = self._load_verb(os.path.join(path_list, "verb_label", f"{fname}.txt"))
  File "/data/sri.hegde/ptg-activity-recognition/activity_hydra/src/datamodules/components/frame_dataset.py", line 75, in _load_verb
    with open(annotation_file) as f:
FileNotFoundError: [Errno 2] No such file or directory: '../data/h2o/subject4/h1/0/cam4/verb_label/000000.txt'


In [19]:
all_feats = torch.load(fpath)
all_feats

{'feats': tensor([[0.4777, 0.3931, 0.3964,  ..., 0.4264, 0.5056, 0.4327]],
        device='cuda:0'),
 'labels': {'l_hand': tensor([[-0.0434,  0.0190,  0.3661, -0.0119,  0.0065,  0.3457,  0.0163, -0.0064,
            0.3423,  0.0362, -0.0238,  0.3421,  0.0633, -0.0375,  0.3498, -0.0033,
           -0.0620,  0.3441,  0.0158, -0.0903,  0.3511,  0.0359, -0.0929,  0.3627,
            0.0589, -0.0922,  0.3715, -0.0127, -0.0746,  0.3658,  0.0128, -0.0926,
            0.3777,  0.0354, -0.0928,  0.3867,  0.0596, -0.0870,  0.3936, -0.0208,
           -0.0648,  0.3900,  0.0048, -0.0833,  0.3989,  0.0296, -0.0832,  0.4082,
            0.0551, -0.0802,  0.4134, -0.0236, -0.0512,  0.4085, -0.0038, -0.0578,
            0.4166,  0.0145, -0.0588,  0.4215,  0.0331, -0.0617,  0.4271]],
         dtype=torch.float64),
  'r_hand': tensor([[ 0.1707,  0.0855,  0.4396,  0.1569,  0.0549,  0.4156,  0.1446,  0.0255,
            0.4153,  0.1471, -0.0008,  0.4222,  0.1413, -0.0291,  0.4407,  0.1880,
            0.0