In [None]:
import numpy as np
import cv2
import os, sys
import matplotlib.pyplot as plt

sys.path.append(os.path.join("/data0/wxy/3d_pose/stereo-estimation/"))
from lib.dataset.human36m import Human36MMultiViewDataset


h36m_root = os.path.join("/data1/share/dataset/human36m_multi-view/", "processed")
labels_stereo_npy_path = "/data1/share/dataset/human36m_multi-view/extra/human36m-stereo-labels-GTbboxes.npy"

dataset = Human36MMultiViewDataset(
    h36m_root,
    labels_stereo_npy_path,
    train=True,                       # include all possible data
    test=True,
    image_shape=None,                 # don't resize
    retain_every_n_frames_in_test=1,  # yes actually ALL possible data
    with_damaged_actions=True,        # I said ALL DATA
    kind="mpii",
    norm_image=False,                 # don't do unnecessary image processing
    undistort_images=True,                 
    crop=False)                       # don't crop
print("Dataset length:", len(dataset))

def test_rectificate(idx):
    sample = dataset[idx]

    shot = dataset.labels['table'][idx]
    subject_idx = shot['subject_idx']
    action_idx  = shot['action_idx']
    frame_idx   = shot['frame_idx']

    subject = dataset.labels['subject_names'][subject_idx]
    action = dataset.labels['action_names'][action_idx]

    available_cameras = list(range(len(dataset.labels['action_names'])))
    
    for camera_idx, bbox in enumerate(shot['bbox_by_camera_tlbr']):
        if bbox[2] == bbox[0]: # bbox is empty, which means that this camera is missing
            available_cameras.remove(camera_idx)
    
    for camera_idx, image, keypoit_2d in zip(available_cameras, sample['images'], sample['keypoints_2d']):
        camera_name = dataset.labels['camera_names'][camera_idx]

        output_image_folder = os.path.join(camera_name)
        output_image_path = os.path.join(output_image_folder, 'img_%06d.png' % (idx+1))
        os.makedirs(output_image_folder, exist_ok=True)

        fig = plt.figure()
        plt.imshow(image)
        plt.scatter(keypoit_2d[:, 0], keypoit_2d[:, 1], s=1, c='red')
        plt.savefig(output_image_path)
        plt.close()

for idx in range(0, 160000, 5000):
    test_rectificate(idx)

In [4]:
import numpy as np
import cv2
import os, sys
import matplotlib.pyplot as plt

sys.path.append(os.path.join("/data0/wxy/3d_pose/stereo-estimation/"))
from lib.dataset.human36m import Human36MMultiViewDataset
from lib.utils import camera


h36m_root = os.path.join("/data1/share/dataset/human36m_multi-view/", "processed")
labels_stereo_npy_path = "/data1/share/dataset/human36m_multi-view/extra/human36m-stereo-labels-GTbboxes.npy"
bbox_npy_path = "/data1/share/dataset/human36m_multi-view/extra/bboxes-Human36M-yolo.npy"
bboxes = np.load(bbox_npy_path, allow_pickle=True).item()

dataset = Human36MMultiViewDataset(
    h36m_root,
    labels_stereo_npy_path,
    train=True,                       # include all possible data
    test=True,
    image_shape=None,                 # don't resize
    retain_every_n_frames_in_test=1,  # yes actually ALL possible data
    with_damaged_actions=True,        # I said ALL DATA
    kind="mpii",
    norm_image=False,                 # don't do unnecessary image processing
    undistort_images=True,                 
    crop=True)                       # don't crop
print("Dataset length:", len(dataset))

def test_bbox(idx):
    sample = dataset[idx]
        
    if sample.get('images') is None:
        return
        
    shot = dataset.labels['table'][idx]
    subject_idx = shot['subject_idx']
    action_idx  = shot['action_idx']
    frame_idx   = shot['frame_idx']

    subject = dataset.labels['subject_names'][subject_idx]
    action = dataset.labels['action_names'][action_idx]

    print(idx, " ", subject," ", action, " ", frame_idx)
    bbox_left = bboxes[subject][action][dataset.labels['camera_names'][1]]
    bbox_right = bboxes[subject][action][dataset.labels['camera_names'][0]]
    
    available_cameras = list(range(len(dataset.labels['camera_names'])))
    
    for camera_idx, bbox in enumerate(shot['bbox_by_camera_tlbr']):
        if bbox[2] == bbox[0]: # bbox is empty, which means that this camera is missing
            available_cameras.remove(camera_idx)
    
    for camera_idx, image, keypoit_2d, camera_i in zip(available_cameras, sample['images'], sample['keypoints_2d'], sample['cameras']):
        
        camera_name = dataset.labels['camera_names'][camera_idx]
        # camera_i = camera.Camera(camera_i)
        print("K:", camera_i.getK)
        output_image_folder = os.path.join(camera_name +'GT_label')
        output_image_path = os.path.join(output_image_folder, 'img_%06d.png' % (idx+1))
        os.makedirs(output_image_folder, exist_ok=True)

        fig = plt.figure()
        plt.imshow(image[:,:,::-1])
        plt.scatter(keypoit_2d[:, 0], keypoit_2d[:, 1], s=1, c='red')
        plt.savefig(output_image_path)
        plt.close()
        # cv2.imwrite(output_image_path, image)

for idx in range(0, 160000, 5000):
    test_bbox(idx)

Dataset length: 161348
0   S1   Directions-1   0
K: [[413.9933    33.021763 535.4441  ]
 [  0.       538.8118   226.2185  ]
 [  0.         0.         1.      ]]
K: [[6.4181537e+02 2.9713616e-02 1.3105852e+02]
 [0.0000000e+00 5.3896320e+02 2.2626797e+02]
 [0.0000000e+00 0.0000000e+00 1.0000000e+00]]
5000   S1   Greeting-1   419
K: [[413.9933    33.021763 568.4441  ]
 [  0.       538.8118   238.2185  ]
 [  0.         0.         1.      ]]
K: [[6.4181537e+02 2.9713616e-02 1.5905852e+02]
 [0.0000000e+00 5.3896320e+02 2.3826797e+02]
 [0.0000000e+00 0.0000000e+00 1.0000000e+00]]
10000   S1   Sitting-2   1928
K: [[413.9933    33.021763 524.4441  ]
 [  0.       538.8118   194.2185  ]
 [  0.         0.         1.      ]]
K: [[6.4181537e+02 2.9713616e-02 1.0805852e+02]
 [0.0000000e+00 5.3896320e+02 1.9426797e+02]
 [0.0000000e+00 0.0000000e+00 1.0000000e+00]]
15000   S1   Walking-1   587
K: [[413.9933    33.021763 575.4441  ]
 [  0.       538.8118   233.2185  ]
 [  0.         0.         1.      ]

KeyboardInterrupt: 

In [9]:
import cv2
import numpy as np
import os, sys
import io
import argparse
import torch
from torchvision import transforms
from PIL import Image
from rembg.bg import remove 

sys.path.append(os.path.join("/data0/wxy/3d_pose/stereo-estimation/"))
from lib.dataset.human36m import Human36MMultiViewDataset

def load_img(img_file):

    img = cv2.imread(img_file, cv2.IMREAD_UNCHANGED)
    if len(img.shape) == 2:
        img = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR)
    if not img_file.endswith("png"):
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    else:
        img = cv2.cvtColor(img, cv2.COLOR_RGBA2BGR)

    return img

def get_image_mask(img_ori):

    mask_to_origin_tensor = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.0, ), (1.0, ))
    ])
    with torch.no_grad():
        buf = io.BytesIO()
        Image.fromarray(img_ori).save(buf, format='png')
        img_pil = Image.open(
            io.BytesIO(remove(buf.getvalue()))).convert("RGBA")
    img_mask = torch.tensor(1.0) - (mask_to_origin_tensor(img_pil.split()[-1]) <
                                    torch.tensor(0.5)).float()

    return img_mask

def apply_mask(image, mask):
    """Apply the given mask to the image.
    """
    for c in range(3):
        image[:, :, c] = np.where(mask == 0,
                                  255,
                                  image[:, :, c])
    return image


def test_mask(idx):
    sample = dataset[idx]
        
    if sample.get('images') is None:
        return
        
    shot = dataset.labels['table'][idx]
    subject_idx = shot['subject_idx']
    action_idx  = shot['action_idx']
    frame_idx   = shot['frame_idx']

    subject = dataset.labels['subject_names'][subject_idx]
    action = dataset.labels['action_names'][action_idx]
    
    available_cameras = list(range(len(dataset.labels['camera_names'])))
    
    for camera_idx, bbox in enumerate(shot['bbox_by_camera_tlbr']):
        if bbox[2] == bbox[0]: # bbox is empty, which means that this camera is missing
            available_cameras.remove(camera_idx)
    
    for camera_idx, image in zip(available_cameras, sample['images']):
        img = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)

        mask = get_image_mask(img)
        camera_name = dataset.labels['camera_names'][camera_idx]
        output_image_folder = os.path.join(camera_name +'silhoutte')
        output_image_path = os.path.join(output_image_folder, 'silhoutte_%06d.png' % (idx+1))
        os.makedirs(output_image_folder, exist_ok=True)

        cv2.imwrite(output_image_path, np.squeeze(mask.numpy(),0)*255)


h36m_root = os.path.join("/data1/share/dataset/human36m_multi-view/", "processed")
labels_stereo_npy_path = "/data1/share/dataset/human36m_multi-view/extra/human36m-stereo-labels-GTbboxes.npy"

dataset = Human36MMultiViewDataset(
    h36m_root,
    labels_stereo_npy_path,
    train=True,                       # include all possible data
    test=True,
    image_shape=None,                 # don't resize
    retain_every_n_frames_in_test=1,  # yes actually ALL possible data
    with_damaged_actions=True,        # I said ALL DATA
    kind="mpii",
    norm_image=False,                 # don't do unnecessary image processing
    undistort_images=True,                 
    crop=True)                       # don't crop
print("Dataset length:", len(dataset))
for idx in range(0, 160000, 5000):
    test_mask(idx)

image = cv2.imread("/data0/wxy/stereo_matching/aanet/demo/right/img_000002.png")
h,w = image.shape[:2]
image = cv2.resize(image, (w//3, h//3))
cv2.imwrite("/data0/wxy/stereo_matching/aanet/demo/right/img_000003.png", image)
img = cv2.cvtColor(image, cv2.COLOR_RGBA2BGR)
mask = get_image_mask(img)
output_image_path = "/data0/wxy/stereo_matching/aanet/demo/right/silhoutte_000003.png"
cv2.imwrite(output_image_path, np.squeeze(mask.numpy(),0)*255)

Dataset length: 161348


True