In [None]:
import cv2
import trimesh
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from pathlib import Path
from scipy.spatial.transform import Rotation

from renderer import QuickRenderer

seed = 777
np.random.seed(seed)
np.set_printoptions(suppress=True)

# Prerequisite

Firstly, set the root directory of your dataset.

In [None]:
data_root = Path('E:\WCPA_track2')

Read the list of the training instances.

In [None]:
csv_path = data_root / 'list/WCPA_track2_train.csv'
df = pd.read_csv(csv_path, dtype={'subject_id': str, 'img_id': str})
print(df.shape)
print(df.head())

Read the global projection matrix (OpenGL format), which is used to covert vertices from camera space to NDC space.

In [None]:
txt_path = data_root / 'resources/projection_matrix.txt'
M_proj = np.loadtxt(txt_path, dtype=np.float32)
print(M_proj)

We provide a example of 3D mesh. You can open it using Meshlab for more details. Note that the left side of the face points to the $+x$ direction, the top of the face points to the $+y$ direction, and the face looks to the $+z$ direction.

In [None]:
obj_path = data_root / 'resources/example.obj'
mesh = trimesh.load(obj_path, process=False)
verts_template = np.array(mesh.vertices, dtype=np.float32)
tris = np.array(mesh.faces, dtype=np.int32)
print(verts_template.shape, tris.shape)

We also provide indices of 68 landmarks from 1,220 vertices.

In [None]:
npy_path = data_root / 'resources/kpt_ind.npy'
kpt_ind = np.load(npy_path)
print(kpt_ind.shape)

A quick renderer is implemented for visualization.

In [None]:
img_h, img_w = 800, 800   # All images are the same size
renderer = QuickRenderer(img_w, img_h, M_proj, tris)

# Visualization

Render template mesh with difference rotation and translation.

In [None]:
fig, axes = plt.subplots(3, 5, figsize=(26, 16))

titles = ['zero R/t', 'R_x > 0', 'R_x < 0', 'R_y > 0', 'R_y < 0', 'R_z > 0', 'R_z < 0',
          't_x > 0', 't_x < 0', 't_y > 0', 't_y < 0', '|t_z| is larger', '|t_z| is smaller']

default_t = np.array([0, 0, -0.45])

for k in range(len(titles)):

    R_t = np.identity(4)
    R_t[3, :3] = default_t
    
    if k == 0:
        None

    elif k == 1:
        R_t[:3, :3] = Rotation.from_euler('x', 30, degrees=True).as_matrix().T
    
    elif k == 2:
        R_t[:3, :3] = Rotation.from_euler('x', -30, degrees=True).as_matrix().T

    elif k == 3:
        R_t[:3, :3] = Rotation.from_euler('y', 30, degrees=True).as_matrix().T
  
    elif k == 4:
        R_t[:3, :3] = Rotation.from_euler('y', -30, degrees=True).as_matrix().T
        
    elif k == 5:
        R_t[:3, :3] = Rotation.from_euler('z', 30, degrees=True).as_matrix().T
        
    elif k == 6:
        R_t[:3, :3] = Rotation.from_euler('z', -30, degrees=True).as_matrix().T

    elif k == 7:
        R_t[3, 0] += 0.12
        
    elif k == 8:
        R_t[3, 0] -= 0.12
        
    elif k == 9:
        R_t[3, 1] += 0.12
        
    elif k == 10:
        R_t[3, 1] -= 0.12
        
    elif k == 11:
        R_t[3, 2] += 0.2

    elif k == 12:
        R_t[3, 2] -= 0.2
    
    img_render = renderer(verts_template, R_t)
    axes.flat[k].imshow(img_render)
    axes.flat[k].axis('off')
    axes.flat[k].set_title(titles[k])

plt.show()

Plot some images with corresponding 2D 68 landmarks.

In [None]:
fig, axes = plt.subplots(5, 5, figsize=(32, 32))

for k, ax in enumerate(axes.flat):
    index = np.random.randint(len(df))

    subject_id = df['subject_id'][index]
    facial_action = df['facial_action'][index]
    img_id = df['img_id'][index]

    img_path = data_root / 'image' / subject_id / facial_action / f'{img_id}_ar.jpg'
    txt_path = data_root / '68landmarks' / subject_id / facial_action / f'{img_id}_68landmarks.txt'

    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    pts68 = np.loadtxt(txt_path, dtype=np.int32)

    for p in pts68:
        cv2.circle(img, (p[0], p[1]), radius=4, color=(0, 255, 0), thickness=-1)

    ax.imshow(img)
    ax.axis('off')

plt.show()

Read the images and its ground truth of 3D vertices and face pose.

In [None]:
fig, axes = plt.subplots(5, 5, figsize=(32, 32))

for k, ax in enumerate(axes.flat):
    index = np.random.randint(len(df))

    subject_id = df['subject_id'][index]
    facial_action = df['facial_action'][index]
    img_id = df['img_id'][index]

    img_path = data_root / 'image' / subject_id / facial_action / f'{img_id}_ar.jpg'
    npz_path = data_root / 'info' / subject_id / facial_action / f'{img_id}_info.npz'

    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    M = np.load(npz_path)

    img_render = renderer(M['verts'], M['R_t'], overlap=img) 
    ax.imshow(img_render)
    ax.axis('off')

plt.show()

Understand the transformation pipeline from world space to image space and index 68 landmarks from 1,220 vertices.

In [None]:
fig, axes = plt.subplots(5, 3, figsize=(32, 32))

for k, ax in enumerate(axes.flat):
    index = np.random.randint(len(df))

    subject_id = df['subject_id'][index]
    facial_action = df['facial_action'][index]
    img_id = df['img_id'][index]

    img_path = data_root / 'image' / subject_id / facial_action / f'{img_id}_ar.jpg'
    npz_path = data_root / 'info' / subject_id / facial_action / f'{img_id}_info.npz'

    img = cv2.imread(str(img_path))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img_h, img_w, _ = img.shape
    M = np.load(npz_path)


    verts3d, R_t = M['verts'], M['R_t']
    ones = np.ones([verts3d.shape[0], 1])
    verts_homo = np.concatenate([verts3d, ones], axis=1)

    assert R_t[3, 2] < 0    # tz is always negative

    M1 = np.array([
        [img_w/2,       0, 0, 0],
        [      0, img_h/2, 0, 0],
        [      0,       0, 1, 0],
        [img_w/2, img_h/2, 0, 1]
    ])

    # world space -> camera space -> NDC space -> image space
    verts = verts_homo @ R_t @ M_proj @ M1
    w_ = verts[:, [3]]
    verts = verts / w_

    # image space: →+x，↓+y
    points2d = verts[:, :2]
    points2d[:, 1] = img_h - points2d[:, 1]

    temp1 = img.copy()
    for p in points2d:
        cv2.circle(temp1, (int(p[0]), int(p[1])), radius=2, color=(0, 255, 0), thickness=-1)

    temp2 = img.copy()
    for p in points2d[kpt_ind]:
        cv2.circle(temp2, (int(p[0]), int(p[1])), radius=4, color=(0, 255, 0), thickness=-1)

    img_display = np.hstack([temp1, temp2])

    ax.imshow(img_display)
    ax.axis('off')

plt.show()