論文  
https://arxiv.org/abs/2202.12555<br>
<br>  
GitHub  
https://github.com/thohemp/6drepnet<br>
<br>
<a href="https://colab.research.google.com/github/kaz12tech/ai_demos/blob/master/6DRepNet_demo.ipynb" target="_blank"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 環境セットアップ

## GPU確認

In [None]:
!nvidia-smi

## GitHubからコード取得

In [None]:
%cd /content

!git clone https://github.com/thohemp/6DRepNet

## ライブラリのインストール

In [None]:
%cd /content/6DRepNet

!pip install --upgrade gdown
!pip install git+https://github.com/elliottzheng/face-detection.git@master

## ライブラリのインポート

In [None]:
from model import SixDRepNet
import math
import re
from matplotlib import pyplot as plt
import sys
import os
import argparse

import numpy as np
import cv2
from google.colab.patches import cv2_imshow
import matplotlib.pyplot as plt
from numpy.lib.function_base import _quantile_unchecked

import torch
import torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import transforms
import torch.backends.cudnn as cudnn
import torchvision
import torch.nn.functional as F
import utils
import matplotlib
from PIL import Image
import time
from face_detection import RetinaFace

import glob
from google.colab import files
from tqdm import tqdm

## 学習済みモデルのダウンロード

In [None]:
%cd /content/6DRepNet
!mkdir pretrained

#https://drive.google.com/file/d/1vPNtVu_jg2oK-RiIWakxYyfLPA9rU4R4/view?usp=sharing
pretrained_ckpt = 'pretrained/6DRepNet_300W_LP_AFLW2000.pth'
if not os.path.exists(pretrained_ckpt):
  !gdown --id 1vPNtVu_jg2oK-RiIWakxYyfLPA9rU4R4 \
          -O {pretrained_ckpt}

snapshot_path = os.path.join("/content/6DRepNet/", "pretrained/6DRepNet_300W_LP_AFLW2000.pth")

# テスト動画のセットアップ

## 動画のアップロード
使用動画<br>
https://www.pexels.com/ja-jp/video/3201691/

In [None]:
%cd /content/6DRepNet
!rm -rf upload
!mkdir -p upload/frames
%cd upload

uploaded = files.upload()
uploaded = list(uploaded.keys())
file_name = uploaded[0]

upload_path = os.path.join("/content/6DRepNet/upload", file_name)
print("upload file here:", upload_path)

## 動画をフレーム画像に分割

In [None]:
%cd /content/6DRepNet/upload

!ffmpeg -i {upload_path} frames/%06d.png

frames = glob.glob("/content/6DRepNet/upload/frames/*.png")

# Head Pose Estimation

In [None]:
%cd /content/6DRepNet

!rm -rf output
!mkdir -p output/frames

cudnn.enabled = True
gpu = 0

print("Start model setup...")
# Modelのビルド
model = SixDRepNet(
    backbone_name='RepVGG-B1g2',
    backbone_file='',
    deploy=True,
    pretrained=False)

detector = RetinaFace(gpu_id=gpu)

# Modelのロード
saved_state_dict = torch.load(os.path.join(snapshot_path), map_location='cpu')

if 'model_state_dict' in saved_state_dict:
  model.load_state_dict(saved_state_dict['model_state_dict'])
else:
  model.load_state_dict(saved_state_dict)    
  model.cuda(gpu)

# Test the Model
model.eval()  # Change model to 'eval' mode (BN uses moving mean/var).

print("Complete model setup.")

print("loading ", len(frames), " frames...")

process_start = time.time()
with torch.no_grad():
  for i in tqdm( range(len(frames)) ):
    img_path = frames[i]
    frame = np.array(Image.open(img_path))

    faces = detector(frame)

    for box, landmarks, score in faces:
      # Print the location of each face in this image
      if score < .95:
          continue
      x_min = int(box[0])
      y_min = int(box[1])
      x_max = int(box[2])
      y_max = int(box[3])         
      bbox_width = abs(x_max - x_min)
      bbox_height = abs(y_max - y_min)

      x_min = max(0,x_min-int(0.2*bbox_height))
      y_min = max(0,y_min-int(0.2*bbox_width))
      x_max = x_max+int(0.2*bbox_height)
      y_max = y_max+int(0.2*bbox_width)

      img = frame[y_min:y_max,x_min:x_max]

      img = cv2.resize(img, (244, 244))/255.0
      img = img.transpose(2, 0, 1)
      img = torch.from_numpy(img).type(torch.FloatTensor)
      img = torch.Tensor(img).cuda(gpu)
      img=img.unsqueeze(0)

      start = time.time()
      R_pred = model(img)
      end = time.time()
      #print('Head pose estimation: %2f ms'% ((end - start)*1000.))

      euler = utils.compute_euler_angles_from_rotation_matrices(R_pred)*180/np.pi
      p_pred_deg = euler[:, 0].cpu()
      y_pred_deg = euler[:, 1].cpu()
      r_pred_deg = euler[:, 2].cpu()

      utils.plot_pose_cube(frame,  y_pred_deg, p_pred_deg, r_pred_deg, x_min + int(.5*(x_max-x_min)), y_min + int(.5*(y_max-y_min)), size = bbox_width)

    # 1フレーム完了毎に表示する場合はコメントアウト解除
    #cv2_imshow(frame)
    cv2.imwrite( os.path.join("/content/6DRepNet/output/frames", os.path.basename(img_path)), frame)

process_end = time.time()
print('Complete All Head pose estimation: %2f s'% (process_end - process_start))
print('Average %2f ms/ %06d frames'% (((process_end - process_start)*1000.)/len(frames), len(frames)))

## フレーム画像を動画に変換

In [None]:
!ffmpeg -i "/content/6DRepNet/output/frames/%06d.png" -c:v libx264 -vf "format=yuv420p" "/content/6DRepNet/output/result.mp4"

## Head Pose Estimationの結果を表示

In [None]:
from moviepy.editor import *
from moviepy.video.fx.resize import resize
clip = VideoFileClip("/content/6DRepNet/output/result.mp4")
clip = resize(clip, height=420)
clip.ipython_display()