# 人物识别

基于Google MediaPipe实现人物动作识别/定位

In [None]:
VID_PATH = 'input.mp4'
OUT_PATH = 'output.mp4'

!pip install mediapipe

In [None]:
import sys, os, time, math
import cv2
import numpy as np
import matplotlib.pyplot as plt
import mediapipe as mp

from IPython.display import HTML
from base64 import b64encode

In [None]:
'''
Reference: https://github.com/AnanthaKannan/ai-media-pipe
'''

class poseDetector():
  def __init__(self, mode=False, smooth=True, detectionCon=0.5, trackCon=0.5):
    self.mode = mode
    self.smooth = smooth
    self.detectionCon = detectionCon
    self.trackCon = trackCon
    self.pTime = 0

    self.mpDraw = mp.solutions.drawing_utils
    self.mpPose = mp.solutions.pose
    self.pose = self.mpPose.Pose(static_image_mode=self.mode,
                            smooth_landmarks=self.smooth,
                            min_detection_confidence=self.detectionCon,
                            min_tracking_confidence=self.trackCon)

  def findPose(self, img, draw=True):
    imgRGB = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    self.results = self.pose.process(imgRGB)

    if self.results.pose_landmarks:
        if draw:
            self.mpDraw.draw_landmarks(img, self.results.pose_landmarks, self.mpPose.POSE_CONNECTIONS)
    return img

  def getPosition(self, img):
    self.lmList = []
    if self.results.pose_landmarks:
        for id, lm in enumerate(self.results.pose_landmarks.landmark):
            h, w, c = img.shape
            cx, cy = int(lm.x * w), int(lm.y * h)
            self.lmList.append([id, cx, cy])
    return self.lmList

  def showFps(self, img):
    cTime = time.time()
    print(cTime, self.pTime)
    fbs = 1 / (cTime - self.pTime)
    self.pTime = cTime
    cv2.putText(img, str(int(fbs)), (70, 80), cv2.FONT_HERSHEY_PLAIN, 3,
                (255, 0, 0), 3)

  def findAngle(self, img, p1, p2, p3, draw=True):
    # Get the landmark
    x1, y1 = self.lmList[p1][1:]
    x2, y2 = self.lmList[p2][1:]
    x3, y3 = self.lmList[p3][1:]

    # Calculate the angle
    angle = math.degrees(math.atan2(y3 - y2, x3 - x2) - math.atan2(y1 - y2, x1 - x2))
    # some time this angle comes zero, so below conditon we added
    if angle < 0:
      angle += 360

    # Draw
    if draw:
      cv2.line(img, (x1, y1), (x2, y2), (255, 255, 255), 3)
      cv2.line(img, (x3, y3), (x2, y2), (255, 255, 255), 3)
      cv2.circle(img, (x1, y1), 10, (0, 0, 255), cv2.FILLED)
      cv2.circle(img, (x1, y1), 15, (0, 0, 255), 1)
      cv2.circle(img, (x2, y2), 10, (0, 0, 255), cv2.FILLED)
      cv2.circle(img, (x2, y2), 15, (0, 0, 255), 1)
      cv2.circle(img, (x3, y3), 10, (0, 0, 255), cv2.FILLED)
      cv2.circle(img, (x3, y3), 15, (0, 0, 255), 1)
      # cv2.putText(img, str(int(angle)), (x2 - 20, y2 + 50), cv2.FONT_HERSHEY_SIMPLEX,
      #             1, (0, 0, 255), 2)
    return angle

In [None]:
def videoPlayer(filename):
  if not os.path.exists(filename):
    return
  if os.path.getsize(filename) > 5 * 1024 **2:
    os.system(f"ffmpeg -i {filename} -vcodec libx264 -b:v 100k-200k compressed.mp4")
    filename = 'compressed.mp4'
  file = open(filename, 'rb').read()
  data_enc = "data:video/mp4;base64," + b64encode(file).decode()
  return HTML("""
    <video width=400 controls>
          <source src="%s" type="video/mp4">
    </video>
    """ % data_enc)

In [None]:
%%capture

if not os.path.exists(VID_PATH):
  raise f"Video file not found: {VID_PATH}"

detector = poseDetector()
vid = cv2.VideoCapture(VID_PATH)
out = cv2.VideoWriter(OUT_PATH,
                      cv2.VideoWriter_fourcc(*'MP4V'),
                      vid.get(cv2.CAP_PROP_FPS),
                      (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))))

lmList = []

while vid.isOpened():
  ret, img = vid.read()
  if not ret:
    break
  img = detector.findPose(img)
  detector.showFps(img)
  lmList.append(detector.getPosition(img))
  out.write(img)

vid.release()
out.release()

lmList = np.array(lmList)

# 由于OpenCV视频编码选项较少，调用ffmpeg重新编码
os.system(f"mv {OUT_PATH} tmp_{OUT_PATH}")
os.system(f"ffmpeg -i tmp_{OUT_PATH} -vcodec libx264 {OUT_PATH}")
os.system(f"rm tmp_{OUT_PATH}")

In [None]:
videoPlayer(OUT_PATH)

In [None]:
'''
坐标数据
(视频帧, 坐标编号, 坐标id-x-y)
'''

lmList.shape

# 视频加工

根据识别结果，对视频进行裁切、超分等后处理