<a href="https://colab.research.google.com/github/kacychou/multi-person-pose-estimation/blob/main/Multi_person_Pose_Estimation.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
!pip install tensorflow==2.7.0
!pip install tensorflow-gpu==2.7.0
!pip install tensorflow-hub opencv-python matplotlib

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import cv2
from matplotlib import pyplot as plt
import numpy as np
from google.colab.patches import cv2_imshow

In [None]:
#turn on memory growth to allocate as much GPU memory as needed for runtime alloctions - avoid out of memory error
gpus=tf.config.experimental.list_physical_devices('GPU')
for gpu in gpus:
  tf.config.experimental.set_memory_growth(gpu,True)

1. Load Model

In [None]:
model = hub.load('https://tfhub.dev/google/movenet/multipose/lightning/1') #download model

In [None]:
movenet=model.signatures['serving_default'] #extract model

2. Make Detections

In [None]:
import glob
# Importing drive method from colab for accessing google drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# getting list of 
data_files = glob.glob("/content/drive/My Drive/Colab Notebooks/*.mp4")

In [None]:
#keypoints_with_scores -> set of keypoints for 6 different ppl
def loop_through_people(frame, keypoints_with_scores, edges, confidence_threshold):
  for person in keypoints_with_scores:
    draw_connections(frame,person,edges,confidence_threshold) #our rendering functions #render to our image
    draw_keypoints(frame,person,confidence_threshold)

In [None]:
#actual image, frame, keypoints of a single person, confindence threshold e.g. dont draw anything that is below 0.25 
def draw_keypoints(frame, keypoints, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for kp in shaped:
        ky, kx, kp_conf = kp
        if kp_conf > confidence_threshold:
            cv2.circle(frame, (int(kx), int(ky)), 6, (0,255,0), -1)


In [None]:
EDGES = {
    (0, 1): 'm',
    (0, 2): 'c',
    (1, 3): 'm',
    (2, 4): 'c',
    (0, 5): 'm',
    (0, 6): 'c',
    (5, 7): 'm',
    (7, 9): 'm',
    (6, 8): 'c',
    (8, 10): 'c',
    (5, 6): 'y',
    (5, 11): 'm',
    (6, 12): 'c',
    (11, 12): 'y',
    (11, 13): 'm',
    (13, 15): 'm',
    (12, 14): 'c',
    (14, 16): 'c'
}



In [None]:
#edges -> tell us what joints connect to what other joints
#e.g. nose connects to left eye 0,1
#nose connects to right eye 0,2
def draw_connections(frame, keypoints, edges, confidence_threshold):
    y, x, c = frame.shape
    shaped = np.squeeze(np.multiply(keypoints, [y,x,1]))
    
    for edge, color in edges.items():
        p1, p2 = edge
        y1, x1, c1 = shaped[p1]
        y2, x2, c2 = shaped[p2]
        
        if (c1 > confidence_threshold) & (c2 > confidence_threshold):      
            cv2.line(frame, (int(x1), int(y1)), (int(x2), int(y2)), (0,0,255), 4)

In [None]:
from google.colab.patches import cv2_imshow
import cv2

cap = cv2.VideoCapture(data_files[0]) #reads video from the file path
frame_width = int(cap.get(3))
frame_height = int(cap.get(4))
   
size = (frame_width, frame_height)
result = cv2.VideoWriter('/content/drive/My Drive/Colab Notebooks/final1.avi', 
                         cv2.VideoWriter_fourcc(*'MJPG'),
                         20, size)

while cap.isOpened():
    ret, frame = cap.read()
    #Resize image
    if frame is None:
      print("no image passed")
      break

    else:
      img = frame.copy()
      img = tf.image.resize_with_pad(tf.expand_dims(img,axis=0),256,256) #automatically pad our image to 0 if we reshape to odd size
      input_img = tf.cast(img,dtype=tf.int32) #convert type to a 32-bit integer
  

    # Detection section
      results = movenet(input_img)
      keypoints_with_scores = results['output_0'].numpy()[:,:,:51].reshape((6,17,3)) #apply transformation so we only have keypoints with score #6 ppl/17 kp/3 value for each kp
    #now we have a single array for every kp
      print(keypoints_with_scores) #values used for rendering #returns [y co-ord, x co-ord, score (detection confidence)]
    # Render keypoints e.g. 17 keypoints -> nose, left eye
    #loop_through_people(frame, keypoints_with_scores, EDGES, 0.1)
      loop_through_people(frame, keypoints_with_scores,EDGES,0.3)
      result.write(frame)
      cv2_imshow(frame)
      if cv2.waitKey(10) & 0xFF==ord('q'): #how we want to exit
          break
result.release()
cv2.destroyAllWindows() #close windows
cap.release() #release our webcam

In [None]:
keypoints_with_scores[0] 
#keypoints of the first person
#unnormalised coordinates -> not scaled the size for the image
# 3rd value shows how confidence the move net model is in predicting that particular coordinate
#upper body + lower body

In [None]:
results
#a set of array - wrapped inside a single array
#6 people
#56 values inside each result
#values represent (y,x,score) * 17 key points
#remaining 5 -> bounding box values

In [None]:
results['output_0'].numpy()[:,:,:51] #convert to numpy array -> all 1, all 6, get me the first 51 values