# In this file, we are running MPII testing dataset consisting of 24984 images and SURREAL video dataset using MPII model and COCO testing data using COCO model, measuring the time it takes as an analytical parameter

In [1]:
import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
import glob
import os
from scipy.io import loadmat
import scipy.io
%matplotlib inline


# if not os.path.exists(output_dir_images):
#     os.makedirs(output_dir_images)
    
# if not os.path.exists(output_dir_videos):
#     os.makedirs(output_dir_videos)

# Specify the model to be used:
## Let's go with MPII first 
COCO and MPII are body pose estimation model. COCO has 18 points and MPI has 15 points as output.
### Load the network and specify the image dimensions


In [2]:
def load_mpii_model():
    protoFile = "pose/mpi/pose_deploy_linevec_faster_4_stages.prototxt"
    weightsFile = "pose/mpi/pose_iter_160000.caffemodel"
    nPoints = 15
    POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13] ]
    colors = [ [255,255,0],[0,100,255], [0,255,255], [0,100,255],[0,100,255], [0,255,255], [0,100,255],
         [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255],
         [0,255,0]]
    
    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)
    inWidth = 368
    inHeight = 368

### Find the keypoints for an image with only single person

In [3]:
def image_mpii_model(data_dir_images, output_dir_images, file):

    
    
    for img_file in os.listdir(data_dir_images):
        if img_file.endswith(".jpg"):
            img_path = os.path.join(data_dir_images, img_file)
            frame = cv2.imread(img_path)
            frameCopy = np.copy(frame)
            frameWidth = frame.shape[1]
            frameHeight = frame.shape[0]
            threshold = 0.1

            inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                              (0, 0, 0), swapRB=False, crop=False)

            net.setInput(inpBlob)

            output = net.forward()
            H = output.shape[2]
            W = output.shape[3]

            # Empty list to store the detected keypoints
            points = []


            for i in range(nPoints):
                # confidence map of corresponding body's part.
                probMap = output[0, i, :, :]

                # Find global maxima of the probMap.
                minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

                # Scale the point to fit on the original image
                x = (frameWidth * point[0]) / W
                y = (frameHeight * point[1]) / H

                if prob > threshold : 
                    cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                    cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

                    # Add the point to the list if the probability is greater than the threshold
                    points.append((int(x), int(y)))
                else :
                    points.append(None)

            # Draw Skeleton
            for pair in POSE_PAIRS:
                partA = pair[0]
                partB = pair[1]

                if points[partA] and points[partB]:
                    #cv2.line(frame, points[partA], points[partB], (0,255,255), 3)
                    cv2.line(frame, points[partA], points[partB], colors[POSE_PAIRS.index(pair)], 3)
                    cv2.circle(frame, points[partA], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)
                    cv2.circle(frame, points[partB], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)

            # Save the processed image to the output directory
            file_name = os.path.basename(img_path)
            output_path = os.path.join(output_dir_images, file_name)
            cv2.imwrite(output_path, frame)
            with open(file, 'a') as f:
                f.write(f'{img_file}, {points}\n')



### Find the keypoints for a video with only single person

In [4]:
def video_mpii_model(data_dir_videos, output_dir_videos, file):

    for vid_file in os.listdir(data_dir_videos):
        if vid_file.endswith(".mp4"):
            vid_path = os.path.join(data_dir_videos, vid_file)
            cap = cv2.VideoCapture(vid_path)

            file_name = os.path.basename(vid_path)
            output_path = os.path.join(output_dir_videos, file_name)
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)),int(cap.get(4))))

            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                frameCopy = np.copy(frame)
                frameWidth = frame.shape[1]
                frameHeight = frame.shape[0]
                threshold = 0.1

                inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                                          (0, 0, 0), swapRB=False, crop=False)

                net.setInput(inpBlob)

                output = net.forward()
                H = output.shape[2]
                W = output.shape[3]

                # Empty list to store the detected keypoints
                points = []

                for i in range(nPoints):
                    # confidence map of corresponding body's part.
                    probMap = output[0, i, :, :]

                    # Find global maxima of the probMap.
                    minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

                    # Scale the point to fit on the original image
                    x = (frameWidth * point[0]) / W
                    y = (frameHeight * point[1]) / H

                    if prob > threshold : 
                        cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                        cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
                        cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

                        # Add the point to the list if the probability is greater than the threshold
                        points.append((int(x), int(y)))
                    else :
                        points.append(None)

                # Draw Skeleton
                for pair in POSE_PAIRS:
                    partA = pair[0]
                    partB = pair[1]

                    if points[partA] and points[partB]:
                        cv2.line(frame, points[partA], points[partB], colors[POSE_PAIRS.index(pair)], 3)
                        cv2.circle(frame, points[partA], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)
                        cv2.circle(frame, points[partB], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)

                #Save the processed image to the output directory

                #cv2.imwrite(output_path, frame)
                out.write(cv2.resize(frame, (int(cap.get(3)),int(cap.get(4)))))
                # with open(file, 'a') as f:
                #     f.write(f'{img_file}, {points}\n')

    # Release the VideoCapture and VideoWriter objects
    cap.release()
    out.release()


## Training on COCO images
### Load the network and specify the image dimensions

In [5]:
def load_coco_model():
    protoFile = "pose/coco/pose_deploy_linevec.prototxt"
    weightsFile = "pose/coco/pose_iter_440000.caffemodel"
    nPoints = 18
    POSE_PAIRS = [ [1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]
    colors = [ [0,0,255],[0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255],
         [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255],
         [255,0,0], [255,0,0],[200,200,0],[0,0,0]]

    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

    inWidth = 368
    inHeight = 368

### Find the keypoints for an image with only single person

In [6]:
def image_coco_model(data_dir_images, output_dir_images, file):

    
    
    
    for img_file in os.listdir(data_dir_images):
        if img_file.endswith(".jpg"):
            img_path = os.path.join(data_dir_images, img_file)
            frame = cv2.imread(img_path)
            frameCopy = np.copy(frame)

            frameWidth = frame.shape[1]
            frameHeight = frame.shape[0]
            threshold = 0.1

            inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                                      (0, 0, 0), swapRB=False, crop=False)

            net.setInput(inpBlob)

            output = net.forward()
            H = output.shape[2]
            W = output.shape[3]

            # Empty list to store the detected keypoints
            points = []


            for i in range(nPoints):
                # confidence map of corresponding body's part.
                probMap = output[0, i, :, :]

                # Find global maxima of the probMap.
                minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

                # Scale the point to fit on the original image
                x = (frameWidth * point[0]) / W
                y = (frameHeight * point[1]) / H

                if prob > threshold : 
                    cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                    cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
                    cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

                    # Add the point to the list if the probability is greater than the threshold
                    points.append((int(x), int(y)))
                else :
                    points.append(None)

            # Draw Skeleton
            for pair in POSE_PAIRS:
                partA = pair[0]
                partB = pair[1]

                if points[partA] and points[partB]:
                    #cv2.line(frame, points[partA], points[partB], (0,255,255), 3)
                    cv2.line(frame, points[partA], points[partB], colors[POSE_PAIRS.index(pair)], 3)
                    cv2.circle(frame, points[partA], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)
                    cv2.circle(frame, points[partB], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)

            # Save the processed image to the output directory
            file_name = os.path.basename(img_path)
            output_path = os.path.join(output_dir_images, file_name)
            cv2.imwrite(output_path, frame)
            with open(file, 'a') as f:
                f.write(f'{img_file}, {points}\n')



### Find the keypoints for a video with only single person

In [8]:
def video_coco_model(data_dir_videos, output_dir_videos, file):

    protoFile = "pose/coco/pose_deploy_linevec.prototxt"
    weightsFile = "pose/coco/pose_iter_440000.caffemodel"
    nPoints = 18
    POSE_PAIRS = [ [1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]
    colors = [ [0,0,255],[0,100,255], [0,100,255], [0,255,255], [0,100,255], [0,255,255], [0,100,255],
         [0,255,0], [255,200,100], [255,0,255], [0,255,0], [255,200,100], [255,0,255],
         [255,0,0], [255,0,0],[200,200,0],[0,0,0]]

    net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

    inWidth = 368
    inHeight = 368
    
    for vid_file in os.listdir(data_dir_videos):
        if vid_file.endswith(".mp4"):
            vid_path = os.path.join(data_dir_videos, vid_file)
            cap = cv2.VideoCapture(vid_path)

            file_name = os.path.basename(vid_path)
            output_path = os.path.join(output_dir_videos, file_name)
            fourcc = cv2.VideoWriter_fourcc(*'mp4v')
            out = cv2.VideoWriter(output_path, fourcc, 30.0, (int(cap.get(3)),int(cap.get(4))))

            while True:
                ret, frame = cap.read()
                if not ret:
                    break

                frameCopy = np.copy(frame)
                frameWidth = frame.shape[1]
                frameHeight = frame.shape[0]
                threshold = 0.1

                inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                                          (0, 0, 0), swapRB=False, crop=False)

                net.setInput(inpBlob)

                output = net.forward()
                H = output.shape[2]
                W = output.shape[3]

                # Empty list to store the detected keypoints
                points = []

                for i in range(nPoints):
                    # confidence map of corresponding body's part.
                    probMap = output[0, i, :, :]

                    # Find global maxima of the probMap.
                    minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

                    # Scale the point to fit on the original image
                    x = (frameWidth * point[0]) / W
                    y = (frameHeight * point[1]) / H

                    if prob > threshold : 
                        cv2.circle(frameCopy, (int(x), int(y)), 8, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                        cv2.putText(frameCopy, "{}".format(i), (int(x), int(y)), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 0, 255), 2, lineType=cv2.LINE_AA)
                        cv2.circle(frame, (int(x), int(y)), 8, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

                        # Add the point to the list if the probability is greater than the threshold
                        points.append((int(x), int(y)))
                    else :
                        points.append(None)

                # Draw Skeleton
                for pair in POSE_PAIRS:
                    partA = pair[0]
                    partB = pair[1]

                    if points[partA] and points[partB]:
                        cv2.line(frame, points[partA], points[partB], colors[POSE_PAIRS.index(pair)], 3)
                        cv2.circle(frame, points[partA], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)
                        cv2.circle(frame, points[partB], 8, colors[POSE_PAIRS.index(pair)], thickness=-1)

                #Save the processed image to the output directory

                #cv2.imwrite(output_path, frame)
                out.write(cv2.resize(frame, (int(cap.get(3)),int(cap.get(4)))))

    # Release the VideoCapture and VideoWriter objects
    cap.release()
    out.release()


## Now we test the models
### We run the MPII model on the MPII image dataset and the COCO image dataset and the SURREAL video dataset

### First we run the MPII model on MPII dataset

In [None]:
data_dir_images = "dataset/single_person/mpii_dataset"
output_dir_images = "results/single_person/mpii_model/mpii_dataset"
file = "predicted_keypoints/mpii_model_mpii_dataset.txt"
load_mpii_model()
t = time.time()
image_mpii_model(data_dir_images, output_dir_images, file)
print("Time Taken for MPII model running MPII dataset = {}".format(time.time() - t))

# Time Taken = 11689.12948679924

### Running MPII model on COCO dataset

In [None]:
data_dir_images = "dataset/single_person/coco_dataset"
output_dir_images = "results/single_person/mpii_model/coco_dataset"
file = "mpii_model_coco_dataset.txt"
#load_mpii_model()
t = time.time()
image_mpii_model(data_dir_images, output_dir_images, file)
print("Time Taken for MPII model running COCO dataset = {}".format(time.time() - t))

# Time taken = 19398.382776021957

### Running MPII model on SURREAL dataset

In [None]:
# FIX PATHS
# data_dir_videos = "dataset/single_person/surreal_dataset"
# output_dir_videos = "results/single_person/mpii_model/coco_dataset"
# file = "mpii_model_coco_dataset.txt"
# t = time.time()
# video_mpii_model(data_dir_videos, output_dir_videos, file):
# print("Time Taken for MPII model running COCO dataset = {}".format(time.time() - t))

# 9:22 to 1:32, 362 videos

### Running COCO model on MPII dataset

In [None]:
data_dir_images = "dataset/single_person/mpii_dataset"
output_dir_images = "results/single_person/coco_model/mpii_dataset"
file = "predicted_keypoints/coco_model_mpii_dataset.txt"
t = time.time()
image_coco_model(data_dir_images, output_dir_images, file)
print("Time Taken for COCO model running MPII dataset = {}".format(time.time() - t))

# Time Taken = 16239.520328998566

### Running COCO model on COCO dataset

In [None]:
data_dir_images = "dataset/single_person/coco_dataset"
output_dir_images = "results/single_person/coco_model/coco_dataset"
file = "predicted_keypoints/coco_model_coco_dataset.txt"
t = time.time()
image_coco_model(data_dir_images, output_dir_images, file)
print("Time Taken for COCO model running COCO dataset = {}".format(time.time() - t))

# Time Taken = 36547.80410313606	

### Running COCO model on SURREAL dataset

In [9]:
# FIX PATHS
data_dir_videos = "dataset/single_person/surreal_dataset"
output_dir_videos = "results/single_person/coco_model/surreal_dataset"
file = "mpii_model_coco_dataset.txt"
t = time.time()
video_coco_model(data_dir_videos, output_dir_videos, file)
print("Time Taken for coco model running surreal dataset = {}".format(time.time() - t))

KeyboardInterrupt: 