In [None]:
# look at https://modelzoo.co/model/keras-realtime-multi-person-pose-estimation
!git clone https://github.com/kevinlin311tw/keras-openpose-reproduce.git
!mv keras-openpose-reproduce/* ./    &&    mkdir model/keras       
!cd model   && sh get_keras_model.sh
!pip3 -q install Cython scikit-image pandas zmq h5py opencv-python configobj

Cloning into 'keras-openpose-reproduce'...
remote: Enumerating objects: 132, done.[K
remote: Total 132 (delta 0), reused 0 (delta 0), pack-reused 132[K
Receiving objects: 100% (132/132), 9.28 MiB | 10.28 MiB/s, done.
Resolving deltas: 100% (45/45), done.
--2023-04-03 20:06:20--  https://www.dropbox.com/s/llpxd14is7gyj0z/model.h5
Resolving www.dropbox.com (www.dropbox.com)... 162.125.64.18, 2620:100:6031:18::a27d:5112
Connecting to www.dropbox.com (www.dropbox.com)|162.125.64.18|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: /s/raw/llpxd14is7gyj0z/model.h5 [following]
--2023-04-03 20:06:21--  https://www.dropbox.com/s/raw/llpxd14is7gyj0z/model.h5
Reusing existing connection to www.dropbox.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://uc23ccd1acb7afefc52a6d1aa14d.dl.dropboxusercontent.com/cd/0/inline/B5ehTXNvN3o-dbyv8AvHMroeBmvdBKMgz3Jkhz8gS8-DjaeLs_rBHtjJ1x64uQmEAh161khzjCm7N6zCYXZtmqHUiZaygW4Hb4rT0AZNp3qyZ6Jg71h1sAxRSVp0D

In [None]:
from tensorflow.keras import Sequential
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense, Activation, Lambda
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from tensorflow.keras.layers import BatchNormalization
from tensorflow.keras.layers import Concatenate
from config_reader import config_reader
import scipy
import math
%matplotlib inline
import cv2
import matplotlib
import pylab as plt
import numpy as np
import util
import os

def relu(x): 
    return Activation('relu')(x)

def conv(x, nf, ks, name):
    x1 = Conv2D(nf, (ks, ks), padding='same', name=name)(x)
    return x1

def pooling(x, ks, st, name):
    x = MaxPooling2D((ks, ks), strides=(st, st), name=name)(x)
    return x

def vgg_block(x):
     
    # Block 1
    x = conv(x, 64, 3, "conv1_1")
    x = relu(x)
    x = conv(x, 64, 3, "conv1_2")
    x = relu(x)
    x = pooling(x, 2, 2, "pool1_1")

    # Block 2
    x = conv(x, 128, 3, "conv2_1")
    x = relu(x)
    x = conv(x, 128, 3, "conv2_2")
    x = relu(x)
    x = pooling(x, 2, 2, "pool2_1")
    
    # Block 3
    x = conv(x, 256, 3, "conv3_1")
    x = relu(x)    
    x = conv(x, 256, 3, "conv3_2")
    x = relu(x)    
    x = conv(x, 256, 3, "conv3_3")
    x = relu(x)    
    x = conv(x, 256, 3, "conv3_4")
    x = relu(x)    
    x = pooling(x, 2, 2, "pool3_1")
    
    # Block 4
    x = conv(x, 512, 3, "conv4_1")
    x = relu(x)    
    x = conv(x, 512, 3, "conv4_2")
    x = relu(x)    
    
    # Additional non vgg layers
    x = conv(x, 256, 3, "conv4_3_CPM")
    x = relu(x)
    x = conv(x, 128, 3, "conv4_4_CPM")
    x = relu(x)
    
    return x

def stage1_block(x, num_p, branch):
    
    # Block 1        
    x = conv(x, 128, 3, "conv5_1_CPM_L%d" % branch)
    x = relu(x)
    x = conv(x, 128, 3, "conv5_2_CPM_L%d" % branch)
    x = relu(x)
    x = conv(x, 128, 3, "conv5_3_CPM_L%d" % branch)
    x = relu(x)
    x = conv(x, 512, 1, "conv5_4_CPM_L%d" % branch)
    x = relu(x)
    x = conv(x, num_p, 1, "conv5_5_CPM_L%d" % branch)
    
    return x

def stageT_block(x, num_p, stage, branch):
        
    # Block 1        
    x = conv(x, 128, 7, "Mconv1_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, 128, 7, "Mconv2_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, 128, 7, "Mconv3_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, 128, 7, "Mconv4_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, 128, 7, "Mconv5_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, 128, 1, "Mconv6_stage%d_L%d" % (stage, branch))
    x = relu(x)
    x = conv(x, num_p, 1, "Mconv7_stage%d_L%d" % (stage, branch))
    
    return x

weights_path = "model/keras/model.h5" # orginal weights converted from caffe
#weights_path = "training/weights.best.h5" # weights trained from scratch 

input_shape = (None,None,3)

img_input = Input(shape=input_shape)

stages = 6
np_branch1 = 38
np_branch2 = 19

img_normalized = Lambda(lambda x: x / 256 - 0.5)(img_input)  # [-0.5, 0.5]

# VGG
stage0_out = vgg_block(img_normalized)

# stage 1
stage1_branch1_out = stage1_block(stage0_out, np_branch1, 1)
stage1_branch2_out = stage1_block(stage0_out, np_branch2, 2)
x = Concatenate()([stage1_branch1_out, stage1_branch2_out, stage0_out])

# stage t >= 2
for sn in range(2, stages + 1):
    stageT_branch1_out = stageT_block(x, np_branch1, sn, 1)
    stageT_branch2_out = stageT_block(x, np_branch2, sn, 2)
    if (sn < stages):
        x = Concatenate()([stageT_branch1_out, stageT_branch2_out, stage0_out])

model = Model(img_input, [stageT_branch1_out, stageT_branch2_out])
model.load_weights(weights_path)

# find connection in the specified sequence, center 29 is in the position 15
limbSeq = [[2,3], [2,6], [3,4], [4,5], [6,7], [7,8], [2,9], [9,10], \
           [10,11], [2,12], [12,13], [13,14], [2,1], [1,15], [15,17], \
           [1,16], [16,18], [3,17], [6,18]]
# the middle joints heatmap correpondence
mapIdx = [[31,32], [39,40], [33,34], [35,36], [41,42], [43,44], [19,20], [21,22], \
          [23,24], [25,26], [27,28], [29,30], [47,48], [49,50], [53,54], [51,52], \
          [55,56], [37,38], [45,46]]

colors = [[255, 0, 0], [255, 85, 0], [255, 170, 0], [255, 255, 0], [170, 255, 0], [85, 255, 0], [0, 255, 0], \
          [0, 255, 85], [0, 255, 170], [0, 255, 255], [0, 170, 255], [0, 85, 255], [0, 0, 255], [85, 0, 255], \
          [170, 0, 255], [255, 0, 255], [255, 0, 170], [255, 0, 85]]
cmap = matplotlib.colormaps['hsv']

image_folder = '/content/sample_images'
pedestrian_output_folder = '/content/pedestrian_output'
try:
    os.makedirs(pedestrian_output_folder)
except:
    pass

### Applying the model to an image

In [None]:
# add the input images into /content/sample_images

In [None]:
import os
import re
import json
from numpy import ma
from scipy.ndimage import gaussian_filter


for i, frame_name in enumerate(sorted(os.listdir(image_folder))):
    
    if not frame_name.endswith('jpg'):
        continue
    else:
        frame_number = int(re.findall(r'\d+', frame_name)[0])
        image_path = os.path.join(image_folder, f'frame{frame_number}.jpg')
        oriImg = cv2.imread(image_path)
        print(oriImg.shape)
        # oriImg = cv2.resize(oriImg, (512, 256))

        param, model_params = config_reader()

        multiplier = [x * model_params['boxsize'] / oriImg.shape[0] for x in param['scale_search']]

        heatmap_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 19))
        paf_avg = np.zeros((oriImg.shape[0], oriImg.shape[1], 38))

        for m in range(len(multiplier)):
            scale = multiplier[m]
            imageToTest = cv2.resize(oriImg, (0,0), fx=scale, fy=scale, interpolation=cv2.INTER_CUBIC)
            imageToTest_padded, pad = util.padRightDownCorner(imageToTest, model_params['stride'], model_params['padValue'])        

            input_img = np.transpose(np.float32(imageToTest_padded[:,:,:,np.newaxis]), (3,0,1,2)) # required shape (1, width, height, channels) 

            output_blobs = model.predict(input_img)
            
            # extract outputs, resize, and remove padding
            heatmap = np.squeeze(output_blobs[1]) # output 1 is heatmaps
            heatmap = cv2.resize(heatmap, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)
            heatmap = heatmap[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]
            heatmap = cv2.resize(heatmap, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
            
            paf = np.squeeze(output_blobs[0]) # output 0 is PAFs
            paf = cv2.resize(paf, (0,0), fx=model_params['stride'], fy=model_params['stride'], interpolation=cv2.INTER_CUBIC)
            paf = paf[:imageToTest_padded.shape[0]-pad[2], :imageToTest_padded.shape[1]-pad[3], :]
            paf = cv2.resize(paf, (oriImg.shape[1], oriImg.shape[0]), interpolation=cv2.INTER_CUBIC)
            
            heatmap_avg = heatmap_avg + heatmap / len(multiplier)
            paf_avg = paf_avg + paf / len(multiplier)

        U = paf_avg[:,:,16] * -1
        V = paf_avg[:,:,17]
        X, Y = np.meshgrid(np.arange(U.shape[1]), np.arange(U.shape[0]))
        M = np.zeros(U.shape, dtype='bool')
        M[U**2 + V**2 < 0.5 * 0.5] = True
        U = ma.masked_array(U, mask=M)
        V = ma.masked_array(V, mask=M)

        all_peaks = []
        peak_counter = 0

        for part in range(19-1):
            map_ori = heatmap_avg[:,:,part]
            map = gaussian_filter(map_ori, sigma=3)
            
            map_left = np.zeros(map.shape)
            map_left[1:,:] = map[:-1,:]
            map_right = np.zeros(map.shape)
            map_right[:-1,:] = map[1:,:]
            map_up = np.zeros(map.shape)
            map_up[:,1:] = map[:,:-1]
            map_down = np.zeros(map.shape)
            map_down[:,:-1] = map[:,1:]
            
            peaks_binary = np.logical_and.reduce((map>=map_left, map>=map_right, map>=map_up, map>=map_down, map > param['thre1']))
            peaks = list(zip(np.nonzero(peaks_binary)[1], np.nonzero(peaks_binary)[0])) # note reverse
            peaks_with_score = [x + (map_ori[x[1],x[0]],) for x in peaks]
            id = range(peak_counter, peak_counter + len(peaks))
            peaks_with_score_and_id = [peaks_with_score[i] + (id[i],) for i in range(len(id))]

            all_peaks.append(peaks_with_score_and_id)
            peak_counter += len(peaks)

    min_pedestrian = np.inf
    for row in all_peaks:
        num_pedestrian = len(row)
        if num_pedestrian < min_pedestrian:
            min_pedestrian = num_pedestrian
    for idx in range(len(all_peaks)):
        all_peaks[idx] = all_peaks[idx][:min_pedestrian]

    print(frame_number)
    all_peaks = np.array(all_peaks).reshape(18, min_pedestrian, 4)
    print(all_peaks.shape)
    np.save(os.path.join(pedestrian_output_folder, f'pedestrian{frame_number}.npy'), all_peaks)
    # print(np.array(all_peaks).shape)
    # all_peaks = all_peaks.transpose([1, 0, 2])
    # obj_list = []
    # if not all_peaks.shape[0] < 1:
     
    #     for ped_idx in range(all_peaks.shape[0]):
    #         pedestrian_dict = {}
    #         pedestrian_dict['ped_id'] = ped_idx
    #         for kpt_idx, (x, y, _, _) in enumerate(all_peaks[ped_idx]):
    #             pedestrian_dict[kpt_idx] = {}
    #             pedestrian_dict[kpt_idx]['u'] = int(x)
    #             pedestrian_dict[kpt_idx]['v'] = int(y)
    #         obj_list.append(pedestrian_dict)
            
    # json_object = json.dumps(str(obj_list))

    # print(len(json_object))

    # with open(os.path.join(pedestrian_output_folder, f'pedestrian{frame_number}.json'), "w") as outfile:
    #     outfile.write(json_object)

(960, 1280, 3)


KeyboardInterrupt: ignored

In [None]:
%cd /content/pedestrian_output
!zip 'pedestrian_numpy.zip' *
%cd /content

/content/pedestrian_output
  adding: pedestrian275.npy (deflated 49%)
  adding: pedestrian276.npy (deflated 51%)
  adding: pedestrian277.npy (deflated 42%)
  adding: pedestrian278.npy (deflated 52%)
  adding: pedestrian279.npy (deflated 52%)
  adding: pedestrian280.npy (deflated 52%)
  adding: pedestrian281.npy (deflated 51%)
  adding: pedestrian282.npy (deflated 54%)
  adding: pedestrian283.npy (deflated 54%)
  adding: pedestrian284.npy (deflated 55%)
  adding: pedestrian285.npy (deflated 55%)
  adding: pedestrian286.npy (deflated 56%)
  adding: pedestrian287.npy (deflated 55%)
  adding: pedestrian288.npy (deflated 55%)
  adding: pedestrian289.npy (deflated 55%)
  adding: pedestrian290.npy (deflated 55%)
  adding: pedestrian291.npy (deflated 55%)
  adding: pedestrian292.npy (deflated 55%)
  adding: pedestrian293.npy (deflated 51%)
  adding: pedestrian294.npy (deflated 42%)
  adding: pedestrian295.npy (deflated 42%)
  adding: pedestrian296.npy (deflated 42%)
  adding: pedestrian297.npy

# MiDAS

In [None]:
%cd /content/
!pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu117cu102/torch-1.5.1-cp36-cp36m-win_amd64.whl
!pip install timm

/content
Looking in indexes: https://download.pytorch.org/whl/cu117cu102/torch-1.5.1-cp36-cp36m-win_amd64.whl, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Collecting timm
  Downloading timm-0.6.13-py3-none-any.whl (549 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m549.1/549.1 KB[0m [31m40.4 MB/s[0m eta [36m0:00:00[0m
Collecting huggingface-hub
  Downloading huggingface_hub-0.13.3-py3-none-any.whl (199 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m199.8/199.8 KB[0m [31m29.5 MB/s[0m eta [36m0:00:00[0m
Installing collected packages: huggingface-hub, timm
Successfully installed huggingface-hub-0.13.3 timm-0.6.13


In [None]:
import os
import re
import cv2
import torch
import urllib.request
import numpy as np
import matplotlib.pyplot as plt

frame_data_for_midas = image_folder##########################

ped_depth_path = "/content/ped_depth_output"
try:
    os.makedirs(ped_depth_path)
except:
    pass


model_type = "DPT_Large"     # MiDaS v3 - Large     (highest accuracy, slowest inference speed)
#model_type = "DPT_Hybrid"   # MiDaS v3 - Hybrid    (medium accuracy, medium inference speed)
#model_type = "MiDaS_small"  # MiDaS v2.1 - Small   (lowest accuracy, highest inference speed)

midas = torch.hub.load("intel-isl/MiDaS", model_type)

device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
midas.to(device)
midas.eval()

midas_transforms = torch.hub.load("intel-isl/MiDaS", "transforms")

if model_type == "DPT_Large" or model_type == "DPT_Hybrid":
    transform = midas_transforms.dpt_transform
else:
    transform = midas_transforms.small_transform

midas_results = '/content/midas_outputs'
if not os.path.exists(midas_results):
    os.makedirs(midas_results)

for i, image_name in enumerate(sorted(os.listdir(frame_data_for_midas))):

    if not image_name.endswith('.jpg'):
        continue
    image_number = int(re.findall(r'\d+', image_name)[0])
    img_path = os.path.join(frame_data_for_midas, image_name)

    img = cv2.imread(img_path)
    # img = cv2.resize(img, (512, 256))
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    input_batch = transform(img).to(device)

    with torch.no_grad():
        prediction = midas(input_batch)

        prediction = torch.nn.functional.interpolate(
            prediction.unsqueeze(1),
            size=img.shape[:2],
            mode="bicubic",
            align_corners=False,
        ).squeeze()

    output = prediction.cpu().numpy()
    # output = cv2.cvtColor(img, cv2.COLOR_RGB2BGR)
    np.save(f'/content/midas_outputs/frame_midas{image_number}.npy', output)

Using cache found in /root/.cache/torch/hub/intel-isl_MiDaS_master


OutOfMemoryError: ignored

# HumanPose and Depth

In [None]:
import os

for i, numpy_name in enumerate(sorted(os.listdir(pedestrian_output_folder))):

    if numpy_name.endswith('.npy'):

        frame_number = int(re.findall(r'\d+', numpy_name)[0])
        midas_data = np.load(os.path.join(f'/content/midas_outputs/frame_midas{frame_number}.npy'))
        all_peaks = np.load(os.path.join('/content/pedestrian_output', f'pedestrian{frame_number}.npy'))
        
        print(np.array(all_peaks).shape)
        all_peaks = all_peaks.transpose([1, 0, 2])
        obj_list = []

        if not all_peaks.shape[0] < 1:
        
            for ped_idx in range(all_peaks.shape[0]):
                pedestrian_dict = {}
                pedestrian_dict['ped_id'] = ped_idx
                pedestrian_dict['u'] = int((all_peaks[ped_idx, 10, 0] + all_peaks[ped_idx, 13, 0]) / 2)
                pedestrian_dict['v'] = int((all_peaks[ped_idx, 10, 1] + all_peaks[ped_idx, 13, 1]) / 2)
                pedestrian_dict['z'] = midas_data[pedestrian_dict['u'], pedestrian_dict['v']]
                # for kpt_idx, (x, y, _, _) in enumerate(all_peaks[ped_idx]):
                #     pedestrian_dict[kpt_idx] = {}
                #     pedestrian_dict[kpt_idx]['u'] = int(x)
                #     pedestrian_dict[kpt_idx]['v'] = int(y)
                #     pedestrian_dict[kpt_idx]['z'] = midas_data[int(y), int(x)]
                obj_list.append(pedestrian_dict)
                
        json_object = json.dumps(str(obj_list))

        with open(os.path.join('/content/ped_depth_output', f'ped_depth{frame_number}.json'), "w") as outfile:
            outfile.write(json_object)

(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 0, 4)
(18, 1, 4)


TypeError: ignored

In [None]:
json_data

"[{'ped_id': 0, 0: {'u': 869, 'v': 421}, 1: {'u': 871, 'v': 433}, 2: {'u': 861, 'v': 433}, 3: {'u': 902, 'v': 449}, 4: {'u': 906, 'v': 446}, 5: {'u': 883, 'v': 432}, 6: {'u': 890, 'v': 448}, 7: {'u': 912, 'v': 449}, 8: {'u': 911, 'v': 465}, 9: {'u': 911, 'v': 489}, 10: {'u': 909, 'v': 509}, 11: {'u': 881, 'v': 465}, 12: {'u': 882, 'v': 489}, 13: {'u': 920, 'v': 511}, 14: {'u': 867, 'v': 418}, 15: {'u': 870, 'v': 418}, 16: {'u': 865, 'v': 422}, 17: {'u': 876, 'v': 419}}, {'ped_id': 0, 0: {'u': 869, 'v': 421}, 1: {'u': 871, 'v': 433}, 2: {'u': 861, 'v': 433}, 3: {'u': 902, 'v': 449}, 4: {'u': 906, 'v': 446}, 5: {'u': 883, 'v': 432}, 6: {'u': 890, 'v': 448}, 7: {'u': 912, 'v': 449}, 8: {'u': 911, 'v': 465}, 9: {'u': 911, 'v': 489}, 10: {'u': 909, 'v': 509}, 11: {'u': 881, 'v': 465}, 12: {'u': 882, 'v': 489}, 13: {'u': 920, 'v': 511}, 14: {'u': 867, 'v': 418}, 15: {'u': 870, 'v': 418}, 16: {'u': 865, 'v': 422}, 17: {'u': 876, 'v': 419}}, {'ped_id': 0, 0: {'u': 869, 'v': 421}, 1: {'u': 871,