<center>

*******************************************************************************************
    
### Pose Detection and Tracking in Youtube Video
### with AlphaPose

##### 30 March 2025

##### Juan Ignacio Mendoza Garay  

*******************************************************************************************

</center>

##### INFORMATION:

* Tested using:

    * AlphaPose
        * Fork: https://github.com/juigmend/AlphaPose
    * Python 3.11
    * Windows 11 operating system
    * Intel 64-bit CPU
>
* Dependencies:

    * Youtube video downloader: https://pypi.org/project/yt-dlp/
    * cython_bbox:
        1) install Desktop Development with C++ from the Visual Studio Installer
        2) type to command prompt: \
           a) set DISTUTILS_USE_SDK=1 \
           b) pip install cython_bbox  \
           NOTE: Pip was used for installation but other installers could be used.
    * Other packages might be prompted for installation.
>
* References:
    * https://github.com/MVIG-SJTU/AlphaPose
    * https://github.com/tugstugi/dl-colab-notebooks/blob/master/notebooks/AlphaPoseV0_3_0.ipynb
>
* Instructions:

    Edit the values indicated with an arrow like this: <---  
    Comment/uncomment or change values as suggested by the comments.  
    Run the program, close your eyes and hope for the best.  

*******************************************************************************************

In [None]:
import os
from os.path import exists
from IPython.display import YouTubeVideo
import io
import base64
from IPython.display import HTML
import time
from datetime import timedelta
import subprocess

***
#### Set paths and parameters:

In [None]:
AP_code_path = r"C:\Users\Trevor_Philips\Documents\Python\AlphaPose" # <--- folder of AlpHaPose code (full path)
AP_results_path = r"C:\Users\Trevor_Philips\Documents\AlphaPose_results" # <--- folder where results will be saved (full path)

# Most probably there is no need to alter the following:
yolo_pretrained_model_path = AP_code_path + r'\detector\yolo\data\yolov3-spp.weights'
pretrained_model_path = AP_code_path + r'\pretrained_models\fast_421_res152_256x192.pth'
pretrained_model_config_path = AP_code_path + r'\configs\coco\resnet\256x192_res152_lr1e-3_1x-duc.yaml'
tracker_weights_path = AP_code_path + r'\trackers\weights\osnet_ain_x1_0_msmt17_256x128_amsgrad_ep50_lr0.0015_coslr_b64_fb10_softmax_labsmth_flip_jitter.pth'

>
##### Information to process Youtube video:

In [None]:
# The Youtube ID follows this string in the URL: "www.youtube.com/watch?v="

# Comment/uncomment:

# YT_video_ID = 'bqpRoVxH2jc' # <--- this string is at the end of the video's Youtube page URL
# lbl = 'Rammstein_Sonne_Kids_120s' # <--- name for the resulting files, '' or [] = use Youtube video ID
# trim = [0,(60*2)] # <--- [start,end] in seconds or 'end', '' or [] = no trim

# YT_video_ID = 'D-7SA6rJoWc' # <--- this string is at the end of the video's Youtube page URL
# lbl = 'Bee_Gees_medley_120s' # <--- name for the resulting files, '' or [] = use Youtube video ID
# trim = [0,120] # <--- [start,end] in seconds or 'end', '' or [] = no trim

# YT_video_ID = 'bPwA4gVobJ0' # <--- this string is at the end of the video's Youtube page URL
# lbl = 'JS_Bach_Art_of_the_Fugue' # <--- name for the resulting files, '' or [] = use Youtube video ID
# trim = [0,'end'] # <--- [start,end] in seconds or 'end', '' or [] = no trim

YT_video_ID = 'es_3F3TLJS0' # <--- this string is at the end of the video's Youtube page URL
lbl = 'Pachelbel_Wedding_Str_Qt_608_06_01' # <--- name for the resulting files, '' or [] = use Youtube video ID
trim = [4,'end'] # <--- [start,end] in seconds or 'end', '' or [] = no trim

# YT_video_ID = 'Xu5TlSXEzzs' # <--- this string is at the end of the video's Youtube page URL
# lbl = 'Pachelbel_Vetta_Str_Qt_608_06_01' # <--- name for the resulting files, '' or [] = use Youtube video ID
# trim = [] # <--- [start,end] in seconds or 'end', '' or [] = no trim

***
#### Get pre-trained models:

In [None]:
if not exists(yolo_pretrained_model_path):
    ! mkdir {AP_code_path}\detector\yolo\data
    ! gdown -O {yolo_pretrained_model_path} https://drive.google.com/uc?id=1D47msNOOiJKvPOXlnpyzdKA3k6E97NTC

if not exists(pretrained_model_path):  
    ! gdown -O {pretrained_model_path} https://drive.google.com/uc?id=1kfyedqyn8exjbbNmYq8XGd2EooQjPtF9

if not exists(tracker_weights_path):
    ! mkdir {AP_code_path}\trackers\weights
    ! gdown -O {tracker_weights_path} https://drive.google.com/uc?id=1myNKfr2cXqiHZVXaaG8ZAq_U2UpeOLfG

***
#### Preview Youtube video:

In [None]:
YouTubeVideo(YT_video_ID)

***
#### Download and trim video:

In [None]:
# Download video:

video_in_ffn = AP_results_path + '\\video_in_TMP.mp4'
if exists(video_in_ffn):
    ! del {video_in_ffn}
! yt-dlp -f "b[height<=720][ext=mp4]" --output "{video_in_ffn}" https://www.youtube.com/watch?v={YT_video_ID} -q

In [None]:
# Trim video:

if not trim or ((trim[0] == 0) & (trim[1] == 'end')):    
    video_trimmed_ffn = AP_results_path + '\\video_in_TMP.mp4'
    sfx = lbl
else:
    video_trimmed_ffn = AP_results_path + '\\video_trimmed_TMP.mp4'
    trim_start = trim[0]
    if trim[1] == 'end':
        video_duration = int(float(subprocess.run(["ffprobe", "-v", "error", "-show_entries",
                                                   "format=duration", "-of",
                                                   "default=noprint_wrappers=1:nokey=1", video_in_ffn],
                                                  stdout=subprocess.PIPE,
                                                  stderr=subprocess.STDOUT).stdout))
        trim_end = video_duration + 1 # overshoot to ensure its the end of the video
    else:
        trim_end = trim[1]
    sfx = lbl + f'_{trim[0]}-{trim[1]}'

    ! ffmpeg -y -loglevel error -i {video_in_ffn} -ss {trim_start} -to {trim_end} -acodec copy -vcodec copy{video_trimmed_ffn}

print(sfx)

***
#### Run AlphaPose demo:

In [None]:
tic = time.time()

! cd {AP_code_path} && python scripts\demo_inference.py --sp --video {video_trimmed_ffn} --outdir {AP_results_path} --save_video --checkpoint {pretrained_model_path} --cfg {pretrained_model_config_path} --pose_track --suffix {sfx}

print(f"\ncomputing time = {timedelta(seconds = time.time() - tic)} (H:M:S)\n")

#  On an i5-1235U CPU (10-core, 1.3 GHz, x64) and 16 GB RAM:

# [INP_DIM, NMS_THRES, CONFIDENCE]
# File_name (duration of trimmed video M:S): Computing time (H:M:S)

# [608, 0.6, 0.1]
# Pachelbel_Wedding_Str_Qt_608_06_01_4-end (5:19): 18:53:50
# Pachelbel_Vetta_Str_Qt_608_06_01 (6:59): 15:31:00

***
#### Display video with overlaid pose sticks:

In [None]:
# Extract audio from trimmed video:

audio_ext = ((subprocess.run(["ffprobe","-v","error","-select_streams","a","-show_entries",
                              "stream=codec_name","-of",
                              "default=nokey=1:noprint_wrappers=1",
                             video_in_ffn],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.STDOUT).stdout).strip()).decode("utf-8")

audio_trimmed_ffn = AP_results_path + '\\audio_' + sfx + '.' + audio_ext

! ffmpeg -y -loglevel error -i {video_trimmed_ffn} -vn -acodec copy {audio_trimmed_ffn}

In [None]:
# Combine AlphaPose video with audio:

if not trim:
    video_AP_ffn = AP_results_path + '\\AlphaPose_video_in_TMP.mp4'
else:
    video_AP_ffn = AP_results_path + '\\AlphaPose_video_trimmed_TMP.mp4'

if not lbl:
    lbl = YT_video_ID

av_AP_ffn = AP_results_path + '\\AlphaPose_' + sfx + '.mp4'

! ffmpeg -y -loglevel error -i {video_AP_ffn} -i {audio_trimmed_ffn} -c copy {av_AP_ffn}

In [None]:
# Re-encode and display:

av_AP_DISP_ffn = AP_results_path + '\\AlphaPose_' + sfx + '_DISP_TMP.mp4'

! ffmpeg -y -loglevel error -i {av_AP_ffn} {av_AP_DISP_ffn}

width = 400
height = 300
video_encoded = base64.b64encode(io.open(av_AP_DISP_ffn, 'rb').read())
HTML(data='''<video width = "{0}" height="{1}" alt="test" controls>
             <source src="data:video/mp4;base64,{2}" type="video/mp4" />
             </video>'''.format(width, height, video_encoded.decode('ascii')))