In [1]:
from google.colab import drive
import natsort
import glob
import pathlib
import os
from os.path import exists, join, basename, splitext
import re
import json
import numpy as np
import pickle
import shutil

In [3]:
# Directories

# Video files with subdirectories for 2018 and 2020
video_folder = "/content/drive/MyDrive/projects/pose_detection/videos/*/"
# Final results folder on GDrive
pose_detection_results_folder = "/content/drive/MyDrive/projects/pose_detection/results/pose_detection/"
# Results will first be written to the Colab hard drive, and then copied to GDrive
# This is much faster since there are millions of these files
intermediate_results_folder = '/content/results'

In [2]:
# Install openpose. This will take about 15 minutes.
git_repo_url = 'https://github.com/CMU-Perceptual-Computing-Lab/openpose.git'
project_name = splitext(basename(git_repo_url))[0]
if not exists(project_name):
  # see: https://github.com/CMU-Perceptual-Computing-Lab/openpose/issues/949
  # install new CMake becaue of CUDA10
  !wget -q https://cmake.org/files/v3.13/cmake-3.13.0-Linux-x86_64.tar.gz
  !tar xfz cmake-3.13.0-Linux-x86_64.tar.gz --strip-components=1 -C /usr/local
  # clone openpose
  !git clone -q --depth 1 $git_repo_url
  !sed -i 's/execute_process(COMMAND git checkout master WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/execute_process(COMMAND git checkout f019d0dfe86f49d1140961f8c7dec22130c83154 WORKING_DIRECTORY ${CMAKE_SOURCE_DIR}\/3rdparty\/caffe)/g' openpose/CMakeLists.txt
  # install system dependencies
  !apt-get -qq install -y libatlas-base-dev libprotobuf-dev libleveldb-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler libgflags-dev libgoogle-glog-dev liblmdb-dev opencl-headers ocl-icd-opencl-dev libviennacl-dev
  # install python dependencies
  !pip install -q youtube-dl
  # build openpose
  !cd openpose && rm -rf build || true && mkdir build && cd build && cmake .. && make -j`nproc`

Selecting previously unselected package libgflags2.2.
(Reading database ... 155222 files and directories currently installed.)
Preparing to unpack .../00-libgflags2.2_2.2.1-1_amd64.deb ...
Unpacking libgflags2.2 (2.2.1-1) ...
Selecting previously unselected package libgflags-dev.
Preparing to unpack .../01-libgflags-dev_2.2.1-1_amd64.deb ...
Unpacking libgflags-dev (2.2.1-1) ...
Selecting previously unselected package libgoogle-glog0v5.
Preparing to unpack .../02-libgoogle-glog0v5_0.3.5-1_amd64.deb ...
Unpacking libgoogle-glog0v5 (0.3.5-1) ...
Selecting previously unselected package libgoogle-glog-dev.
Preparing to unpack .../03-libgoogle-glog-dev_0.3.5-1_amd64.deb ...
Unpacking libgoogle-glog-dev (0.3.5-1) ...
Selecting previously unselected package libhdf5-serial-dev.
Preparing to unpack .../04-libhdf5-serial-dev_1.10.0-patch1+docs-4_all.deb ...
Unpacking libhdf5-serial-dev (1.10.0-patch1+docs-4) ...
Selecting previously unselected package libleveldb1v5:amd64.
Preparing to unpack ...

In [4]:
# List all the wmv files in the 2018 and 2020 subdirectories
video_files = []
for file in glob.glob(video_folder + "*.wmv"):
    video_files.append(file)

In [5]:
# Number of videos
len(video_files)

7480

In [6]:
def create_poselist(posedir):

    # List all files in the intermediate results folder
    # There will be one file for each frame
    posefiles = sorted(os.listdir(posedir))

    # Loop over them and put the contents in one list
    poselist = []
    for i, posefile in enumerate(posefiles):

        with open(posedir + "/" + posefile) as f:
            data = json.load(f)

        # Every frame can have multiple people
        # Loop over them, and assign their values to the inner list
        frame_poselist = []
        for people in data['people']:
            # Turn 0s into np.nan
            keypoints = [np.nan if i == 0 else i for i in people['pose_keypoints_2d']]
            # Remove every third value (i.e. the probabilities)
            del keypoints[3-1::3]
            # append to the current frame's list of keypoints
            frame_poselist.append(keypoints)

        # Append the current frame's keypoints people list to the outer list
        poselist.append(frame_poselist)

    return(poselist)

In [7]:
# Components of the system command that are used for every video
system_p1 = "cd '/content/openpose' && '/content/openpose/build/examples/openpose/openpose.bin' --video '"
system_p3 = "' --net_resolution -1x128 --write_json '"
system_p5 = "' --display 0 --render_pose 0"

In [8]:
for counter, videofile in enumerate(video_files):

  # Year of the video
  videofile_year = os.path.split(os.path.dirname(videofile))[1]
  # Basename of the video without extension
  videofile_base_noending = os.path.splitext(basename(videofile))[0]
  # Path of the file the final results are saved to
  # Format:
  # /path/to/file/20**_basename.pkl
  final_result_file =  pose_detection_results_folder + videofile_year + "_" + videofile_base_noending + ".pkl"

  # Process the video if the final output file on GDrive doesn't already exist
  if not os.path.exists(final_result_file):
    # Delete and remake the intermediate output folder on Colab
    if os.path.exists(intermediate_results_folder):
      shutil.rmtree(intermediate_results_folder)
    os.mkdir(intermediate_results_folder)
    # Run the pose detection
    os.system(system_p1 + videofile + system_p3 + intermediate_results_folder + system_p5)
    # Combine the results into a single file
    posel = create_poselist("results")
    # Save the results to GDrive
    with open(final_result_file, 'wb') as f:
        pickle.dump(posel, f)
  else:
    print('Skipping', videofile_base_noending, "- already done.")

  print(counter)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
4883
Skipping GOV_NE_RICKETTS_SMALLER_THAN_EVER - already done.
4884
Skipping GOV_IA_HUBBELL_MEDICAID_PRIVATIZATION - already done.
4885
Skipping GOV_ME_HAYES_DO_THINGS_DIFFERENTLY - already done.
4886
Skipping GOV_NV_SISOLAK_A_GREAT_EDUCATION - already done.
4887
Skipping GOV_WI_EVERS_TIME - already done.
4888
Skipping HOUSE_MI07_WALBERG_BIPARTISAN_SOLUTIONS - already done.
4889
Skipping GOV_GA_ABRAMS_GEORGIA_REPUBLICANS - already done.
4890
Skipping HOUSE_NC11_PRICE_COMING_FOR_MY_GUN - already done.
4891
Skipping HOUSE_NY24_KATKO_NEVER_FORGET - already done.
4892
Skipping GOV_AL_IVEY_CINDY - already done.
4893
Skipping HOUSE_NC11_PRICE_JULIAN_SECOND_AMENDMENT - already done.
4894
Skipping HOUSE_VA10_WEXTON_REAR_VIEW_MIRROR - already done.
4895
Skipping HOUSE_CA22_NUNES_JOE_POW - already done.
4896
Skipping HOUSE_AZ09_STANTON_KICK_THE_CAN - already done.
4897
Skipping HOUSE_WI01_STEIL_PLAYGROUND - already done.
4898
Skip

In [14]:
video_files

[]