In [11]:
import os
from pathlib import Path
import xml.etree.ElementTree as ET
import json
import cv2
import glob

In [12]:
# padding of leading zeros on video names
VIDEO_NAME_ZERO_PADDING = 4

# padding of leading zeros on static images names
IMG_NAME_ZERO_PADDING = 5


# Static Paths
# notebook is inside notebook folder (not in root)
ROOT_DIR = Path().resolve().parent
DATA_DIR = ROOT_DIR / 'data'
ANNOTATIONS_DIR = DATA_DIR / 'annotations'

# static image dir
IMG_DIR = DATA_DIR / 'images'

# OPENPOSE_PROCESSING_DIR
OP_PROCESSING_DIR = ROOT_DIR / 'openpose_processing'
# Make dir if it doesnt exist, don't complain if it does
OP_PROCESSING_DIR.mkdir(exist_ok=True, parents=True)

# BBOX_METADATA FILENAME
BBOX_METADATA_FILENAME = 'bbox_metadata.json'

# CROPS_METADATA FILENAME
CROPS_METADATA_FILENAME = 'crops_metadata.json'

In [13]:
# Define video for testing, we dont want to use all of them, just one
VIDEO_PREFIX = "video_"
VIDEO_IX = [i+1 for i in range(346)]



In [10]:
# create list of videos to process using JAAD file formatting video_000\d.mp4
queue = [VIDEO_PREFIX + str(ix).zfill(VIDEO_NAME_ZERO_PADDING) for ix in VIDEO_IX]

# get list of all videos in data folder
xml_files = glob.glob(str(ANNOTATIONS_DIR / '*.xml'))
# get list of files that appear in queue
queue_path = {xml.stem: xml for xml in xml_files if xml.stem in queue}

StopIteration: 

## Simplify obtention of bounding box coordinates
Will save in openpose_processing folder file: `bbox_metadata.json`

In [7]:
def parse_tracks(filepath):
    # here will go the data that will output to json
    tracks_dict = dict()
    
    # run xml parser
    tree = ET.parse(filepath)
    root = tree.getroot()
    
    # find all tracks in the xml file
    tracks = root.findall('track')
    
    # for each track, find all the bounding boxes and their metadata "items", we also need the id which
    # is the id of each pedestrian
    for i, track in enumerate(tracks):
        # the id is inside the box, so we need to retrieve it later, we start with id=None
        # to check later if id = None, else set it on the first iteration
        id = None
        boxes = track.findall('.//box')
        for box in boxes:
            if id is None:
                id = box.findall(".//attribute/[@name='id']")[0].text
                tracks_dict[id] = {}
            occlusion = box.findall(".//attribute/[@name='occlusion']")[0].text
            items = dict(box.items())
            items['occlusion'] = occlusion
            frame = items['frame']
            tracks_dict[id][frame] = items
    return tracks_dict
                

In [8]:
def save_bbox_metadata(path, video_name, name, data):
    
    path_dir = path / video_name
    filepath_dir = path_dir / name
    
    # make dir if it doesnt exist, don't complain if it does
    Path.mkdir(path_dir, exist_ok=True, parents=True)
    
    with open(filepath_dir, 'w') as f:
        f.write(json.dumps(data, indent=4))
    return 1

In [7]:
for video_name, filepath in queue_path.items():
    data = parse_tracks(filepath)
    save_bbox_metadata(path=OP_PROCESSING_DIR, video_name=video_name, name=BBOX_METADATA_FILENAME, data=data)
    

## Crop images

In [8]:
from PIL import Image
from dotenv import load_dotenv

load_dotenv("../openpose_path.env")



True

In [9]:
OP_EXECUTABLE = os.environ['OP_EXECUTABLE']


# folder name for the cropped images
CROPPED_DIR = 'cropped' 

# how much padding to leave around bounding box when cropping images
BORDER_PADDING = 0.3

In [10]:

for video_name in queue_path.keys():
    # set required paths
    image_dir = IMG_DIR / video_name
    metadata_filepath = OP_PROCESSING_DIR / video_name / BBOX_METADATA_FILENAME
    
    crops_metadata_filepath = OP_PROCESSING_DIR / video_name / CROPS_METADATA_FILENAME
    crops_metadata_dict = dict()
    
    with open(metadata_filepath, 'r') as f:
        data = json.load(f)
    
    tracks = list(data.keys())
    
    for track in tracks:
        crops_metadata_dict[track] = dict()
        # DIR to save cropped images
        cropped_im_dir = OP_PROCESSING_DIR / video_name / CROPPED_DIR / track
        Path.mkdir(cropped_im_dir, exist_ok=True, parents=True)
        
        frames = list(data[track].keys())
        
        
        # for each frame theres an image, let's get the image full path for the frame and crop it using the bbox, save it
        for frame in frames:
            frame_filename = str(frame).zfill(IMG_NAME_ZERO_PADDING) + '.png'
            frame_filepath = image_dir / frame_filename
            
            frame_data = data[track][frame]
            
            img = cv2.imread(str(frame_filepath))
            
            (left, top, right, bottom) = (int(float(frame_data['xtl'])), int(float(frame_data['ytl'])), int(float(frame_data['xbr'])), int(float(frame_data['ybr'])))
            
            left = max(0, left - int(BORDER_PADDING * (right - left)))
            right = min(img.shape[1], right + int(BORDER_PADDING * (right - left)))
            top = max(0, top - int(BORDER_PADDING * (bottom - top)))
            bottom = min(img.shape[0], bottom + int(BORDER_PADDING * (bottom - top)))
            
            crops_metadata_dict[track][frame] = {'left': left, 'top': top, 'right': right, 'bottom': bottom}
            
            # crop image
            cropped_img = img[top:bottom, left:right]
            (height, width, filters) = cropped_img.shape
            
            cv2.imwrite(str(cropped_im_dir / f'{frame}.png'), cropped_img)
    
    with open(crops_metadata_filepath, 'w') as f:
        json.dump(crops_metadata_dict, f, indent=4)