In [36]:
import os
import json
import glob
import mmcv
import pickle
from tqdm import tqdm
import xml.etree.ElementTree as ET

**Extract frames**

In [37]:
path2videos = "videos/raw/validation"
videopaths = glob.glob(f"{path2videos}/**/*.mp4", recursive=True)

In [38]:
for video in videopaths:
    videoname = video.split("/")[-1].split(".")[0]
    open_video = mmcv.VideoReader(video)
    open_video.cvt2frames(
        f"videos/frames/validation/{videoname}/", filename_tmpl="frame_{:d}.jpg"
    )
    frames = glob.glob(f"videos/frames/validation/{videoname}/**/*.jpg", recursive=True)
    frames.sort(key=lambda x: int(x.split("/")[-1].split(".")[0].split("_")[-1]))
    
    # Rename inmage files and increase idx by 1
    for idx, frame in enumerate(frames):
        if os.path.isfile(frame):
            assert int(frame.split('/')[-1].split('.')[0].split('_')[-1]) == idx, f"{int(frame.split('/')[-1].split('.')[0])}, {idx}"
            os.rename(frame, f"videos/frames/validation/{videoname}/{videoname}_frame_{idx+1}.jpg")
        else:
            raise ValueError(f"{frame} is not a file...")

[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 104.2 task/s, elapsed: 3s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 105.4 task/s, elapsed: 3s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 102.4 task/s, elapsed: 4s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 104.1 task/s, elapsed: 3s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 101.7 task/s, elapsed: 4s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 107.4 task/s, elapsed: 3s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 79.0 task/s, elapsed: 5s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 65.7 task/s, elapsed: 5s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 86.8 task/s, elapsed: 4s, ETA:     0s
[>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>] 360/360, 82.3 task/s, elapsed: 4s, ETA:  

In [29]:
len(glob.glob(f"videos/frames/test/{videoname}/**/*.jpg", recursive=True))

360

**Get video dimensions**

In [None]:
video_details = {}
videos = [x for x in os.listdir("videos/") if x.endswith(".mp4")]
for v in tqdm(videos):
    open_video = mmcv.VideoReader(f"videos/{v}")
    width, height = open_video.width, open_video.height
    video_details[v] = dict(width=width, height=height)

In [None]:
video_details

In [None]:
videos = os.listdir("annotations/")

In [None]:
def get_ape_coordinates(ape):
    coordinates = []
    for coordinate in ape.find("bndbox"):
        coordinates.append(float(coordinate.text))
    return coordinates


def get_ape_id(ape):
    return int(ape.find("id").text)


def get_species(ape):
    return ape.find("name").text


def get_behaviour(ape):
    return ape.find("activity").text

In [None]:
for video in tqdm(videos):
    entry = dict()
    entry["video"] = video
    entry["annotations"] = []

    frames = len([x for x in os.listdir(f"annotations/{video}") if x.endswith(".xml")])

    for i in range(1, frames + 1):
        xmlfile = f"annotations/{video}/{video}_frame_{i}.xml"

        xml = open(xmlfile)
        tree = ET.parse(xml)
        root = tree.getroot()

        item = dict()
        item["frame_id"] = int(root.find("frameid").text)
        item["detections"] = []

        for ape in root.findall("object"):
            det = dict()
            det["bbox"] = get_ape_coordinates(ape)
            det["ape_id"] = get_ape_id(ape)
            det["species"] = get_species(ape)
            det["behaviour"] = get_behaviour(ape)

            item["detections"].append(det)

        entry["annotations"].append(item)

    with open(f"{video}.json", "w") as handle:
        json.dump(entry, handle)

In [None]:
entry.keys()

**Restructure Dataset**

In [None]:
import shutil
from tqdm import tqdm

In [None]:
videos = open("splits/traindata.txt").read().strip().split()

In [None]:
for v in tqdm(videos):
    try:
        shutil.copy(
            f"restructure/densepose_annotations/{v}_dense.pkl",
            "restructure/densepose_annotations/train",
        )
    except:
        pass

In [None]:
shutil.copy(
    f"restructure/densepose_annotations/TsFCsskzig_dense.pkl",
    "restructure/densepose_annotations/validation",
)

In [None]:
import pickle

In [None]:
with open(
    "restructure/densepose_annotations/train/p3feC1D6cK_dense.pkl", "rb"
) as handle:
    data = pickle.load(handle)

In [None]:
data

In [None]:
for frame in data["annotations"]:
    for det in frame["detections"]:
        if det["ape_id"] == 2:
            det["ape_id"] = 1

In [None]:
with open(
    "restructure/densepose_annotations/train/p3feC1D6cK_dense.pkl", "wb"
) as handle:
    pickle.dump(data, handle, pickle.HIGHEST_PROTOCOL)

**Find videos with duplicate apes**

In [None]:
videos = glob.glob("restructure/annotations/standard/**/*.json", recursive=True)

In [None]:
duplicates, apes = [], []

for v in tqdm(videos):
    with open(v, "rb") as handle:
        data = json.load(handle)

        for frame in data["annotations"]:
            apes = [x["ape_id"] for x in frame["detections"]]
            if len(set(apes)) != len(apes):
                duplicates.append(v.split("/")[-1])

In [None]:
apes

In [None]:
list(set(duplicates))

In [None]:
len(duplicates)

# Remove these videos and see if everything works...

**Checking all partitions are the same...**

In [None]:
import os
from glob import glob

In [None]:
videos1 = [
    x.split("/")[-1]
    for x in glob("annotations/json/obfu/all/**/*.json", recursive=True)
]
videos2 = [
    x.split("/")[-1]
    for x in glob("restructure/annotations/standard/all/**/*.json", recursive=True)
]