# Convert CVAT Video annotations to AVA format

### Unzip File
Make sure to set "path_to_zip" as the path to the zip file exported from CVAT

In [1]:
def unzip_file(path_to_zip, extract_to="./"):
    import zipfile
    with zipfile.ZipFile(path_to_zip, 'r') as zip_ref:
        zip_ref.extractall(extract_to)

path_to_zip = "./Escape_Room_Small.zip"
unzip_file(path_to_zip, extract_to="./")


After unzipping, you should have a file named "annotations.xml". Make sure to rename this to something appropriate and set it below:

### Imports and Setup
Make sure to set `PATH_TO_XML`, `SAVE_FILE_PATH`  
`NAME_TO_ID` maps the names of the actions to the id as described in `Action_Detection/data/actions_list.txt`  
You can adjust this if you need to but it must be consistent with the `actions_list.txt`  


In [None]:
import numpy as np
import os 
import xmltodict
import pprint

cores = os.cpu_count()
print(f"Number of cores: {cores}")


PATH_TO_XML = "./escape_room_small_annotaions.xml" #### SET XML FILE PATH HERE
SAVE_FILE_PATH = "./ava_annotations.txt" #### SET SAVE FILE PATH HERE FOR THE CONVERTED ANNOTATIONS
NAME_TO_ID = {
    "walk": 0,
    "sit": 1,
    "stand": 2,
    "bend/bow (at the waist)": 3,
    "run/jog": 4,
    "hand wave": 5,
    "get up": 6,
    "paired(standing together with one other person)": 7,
    "huddle (standing together with 2 or more people)": 8,
    "lift/pick up": 9,
    "carry/hold (an object)": 10,
    "point to (an object)": 11,
    "write": 12,
    "read": 13,
    "put down": 14,
    "watch (object)": 15,
    "talk to": 16,
    "listen": 17,
    "watch (person)": 18,
    "gesture (a person)": 19,
    "give/serve (an object) to (a person)": 20,
    "take (an object) from (a person)": 21,
    "face to face": 22,
    "hi 5": 23,
    "laugh":25,
    "smile":26,
    "stressed":27,
    "annoyed/frustrated":28,
    "take (an object) from (robot)":29,
    "give/serve (an object) to (robot)":30,
    "talk to (robot)":31,
    "listen to (robot)":32,
    "watch (robot)":33,
}


Number of cores: 32


### Convert annotations to AVA format

#### Load XML file

In [7]:
def get_xml_dic(xml_path):
    with open(xml_path) as fd:
        dic = xmltodict.parse(fd.read())
    return dic


annotations = get_xml_dic(PATH_TO_XML)['annotations']


#### Meta Data

In [24]:
video_name_to_task_id = {}
task_id_to_video_name = {}
height = None
width = None


for task in annotations['meta']['project']['tasks']['task']:
    task_id = int(task['id'])
    video_name = task['name']
    video_name_to_task_id[video_name] = task_id
    task_id_to_video_name[task_id] = video_name
    height = int(task['original_size']['height'])
    width = int(task['original_size']['width'])

print("Number of videos: ", len(video_name_to_task_id))
assert height is not None and width is not None
print("height: ", height)
print("width: ", width)
print("tracks: ", len(annotations['track']))


Number of videos:  2
height:  360
width:  640
tracks:  6


In [34]:
with open(SAVE_FILE_PATH, "w") as f:


    for track in annotations['track']:
        track_id = track['@id']
        video_name = task_id_to_video_name[int(track['@task_id'])]
        for box in track['box']:
            frame_num = int(box["@frame"])
            if frame_num%30 == 0 and frame_num != 0:
                outside = False if box['@outside'] == '0' else True
                if not outside:
                    xtl = round(float(box['@xtl']) / width, 3)
                    ytl = round(float(box['@ytl']) / height, 3)
                    xbr = round(float(box['@xbr']) / width, 3)
                    ybr = round(float(box['@ybr']) / height, 3)
                    for attribute in box['attribute']:
                        action_name = attribute['#text']
                        if action_name != "None" and attribute['@name'] != "Person ID":
                            action_id = NAME_TO_ID[action_name]
                            row_str = f"{video_name},{frame_num},{xtl},{ytl},{xbr},{ybr},{action_id},{track_id}"
                            f.write(row_str + "\n")
            
print("Done")
    

Done
