In [1]:
# create csv files from xml
import xml.etree.ElementTree as ET
import os

In [2]:
tree = ET.parse("train/annotations/img_0001.xml")
root = tree.getroot()
root

<Element 'annotation' at 0x10777c720>

In [18]:
# keys: image_id, image_path, width, height, class/name, xmin, xmax, ymin, ymax

def extract_metadata(annotation_dir_path):
    metadata = {
                'filename': [],
                'filepath': [],
                'width': [],
                'height': [],
                'depth': [],
                'label': [],
                'xmin': [],
                'ymin': [],
                'xmax': [],
                'ymax': []}

    for filename in os.listdir(annotation_dir_path):
        tree = ET.parse(annotation_dir_path + "/" + filename)
        root = tree.getroot()

        folder = annotation_dir_path.split("/")[0] + "/" + root.find('folder').text
        filename = root.find('filename').text
        filepath = folder + '/' + filename
        width = int(root.find('size/width').text)
        height = int(root.find('size/height').text)
        depth = int(root.find('size/depth').text)

        for obj in root.findall('object'):
            # TODO account for multiple bboxes in one image.
            label = obj.find('name').text
            bbox = obj.find('bndbox')
            xmin = int(bbox.find('xmin').text)
            ymin = int(bbox.find('ymin').text)
            xmax = int(bbox.find('xmax').text)
            ymax = int(bbox.find('ymax').text)

            metadata["label"].append(label)
            metadata["xmin"].append(xmin)
            metadata["ymin"].append(ymin)
            metadata["xmax"].append(xmax)
            metadata["ymax"].append(ymax)

        # metadata["folder"].append(folder)
            metadata["filename"].append(filename.split(".")[0])
            metadata["filepath"].append(filepath)
            metadata["width"].append(width)
            metadata["height"].append(height)
            metadata["depth"].append(depth)

    return metadata


In [19]:
# extract train metadata
train_metadata = extract_metadata("train/annotations")
print(train_metadata)

{'filename': ['img_0028', 'img_1336', 'img_0996', 'img_0996', 'img_2159', 'img_2159', 'img_0982', 'img_1444', 'img_1322', 'img_0014', 'img_0014', 'img_2165', 'img_2165', 'img_0772', 'img_0772', 'img_1478', 'img_2171', 'img_2171', 'img_2617', 'img_2824', 'img_0955', 'img_1493', 'img_1493', 'img_0799', 'img_0941', 'img_1487', 'img_2818', 'img_0969', 'img_0969', 'img_1652', 'img_2429', 'img_1120', 'img_0558', 'img_1646', 'img_1646', 'img_0570', 'img_0570', 'img_2367', 'img_1108', 'img_1108', 'img_0216', 'img_0202', 'img_2415', 'img_2415', 'img_2373', 'img_0564', 'img_1691', 'img_1691', 'img_1849', 'img_1685', 'img_1875', 'img_1875', 'img_1861', 'img_1726', 'img_1726', 'img_0438', 'img_1040', 'img_2549', 'img_1054', 'img_1732', 'img_1732', 'img_2213', 'img_2213', 'img_2575', 'img_0362', 'img_1068', 'img_1068', 'img_2561', 'img_2207', 'img_0410', 'img_0410', 'img_1083', 'img_1083', 'img_1929', 'img_1915', 'img_1915', 'img_1524', 'img_2039', 'img_1530', 'img_1530', 'img_1256', 'img_0148', 'i

In [20]:
train_metadata.keys()

dict_keys(['filename', 'filepath', 'width', 'height', 'depth', 'label', 'xmin', 'ymin', 'xmax', 'ymax'])

In [21]:
# check for consistency
for key in train_metadata.keys():
    print(key, len(train_metadata[key]))

filename 3199
filepath 3199
width 3199
height 3199
depth 3199
label 3199
xmin 3199
ymin 3199
xmax 3199
ymax 3199


In [22]:
import pandas as pd

train_df = pd.DataFrame(train_metadata)
train_df.head()

Unnamed: 0,filename,filepath,width,height,depth,label,xmin,ymin,xmax,ymax
0,img_0028,train/images/img_0028.jpg,640,640,3,Jett,292,283,384,445
1,img_1336,train/images/img_1336.jpg,640,640,3,Jett,374,292,397,366
2,img_0996,train/images/img_0996.jpg,640,640,3,Phoenix,294,304,307,349
3,img_0996,train/images/img_0996.jpg,640,640,3,Jett,250,245,267,289
4,img_2159,train/images/img_2159.jpg,640,640,3,Phoenix,94,109,281,639


In [23]:
train_df.to_csv("train.csv", index=False)

In [24]:
train_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3199 entries, 0 to 3198
Data columns (total 10 columns):
 #   Column    Non-Null Count  Dtype 
---  ------    --------------  ----- 
 0   filename  3199 non-null   object
 1   filepath  3199 non-null   object
 2   width     3199 non-null   int64 
 3   height    3199 non-null   int64 
 4   depth     3199 non-null   int64 
 5   label     3199 non-null   object
 6   xmin      3199 non-null   int64 
 7   ymin      3199 non-null   int64 
 8   xmax      3199 non-null   int64 
 9   ymax      3199 non-null   int64 
dtypes: int64(7), object(3)
memory usage: 250.1+ KB
