In [10]:
import os
import xml.etree.ElementTree as ET
import math
import shutil
import pandas as pd
from PIL import Image

In [3]:
# Copy all image files and annotation files to combined folder
# path to source directory
countries = ['Czech', 'Japan', 'United_States', 'China_Motorbike']
for gov in countries:
    
    src_dir = os.path.join("RDD2022_all_countries",gov,gov,'train','images')
    # path to destination directory
    dest_dir = os.path.join("combined",'images')

    shutil.copytree(src_dir, dest_dir, dirs_exist_ok=True)
    
    src_dir = os.path.join("RDD2022_all_countries",gov,gov,'train','annotations','xmls')
    # path to destination directory
    dest_dir = os.path.join("combined",'annotations')

    shutil.copytree(src_dir, dest_dir, dirs_exist_ok=True)

In [4]:
# Read in train test val csv to get train test val file names
train = pd.read_csv('train.csv')
validation = pd.read_csv('validation.csv')
test = pd.read_csv('test.csv')

In [12]:
# Based on train csv copy image files and format labels to respective train folders
# (while changing size of image)
# (formatting labels)
classes = {"D00": 0, "D10": 2, "D20": 4, "D40": 5, 'D43':6, 'D01':1, 'D11':3, 'D50':8, 'D44':7, 'Repair':9}

directory = os.path.join("combined",'images')
for filename in train["Filename"]:
    image = Image.open(os.path.join(directory, filename + ".jpg"))
    image.thumbnail((640,640))
    image.save(os.path.join('train','images',filename + ".jpg"))
    
    tree = ET.parse(os.path.join("combined",'annotations',filename + ".xml"))
    root = tree.getroot()
    object_row = []
    for i in range(len(root)):
        if root[i].tag == 'size':
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'width':
                    image_width = int(root[i][j1].text)
                    break
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'height':
                    image_height= int(root[i][j1].text)
                    break
    for i in range(len(root)):
        if root[i].tag == 'object':
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'name':
                    CL = root[i][j2].text
                    break
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'bndbox':
                    x_centre_l = list(map(int,[root[i][j2][0].text,root[i][j2][2].text]))
                    y_centre_l = list(map(int,[root[i][j2][1].text,root[i][j2][3].text])) 
                    width = abs(x_centre_l[0]-x_centre_l[1])/image_width
                    height= abs(y_centre_l[0]-y_centre_l[1])/image_height
                    x_centre= sum(x_centre_l)/2/image_width
                    y_centre= sum(y_centre_l)/2/image_height 
                    
                    x= f"{classes[CL]} {x_centre} {y_centre} {width} {height}"
                    object_row.append(x)
    with open(os.path.join("train",'labels',filename + ".txt"),"w") as f:   # Opens file and casts as f 
        for i in range(len(object_row)):
            f.write(f"{object_row[i]}\n")

    

In [13]:
# Based on test csv copy image files and format labels to respective train folders
# (while changing size of image)
# (formatting labels)

directory = os.path.join("combined",'images')
for filename in test["Filename"]:
    image = Image.open(os.path.join(directory, filename + ".jpg"))
    image.thumbnail((640,640))
    image.save(os.path.join('test','images',filename + ".jpg"))
    
    tree = ET.parse(os.path.join("combined",'annotations',filename + ".xml"))
    root = tree.getroot()
    object_row = []
    for i in range(len(root)):
        if root[i].tag == 'size':
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'width':
                    image_width = int(root[i][j1].text)
                    break
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'height':
                    image_height= int(root[i][j1].text)
                    break
    for i in range(len(root)):
        if root[i].tag == 'object':
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'name':
                    CL = root[i][j2].text
                    break
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'bndbox':
                    x_centre_l = list(map(int,[root[i][j2][0].text,root[i][j2][2].text]))
                    y_centre_l = list(map(int,[root[i][j2][1].text,root[i][j2][3].text])) 
                    width = abs(x_centre_l[0]-x_centre_l[1])/image_width
                    height= abs(y_centre_l[0]-y_centre_l[1])/image_height
                    x_centre= sum(x_centre_l)/2/image_width
                    y_centre= sum(y_centre_l)/2/image_height 
                    
                    x= f"{classes[CL]} {x_centre} {y_centre} {width} {height}"
                    object_row.append(x)
    with open(os.path.join("test",'labels',filename + ".txt"),"w") as f:   # Opens file and casts as f 
        for i in range(len(object_row)):
            f.write(f"{object_row[i]}\n")

In [14]:
# Based on val csv copy image files and format labels to respective train folders
# (while changing size of image)
# (formatting labels)
directory = os.path.join("combined",'images')
for filename in validation["Filename"]:
    image = Image.open(os.path.join(directory, filename + ".jpg"))
    image.thumbnail((640,640))
    image.save(os.path.join('validation','images',filename + ".jpg"))
    
    tree = ET.parse(os.path.join("combined",'annotations',filename + ".xml"))
    root = tree.getroot()
    object_row = []
    for i in range(len(root)):
        if root[i].tag == 'size':
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'width':
                    image_width = int(root[i][j1].text)
                    break
            for j1 in range(len(root[i])):
                if root[i][j1].tag == 'height':
                    image_height= int(root[i][j1].text)
                    break
    for i in range(len(root)):
        if root[i].tag == 'object':
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'name':
                    CL = root[i][j2].text
                    break
            for j2 in range(len(root[i])):
                if root[i][j2].tag == 'bndbox':
                    x_centre_l = list(map(int,[root[i][j2][0].text,root[i][j2][2].text]))
                    y_centre_l = list(map(int,[root[i][j2][1].text,root[i][j2][3].text])) 
                    width = abs(x_centre_l[0]-x_centre_l[1])/image_width
                    height= abs(y_centre_l[0]-y_centre_l[1])/image_height
                    x_centre= sum(x_centre_l)/2/image_width
                    y_centre= sum(y_centre_l)/2/image_height 
                    
                    x= f"{classes[CL]} {x_centre} {y_centre} {width} {height}"
                    object_row.append(x)
    with open(os.path.join("validation",'labels',filename + ".txt"),"w") as f:   # Opens file and casts as f 
        for i in range(len(object_row)):
            f.write(f"{object_row[i]}\n")

In [20]:

# the number of each class labels for each country
#check classes
for gov in countries:
    cls_names = []
    total_images = 0
    file_list = [filename for filename in os.listdir(os.path.join('combined', 'annotations'))]
    
    for file in file_list:

        total_images = total_images + 1
      
        infile_xml = open(os.path.join('combined', 'annotations', file))
        tree = ET.parse(infile_xml)
        root = tree.getroot()
        for obj in root.iter('object'):
            cls_name = obj.find('name').text
            cls_names.append(cls_name)
    print('Country : ',gov)
    print("\nTotal images in {} ： {}".format(gov , total_images))
    print("Total labels in {} ： {}\n".format(gov , len(cls_names)))
    
    count_dict = collections.Counter(cls_names)
    cls_count = []
    for damageType in damageTypes:
        print(str(damageType) + ' : ' + str(count_dict[damageType]))
        cls_count.append(count_dict[damageType])
    print('***************************************************************************************\n')


Country :  Czech

Total images in Czech ： 20117
Total labels in Czech ： 42440



NameError: name 'collections' is not defined