# Convert labels from .xml to .txt

In order to augment our images we first need to convert .xml files into .txt files. Here is the code to do that.

In [7]:
# Convert to txt file for augmentation

import xml.etree.ElementTree as ET
import os
xml_dir = 'D:/data/ChestXray/xml/'#path to xml file folder
xmls = os.listdir(xml_dir)
all_labels = []
for xml in xmls:
    line = ''
    image_name = xml.split('.')[0]+'.jpeg'
    tree = ET.parse(xml_dir+xml)
    line = 'D:/data/ChestXray/X_Ray/'+image_name+' '  # path to image file folder
    root = tree.getroot()
    for objct in root.iter('object'):
        node = objct.find('bndbox')
        xmin = node.find('xmin').text
        xmax = node.find('xmax').text
        ymin = node.find('ymin').text
        ymax = node.find('ymax').text
        obj_name = objct.find('name').text
        if (obj_name=='chest_x_ray'):
            line += xmin +","+ ymin +","+ xmax+","+ ymax+" "+obj_name+' '
            
    all_labels.append(line)

with open('D:/data/ChestXray/labels.txt','w') as f:   #generated txt file
    for line in all_labels:
        f.write(line+'\n')

# Open labels.txt

Checking the content of .txt file. The number of files should be equal to the number of xml files.

In [8]:
# printing total number of files

import os
with open('D:/data/ChestXray/labels.txt','r') as f:
    contents = f.readlines()
len(contents)

260

# Making directory for augmented images

1. Make folder **(in this case aug_img)** to store augmented images.

In [9]:
image_dir = 'D:/data/ChestXray/X_Ray/'#IMAGE DIR
images = os.listdir(image_dir)
save_dir = 'D:/data/ChestXray/aug_x_ray/'#AUGMENT IMAGES DIR
labels_txt = 'D:/data/ChestXray/labels.txt'#ANNOTATION TEXT DIR
output_file= 'D:/data/ChestXray/augment.txt'#AUGMENT TEXT
len(images)

260

# Augmenting Images and saving xml files.

## Steps 

1. change the index number in **image_name = line.split(" ")[0].split("/")[5]** according to your directory structure.
2. Assign unique name to augmented images in **f.write("C:/Users/kscho/Desktop/aug_img/"+'p7'+image_name+" "+cont+'\n').**
3. This will also create augment.txt. This will be used in next step to produce .xml files for augmented images.

In [25]:
# Augmentation 

import imgaug as ia
from imgaug import augmenters as iaa

import cv2
import os, shutil

ia.seed(1)

for line in contents:
    line = line.replace("\n","")
    image_name = line.split(" ")[0].split("/")[4]
    print(image_name)#image name
    image = cv2.imread(image_dir+image_name)
    b_boxes = line.split(" ")[1:2]#Bounding box coordinates
    obj=line.split(" ")[2]#object name
    li_class = []
    li_ia = []
    for box in b_boxes:
        box = box.split(",")
        xmin, ymin, xmax, ymax = float(box[0]), float(box[1]), float(box[2]), float(box[3])
        li_ia.append(ia.BoundingBox(x1=xmin, y1=ymin, x2=xmax, y2=ymax))
    bbs = ia.BoundingBoxesOnImage(li_ia, shape=image.shape)
    seq = iaa.Sequential(
    [
#        iaa.Fliplr(1),
#        iaa.Affine(rotate=10),
        iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
#        iaa.Multiply((0.5, 1.5), per_channel=0.5),
#        iaa.Add((-10, 10), per_channel=0.5),
#        iaa.Affine(translate_px={"x": 40, "y": 60}, rotate=(-0.5,2.75))
#        iaa.Flipud(0.2)

    ])
    seq_det = seq.to_deterministic()
    image_aug = seq_det.augment_images([image])[0]
    bbs_aug = seq_det.augment_bounding_boxes([bbs])[0]
    
    for i in range(len(bbs.bounding_boxes)):
        cont = ""
        cv2.imwrite(save_dir+'AdditiveGaussianNoise'+image_name,image_aug)
        before = bbs.bounding_boxes[i]
        after = bbs_aug.bounding_boxes[i]
        cont += str(int(after.x1)) +","+ str(int(after.y1)) +","+ \
                  str(int(after.x2)) +","+ str(int(after.y2))+" "+str(obj)
        print(image_name)

        print("BB %d: (%.4f, %.4f, %.4f, %.4f) -> (%.4f, %.4f, %.4f, %.4f)" % (
            i,
            before.x1, before.y1, before.x2, before.y2,
            after.x1, after.y1, after.x2, after.y2)
        )
        print(cont)
        
    with open(output_file,"a") as f:
        f.write("D:/data/ChestXray/aug_x_ray/"+'AdditiveGaussianNoise'+image_name+" "+cont+'\n')#TEXT

    cv2.imwrite(save_dir+'AdditiveGaussianNoise'+image_name,image_aug)#IMAGES

1.jpeg
1.jpeg
BB 0: (29.0000, 31.0000, 1214.0000, 1209.0000) -> (29.0000, 31.0000, 1214.0000, 1209.0000)
29,31,1214,1209 chest_x_ray
10.jpeg
10.jpeg
BB 0: (12.0000, 29.0000, 1008.0000, 995.0000) -> (12.0000, 29.0000, 1008.0000, 995.0000)
12,29,1008,995 chest_x_ray
101.jpeg
101.jpeg
BB 0: (29.0000, 28.0000, 1273.0000, 1094.0000) -> (29.0000, 28.0000, 1273.0000, 1094.0000)
29,28,1273,1094 chest_x_ray
102.jpeg
102.jpeg
BB 0: (19.0000, 13.0000, 1145.0000, 1074.0000) -> (19.0000, 13.0000, 1145.0000, 1074.0000)
19,13,1145,1074 chest_x_ray
103.jpeg
103.jpeg
BB 0: (13.0000, 16.0000, 748.0000, 640.0000) -> (13.0000, 16.0000, 748.0000, 640.0000)
13,16,748,640 chest_x_ray
104.jpeg
104.jpeg
BB 0: (11.0000, 11.0000, 751.0000, 617.0000) -> (11.0000, 11.0000, 751.0000, 617.0000)
11,11,751,617 chest_x_ray
105.jpeg
105.jpeg
BB 0: (13.0000, 13.0000, 744.0000, 588.0000) -> (13.0000, 13.0000, 744.0000, 588.0000)
13,13,744,588 chest_x_ray
106.jpeg
106.jpeg
BB 0: (57.0000, 14.0000, 709.0000, 875.0000) -> (5

157.jpeg
BB 0: (29.0000, 26.0000, 1358.0000, 1237.0000) -> (29.0000, 26.0000, 1358.0000, 1237.0000)
29,26,1358,1237 chest_x_ray
158.jpeg
158.jpeg
BB 0: (21.0000, 15.0000, 1657.0000, 1272.0000) -> (21.0000, 15.0000, 1657.0000, 1272.0000)
21,15,1657,1272 chest_x_ray
159.jpeg
159.jpeg
BB 0: (13.0000, 18.0000, 1450.0000, 1154.0000) -> (13.0000, 18.0000, 1450.0000, 1154.0000)
13,18,1450,1154 chest_x_ray
16.jpeg
16.jpeg
BB 0: (26.0000, 25.0000, 1476.0000, 1240.0000) -> (26.0000, 25.0000, 1476.0000, 1240.0000)
26,25,1476,1240 chest_x_ray
160.jpeg
160.jpeg
BB 0: (21.0000, 12.0000, 1581.0000, 1405.0000) -> (21.0000, 12.0000, 1581.0000, 1405.0000)
21,12,1581,1405 chest_x_ray
161.jpeg
161.jpeg
BB 0: (28.0000, 22.0000, 1659.0000, 1351.0000) -> (28.0000, 22.0000, 1659.0000, 1351.0000)
28,22,1659,1351 chest_x_ray
162.jpeg
162.jpeg
BB 0: (66.0000, 66.0000, 2642.0000, 2405.0000) -> (66.0000, 66.0000, 2642.0000, 2405.0000)
66,66,2642,2405 chest_x_ray
163.jpeg
163.jpeg
BB 0: (29.0000, 29.0000, 1268.0000

22.jpeg
BB 0: (54.0000, 33.0000, 1484.0000, 1226.0000) -> (54.0000, 33.0000, 1484.0000, 1226.0000)
54,33,1484,1226 chest_x_ray
220.jpeg
220.jpeg
BB 0: (15.0000, 14.0000, 1364.0000, 1035.0000) -> (15.0000, 14.0000, 1364.0000, 1035.0000)
15,14,1364,1035 chest_x_ray
221.jpeg
221.jpeg
BB 0: (9.0000, 11.0000, 964.0000, 798.0000) -> (9.0000, 11.0000, 964.0000, 798.0000)
9,11,964,798 chest_x_ray
23.jpeg
23.jpeg
BB 0: (29.0000, 24.0000, 1977.0000, 1252.0000) -> (29.0000, 24.0000, 1977.0000, 1252.0000)
29,24,1977,1252 chest_x_ray
24.jpeg
24.jpeg
BB 0: (16.0000, 14.0000, 1505.0000, 974.0000) -> (16.0000, 14.0000, 1505.0000, 974.0000)
16,14,1505,974 chest_x_ray
25.jpeg
25.jpeg
BB 0: (51.0000, 26.0000, 1931.0000, 1455.0000) -> (51.0000, 26.0000, 1931.0000, 1455.0000)
51,26,1931,1455 chest_x_ray
26.jpeg
26.jpeg
BB 0: (28.0000, 23.0000, 1495.0000, 1122.0000) -> (28.0000, 23.0000, 1495.0000, 1122.0000)
28,23,1495,1122 chest_x_ray
27.jpeg
27.jpeg
BB 0: (38.0000, 38.0000, 2017.0000, 1350.0000) -> (38.0

9.jpeg
BB 0: (12.0000, 98.0000, 1014.0000, 919.0000) -> (12.0000, 98.0000, 1014.0000, 919.0000)
12,98,1014,919 chest_x_ray
m1.jpeg
m1.jpeg
BB 0: (7.0000, 7.0000, 618.0000, 472.0000) -> (7.0000, 7.0000, 618.0000, 472.0000)
7,7,618,472 chest_x_ray
m10.jpeg
m10.jpeg
BB 0: (14.0000, 10.0000, 614.0000, 472.0000) -> (14.0000, 10.0000, 614.0000, 472.0000)
14,10,614,472 chest_x_ray
m11.jpeg
m11.jpeg
BB 0: (14.0000, 14.0000, 546.0000, 534.0000) -> (14.0000, 14.0000, 546.0000, 534.0000)
14,14,546,534 chest_x_ray
m14.jpeg
m14.jpeg
BB 0: (27.0000, 26.0000, 984.0000, 987.0000) -> (27.0000, 26.0000, 984.0000, 987.0000)
27,26,984,987 chest_x_ray
m15.jpeg
m15.jpeg
BB 0: (16.0000, 8.0000, 489.0000, 491.0000) -> (16.0000, 8.0000, 489.0000, 491.0000)
16,8,489,491 chest_x_ray
m2.jpeg
m2.jpeg
BB 0: (8.0000, 7.0000, 541.0000, 538.0000) -> (8.0000, 7.0000, 541.0000, 538.0000)
8,7,541,538 chest_x_ray
m3.jpeg
m3.jpeg
BB 0: (8.0000, 4.0000, 638.0000, 464.0000) -> (8.0000, 4.0000, 638.0000, 464.0000)
8,4,638,464

r4.jpeg
BB 0: (9.0000, 7.0000, 239.0000, 200.0000) -> (9.0000, 7.0000, 239.0000, 200.0000)
9,7,239,200 chest_x_ray
r5.jpeg
r5.jpeg
BB 0: (4.0000, 6.0000, 207.0000, 228.0000) -> (4.0000, 6.0000, 207.0000, 228.0000)
4,6,207,228 chest_x_ray
r6.jpeg
r6.jpeg
BB 0: (7.0000, 5.0000, 236.0000, 205.0000) -> (7.0000, 5.0000, 236.0000, 205.0000)
7,5,236,205 chest_x_ray
r7.jpeg
r7.jpeg
BB 0: (11.0000, 5.0000, 211.0000, 224.0000) -> (11.0000, 5.0000, 211.0000, 224.0000)
11,5,211,224 chest_x_ray
r8.jpeg
r8.jpeg
BB 0: (25.0000, 5.0000, 233.0000, 189.0000) -> (25.0000, 5.0000, 233.0000, 189.0000)
25,5,233,189 chest_x_ray
r9.jpeg
r9.jpeg
BB 0: (9.0000, 6.0000, 223.0000, 216.0000) -> (9.0000, 6.0000, 223.0000, 216.0000)
9,6,223,216 chest_x_ray


# Creating augmented .xml file

## Steps

1. Change **src_dir = "C:/Users/kscho/Desktop/aug_img/"** add path for folder in which have generated augmented images in above step.
2. Change the index in this line **image_name = splitted[0].split("/")[5].**
3. Change path in this line **ET.SubElement(root, "path").text = 'K:/Data/fashion/aug_img/'+image_name.** Add path of your augmented image folder.
4. Change path in **tree.write("C:/Users/kscho/Desktop/aug_xml/"+file_name+".xml").** This will store .xml files for augmented images.

**NOTE** Every time you create new .xml files delete the contents in **augmented.txt**

In [22]:
labels = ! cat D:/data/ChestXray/augment.txt

In [23]:
# Converting from txt to xml

src_dir = "D:/data/ChestXray/aug_x_ray/"
# folder_name = "allimages"
for img in labels:
    splitted = img.split(" ")
    image_name = splitted[0].split("/")[4]#image name
    x=img.split(" ")[2]#object name
    boxes = splitted[1:-1]
    
    print(src_dir+image_name)
    image = cv2.imread(src_dir+image_name)
    print(image_name)
    image_info = image.shape
    print(image_info)
    height, width, depth = image_info[0], image_info[1], image_info[2]
    root = ET.Element("annotation")
    ET.SubElement(root, "folder").text = folder_name
    ET.SubElement(root, "filename").text = image_name
    ET.SubElement(root, "path").text = 'D:/data/ChestXray/aug_x_ray/'+image_name
    ET.SubElement(ET.SubElement(root, "source"), "database").text = "Unknown"
    size = ET.SubElement(root, "size")
    ET.SubElement(size, "width").text = str(width)
    ET.SubElement(size, "height").text = str(height)
    ET.SubElement(size, "depth").text = str(depth)
    ET.SubElement(root, "segmented").text = '0'

    for box in boxes:
        box = box.split(',')
        
        obj_name = x
        xmin = str(int(float(box[0])))
        ymin = str(int(float(box[1])))
        xmax = str(int(float(box[2])))
        ymax = str(int(float(box[3])))
        obj = ET.SubElement(root, "object")
        ET.SubElement(obj, "name").text = obj_name
        ET.SubElement(obj, "pose").text = "Unspecified"
        ET.SubElement(obj, "truncated").text = "0"
        ET.SubElement(obj, "difficult").text = "0"
        bndbox = ET.SubElement(obj, "bndbox")
        ET.SubElement(bndbox, "xmin").text = xmin
        ET.SubElement(bndbox, "ymin").text = ymin
        ET.SubElement(bndbox, "xmax").text = xmax
        ET.SubElement(bndbox, "ymax").text = ymax
    tree = ET.ElementTree(root)
    file_name = image_name.split('.')[0]
    tree.write("D:/data/ChestXray/aug_xml/"+file_name+".xml")
    file_name = None

D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise1.jpeg
AdditiveGaussianNoise1.jpeg
(1243, 1249, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise10.jpeg
AdditiveGaussianNoise10.jpeg
(1024, 1024, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise101.jpeg
AdditiveGaussianNoise101.jpeg
(1127, 1300, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise102.jpeg
AdditiveGaussianNoise102.jpeg
(1094, 1169, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise103.jpeg
AdditiveGaussianNoise103.jpeg
(648, 760, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise104.jpeg
AdditiveGaussianNoise104.jpeg
(626, 760, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise105.jpeg
AdditiveGaussianNoise105.jpeg
(596, 760, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise106.jpeg
AdditiveGaussianNoise106.jpeg
(891, 760, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise107.jpeg
AdditiveGaussianNoise107.jpeg
(567, 760, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise108.jpeg
AdditiveGaussianNoise1

D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise174.jpeg
AdditiveGaussianNoise174.jpeg
(1083, 1242, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise175.jpeg
AdditiveGaussianNoise175.jpeg
(1959, 2358, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise176.jpeg
AdditiveGaussianNoise176.jpeg
(1453, 1588, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise177.jpeg
AdditiveGaussianNoise177.jpeg
(1198, 1430, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise178.jpeg
AdditiveGaussianNoise178.jpeg
(1399, 1530, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise179.jpeg
AdditiveGaussianNoise179.jpeg
(1178, 1700, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise18.jpeg
AdditiveGaussianNoise18.jpeg
(847, 1024, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise180.jpeg
AdditiveGaussianNoise180.jpeg
(1248, 1588, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise181.jpeg
AdditiveGaussianNoise181.jpeg
(1175, 1438, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise182.jpeg
AdditiveG

AdditiveGaussianNoise57.jpeg
(1175, 1536, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise58.jpeg
AdditiveGaussianNoise58.jpeg
(1272, 1536, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise59.jpeg
AdditiveGaussianNoise59.jpeg
(1360, 1536, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise6.jpeg
AdditiveGaussianNoise6.jpeg
(657, 657, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise60.jpeg
AdditiveGaussianNoise60.jpeg
(1272, 1536, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise61.jpeg
AdditiveGaussianNoise61.jpeg
(1024, 1024, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise62.jpeg
AdditiveGaussianNoise62.jpeg
(400, 552, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise63.jpeg
AdditiveGaussianNoise63.jpeg
(1437, 1522, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise64.jpeg
AdditiveGaussianNoise64.jpeg
(1536, 1515, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoise65.jpeg
AdditiveGaussianNoise65.jpeg
(1536, 1429, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussia

AdditiveGaussianNoiser19.jpeg
(234, 216, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser2.jpeg
AdditiveGaussianNoiser2.jpeg
(225, 225, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser20.jpeg
AdditiveGaussianNoiser20.jpeg
(211, 239, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser21.jpeg
AdditiveGaussianNoiser21.jpeg
(416, 416, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser3.jpeg
AdditiveGaussianNoiser3.jpeg
(240, 210, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser4.jpeg
AdditiveGaussianNoiser4.jpeg
(205, 246, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser5.jpeg
AdditiveGaussianNoiser5.jpeg
(237, 213, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser6.jpeg
AdditiveGaussianNoiser6.jpeg
(210, 240, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser7.jpeg
AdditiveGaussianNoiser7.jpeg
(232, 217, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser8.jpeg
AdditiveGaussianNoiser8.jpeg
(194, 259, 3)
D:/data/ChestXray/aug_x_ray/AdditiveGaussianNoiser9.