In [1]:
import os
import glob
import pandas as pd
import xml.etree.ElementTree as ET
import numpy as np

# taken from https://github.com/datitran/raccoon_dataset/blob/master/xml_to_csv.py

def xml_to_csv(path):
    xml_list = []
    for xml_file in glob.glob(path + '/*.xml'):
        tree = ET.parse(xml_file)
        root = tree.getroot()
        for member in root.findall('object'):
            value = (root.find('filename').text,
                     int(root.find('size')[0].text),
                     int(root.find('size')[1].text),
                     member[0].text,
                     int(member[4][0].text),
                     int(member[4][1].text),
                     int(member[4][2].text),
                     int(member[4][3].text)
                     )
            xml_list.append(value)
    column_name = ['filename', 'width', 'height', 'class', 'xmin', 'ymin', 'xmax', 'ymax']
    xml_df = pd.DataFrame(xml_list, columns=column_name)
    return xml_df

In [2]:
path = "/work/MA490_DeepLearning/Share/Dice/Dice data set/"

In [3]:
df = xml_to_csv(path)
df

Unnamed: 0,filename,width,height,class,xmin,ymin,xmax,ymax
0,20211022_002758.jpg,4032,3024,"d10,2",860,730,1464,1252
1,20211022_002758.jpg,4032,3024,"d10,10",1601,718,2176,1246
2,20211022_002758.jpg,4032,3024,"d10,7",2245,609,2714,1149
3,20211022_002758.jpg,4032,3024,"d%,8",1654,1965,2201,2471
4,20211022_002758.jpg,4032,3024,"d10,3",2082,1752,2579,2334
...,...,...,...,...,...,...,...,...
2763,IMG_20211022_000811939.jpg,4000,3000,"d6,2",1414,641,1795,1080
2764,IMG_20211022_000811939.jpg,4000,3000,"d20,2",2111,835,2566,1283
2765,IMG_20211022_000811939.jpg,4000,3000,"d%,10",2482,1493,2960,1915
2766,IMG_20211022_000811939.jpg,4000,3000,"d10,10",1863,1157,2221,1576


In [4]:
import cv2
import matplotlib.pyplot as plt

In [5]:
filenames = list(set(df.filename))
len(filenames)

563

In [6]:
def show_images(images, labels, columns):
    plt.figure(figsize=(20,20))
    for i, image in enumerate(images):
        plt.subplot(len(images) / columns + 1, columns, i + 1)
        plt.imshow(image)
        plt.title(labels[i])

In [7]:
def show_bounding_box_pictures_with_labels(i):
    file =  filenames[i]
    image = cv2.imread(path + file)
    image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

    new_images = []
    classes = []
    rows = df[df["filename"] == file].iterrows()
    for i, row in rows:
        classes.append(row["class"])
        new_image = image[row["ymin"]:row["ymax"], row["xmin"]:row["xmax"]]
        new_images.append(new_image)

    show_images([image] + new_images, ["original"] + classes, 4)

In [8]:
pixels = 64

In [9]:
def get_images_and_labels_for_all_bounding_boxes():
    new_images = []
    classes = []
    for file in filenames:
        print("getting", file)
        image = cv2.imread(path + file)
        image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

        rows = df[df["filename"] == file].iterrows()
        for i, row in rows:
            classes.append(row["class"])
            new_image = image[row["ymin"]:row["ymax"], row["xmin"]:row["xmax"]]
            new_image = cv2.resize(new_image, (pixels, pixels))
            new_images.append(new_image)
    return {"images": np.array(new_images, dtype=int), "labels": np.array(classes, dtype=str)}

In [10]:
images_and_labels = get_images_and_labels_for_all_bounding_boxes()

getting 20211022_001614.jpg
getting IMG_20211022_001520296.jpg
getting 20211022_003109.jpg
getting 20211022_001332.jpg
getting IMG_20211022_000733123.jpg
getting 20211022_001830.jpg
getting 20211022_003000.jpg
getting IMG_20211022_000853538.jpg
getting IMG_20211022_002535290.jpg
getting IMG_20211022_001600286.jpg
getting IMG_20211022_002243288.jpg
getting IMG_20211022_002538870.jpg
getting 20211022_001918.jpg
getting IMG_20211022_001901704.jpg
getting 20211022_002915.jpg
getting 20211022_003051.jpg
getting 20211022_000934.jpg
getting IMG_20211022_002203890.jpg
getting IMG_20211022_001810226.jpg
getting IMG_20211022_000711227.jpg
getting 20211022_001330.jpg
getting IMG_20211022_001856888.jpg
getting 20211022_001550.jpg
getting 20211022_003241.jpg
getting IMG_20211022_001632497.jpg
getting 20211022_000924.jpg
getting 20211022_002613.jpg
getting 20211022_001434.jpg
getting 20211022_002039.jpg
getting IMG_20211022_001208353.jpg
getting 20211022_000617.jpg
getting 20211022_001533.jpg
gettin

getting IMG_20211022_002331191.jpg
getting 20211022_002252.jpg
getting 20211022_000856.jpg
getting IMG_20211022_002456121.jpg
getting IMG_20211022_001938202.jpg
getting 20211022_001927.jpg
getting IMG_20211022_002023330.jpg
getting 20211022_002904.jpg
getting 20211022_002302.jpg
getting 20211022_002533.jpg
getting 20211022_000726.jpg
getting 20211022_000950.jpg
getting IMG_20211022_002234677.jpg
getting 20211022_001209.jpg
getting 20211022_002052.jpg
getting IMG_20211022_002214246.jpg
getting 20211022_003011.jpg
getting IMG_20211022_003302100.jpg
getting IMG_20211022_001733043.jpg
getting IMG_20211022_002939976.jpg
getting 20211022_001728.jpg
getting 20211022_002438.jpg
getting 20211022_000906.jpg
getting 20211022_001153.jpg
getting 20211022_000655.jpg
getting 20211022_001707.jpg
getting 20211022_001558.jpg
getting 20211022_002535.jpg
getting IMG_20211022_003247249.jpg
getting 20211022_003117.jpg
getting 20211022_001231.jpg
getting IMG_20211022_002102466.jpg
getting 20211022_003225.jpg

getting 20211022_002031.jpg
getting IMG_20211022_001131460.jpg
getting IMG_20211022_002528577.jpg
getting 20211022_001511.jpg
getting IMG_20211022_001700575.jpg
getting 20211022_001353.jpg
getting 20211022_001513.jpg
getting 20211022_002007.jpg
getting 20211022_002953.jpg
getting 20211022_002803.jpg
getting 20211022_001958.jpg
getting 20211022_000633.jpg
getting 20211022_001258.jpg
getting 20211022_002143.jpg
getting IMG_20211022_002301332.jpg
getting 20211022_001355.jpg
getting 20211022_001945.jpg
getting 20211022_002919.jpg
getting 20211022_002525.jpg
getting IMG_20211022_002419170.jpg
getting IMG_20211022_001951070.jpg
getting 20211022_002958.jpg
getting IMG_20211022_002310144.jpg
getting 20211022_002415.jpg
getting IMG_20211022_002524077.jpg
getting IMG_20211022_001803120.jpg
getting 20211022_002126.jpg
getting IMG_20211022_001529574.jpg
getting 20211022_001804.jpg
getting IMG_20211022_001926433.jpg
getting 20211022_000823.jpg
getting 20211022_001141.jpg
getting 20211022_001303.jpg

In [11]:
saveFileName = "CleanedDiceData" + str(pixels) + ".npz"

In [12]:
np.savez_compressed(saveFileName, images=images_and_labels["images"], labels=images_and_labels["labels"])

In [13]:
data = np.load(saveFileName, allow_pickle=True)
data.files

['images', 'labels']

In [14]:
images_loaded = data["images"]
labels_loaded = data["labels"]