In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses, initializers
from tensorflow.keras.utils import plot_model
from IPython.display import Image
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
import cv2
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
# import utils
import xmltodict

In [None]:
dataset_dir = "dataset"
data_dirs = ("original", "train", "valid", "test")
labels = ("without_mask", "with_mask", "mask_weared_incorrect")
for data_dir in data_dirs:
    _data_path = os.path.join(dataset_dir, data_dir)
    for label in labels:
        _path = os.path.join(_data_path, label)
        if not os.path.isdir(_path):
            os.makedirs(_path)

In [None]:
def fileToDict(path):
    with open(path, "r") as file:
        _dict = xmltodict.parse(file.read())
    return _dict

def cv2_imshow(image):
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.show()

In [None]:
total_num = 0
dataset = {
    "image_path": [],
    "shape": [],
    "status": [],
    "bboxs": []
}
annotations_path = os.path.join(dataset_dir, "annotations")
images_path = os.path.join(dataset_dir, "images")
annotations_datas = os.listdir(annotations_path)
for annotations_data in annotations_datas:
    xml_path = os.path.join(annotations_path, annotations_data)
    data = fileToDict(xml_path)["annotation"]
    file = os.path.join(images_path, data["filename"])
    shape = (int(data["size"]["height"]), int(data["size"]["width"]), int(data["size"]["depth"]))
    bboxs = []
    status = []
    if type(data["object"]) == dict:
        obj = data["object"]
        status.append(obj["name"])
        xmin = int(obj["bndbox"]["xmin"])
        ymin = int(obj["bndbox"]["ymin"])
        xmax = int(obj["bndbox"]["xmax"])
        ymax = int(obj["bndbox"]["ymax"])
        bboxs.append((xmin, ymin, xmax, ymax))
    else:
        for obj in data["object"]:
            status.append(obj["name"])
            xmin = int(obj["bndbox"]["xmin"])
            ymin = int(obj["bndbox"]["ymin"])
            xmax = int(obj["bndbox"]["xmax"])
            ymax = int(obj["bndbox"]["ymax"])
            bboxs.append((xmin, ymin, xmax, ymax))
    dataset["image_path"].append(file)
    dataset["shape"].append(shape)
    dataset["status"].append(status)
    dataset["bboxs"].append(bboxs)
    total_num += len(status)
dataset = pd.DataFrame(dataset)
dataset.head()


In [None]:
print(f"total_num = {total_num}")
valid_split_num = int(total_num * 0.1)
test_split_num = int(total_num * 0.1)
train_split_num = total_num - valid_split_num - test_split_num
print(f"train_split_num = {train_split_num}")
print(f"valid_split_num = {valid_split_num}")
print(f"test_split_num = {test_split_num}")

In [None]:
data_path = os.path.join(dataset_dir, data_dirs[0])
img_num = [0] * 3
for i, image_path in enumerate(dataset["image_path"]):
    status = dataset["status"][i]
    bboxs = dataset["bboxs"][i]
    img = cv2.imread(image_path)
    img_draw = img.copy()
    for info in zip(status, bboxs):
        y_data = info[0]
        # cv2.rectangle(img_draw, bbox[:2], bbox[2:], (0, 255, 0), 2)
        img_crop = img.copy()[info[1][1]:info[1][3], info[1][0]:info[1][2]]
        x_data = cv2.resize(img_crop, (64, 64))
        # cv2_imshow(x_data)
        path = os.path.join(data_path, y_data)
        img_path = os.path.join(path, f"{img_num[labels.index(y_data)]}.jpg")
        print(img_path)
        cv2.imwrite(img_path, x_data)
        # tra_data = {"image": x_data, "label": y_data}
        img_num[labels.index(y_data)] += 1
    # cv2_imshow(img)
    # break


In [None]:
split_rate = 0.8, 0.1, 0.1

In [None]:
for dir in os.listdir(data_path):
    _path = os.path.join(data_path, dir)
    print(f"{_path}:")
    file_total_num = len(os.listdir(_path))
    for i, spile in enumerate(split_rate[1:]):
        file_list = os.listdir(_path)
        file_num = len(file_list)
        file_spile_num = int(file_total_num * spile)
        print(f"file_spile_num = {file_spile_num}")
        for file in file_list[:file_spile_num]:
            _file_path = os.path.join(_path, file)
            _new_file_path = os.path.join(dataset_dir, data_dirs[i + 2], dir, file)
            os.rename(_file_path, _new_file_path)
            # print(_file_path, _new_file_path)
    file_list = os.listdir(_path)
    for file in file_list:
        _file_path = os.path.join(_path, file)
        _new_file_path = os.path.join(dataset_dir, data_dirs[1], dir, file)
        os.rename(_file_path, _new_file_path)
    print(f"file_spile_num = {len(file_list)}")
    # break
    