In [1]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, optimizers, losses, initializers
from tensorflow.keras.utils import plot_model
from IPython.display import Image
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
import pandas as pd
import cv2
import os
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import glob
# import utils
import xmltodict

In [2]:
dataset_dir = "dataset"
data_dirs = ("original", "train", "valid", "test")
labels = ("without_mask", "with_mask", "mask_weared_incorrect")
for data_dir in data_dirs:
    _data_path = os.path.join(dataset_dir, data_dir)
    for label in labels:
        _path = os.path.join(_data_path, label)
        if not os.path.isdir(_path):
            os.makedirs(_path)

In [3]:
def fileToDict(path):
    with open(path, "r") as file:
        _dict = xmltodict.parse(file.read())
    return _dict

def cv2_imshow(image):
    plt.imshow(cv2.cvtColor(image, cv2.COLOR_BGR2RGB))
    plt.show()

In [4]:
total_num = 0
dataset = {
    "image_path": [],
    "shape": [],
    "status": [],
    "bboxs": []
}
annotations_path = os.path.join(dataset_dir, "annotations")
images_path = os.path.join(dataset_dir, "images")
annotations_datas = os.listdir(annotations_path)
for annotations_data in annotations_datas:
    xml_path = os.path.join(annotations_path, annotations_data)
    data = fileToDict(xml_path)["annotation"]
    file = os.path.join(images_path, data["filename"])
    shape = (int(data["size"]["height"]), int(data["size"]["width"]), int(data["size"]["depth"]))
    bboxs = []
    status = []
    if type(data["object"]) != list:
        obj = data["object"]
        status.append(obj["name"])
        xmin = int(obj["bndbox"]["xmin"])
        ymin = int(obj["bndbox"]["ymin"])
        xmax = int(obj["bndbox"]["xmax"])
        ymax = int(obj["bndbox"]["ymax"])
        bboxs.append((xmin, ymin, xmax, ymax))
    else:
        for obj in data["object"]:
            status.append(obj["name"])
            xmin = int(obj["bndbox"]["xmin"])
            ymin = int(obj["bndbox"]["ymin"])
            xmax = int(obj["bndbox"]["xmax"])
            ymax = int(obj["bndbox"]["ymax"])
            bboxs.append((xmin, ymin, xmax, ymax))
    dataset["image_path"].append(file)
    dataset["shape"].append(shape)
    dataset["status"].append(status)
    dataset["bboxs"].append(bboxs)
    total_num += len(status)
dataset = pd.DataFrame(dataset)
dataset.head()

Unnamed: 0,image_path,shape,status,bboxs
0,dataset\images\maksssksksss0.png,"(366, 512, 3)","[without_mask, with_mask, without_mask]","[(79, 105, 109, 142), (185, 100, 226, 144), (3..."
1,dataset\images\maksssksksss1.png,"(156, 400, 3)","[with_mask, with_mask, with_mask, with_mask, w...","[(321, 34, 354, 69), (224, 38, 261, 73), (299,..."
2,dataset\images\maksssksksss10.png,"(400, 301, 3)",[with_mask],"[(98, 267, 194, 383)]"
3,dataset\images\maksssksksss100.png,"(226, 400, 3)","[with_mask, with_mask, with_mask]","[(189, 30, 245, 88), (387, 54, 400, 75), (118,..."
4,dataset\images\maksssksksss101.png,"(400, 301, 3)",[with_mask],"[(48, 294, 164, 400)]"


In [5]:
print(f"total_num = {total_num}")
valid_split_num = int(total_num * 0.1)
test_split_num = int(total_num * 0.1)
train_split_num = total_num - valid_split_num - test_split_num
print(f"train_split_num = {train_split_num}")
print(f"valid_split_num = {valid_split_num}")
print(f"test_split_num = {test_split_num}")

total_num = 4072
train_split_num = 3258
valid_split_num = 407
test_split_num = 407


In [6]:
data_path = os.path.join(dataset_dir, data_dirs[0])
img_num = [0] * 3
for i, image_path in enumerate(dataset["image_path"]):
    status = dataset["status"][i]
    bboxs = dataset["bboxs"][i]
    img = cv2.imread(image_path)
    img_draw = img.copy()
    for info in zip(status, bboxs):
        y_data = info[0]
        # cv2.rectangle(img_draw, bbox[:2], bbox[2:], (0, 255, 0), 2)
        img_crop = img.copy()[info[1][1]:info[1][3], info[1][0]:info[1][2]]
        x_data = cv2.resize(img_crop, (64, 64))
        # cv2_imshow(x_data)
        path = os.path.join(data_path, y_data)
        img_path = os.path.join(path, f"{img_num[labels.index(y_data)]}.jpg")
        print(img_path)
        cv2.imwrite(img_path, x_data)
        # tra_data = {"image": x_data, "label": y_data}
        img_num[labels.index(y_data)] += 1
    # cv2_imshow(img)
    # break


dataset\original\without_mask\0.jpg
dataset\original\with_mask\0.jpg
dataset\original\without_mask\1.jpg
dataset\original\with_mask\1.jpg
dataset\original\with_mask\2.jpg
dataset\original\with_mask\3.jpg
dataset\original\with_mask\4.jpg
dataset\original\with_mask\5.jpg
dataset\original\with_mask\6.jpg
dataset\original\with_mask\7.jpg
dataset\original\with_mask\8.jpg
dataset\original\without_mask\2.jpg
dataset\original\with_mask\9.jpg
dataset\original\with_mask\10.jpg
dataset\original\with_mask\11.jpg
dataset\original\with_mask\12.jpg
dataset\original\with_mask\13.jpg
dataset\original\with_mask\14.jpg
dataset\original\with_mask\15.jpg
dataset\original\with_mask\16.jpg
dataset\original\with_mask\17.jpg
dataset\original\with_mask\18.jpg
dataset\original\without_mask\3.jpg
dataset\original\without_mask\4.jpg
dataset\original\without_mask\5.jpg
dataset\original\with_mask\19.jpg
dataset\original\with_mask\20.jpg
dataset\original\without_mask\6.jpg
dataset\original\without_mask\7.jpg
dataset\

In [7]:
split_rate = 0.8, 0.1, 0.1

In [8]:
for dir in os.listdir(data_path):
    _path = os.path.join(data_path, dir)
    print(f"{_path}:")
    file_total_num = len(os.listdir(_path))
    for i, spile in enumerate(split_rate[1:]):
        file_list = os.listdir(_path)
        file_num = len(file_list)
        file_spile_num = int(file_total_num * spile)
        print(f"file_spile_num = {file_spile_num}")
        for file in file_list[:file_spile_num]:
            _file_path = os.path.join(_path, file)
            _new_file_path = os.path.join(dataset_dir, data_dirs[i + 2], dir, file)
            os.rename(_file_path, _new_file_path)
            # print(_file_path, _new_file_path)
    file_list = os.listdir(_path)
    for file in file_list:
        _file_path = os.path.join(_path, file)
        _new_file_path = os.path.join(dataset_dir, data_dirs[1], dir, file)
        os.rename(_file_path, _new_file_path)
    print(f"file_spile_num = {len(file_list)}")
    # break
    

dataset\original\mask_weared_incorrect:
file_spile_num = 12
file_spile_num = 12
file_spile_num = 99
dataset\original\without_mask:
file_spile_num = 71
file_spile_num = 71
file_spile_num = 575
dataset\original\with_mask:
file_spile_num = 323
file_spile_num = 323
file_spile_num = 2586
