## Set up

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# *------- Basic setup -------*
import numpy as np
import pandas as pd
import os, random
import matplotlib.pyplot as plt
from tqdm.notebook import tqdm
import cv2
import glob
from PIL import Image

# *------- tensorflow & keras -------*
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers

# *------- sklearn -------*
from sklearn.model_selection import train_test_split

# *------- path & file -------*
dataset_path = "/content/drive/MyDrive/KT_3차_미프/Datasets"

train_path = os.path.join(dataset_path, 'Car_Images_train')
tr_n_path = os.path.join(train_path, 'normal')
tr_ab_path = os.path.join(train_path, 'abnormal')

test_path = os.path.join(dataset_path, 'Car_Images_test')
te_n_path = os.path.join(test_path, 'normal')
te_ab_path = os.path.join(test_path, 'abnormal')

val_path = os.path.join(dataset_path, 'Car_Images_validation')
val_n_path = os.path.join(val_path, 'normal')
val_ab_path = os.path.join(val_path, 'abnormal')

aug_dataset_path = os.path.join(dataset_path, 'aug_Dataset')

## Prepare the dataset

In [None]:
import os

def createDirectory(directory):
    try:
        if not os.path.exists(directory):
            os.makedirs(directory)
    except OSError:
        print("Error: Failed to create the directory.")

In [None]:
# Create Directory

createDirectory(aug_dataset_path)

In [None]:
from tensorflow.keras.backend import clear_session
from tensorflow.keras.preprocessing.image import img_to_array

img_size= 280

clear_session()

# 이미지 및 라벨데이터 제작
def glos(folder):
    files = glob.glob(f'{dataset_path}/copy_images/{folder}/*.png')
    data, label = zip(*[[img_to_array(Image.open(f).resize((img_size, img_size))), 1 if os.path.split(f)[1][:2] == 'ab' else 0 ] for f in files])
    return np.array(list(data)), np.array(list(label))

entire_data, entire_label = glos('*')
# train_data, train_label = glos('trainset')
# validation_data, validation_label = glos('validset')
# test_data, test_label = glos('testset')

In [None]:
entire_data = entire_data.astype("float32") / 255.0
entire_label = tf.one_hot(entire_label, 2)

In [None]:
entire_data.shape

(605, 280, 280, 3)

In [None]:
AUTO = tf.data.AUTOTUNE
BATCH_SIZE = 256
EPOCHS = 10

In [None]:
test_samples = 121

x_test, y_test = entire_data[:test_samples], entire_label[:test_samples]
new_x_train, new_y_train = entire_data[test_samples:], entire_label[test_samples:]

train_ds_one = (
    tf.data.Dataset.from_tensor_slices((new_x_train, new_y_train))
    .shuffle(BATCH_SIZE * 100)
    .batch(BATCH_SIZE)
)
train_ds_two = (
    tf.data.Dataset.from_tensor_slices((new_x_train, new_y_train))
    .shuffle(BATCH_SIZE * 100)
    .batch(BATCH_SIZE)
)
# Because we will be mixing up the images and their corresponding labels, we will be
# combining two shuffled datasets from the same training data.
train_ds = tf.data.Dataset.zip((train_ds_one, train_ds_two))

test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).batch(BATCH_SIZE)

In [None]:
def sample_beta_distribution(size, concentration_0=0.2, concentration_1=0.2):
    gamma_1_sample = tf.random.gamma(shape=[size], alpha=concentration_1)
    gamma_2_sample = tf.random.gamma(shape=[size], alpha=concentration_0)
    return gamma_1_sample / (gamma_1_sample + gamma_2_sample)


def mix_up(ds_one, ds_two, alpha=0.2):
    # Unpack two datasets
    images_one, labels_one = ds_one
    images_two, labels_two = ds_two
    batch_size = tf.shape(images_one)[0]

    # Sample lambda and reshape it to do the mixup
    l = sample_beta_distribution(batch_size, alpha, alpha)
    x_l = tf.reshape(l, (batch_size, 1, 1, 1))
    y_l = tf.reshape(l, (batch_size, 1))

    # Perform mixup on both images and labels by combining a pair of images/labels
    # (one from each dataset) into one image/label
    images = images_one * x_l + images_two * (1 - x_l)
    labels = labels_one * y_l + labels_two * (1 - y_l)
    return (images, labels)

In [None]:
train_ds_mu = train_ds.map(
    lambda ds_one, ds_two: mix_up(ds_one, ds_two, alpha=0.2), num_parallel_calls=AUTO
)

sample_images, sample_labels = next(iter(train_ds_mu))

mix_data = []
mix_label = []

for i, (image, label) in enumerate(zip(sample_images, sample_labels)):
    if label.numpy().tolist()[0] >= 0.95: #정상
        mix_data.append(image.numpy())
        mix_label.append([0])
    else:
        mix_data.append(image.numpy())
        mix_label.append([1])

mix_data, mix_label = np.array(list(mix_data)), np.array(list(mix_label))

In [None]:
mix_data.shape

(256, 280, 280, 3)

In [None]:
mix_label.shape

(256, 1)

In [None]:
np.unique(mix_label, return_counts = True)

(array([0, 1]), array([ 88, 168]))

In [None]:
from PIL import Image

i = 0
for data, label in zip(mix_data,mix_label):
    if label == 1:
        cv2.imwrite(f'{aug_dataset_path}/ab_mixup_{i}.jpg',data*255)
        i += 1
    else:
        cv2.imwrite(f'{aug_dataset_path}/mixup_{i}.jpg',data*255)
        i += 1

In [None]:
len(os.listdir(aug_dataset_path))

256

In [None]:
# 시각화
# Let's preview 9 samples from the dataset
sample_images, sample_labels = next(iter(train_ds_mu))
plt.figure(figsize=(10, 10))
for i, (image, label) in enumerate(zip(sample_images[:9], sample_labels[:9])):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(image.numpy().squeeze())
    print(label.numpy().tolist())
    print("*******************************************")
    plt.axis("off")