In [3]:
import os
from pathlib import Path
import random

import tensorflow as tf
import cv2
import numpy as np
from sklearn.model_selection import train_test_split

from model_def import Generator, Discriminator

2024-05-12 19:20:49.178128: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [4]:
def jpg_to_jpeg(dir_path):
    for file_name in os.listdir(dir_path):
        file_path = os.path.join(dir_path, file_name)

        if file_path.endswith('.jpg') or file_path.endswith('.JPG'):
            img = cv2.imread(file_path)
            cv2.imwrite(file_path[:-4] + ".jpeg", img)

            os.remove(file_path)

In [5]:
DATASET_PATH = "../training_dataset"

# Tranform images from jpg format to jpeg format
jpg_to_jpeg(DATASET_PATH)

# save the photo in the form numpy array


training_set = []

for img_name in os.listdir(TRAIN_DATASET_PATH):
    img_path = os.path.join(TRAIN_DATASET_PATH, img_name)
    
    training_set.append(cv2.imread(img_path))

training_set = tf.convert_to_tensor(training_set)

print(type(training_set), training_set.shape)

np.save(os.path.join(TRAIN_DATASET_PATH, 'photo_tensor.npy'), training_set.numpy())


In [6]:
TRAIN_DATASET_PATH = os.path.join(DATASET_PATH, "photo_jpg", "photo_tensor.npy")
MONET_DATASET_PATH = os.path.join(DATASET_PATH, "monet_jpg", "monet_tensor.npy")

In [7]:
# photo_set is for the picture which we would like to convert it to Monet
photo_set = tf.convert_to_tensor(np.load(TRAIN_DATASET_PATH), dtype=tf.float32)[:5000]

# monet_set is for the Monet
monet_set = tf.convert_to_tensor(np.load(MONET_DATASET_PATH), dtype=tf.float32)

2024-05-12 19:21:04.099493: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-12 19:21:04.321860: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-12 19:21:04.322030: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

# PLAN

### Train generator through PHOTO_SET  
### Train discriminator through both the result of generator and some images in MONET_SET 

In [8]:
CONFIG = dict({
    "EPOCHS": 10,
    "BATCH_SIZE": 16,
})

In [9]:
generator = Generator(
    origin_output_shape=(32, 32, 3),
    num_of_conv=5,
    filters=[32, 64, 128, 64, 3],
    kernel_size=[(3, 3), (3, 3), (3, 3), (3, 3), (3, 3)],
    strides=[1, 1, 1, 1, 1],
)

discriminator = Discriminator(
    num_of_dense=3,
    dense_units=[64, 64, 32],
    num_of_conv=4,
    filters=[64, 128, 128, 64],
    kernel_size=[(3, 3), (3, 3), (3, 3), (3, 3)],
    strides=[2, 2, 2, 1]
)

In [10]:
discriminator.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss='binary_crossentropy',
    metrics=["accuracy"]
)

In [11]:
discriminator.trainable = False

model_input = tf.keras.Input(shape=(256, 256, 3), name="model_input")
model_output = discriminator(generator(model_input))

model = tf.keras.models.Model(model_input, model_output)

model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.0001),
    loss="binary_crossentropy",
    metrics=["accuracy"]
)

In [12]:
def train_discriminator(real_images, fake_images):
    real = np.ones(shape=(CONFIG["BATCH_SIZE"], 1), dtype=np.float32)
    fake = np.ones(shape=(CONFIG["BATCH_SIZE"], 1), dtype=np.float32)

    # Train on real images
    real_history = discriminator.train_on_batch(x=real_images, y=real)

    # Train on fake images
    fake_history = discriminator.train_on_batch(x=fake_images, y=fake)

    return real_history, fake_history


def train_generator(images):
    return model.train_on_batch(x=images, y=np.ones(shape=(CONFIG["BATCH_SIZE"], 1), dtype=np.float32))

In [13]:
# we do not need to train the discriminator with mixture of fake and real monet
# train it seperately instead.

discriminator_history = []
generator_history = []

for _ in range(CONFIG["EPOCHS"]):
    monet_idx = random.sample(range(monet_set.shape[0]), k=CONFIG["BATCH_SIZE"])
    monet_batch = tf.gather(monet_set, indices=monet_idx)

    photo_idx = random.sample(range(photo_set.shape[0]), k=CONFIG["BATCH_SIZE"])
    photo_batch = tf.gather(photo_set, indices=photo_idx)

    discriminator_history.append(train_discriminator(real_images=monet_batch, fake_images=generator(photo_batch)))
    generator_history.append(train_generator(images=photo_batch))


2024-05-12 19:21:10.668832: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8600


2024-05-12 19:21:15.313421: I tensorflow/compiler/xla/stream_executor/cuda/cuda_blas.cc:606] TensorFloat-32 will be used for the matrix multiplication. This will only be logged once.
2024-05-12 19:21:15.402786: I tensorflow/compiler/xla/service/service.cc:168] XLA service 0x7fefb00beaf0 initialized for platform CUDA (this does not guarantee that XLA will be used). Devices:
2024-05-12 19:21:15.402811: I tensorflow/compiler/xla/service/service.cc:176]   StreamExecutor device (0): NVIDIA GeForce RTX 3060, Compute Capability 8.6
2024-05-12 19:21:15.486503: I tensorflow/compiler/mlir/tensorflow/utils/dump_mlir_util.cc:255] disabling MLIR crash reproducer, set env var `MLIR_CRASH_REPRODUCER_DIRECTORY` to enable.
2024-05-12 19:21:15.841839: I ./tensorflow/compiler/jit/device_compiler.h:186] Compiled cluster using XLA!  This line is logged at most once for the lifetime of the process.
2024-05-12 19:21:17.277009: W tensorflow/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of 

In [14]:
discriminator_history

[([0.6993177533149719, 0.0], [0.6676900386810303, 1.0]),
 ([0.6498976349830627, 1.0], [0.5918108820915222, 1.0]),
 ([0.5767498016357422, 1.0], [0.4929863214492798, 1.0]),
 ([0.44746917486190796, 1.0], [0.3384184241294861, 1.0]),
 ([0.25374096632003784, 1.0], [0.19456419348716736, 1.0]),
 ([0.12731899321079254, 1.0], [0.08763424307107925, 1.0]),
 ([0.034163814038038254, 1.0], [0.03254415839910507, 1.0]),
 ([0.013902666978538036, 1.0], [0.010343027301132679, 1.0]),
 ([0.002575263846665621, 1.0], [0.008592531085014343, 1.0]),
 ([0.00012343781418167055, 1.0], [0.0012969749514013529, 1.0])]

In [15]:
generator_history

[[0.6452574133872986, 1.0],
 [0.561002790927887, 1.0],
 [0.41489189863204956, 1.0],
 [0.23869256675243378, 1.0],
 [0.09501928836107254, 1.0],
 [0.021592263132333755, 1.0],
 [0.003141624853014946, 1.0],
 [0.00040362394065596163, 1.0],
 [0.00014220745651982725, 1.0],
 [4.4261992115934845e-06, 1.0]]