In [10]:
import os
import tensorflow as tf

from layers import *

In [12]:
BATCH_SIZE = 32
IMG_SIZE = 512

def make_model(input_shape=(IMG_SIZE, IMG_SIZE, 3), num_type2=5, dropout_rate=0):
    # I reduced the size (image size, filters and depth) of the original network because it was way to big
    inputs = L.Input(shape=input_shape)
    with tf.device("/GPU:0"):
        x = layer_type1(inputs, filters=64, kernel_size=(8, 8), stride=8, dropout_rate=dropout_rate)
        x = layer_type1(x, filters=64, dropout_rate=dropout_rate)    
    
    with tf.device("/GPU:1"):
        for _ in range(num_type2):
            x = layer_type2(x, filters=64, dropout_rate=dropout_rate)         
    
    with tf.device("/GPU:0"):
        x = layer_type3(x, filters=16, dropout_rate=dropout_rate)
        x = layer_type3(x, filters=32, dropout_rate=dropout_rate)
        x = layer_type3(x, filters=64, dropout_rate=dropout_rate)            
        x = layer_type3(x, filters=128, dropout_rate=dropout_rate) 
    
    with tf.device("/GPU:1"):
        x = layer_type4(x, filters=256, dropout_rate=dropout_rate)        
        x = L.Dense(256)(x)
        x = L.BatchNormalization()(x)
        x = L.ReLU()(x)
        if dropout_rate > 0:
            x = L.Dropout(dropout_rate)(x)

        predictions = L.Dense(3)(x)
    
    model = tf.keras.Model(inputs=inputs, outputs=predictions)

    loss = tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    model.compile(optimizer='adam',
                  loss=loss, 
                  metrics=['acc'])
    
    return model
model = make_model(num_type2=2, dropout_rate=0.2)
model.summary()

Model: "functional_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            [(None, 512, 512, 3) 0                                            
__________________________________________________________________________________________________
conv2d_14 (Conv2D)              (None, 64, 64, 64)   12352       input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_15 (BatchNo (None, 64, 64, 64)   256         conv2d_14[0][0]                  
__________________________________________________________________________________________________
re_lu_12 (ReLU)                 (None, 64, 64, 64)   0           batch_normalization_15[0][0]     
_______________________________________________________________________________________

In [15]:
def get_label(file_path):
    parts = tf.strings.split(file_path, os.path.sep)    
    #label = tf.cast(parts[-2] == "Cover", tf.int8)
    label = 0
    if parts[-2] == 'JUNIWARD':
        label = 1
    if parts[-2] == 'UERD':
        label = 2
    return label

def get_img(file_path):
    img = tf.io.read_file(file_path)
    img = tf.io.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)
    img = tf.image.resize_with_crop_or_pad(img, IMG_SIZE, IMG_SIZE)
    return img

def process_path(file_path):
    # Get embeeded image
    label = get_label(file_path)
    img = get_img(file_path)

    # Get cover
    parts = tf.strings.split(file_path, os.path.sep)
    cover_file = parts[-1]
    cover = get_img('Cover/'+cover_file)

    # Return their difference
    return tf.subtract(img,cover), label

def split_dataset(dataset: tf.data.Dataset, validation_data_fraction: float):
    """
    Splits a dataset of type tf.data.Dataset into a training and validation dataset using given ratio. Fractions are
    rounded up to two decimal places.
    @param dataset: the input dataset to split.
    @param validation_data_fraction: the fraction of the validation data as a float between 0 and 1.
    @return: a tuple of two tf.data.Datasets as (training, validation)
    """

    validation_data_percent = round(validation_data_fraction * 100)
    if not (0 <= validation_data_percent <= 100):
        raise ValueError("validation data fraction must be ∈ [0,1]")

    dataset = dataset.enumerate()
    train_dataset = dataset.filter(lambda f, data: f % 100 > validation_data_percent)
    validation_dataset = dataset.filter(lambda f, data: f % 100 <= validation_data_percent)

    # remove enumeration
    train_dataset = train_dataset.map(lambda f, data: data)
    validation_dataset = validation_dataset.map(lambda f, data: data)

    return train_dataset, validation_dataset

def prepare_for_training(ds, cache=True, shuffle_buffer_size=1000):
    # This is a small dataset, only load it once, and keep it in memory.
    # use `.cache(filename)` to cache preprocessing work for datasets that don't
    # fit in memory.
  
    if cache:
        if isinstance(cache, str):
            ds = ds.cache(cache)
        else:
            ds = ds.cache()

    ds = ds.shuffle(buffer_size=shuffle_buffer_size, seed=42)

    # Repeat forever
    ds = ds.repeat()

    ds = ds.batch(BATCH_SIZE)

    # `prefetch` lets the dataset fetch batches in the background while the model
    # is training.
    ds = ds.prefetch(buffer_size=AUTOTUNE)

    return ds

In [14]:
LABELS = {'JMiPOD': 0, 'JUNIWARD': 1, 'UERD': 2}
AUTOTUNE = tf.data.experimental.AUTOTUNE
list_ds = tf.data.Dataset.list_files(str("data/*/*"))
BATCH_SIZE = 32
IMG_SIZE = 512

# Set `num_parallel_calls` so multiple images are loaded/processed in parallel.
labeled_ds = list_ds.map(process_path, num_parallel_calls=AUTOTUNE)
data_train, data_val = split_dataset(labeled_ds, 0.02)
    
train_ds = prepare_for_training(labeled_ds, cache="cache_train")
test_ds = prepare_for_training(data_val, cache="cache_test")

In [5]:
model=tf.keras.models.load_model("pre_trained_on_diff")

In [None]:
model.fit(train_ds, epochs=5, validation_data=test_ds, steps_per_epoch=2000, verbose=1, validation_steps=100)
model.save("pre_trained_on_diff")

In [14]:
model.fit(train_ds, epochs=5, validation_data=test_ds, steps_per_epoch=2000, verbose=1, validation_steps=100)
model.save("pre_trained_on_diff")

Epoch 1/5

ResourceExhaustedError: 2 root error(s) found.
  (0) Resource exhausted:  Failed to allocate memory for the batch of component 0
	 [[node IteratorGetNext (defined at <ipython-input-14-dd1bf11e5579>:1) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

  (1) Resource exhausted:  Failed to allocate memory for the batch of component 0
	 [[node IteratorGetNext (defined at <ipython-input-14-dd1bf11e5579>:1) ]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

	 [[IteratorGetNext/_2]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info.

0 successful operations.
0 derived errors ignored. [Op:__inference_train_function_51462]

Function call stack:
train_function -> train_function
