In [1]:
import numpy as np
import os
import json
import skimage.io
import random
import tensorflow as tf

In [2]:
from distutils.version import LooseVersion


def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True,
           preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None):
    """A wrapper for Scikit-Image resize().

    Scikit-Image generates warnings on every call to resize() if it doesn't
    receive the right parameters. The right parameters depend on the version
    of skimage. This solves the problem by using different parameters per
    version. And it provides a central place to control resizing defaults.
    """
    if LooseVersion(skimage.__version__) >= LooseVersion("0.14"):
        # New in 0.14: anti_aliasing. Default it to False for backward
        # compatibility with skimage 0.13.
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range, anti_aliasing=anti_aliasing,
            anti_aliasing_sigma=anti_aliasing_sigma)
    else:
        return skimage.transform.resize(
            image, output_shape,
            order=order, mode=mode, cval=cval, clip=clip,
            preserve_range=preserve_range)

In [25]:
class OneHotEncoder(object):
    def __init__(self, tags):
        self.tags = tags
        self.dict = {}
        for i, tag in enumerate(tags):
            self.dict[tag] = i

    def encode(self, tags, max_batch_length=64):
        result = [0] * max_batch_length
        for tag in tags:
            result[self.dict[tag]] = 1
        result = tf.convert_to_tensor(result, dtype=tf.dtypes.int64)
        return result

    def decode(self, encoded):
        result = []
        encoded = encoded.numpy().tolist()
        for i, true in enumerate(encoded):
            if true:
                result.append(self.tags[i])
        return result

In [4]:
base_metadata_path = "../input/tagged-anime-illustrations/danbooru-metadata/danbooru-metadata"
base_image_path = "../input/tagged-anime-illustrations/danbooru-images/danbooru-images"
tags = ['1boy', '1girl', '2girls', '3girls', 'ahoge', 'animal_ears', 'bangs', 'bare_shoulders', 'black_legwear',
        'blush', 'boots', 'bow', 'braid', 'breasts', 'cleavage', 'closed_eyes', 'detached_sleeves', 'dress', 'flower',
        'food', 'full_body', 'glasses', 'gloves', 'hat', 'heart', 'holding', 'jacket', 'japanese_clothes', 'jewelry',
        'large_breasts', 'long_hair', 'long_sleeves', 'male_focus', 'medium_breasts', 'multiple_boys', 'multiple_girls',
        'navel', 'necktie', 'one_eye_closed', 'open_mouth', 'panties', 'pantyhose', 'ponytail', 'ribbon',
        'school_uniform', 'shirt', 'shoes', 'short_hair', 'simple_background', 'sitting', 'skirt', 'smile', 'solo',
        'standing', 'swimsuit', 'sword', 'tail', 'thighhighs', 'twintails', 'underwear', 'very_long_hair', 'weapon',
        'white_background', 'wings']
encoder = OneHotEncoder(tags)

In [6]:
def fix_dim(img):
    if len(img.shape) == 3:
        return img
    w, h = img.shape
    ret = np.empty((w, h, 3), dtype=np.uint8)
    ret[:, :, 0] = img
    ret[:, :, 1] = img
    ret[:, :, 2] = img
    return ret

In [17]:
def load_image(path):
    x = tf.convert_to_tensor(resize(fix_dim(skimage.io.imread(path)), (380, 380)), dtype=tf.dtypes.float32)
    return fix_dim(x)

In [18]:
def data(training=True):
    random.seed(175069818)
    for file in os.listdir(base_metadata_path):
        with open(os.path.join(base_metadata_path, file), 'r') as f:
            for i, line in enumerate(f):
                num = random.randint(0, 19)
                if num % 20 == 0 and training: continue
                if num % 20 != 0 and (not training): continue
                j = json.loads(line)

                # get json fields
                image_id = j['id']
                ext = j['file_ext']
                tags = j['tags']

                # get tag names and ids
                tag_names = list(map(lambda t: t['name'], tags))

                # dir of the image
                image_path = str(int(image_id) % 1000).zfill(4)

                # path to image
                path = os.path.join(base_image_path, image_path, image_id) + f'.{ext}'
                # due to the smaller subset, not all images are available
                if os.path.exists(path):
                    x = load_image(path)
                    y = tag_names
                    y = encoder.encode(y)
                    yield x, y

In [19]:
train_dataset = tf.data.Dataset.from_generator(data,
                                               output_signature=(
                                                   tf.TensorSpec(shape=(512, 512, 3)),
                                                   tf.TensorSpec(shape=[64])
                                               )).batch(batch_size=10)
valid_dataset = tf.data.Dataset.from_generator(lambda: data(False),
                                               output_signature=(
                                                   tf.TensorSpec(shape=(512, 512, 3)),
                                                   tf.TensorSpec(shape=[64])
                                               )).batch(batch_size=10)

In [22]:
model = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
    include_top=True,
    weights=None,
    input_tensor=None,
    input_shape=None,
    pooling="avg",
    include_preprocessing=True,
    classifier_activation="sigmoid"
)

In [23]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2))

In [24]:
model.compile(loss=tf.keras.losses.BinaryCrossentropy(), optimizer=tf.keras.optimizers.Adam(learning_rate=1e-2))


ValueError: in user code:

    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\engine\training.py", line 1021, in train_function  *
        return step_function(self, iterator)
    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\engine\training.py", line 1010, in step_function  **
        outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\engine\training.py", line 1000, in run_step  **
        outputs = model.train_step(data)
    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\engine\training.py", line 859, in train_step
        y_pred = self(x, training=True)
    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\utils\traceback_utils.py", line 67, in error_handler
        raise e.with_traceback(filtered_tb) from None
    File "E:\Anaconda\envs\Project-Fugu-Manga-Translator\lib\site-packages\keras\engine\input_spec.py", line 264, in assert_input_compatibility
        raise ValueError(f'Input {input_index} of layer "{layer_name}" is '

    ValueError: Input 0 of layer "efficientnetv2-s" is incompatible with the layer: expected shape=(None, 384, 384, 3), found shape=(None, 512, 512, 3)
