In [1]:
!pip install --upgrade pip

In [2]:
!pip install tensorflow==2.7.0

In [3]:
!pip install --upgrade scikit-image

In [4]:
import numpy as np
import os
import json
import skimage.io
import random
import tensorflow as tf

In [5]:
class OneHotEncoder(object):
    def __init__(self, tags):
        self.tags = tags
        self.dict = {}
        for i, tag in enumerate(tags):
            self.dict[tag] = i
    
    def encode(self, tags, max_batch_length=64):
        result = [0] * max_batch_length
        for tag in tags:
            result[self.dict[tag]] = 1
        result = tf.convert_to_tensor(result, dtype=ty.dtypes.int64)
        return result
    
    def decode(self, encoded):
        result = []
        encoded = encoded.numpy().tolist()
        for i, true in enumerate(encoded):
            if true:
                result.append(self.tags[i])
        return result

In [6]:
base_metadata_path = "../input/tagged-anime-illustrations/danbooru-metadata/danbooru-metadata"
base_image_path = "../input/tagged-anime-illustrations/danbooru-images/danbooru-images"
tags = ['1boy', '1girl', '2girls', '3girls', 'ahoge', 'animal_ears', 'bangs', 'bare_shoulders', 'black_legwear', 'blush', 'boots', 'bow', 'braid', 'breasts', 'cleavage', 'closed_eyes', 'detached_sleeves', 'dress', 'flower', 'food', 'full_body', 'glasses', 'gloves', 'hat', 'heart', 'holding', 'jacket', 'japanese_clothes', 'jewelry', 'large_breasts', 'long_hair', 'long_sleeves', 'male_focus', 'medium_breasts', 'multiple_boys', 'multiple_girls', 'navel', 'necktie', 'one_eye_closed', 'open_mouth', 'panties', 'pantyhose', 'ponytail', 'ribbon', 'school_uniform', 'shirt', 'shoes', 'short_hair', 'simple_background', 'sitting', 'skirt', 'smile', 'solo', 'standing', 'swimsuit', 'sword', 'tail', 'thighhighs', 'twintails', 'underwear', 'very_long_hair', 'weapon', 'white_background', 'wings']
encoder = OneHotEncoder(tags)

In [7]:
def fix_dim(img):
    if len(img.shape) is 3:
        return img
    w, h = img.shape
    ret = np.empty((w, h, 3), dtype=np.uint8)
    ret[:, :, 0] = img
    ret[:, :, 1] = img
    ret[:, :, 2] = img
    return ret

In [8]:
def load_image(path):
    x = tf.conver_to_tensor(skimage.io.imread(path), dtype = tf.dtypes.float32)
    return fix_dim(x)

In [9]:
def data(training=True):
    random.seed(175069818)
    for file in os.listdir(base_metadata_path):
        with open(os.path.join(base_metadata_path, file), 'r') as f:
            for i, line in enumerate(f):
                num = random.randint(0, 19)
                if num % 20 == 0 and training: continue
                if num % 20 != 0 and (not training): continue
                j = json.loads(line)
                
                # get json fields
                image_id = j['id']
                ext = j['file_ext']
                tags = j['tags']
            
                # get tag names and ids
                tag_names = list(map(lambda t: t['name'], tags))
            
                # dir of the image
                image_path = str(int(image_id) % 1000).zfill(4)
            
                # path to image
                path = os.path.join(base_image_path, image_path, image_id) + f'.{ext}'
                # due to the smaller subset, not all images are available
                if os.path.exists(path):
                    x = load_image(path)
                    y = tag_names
                    y = encoder.encode(y)
                    yield x, y

In [10]:
train_dataset = tf.data.Dataset.from_generator(data,
                                                   output_signature=(
                                                       tf.TensorSpec(shape=(512, 512, 3)),
                                                       tf.TensorSpec(shape=[64])
                                                   )).batch(batch_size=10)
valid_dataset = tf.data.Dataset.from_generator(lambda: data(False),
                                                   output_signature=(
                                                       tf.TensorSpec(shape=(512, 512, 3)),
                                                       tf.TensorSpec(shape=[64])
                                                   )).batch(batch_size=10)

In [11]:
class Model(tf.keras.Model):
    def __init__(self):
        super(Model, self).__init__()
        self.feature_extractor = tf.keras.applications.efficientnet_v2.EfficientNetV2S(
                                     include_top=False,
                                     weights=None,
                                     input_tensor=None,
                                     input_shape=None,
                                     pooling="avg",
                                     include_preprocessing=True,
                                 )
        self.predict = tf.keras.layers.dense(64)
    
    def call(self, inputs):
        x = self.feature_extractor(inputs)
        x = self.predict(x)
        return tf.keras.activations.sigmoid(x)

In [12]:
model = Model()
model.compile(loss = tf.keras.losses.CategoricalCrossentropy(), optimizer = tf.keras.optimizers.Adadelta())
model.build(input_shape = (None, 512, 512, 3))
model.summary()