In [None]:
%%bash
pip install --quiet --upgrade tensorflow-addons comet-ml

In [None]:
from pathlib import Path

data_root = "/kaggle/input/h-and-m-personalized-fashion-recommendations"
data_root = Path(data_root)
list(map(str, data_root.glob("*")))

In [None]:
import pandas as pd

articles = pd.read_csv(data_root / "articles.csv")
label_names = articles.set_index("product_type_name")
label_names = label_names["product_type_no"].to_dict()
label_names = sorted(label_names, key=label_names.get)
articles = {row["article_id"]: row for row in articles.to_dict(orient="records")}

In [None]:
import tensorflow as tf
import tensorflow_addons as tfa

tf.random.set_seed(42)

def build_decoder(latent_dim):
    return tf.keras.Sequential([
        tf.keras.layers.Input([latent_dim]),
        tf.keras.layers.Dense(len(label_names))
    ])

def build_encoder(latent_dim):
    inputs = tf.keras.layers.Input([None, None, 3])
    spine = tf.keras.applications.MobileNetV3Small(include_top=False)
    spine.trainable = False
    
    batch = inputs
    for augment in [
        tf.keras.layers.Resizing(64, 64),
        tf.keras.layers.RandomFlip("horizontal"),
        tf.keras.layers.RandomContrast(0.25),
        tf.keras.layers.RandomRotation(0.0625),
        tf.keras.layers.RandomTranslation(0.125, 0.125),
        tf.keras.layers.RandomZoom(-0.125),
    ]:
        batch = augment(batch)
    vectors = spine(batch, training=False)
    vectors = tf.keras.layers.GlobalAvgPool2D()(vectors)
    
    dims = [512, 256, 128]
    for i, d in enumerate(dims):
        name = None if i != range(len(dims))[-1] else "projection_head"
        vectors = tf.keras.layers.Activation(tf.nn.silu)(vectors)
        vectors = tf.keras.layers.Dropout(0.1)(vectors)
        vectors = tf.keras.layers.Dense(d, name=name)(vectors)
        
    return tf.keras.Model(inputs, vectors)

def build_optimizer(steps, min_lr, max_lr):
    sched = tfa.optimizers.TriangularCyclicalLearningRate(
        min_lr, max_lr, steps / 2
    )
    optim = tfa.optimizers.AdaBelief(sched, rectify=True)
    optim = tfa.optimizers.Lookahead(optim)
    return optim

In [None]:
def unitwise_norm(x):
    if len(x.get_shape()) <= 1:  # Scalars and vectors
        axis = None
        keepdims = False
    elif len(x.get_shape()) in [2, 3]:  # Linear layers of shape IO or multihead linear
        axis = 0
        keepdims = True
    elif len(x.get_shape()) == 4:  # Conv kernels of shape HWIO
        axis = [0, 1, 2,]
        keepdims = True
    else:
        raise ValueError(f"Got a parameter with shape not in [1, 2, 4]! {x}")
    return tf.linalg.norm(x, axis=axis, keepdims=keepdims)


def adaptive_clip_grad(parameters, gradients, clip_factor=0.01,
                       eps=1e-3):
    new_grads = []
    for (params, grads) in zip(parameters, gradients):
        p_norm = unitwise_norm(params)
        max_norm = tf.math.maximum(p_norm, eps) * clip_factor
        grad_norm = unitwise_norm(grads)
        clipped_grad = grads * (max_norm / tf.math.maximum(grad_norm, 1e-6))
        new_grad = tf.where(grad_norm < max_norm, grads, clipped_grad)
        new_grads.append(new_grad)
    return new_grads

In [None]:
class SupConLoss(tf.keras.losses.Loss):
    def __init__(self, tau=1.0, name=None):
        super().__init__(name=name)
        self.tau = tau
        
    def __call__(self, y, z, sample_weight=None):
        z, _ = tf.linalg.normalize(z, ord=2, axis=-1)
        u = z @ tf.transpose(z) / self.tau
        v = y @ tf.transpose(y)
        v = v / tf.math.reduce_sum(v, axis=-1, keepdims=True)
        return tf.nn.softmax_cross_entropy_with_logits(v, u)

In [None]:
from tqdm.notebook import tqdm

bsize = 1024
paths = list(tqdm(data_root.joinpath("images").glob("**/*.jpg"), total=105100))
types = [
    label_names.index(articles[int(path.stem)]["product_type_name"])
    for path in paths
]
paths = list(map(str, paths))
types = tf.lookup.StaticHashTable(
    tf.lookup.KeyValueTensorInitializer(
        tf.constant(paths), tf.constant(types)
    ),
    default_value=-1
)
steps = int(len(paths) / bsize + 0.5)

def deserialize(path):
    x = tf.io.decode_jpeg(tf.io.read_file(path), channels=3)
    x = tf.image.resize(x, (64, 64))
    y = types.lookup(path)
    return x, tf.one_hot(y, len(label_names))

def dataset(paths, training=True):
    ds = (
        tf.data.Dataset.from_tensor_slices(paths)
        .map(deserialize)
        .prefetch(tf.data.AUTOTUNE)
    )
    if training:
        ds = ds.shuffle(bsize).cache()
        ds = ds.batch(bsize)
    return ds

train_ds = dataset(paths[2048:])
test_ds = dataset(paths[:2048], training=False)

In [None]:
embed_images, embed_labels = map(tf.stack, zip(*test_ds))
embed_labels = [label_names[tf.argmax(y)] for y in embed_labels]
embed_images.shape

In [None]:
from comet_ml import Experiment
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()

monitor = Experiment(
    api_key=user_secrets.get_secret("comet-api-key"),
    project_name="hmrec",
    workspace="kavorite",
)

clip_factor = 0.01
temperature = 0.05
latent_dims = 128
max_lr = 0.02 * bsize / 256
min_lr = max_lr * 1e-3
epochs = 24
monitor.log_parameters({
    "temperature": temperature,
    "latent_dims": latent_dims,
    "max_lr": max_lr,
    "min_lr": min_lr,
    "clip_factor": clip_factor,
    "epochs": epochs,
    "batch_size": bsize,
})

_, embed_image_url = monitor.create_embedding_image(
    embed_images, image_size=tf.shape(embed_images)[-3:].numpy().tolist()
)
sup_con = SupConLoss(temperature)
encoder = build_encoder(latent_dims)
decoder = build_decoder(latent_dims)
opt_config = (steps * epochs, min_lr, max_lr)
encoder_opt = build_optimizer(*opt_config)
decoder_opt = build_optimizer(*opt_config)
acc1 = tf.keras.metrics.CategoricalAccuracy()
acc5 = tf.keras.metrics.TopKCategoricalAccuracy(k=5)

In [None]:
import time

def current_lr():
    sched = encoder_opt.lr
    steps = encoder_opt.iterations
    return sched(steps) if callable(sched) else sched


last_viz = 0.0
with tqdm(total=steps * epochs) as progress:
    for x, y in train_ds.repeat(epochs):
        with tf.GradientTape() as tape:
            z = encoder(x, training=True)
            encoder_loss = tf.reduce_mean(sup_con(y, z))
            
        grads = tape.gradient(encoder_loss, encoder.trainable_weights)
        clipped = adaptive_clip_grad(encoder.trainable_weights, grads, clip_factor)
        encoder_opt.apply_gradients(
            (g, w) if w.name.startswith("projection_head/") else (c, w) 
            for g, c, w in zip(grads, clipped, encoder.trainable_weights)
        )
        
        with tf.GradientTape() as tape:
            y_hat = decoder(z)
            decoder_loss = tf.reduce_mean(
                tf.nn.softmax_cross_entropy_with_logits(y, y_hat / temperature)
            )
        
        acc1.update_state(y, y_hat)
        acc5.update_state(y, y_hat)
        progress.set_description(
            f"loss = {encoder_loss.numpy():.3g}"
            + f" acc@1 = {acc1.result():.3g}"
            + f" acc@5 = {acc5.result():.3g}"
        )
        progress.update()
        # TODO: slow-cook encoder backbone during final epochs
        epoch = tf.math.floor(encoder_opt.iterations / steps).numpy().astype(int)
        grads = tape.gradient(decoder_loss, decoder.trainable_weights)
        decoder_opt.apply_gradients(zip(grads, decoder.trainable_weights))
        if (encoder_opt.iterations or -1) % steps == 0:
            monitor.log_epoch_end(epochs)
        monitor.log_metrics({
            "decoder_acc1": acc1.result(),
            "decoder_acc5": acc5.result(),
            "encoder_loss": encoder_loss,
            "learning_rate": current_lr(),
        }, step=encoder_opt.iterations)
        now = time.time()
        if now - last_viz > 300:
            last_viz = now
            monitor.log_embedding(
                vectors=encoder(embed_images, training=False).numpy().tolist(),
                labels=embed_labels,
                image_data=embed_image_url,
                image_size=tf.shape(embed_images)[-3:].numpy().tolist(),
                title=f"Step {encoder_opt.iterations.numpy().astype(int)} Encodings"
            )

In [None]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import tensorflow_probability as tfp

def pca(mat, k=2):
    loc, var = tf.nn.moments(mat, -1, keepdims=True)
    mat = (mat - loc) / var
    cov = tfp.stats.covariance(mat)
    eigvals, eigvecs = tf.linalg.eigh(cov)
    eigvecs = tf.gather(eigvecs, tf.argsort(eigvals)[::-1])
    return mat @ eigvecs[:, :k]

label = tf.argmax(y, -1)
order = tf.argsort(label)
label = tf.gather(label, order)
preds = pca(tf.gather(z, order))
_, _, group = tf.unique_with_counts(label)
color = cm.nipy_spectral(tf.linspace(0, 1, len(group)))
plt.figure(figsize=(12, 8))
plt.title("Predictions")
for lname, color, vectors in zip(label_names, color, tf.split(preds, group)):
    if len(vectors) > 1:
        plt.scatter(vectors[0], vectors[1], color=color, label=lname)
plt.legend()

In [None]:
u = tf.nn.softmax(z @ tf.transpose(z))
v = y @ tf.transpose(y) / tf.reduce_sum(y, axis=-1)
plt.imshow(v - u)

In [None]:
encoder.save("encoder.h5", include_optimizer=False)