In [None]:
!pip install keras-cv
!pip install tf-models-official
!pip install focal-loss

In [None]:
import os
import glob

from focal_loss import sparse_categorical_focal_loss
import keras_cv
import numpy as np
import pandas as pd
from sklearn.metrics import f1_score, precision_recall_curve
import tensorflow as tf
from tensorflow import keras
import tensorflow_datasets as tfds
import tensorflow_models as tfm
from tqdm import tqdm


tf.keras.utils.set_random_seed(42)

In [None]:
class GeMPooling(keras.layers.Layer):
    def __init__(self, p=3, eps=1e-8, **kwargs):
        super().__init__(**kwargs)
        self.p = p
        self.eps = eps
        self.gap = keras.layers.GlobalAveragePooling2D()

    def call(self, x):
        return tf.math.pow(self.gap(tf.math.pow(x + self.eps, self.p)), 1 / self.p)


class Contrastive(keras.Model):
    def __init__(self, model_dim=256, temperature=0.1):
        super().__init__()

        self.temperature = temperature

        self.feature_extractor = keras.applications.EfficientNetV2B2(include_top=False, weights='imagenet')
        self.augmentation = keras.Sequential(
            [
                keras.layers.Rescaling(1 / 255.),
                keras.layers.RandomFlip('horizontal'),
                keras.layers.Lambda(lambda batch: tf.map_fn(lambda x: tf.image.random_jpeg_quality(x, 50, 100), batch)),
                keras.layers.RandomRotation(0.15, fill_mode='constant', fill_value=1.),
                keras.layers.RandomTranslation(0.15, 0.15, fill_mode='constant', fill_value=1.),
                keras.layers.RandomZoom((-0.3, 0.3), fill_mode='constant', fill_value=1.),
                keras.layers.RandomBrightness(0.35, value_range=(0, 1)),
                keras_cv.layers.RandomHue(0.35, value_range=(0, 1)),
                keras_cv.layers.RandomSharpness(0.35, value_range=(0, 1)),
                keras_cv.layers.RandomSaturation((0.35, 0.65)),
                keras_cv.layers.RandomCutout(0.4, 0.4, fill_mode='gaussian_noise'),
                keras.layers.Lambda(lambda batch: tf.clip_by_value(batch, 0., 1.)),
                keras.layers.Rescaling(255.)
            ]
        )

        self.head = keras.Sequential(
            [
                keras.layers.Conv2D(model_dim, kernel_size=1, activation='relu', name='embedding_projection'),
                GeMPooling(name='gem_pooling'),
                keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=-1), name='l2_normalization')
            ]
        )

    def compile(self, optimizer, **kwargs):
        super().compile(**kwargs)

        self.optimizer = optimizer
        self.loss_tracker = keras.metrics.Mean(name='loss')
        self.val_loss_tracker = keras.metrics.Mean(name='val_loss')
        self.acc_tracker = keras.metrics.Mean(name='acc')
        self.val_acc_tracker = keras.metrics.Mean(name='val_loss')

    def call(self, x):
        features = self.feature_extractor(x)
        return self.head(features)

    def train_step(self, data):
        x, _ = data
        augmented_1 = self.augmentation(x, training=True)
        augmented_2 = self.augmentation(x, training=True)
        with tf.GradientTape() as tape:
            features_1 = self.feature_extractor(augmented_1)
            features_2 = self.feature_extractor(augmented_2)
            embeddings_1 = self.head(features_1)
            embeddings_2 = self.head(features_2)
            loss, acc = self.calculate_loss(embeddings_1, embeddings_2)

        gradients = tape.gradient(loss, self.feature_extractor.trainable_weights + self.head.trainable_weights)
        self.optimizer.apply_gradients(zip(gradients, self.feature_extractor.trainable_weights + self.head.trainable_weights))
        self.loss_tracker.update_state(loss)
        self.acc_tracker.update_state(acc)
        return {'loss': self.loss_tracker.result(), 'acc': self.acc_tracker.result()}

    def test_step(self, data):
        x, _ = data
        augmented_1 = self.augmentation(x, training=True)
        augmented_2 = self.augmentation(x, training=True)
        embeddings_1 = self.call(augmented_1)
        embeddings_2 = self.call(augmented_2)
        loss, acc = self.calculate_loss(embeddings_1, embeddings_2)
        self.val_loss_tracker.update_state(loss)
        self.val_acc_tracker.update_state(acc)
        return {'loss': self.val_loss_tracker.result(), 'acc': self.val_acc_tracker.result()}

    def calculate_loss(self, emb_1, emb_2):
        similarities = tf.matmul(emb_1, emb_2, transpose_b=True) / self.temperature
        contrastive_labels = tf.range(tf.shape(emb_1)[0])
        loss_12 = sparse_categorical_focal_loss(contrastive_labels, similarities, 2, from_logits=True)
        loss_21 = sparse_categorical_focal_loss(contrastive_labels, tf.transpose(similarities), 2, from_logits=True)
        acc_12 = tf.keras.metrics.sparse_categorical_accuracy(contrastive_labels, similarities)
        acc_21 = tf.keras.metrics.sparse_categorical_accuracy(contrastive_labels, tf.transpose(similarities))
        return (loss_12 + loss_21) / 2, (acc_12 + acc_21) / 2

In [None]:
def get_connected_components(adjacency_dict):

    def dfs(node, component):
        visited.add(node)
        component.append(node)
        neighbors = adjacency_dict.get(node, set())
        for neighbor in neighbors:
            if neighbor not in visited:
                dfs(neighbor, component)

    visited = set()
    components = []
    for node in adjacency:
        if node not in visited:
            component = []
            dfs(node, component)
            components.append(component)

    return components


train_df = pd.read_csv('train.csv')
train_df['image_name1'] = train_df['image_url1'].apply(lambda x: os.path.basename(x))
train_df['image_name2'] = train_df['image_url2'].apply(lambda x: os.path.basename(x))

adjacency = {}
for ind in train_df.index:
    left = train_df.loc[ind, 'image_name1']
    right = train_df.loc[ind, 'image_name2']
    adjacency.setdefault(left, set()).add(right)
    adjacency.setdefault(right, set()).add(left)

components = get_connected_components(adjacency)
image_labels = {}
for i, component in enumerate(components):
    for name in component:
        image_labels[name] = i

print(len(components))

keys_tensor = tf.constant(list(image_labels.keys()))
vals_tensor = tf.constant(list(image_labels.values()))
image_labels_table = tf.lookup.StaticHashTable(tf.lookup.KeyValueTensorInitializer(keys_tensor, vals_tensor), default_value=-1)

# for now we will only use one image from each scene
selected_images = set([c[0] for c in components])

In [None]:
HEIGHT, WIDTH = 256, 256
BATCH = 512
EPOCHS = 50

images_list = glob.glob(os.path.join('train', '*'))
images_list = [p for p in images_list if os.path.basename(p) in selected_images]

n_val_components = 7500
train_images = [p for p in images_list if image_labels[os.path.basename(p)] >= n_val_components]
val_images = [p for p in images_list if image_labels[os.path.basename(p)] < n_val_components]

train_images = train_images + glob.glob('kaggle_room_street_data/*/*jpg') + glob.glob('home_bro/*/*jpg')
print(len(train_images), len(val_images))

train_dataset = tf.data.Dataset.from_tensor_slices(train_images)
train_dataset = train_dataset.shuffle(len(train_images), reshuffle_each_iteration=True)
val_dataset = tf.data.Dataset.from_tensor_slices(val_images)
val_dataset = val_dataset.shuffle(len(val_images), reshuffle_each_iteration=False)


def process_path(image_path):
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize_with_pad(image, HEIGHT, WIDTH)
    label = image_labels_table[tf.strings.split(image_path, os.path.sep)[-1]]
    return image, label


train_dataset = train_dataset.map(process_path, num_parallel_calls=tf.data.AUTOTUNE).batch(BATCH).prefetch(buffer_size=tf.data.AUTOTUNE)
val_dataset = val_dataset.map(process_path, num_parallel_calls=tf.data.AUTOTUNE).batch(BATCH).prefetch(buffer_size=tf.data.AUTOTUNE)

In [None]:
learning_rate = 0.03 * BATCH / 256
n_steps = EPOCHS * len(train_dataset)
cosine_decay_scheduler = tf.keras.optimizers.schedules.CosineDecay(learning_rate, decay_steps=n_steps, alpha=0.05)

model = Contrastive()
model.compile(optimizer=tfm.optimization.lars_optimizer.LARS(learning_rate=cosine_decay_scheduler))
history = model.fit(train_dataset, epochs=20, validation_data=val_dataset)

In [None]:
model.compute_output_shape((None, 256, 256, 3))
model.save('models/simclr_efficientnetv2b2')

In [None]:
images_list = glob.glob(os.path.join('test', '*'))


def process_path_test(image_path):
    image = tf.io.read_file(image_path)
    image = tf.io.decode_jpeg(image, channels=3)
    image = tf.image.resize_with_pad(image, HEIGHT, WIDTH)
    label = tf.strings.split(image_path, os.path.sep)[-1]
    return image, label


test_dataset = tf.data.Dataset.from_tensor_slices(images_list)
test_dataset = test_dataset.map(process_path_test, num_parallel_calls=tf.data.AUTOTUNE).batch(BATCH).prefetch(buffer_size=tf.data.AUTOTUNE)

test_embeddings = {}
for batch in tqdm(test_dataset):
    images, names = batch
    embeddings = model.call(images)
    for name, embedding in zip(names.numpy(), embeddings.numpy()):
        test_embeddings[name.decode()] = embedding

In [None]:
len(images_list) == len(test_embeddings)

In [None]:
test_df = pd.read_csv('test-data.csv')
test_df['image_name1'] = test_df['image_url1'].apply(lambda x: os.path.basename(x))
test_df['image_name2'] = test_df['image_url2'].apply(lambda x: os.path.basename(x))

test_df['simclr'] = 0

for ind in tqdm(test_df.index):
    image_1 = test_df.loc[ind, 'image_name1']
    image_2 = test_df.loc[ind, 'image_name2']

    if not (image_1 in test_embeddings.keys() and image_2 in test_embeddings.keys()):
        continue

    embedding_1 = test_embeddings[image_1]
    embedding_2 = test_embeddings[image_2]
    test_df.loc[ind, 'simclr'] = np.dot(embedding_1, embedding_2)

In [None]:
test_df.to_csv('test_simclr.csv')