In [1]:
import os
import random as rnd

import albumentations as A
import annoy
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf
from IPython.core.interactiveshell import InteractiveShell
from sklearn.model_selection import StratifiedGroupKFold
from tensorflow import keras
from tqdm import tqdm

from _data.artist_data.ny_baseline import eval_submission as ev_sub
from _data.artist_data.ny_baseline import get_ranked_list
from src.utils import make_callbacks

InteractiveShell.ast_node_interactivity = "all"

In [2]:
class CFG:
    def __init__(
        self,
    ):
        self.seed = 39
        self.img_size = (512, 60)
        self.n_folds = 8
        self.fold = 0
        self.norm = False
        self.emb_len = 1024
        self.kernel_size = (5, 2)
        self.n_epochs = 30
        self.n_pairs_in_batch = 30
        self.input_shape = (self.img_size[0], self.img_size[1], 1)

In [3]:
cfg = CFG()

In [4]:
root_dir = "/app/_data/artist_data/"
mod_dir = f"/app/_data/artist_data/models/test_arch/rank_dist_{cfg.fold}/"
mod_dir

'/app/_data/artist_data/models/test_arch/rank_dist_0/'

In [5]:
train = pd.read_csv(os.path.join(root_dir, "train.csv"))
train = train[train["artistid_count"] != 1].reset_index(drop=True)
test = pd.read_csv(os.path.join(root_dir, "test_meta.tsv"), sep="\t")
test["path"] = test["archive_features_path"].apply(
    lambda x: os.path.join(root_dir, "test_features", x)
)

## train_val_split

In [6]:
gkf = StratifiedGroupKFold(n_splits=cfg.n_folds, shuffle=True, random_state=cfg.seed)
for n, (train_ids, val_ids) in enumerate(
    gkf.split(
        X=train[["artistid", "artistid_count"]],
        y=train["artistid_count"],
        groups=train["artistid"],
    )
):
    train.loc[val_ids, "fold"] = n

In [7]:
train_df = train[train["fold"] != cfg.fold].reset_index(drop=True)
val_df = train[train["fold"] == cfg.fold].reset_index(drop=True)

## DataGenerator

In [8]:
class DataGenerator:
    def __init__(
        self,
        data,
        img_size,
        n_pairs_in_batch=50,
        norm=False,
        shuffle=True,
        transpose=True,
        augment=True,
    ):
        self.data = data.reset_index(drop=True)
        self.img_size = img_size
        self.n_pairs_in_batch = n_pairs_in_batch
        self.norm = norm
        self.shuffle = shuffle
        self.transpose = transpose
        self.augment = augment
        self.artist_ids = self.data["artistid"].unique().tolist()
        self.artis2path = self.data.groupby("artistid").agg(list)["path"].to_dict()
        self.paths = self.data["path"].tolist()

        if self.shuffle:
            np.random.shuffle(self.artist_ids)

    def __len__(self):
        return len(self.artist_ids)

    def augment_fn(self, img):
        transform = A.Compose(
            [
                A.RandomCrop(always_apply=True, p=1.0, height=512, width=60),
                A.Flip(p=0.2),
                A.PixelDropout(p=0.1, dropout_prob=0.01),
                A.CoarseDropout(
                    p=0.1,
                    max_holes=11,
                    max_height=5,
                    max_width=3,
                    min_holes=1,
                    min_height=2,
                    min_width=2,
                ),
                A.RandomGridShuffle(p=0.3, grid=(1, 6)),
            ]
        )
        return transform(image=img)["image"]

    def load_img(self, path):
        img = np.load(path).astype("float32")
        if self.norm:
            img -= img.min()
            img /= img.max()
        if self.augment:
            img = self.augment_fn(img)
        else:
            wpad = (img.shape[1] - self.img_size[1]) // 2
            img = img[:, wpad : wpad + self.img_size[1]]
        if self.transpose:
            img = img.transpose(1, 0)
        img = np.expand_dims(img, -1)
        return img

    def get_list(self, artist_id):
        valid_paths = self.artis2path[artist_id]
        np.random.shuffle(valid_paths)
        anchor_img = np.expand_dims(self.load_img(valid_paths[0]), 0)
        positive_imgs = np.array([self.load_img(p) for p in valid_paths[1:]])
        negative_paths = rnd.sample(
            [x for x in self.paths if x not in valid_paths],
            self.n_pairs_in_batch - len(positive_imgs),
        )
        negative_imgs = np.array([self.load_img(p) for p in negative_paths])
        imgs = np.concatenate([positive_imgs, negative_imgs])
        labels = np.zeros(self.n_pairs_in_batch)
        labels[positive_imgs.shape[0] :] = 1
        perm = np.random.permutation(self.n_pairs_in_batch)
        imgs = imgs[perm]
        labels = labels[perm]
        imgs = np.concatenate([anchor_img, imgs])
        return imgs, labels

    def __call__(self):
        np.random.shuffle(self.artist_ids)
        ix = 0
        while ix < len(self.artist_ids):
            tracks, labels = self.get_list(self.artist_ids[ix])
            tracks = tf.convert_to_tensor(tracks)
            labels = tf.convert_to_tensor(labels)
            yield tracks, labels
            ix += 1

In [9]:
train_gen = DataGenerator(
    data=train_df,
    img_size=cfg.img_size,
    n_pairs_in_batch=cfg.n_pairs_in_batch,
    norm=False,
    shuffle=True,
    transpose=False,
    augment=True,
)
val_gen = DataGenerator(
    data=val_df,
    img_size=cfg.img_size,
    n_pairs_in_batch=cfg.n_pairs_in_batch,
    norm=False,
    shuffle=True,
    transpose=False,
    augment=False,
)

In [10]:
train_ds = tf.data.Dataset.from_generator(
    train_gen,
    output_signature=(
        tf.TensorSpec(
            shape=(cfg.n_pairs_in_batch + 1, *cfg.img_size, 1),
            dtype=tf.float32,
            name="tracks",
        ),
        tf.TensorSpec(
            shape=(cfg.n_pairs_in_batch),
            dtype=tf.float32,
            name="labels",
        ),
    ),
).repeat()
val_ds = tf.data.Dataset.from_generator(
    val_gen,
    output_signature=(
        tf.TensorSpec(
            shape=(cfg.n_pairs_in_batch + 1, *cfg.img_size, 1),
            dtype=tf.float32,
            name="tracks",
        ),
        tf.TensorSpec(
            shape=(cfg.n_pairs_in_batch),
            dtype=tf.float32,
            name="labels",
        ),
    ),
).repeat()

In [11]:
def embNet(
    input_shape,
    kernel_size=3,
    dropout_rate=0.1,
    embedding_len=1024,
    activation_fn="relu",
    padding="same",
):
    base_model = tf.keras.models.Sequential(
        [
            keras.layers.Conv1D(
                filters=1,
                kernel_size=kernel_size,
                activation=activation_fn,
                input_shape=input_shape,
                padding=padding,
                name="conv_1",
            ),
            keras.layers.Conv1D(
                filters=1,
                kernel_size=kernel_size,
                activation=activation_fn,
                padding=padding,
                name="conv_2",
            ),
            keras.layers.Dropout(rate=dropout_rate, name="dropout1"),
            keras.layers.MaxPooling2D(
                pool_size=(2, 2), strides=1, padding="same", name="max_1"
            ),
            keras.layers.Conv1D(
                filters=1,
                kernel_size=kernel_size,
                activation=activation_fn,
                padding=padding,
                name="conv_3",
            ),
            keras.layers.Conv1D(
                filters=1,
                kernel_size=kernel_size,
                activation=activation_fn,
                padding=padding,
                name="conv_4",
            ),
            keras.layers.Flatten(name="flatten_base"),
            keras.layers.Dense(embedding_len * 2, activation="relu", name="dense_1"),
            keras.layers.Dense(embedding_len, activation="relu", name="dense_base_2"),
        ]
    )
    embedding_net = keras.Model(
        inputs=base_model.input, outputs=base_model.output, name="embedding"
    )
    return embedding_net

In [12]:
embedding_net = embNet(
    input_shape=(512, 60, 1),
    kernel_size=5,
    dropout_rate=0.1,
    embedding_len=1024,
    activation_fn="relu",
    padding="valid",
)

In [13]:
class RankingModel(keras.Model):
    def __init__(self, emb_model):
        super().__init__()
        self.embeddings = emb_model

    def distance(self, y_true, y_pred):
        dist = tf.reduce_sum(tf.square(y_true - y_pred), axis=-1)
        # dist = tf.sqrt(tf.reduce_sum(tf.square(y_true - y_pred),axis=- 1))
        return dist

    def call(self, features):
        embeddings = self.embeddings(features)
        anchors_embeddings = embeddings[:1, ...]
        img_embeddings = embeddings[1:, ...]
        list_length = img_embeddings.shape[0]
        anchors_embedding_repeated = tf.repeat(
            anchors_embeddings, [list_length], axis=0
        )
        distances = self.distance(anchors_embedding_repeated, img_embeddings)
        return distances

In [14]:
def make_callbacks(
    path, monitor="val_loss", mode="min", reduce_patience=3, stop_patience=20
):
    callbacks = [
        keras.callbacks.EarlyStopping(
            monitor=monitor,
            patience=stop_patience,
            restore_best_weights=True,
            verbose=1,
            mode=mode,
        ),
        keras.callbacks.ModelCheckpoint(
            os.path.join(path, "best"),
            monitor=monitor,
            verbose=1,
            save_best_only=True,
            save_weights_only=True,
            mode=mode,
            save_freq="epoch",
        ),
        keras.callbacks.ReduceLROnPlateau(
            monitor=monitor,
            factor=0.9,
            patience=reduce_patience,
            verbose=1,
            mode=mode,
            min_delta=1e-4,
            min_lr=0.00000001,
        ),
        keras.callbacks.TensorBoard(
            log_dir=f"/app/.tensorboard/{path.split('/')[-2]}/", histogram_freq=0
        ),
        keras.callbacks.BackupAndRestore(os.path.join(path, "backup")),
        keras.callbacks.TerminateOnNaN(),
    ]
    return callbacks

In [15]:
calbacks = make_callbacks(
    "/app/_data/rank_mod/",
    monitor="val_loss",
    mode="min",
    reduce_patience=2,
    stop_patience=4,
)

In [16]:
model = RankingModel(embedding_net)
model.compile(
    loss=keras.losses.BinaryCrossentropy(from_logits=True),
    optimizer=tf.keras.optimizers.Adam(0.001),
)

In [17]:
history = model.fit(
    x=train_ds,
    epochs=100,
    verbose="auto",
    callbacks=calbacks,
    validation_data=val_ds,
    shuffle=True,
    steps_per_epoch=3000,
    validation_steps=1000,
    validation_freq=1,
    max_queue_size=100,
    workers=64,
    use_multiprocessing=False,
)

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.48655, saving model to /app/_data/rank_mod/best
Epoch 2/10
Epoch 2: val_loss improved from 0.48655 to 0.45806, saving model to /app/_data/rank_mod/best
Epoch 3/10
Epoch 3: val_loss improved from 0.45806 to 0.44825, saving model to /app/_data/rank_mod/best
Epoch 4/10
Epoch 4: val_loss improved from 0.44825 to 0.44139, saving model to /app/_data/rank_mod/best
Epoch 5/10
Epoch 5: val_loss improved from 0.44139 to 0.43088, saving model to /app/_data/rank_mod/best
Epoch 6/10
Epoch 6: val_loss improved from 0.43088 to 0.42525, saving model to /app/_data/rank_mod/best
Epoch 7/10
Epoch 7: val_loss did not improve from 0.42525
Epoch 8/10
Epoch 8: val_loss did not improve from 0.42525

Epoch 8: ReduceLROnPlateau reducing learning rate to 0.0009000000427477062.
Epoch 9/10
Epoch 9: val_loss did not improve from 0.42525
Epoch 10/10
Epoch 10: val_loss improved from 0.42525 to 0.42341, saving model to /app/_data/rank_mod/best


In [18]:
class TestGenerator(keras.utils.Sequence):
    def __init__(
        self,
        data,
        img_size,
    ):
        self.data = data.reset_index(drop=True)
        self.img_size = img_size
        self.track2path = self.data.set_index("trackid")["path"].to_dict()
        self.tracks = self.data["trackid"].tolist()

    def __len__(self):
        return len(self.data)

    def load_img(self, path):
        img = np.load(path).astype("float32")
        wpad = (img.shape[1] - self.img_size[1]) // 2
        img = img[:, wpad : wpad + self.img_size[1]]
        img = np.expand_dims(img, [0, -1])
        return img

    def __getitem__(self, ix):
        track = self.tracks[ix]
        img = self.load_img(self.track2path[track])
        return img

In [19]:
testgen = TestGenerator(val_df, (512, 60))

In [20]:
embedding_net.set_weights(model.weights)

In [21]:
predictions = embedding_net.predict(testgen)



In [82]:
def position_discounter(position):
    return 1.0 / np.log2(position + 1)


def get_ideal_dcg(relevant_items_count, top_size):
    dcg = 0.0
    for result_indx in range(min(top_size, relevant_items_count)):
        position = result_indx + 1
        dcg += position_discounter(position)
    return dcg


def compute_dcg(query_trackid, ranked_list, track2artist_map, top_size):
    query_artistid = track2artist_map[query_trackid]
    dcg = 0.0
    for result_indx, result_trackid in enumerate(ranked_list[:top_size]):
        assert result_trackid != query_trackid
        position = result_indx + 1
        discounted_position = position_discounter(position)
        result_artistid = track2artist_map[result_trackid]
        if result_artistid == query_artistid:
            dcg += discounted_position
    return dcg


def eval_submission(submission, gt_meta_info, top_size=100):
    track2artist_map = gt_meta_info.set_index("trackid")["artistid"].to_dict()
    artist2tracks_map = gt_meta_info.groupby("artistid").agg(list)["trackid"].to_dict()
    ndcg_list = []
    for query_trackid in tqdm(submission.keys()):
        ranked_list = submission[query_trackid]
        query_artistid = track2artist_map[query_trackid]
        query_artist_tracks_count = len(artist2tracks_map[query_artistid])
        ideal_dcg = get_ideal_dcg(query_artist_tracks_count - 1, top_size=top_size)
        dcg = compute_dcg(
            query_trackid, ranked_list, track2artist_map, top_size=top_size
        )
        try:
            ndcg_list.append(dcg / ideal_dcg)
        except ZeroDivisionError:
            continue
    return np.mean(ndcg_list)

In [22]:
tracks = val_df["trackid"].values
embeds = {}
for n, t in enumerate(tracks):
    embeds[t] = predictions[n]

In [25]:
def get_ranked_list(embeds, top_size, annoy_num_trees=128):
    annoy_index = None
    annoy2id = []
    id2annoy = dict()
    for track_id, track_embed in tqdm(embeds.items()):
        id2annoy[track_id] = len(annoy2id)
        annoy2id.append(track_id)
        if annoy_index is None:
            annoy_index = annoy.AnnoyIndex(len(track_embed), "euclidean")
        annoy_index.add_item(id2annoy[track_id], track_embed)
    annoy_index.build(annoy_num_trees, n_jobs=-1)
    ranked_list = dict()
    for track_id in tqdm(embeds.keys()):
        candidates = annoy_index.get_nns_by_item(id2annoy[track_id], top_size + 1)[
            1:
        ]  # exclude trackid itself
        candidates = list(filter(lambda x: x != id2annoy[track_id], candidates))
        ranked_list[track_id] = [annoy2id[candidate] for candidate in candidates]
    return ranked_list

In [26]:
submission = get_ranked_list(embeds, 100, 256)

100% 20837/20837 [00:04<00:00, 4398.25it/s]
100% 20837/20837 [00:45<00:00, 453.66it/s]


In [27]:
ndcg = ev_sub(submission, gt_meta_info=val_df, top_size=100)
ndcg

100% 20837/20837 [00:07<00:00, 2966.61it/s]


0.07843049426700441