# PyTorch Metric Learning
### Example for the MetricLossOnly trainer
See the documentation [here](https://kevinmusgrave.github.io/pytorch-metric-learning/)

## Install the necessary packages

In [None]:
# !pip install -q pytorch-metric-learning
# !pip install -q faiss-gpu
# !pip install -q umap-learn
# !pip install -q pynndescent
# !pip install -q record-keeper

## Import the packages

In [1]:
%matplotlib inline
from sklearn.metrics import pairwise
from pytorch_metric_learning import losses, miners, samplers, trainers, testers
from pytorch_metric_learning.utils import common_functions as c_f
from pytorch_metric_learning.utils import common_functions
import pytorch_metric_learning.utils.logging_presets as logging_presets
import numpy as np
import torchvision
from torchvision import datasets, transforms
import torch
import torch.nn as nn
from PIL import Image
import logging
import matplotlib.pyplot as plt
import umap
from cycler import cycler
import record_keeper
import pytorch_metric_learning
import os
import pandas as pd
from tqdm import tqdm
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
logging.getLogger().setLevel(logging.INFO)
logging.info("VERSION %s"%pytorch_metric_learning.__version__)

INFO:root:VERSION 0.9.95


In [None]:
emb_size = 64
retrain = True

## Simple model def

In [None]:
from img_embedder import *

## Initialize models, optimizers and image transforms

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set trunk model and replace the softmax layer with an identity function
trunk = torchvision.models.resnet50(pretrained=True)
trunk.fc = c_f.Identity()
trunk = nn.DataParallel(trunk.to(device))

print(trunk_output_size, ">", emb_size)

# Set embedder model. This takes in the output of the trunk and outputs 'emb_size' dimensional embeddings
trunk_output_size = trunk.fc.in_features
embedder = torch.nn.DataParallel(MLP([trunk_output_size, emb_size]).to(device))

# Set optimizers
trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=0.00001, weight_decay=0.0001)
embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=0.0001, weight_decay=0.0001)

norm = transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])

# size = 196
size = 128

# Set the image transforms
train_transform = transforms.Compose([transforms.Resize((size, size)),
                                    #transforms.RandomResizedCrop(scale=(0.16, 1), ratio=(0.75, 1.33), size=64),
                                    transforms.RandomRotation(degrees=(-30, 30)),
                                    transforms.RandomHorizontalFlip(0.5),
                                    transforms.RandomVerticalFlip(0.5),                               
                                    transforms.ToTensor(),
                                    norm])

val_transform = transforms.Compose([transforms.Resize((size, size)),
                                    transforms.ToTensor(),
                                    norm])

In [None]:
# Замена softmax на identity function
trunk = torchvision.models.resnet50(pretrained=True)
trunk.fc = c_f.Identity()
trunk = nn.DataParallel(trunk.to(torch.device("cuda" if torch.cuda.is_available() else "cpu")))

In [None]:
embedder

## Create the dataset and class-disjoint train/val splits

In [None]:
input_path = "datasets/workers"

original_train = datasets.ImageFolder(os.path.join(input_path, 'train'))
original_val = datasets.ImageFolder(os.path.join(input_path, 'validation'))
original_test = datasets.ImageFolder(os.path.join(input_path, 'test'))

# This will be used to create train and val sets that are class-disjoint
class ClassDisjoint(torch.utils.data.Dataset):
    def __init__(self, original_train, original_val, train, transform):
        if train:
            self.data = np.array(original_train.imgs)
            self.targets = np.array(original_train.targets)
        else:
            self.data = np.array(original_val.imgs)
            self.targets = np.array(original_val.targets)
        self.data = [x[0] for x in self.data.tolist()]
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, index):
        img, target = self.data[index], self.targets[index]
        img = Image.open(img)
        if self.transform is not None:
            img = self.transform(img)
        return img, target

# Class disjoint training and validation set
train_dataset = ClassDisjoint(original_train, original_val, True, train_transform)
val_dataset = ClassDisjoint(original_train, original_val, False, val_transform)
test_dataset = ClassDisjoint(original_train, original_test, False, val_transform)

In [None]:
original_train

In [None]:
original_val

In [None]:
original_test

In [None]:
train_dataset[10][0].shape

## Create the loss, miner, sampler, and package them into dictionaries


In [None]:
# Set the loss function
loss = losses.TripletMarginLoss(margin=0.1)

# Set the mining function
miner = miners.MultiSimilarityMiner(epsilon=0.1)

# Set the dataloader sampler
sampler = samplers.MPerClassSampler(train_dataset.targets, m=4, length_before_new_iter=len(train_dataset))

# Set other training parameters
batch_size = 32
num_epochs = 100

# Package the above stuff into dictionaries.
models = {"trunk": trunk, "embedder": embedder}
optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer}
loss_funcs = {"metric_loss": loss}
mining_funcs = {"tuple_miner": miner}

In [None]:
model_folder = "image_embedder_model_" + ('small_img', 'large_img')[size > 140] 

In [None]:
if retrain:
    # Remove logs if you want to train with new parameters
    !rm -rf image_embedder_logs/ image_embedder_tensorboard/ {model_folder}/ 

## Create the training and testing hooks

In [None]:
val_dataset[0]

In [None]:
record_keeper, _, _ = logging_presets.get_record_keeper("image_embedder_logs", "image_embedder_tensorboard")
hooks = logging_presets.get_hook_container(record_keeper)
dataset_dict = {"val": val_dataset}

def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args):
    logging.info("UMAP plot for the {} split and label set {}".format(split_name, keyname))
    label_set = np.unique(labels)
    num_classes = len(label_set)
    fig = plt.figure(figsize=(20,15))
    plt.gca().set_prop_cycle(cycler("color", [plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes)]))
    for i in range(num_classes):
        idx = labels == label_set[i]
        plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1)   
    plt.show()

# Create the tester
tester = testers.GlobalEmbeddingSpaceTester(end_of_testing_hook = hooks.end_of_testing_hook, 
                                            visualizer = umap.UMAP(), 
                                            visualizer_hook = visualizer_hook,
                                            dataloader_num_workers = 32)

end_of_epoch_hook = hooks.end_of_epoch_hook(tester, 
                                            dataset_dict, 
                                            model_folder, 
                                            test_interval = 1,
                                            patience = 1)

## Create the trainer

In [None]:
trainer = trainers.MetricLossOnly(models,
                                optimizers,
                                batch_size,
                                loss_funcs,
                                mining_funcs,
                                train_dataset,
                                sampler=sampler,
                                dataloader_num_workers = 32,
                                end_of_iteration_hook = hooks.end_of_iteration_hook,
                                end_of_epoch_hook = end_of_epoch_hook
                                )

## Start Tensorboard
(Turn off adblock and other shields)

In [None]:
%load_ext tensorboard
%tensorboard --logdir example_tensorboard

## Train the model

In [None]:
%%time
if retrain:
    trainer.train(num_epochs=num_epochs)

In [None]:
saved_models = os.path.join(os.getcwd(), model_folder)

for _, _, files in os.walk(saved_models): break
[file for file in files if file.__contains__('best')]

### Анализ моделей

In [None]:
path_model = os.path.join(saved_models, [x for x in files if x.__contains__('trunk_best')][0])
path_embedder = os.path.join(saved_models, [x for x in files if x.__contains__('embedder_best')][0])

In [None]:
dataloader = torch.utils.data.DataLoader(
    test_dataset,
    batch_size=32,
    shuffle=False,
    num_workers=32
)

In [None]:
emb = ImageEmbedder(path_model, path_embedder, val_transform)

#### Поиск наилучшей границы

In [None]:
files = []
persons = dict()
input_path = "datasets/workers"

for dpath in ['test','train','validation']:
    for c, d, f in os.walk(os.path.join(input_path, dpath)):        
        for z in f:
            person = os.path.basename(c)
            filepath = os.path.join(c, z)
            i = persons.get(person, list())
            i.append([filepath, emb.img2vect(Image.open(filepath))])
            persons[person] = i

In [None]:
files = []
for person, vectors in persons.items():
    for vector in vectors:
        files.append([person, vector[1]])

In [None]:
vec_arr = [x[1] for x in files]

In [None]:
cos_sim = pairwise.cosine_similarity(vec_arr, vec_arr)

In [None]:
out = []
x = 0
y = 0
for vec1 in tqdm(files):
    x = 0
    for vec2 in files:
        same = vec1[0] == vec2[0]
        out.append({'same': int(vec1[0] == vec2[0]), 'cos': cos_sim[x][y]})
        x += 1
    y += 1
out=pd.DataFrame(out)

In [None]:
out[(out['same']==1) & (out['cos']!=1)].mean()['cos']

In [None]:
out[out['same']==0].mean()['cos']

In [None]:
main_class = 1
img = Image.open(test_dataset.data[main_class])
main_vector = emb.img2vect(img)
test_embeddings = emb.dataloader2vect(dataloader)[0]
l = len(test_embeddings)

In [None]:
def cos(img1, img2):
    if isinstance(img1, Image.Image):
        img1 = emb.img2vect(img1)
    if isinstance(img2, Image.Image):
        img2 = emb.img2vect(img2)
    return np.dot(img1, img2) / np.linalg.norm(img1) / np.linalg.norm(img2)
    
fig = plt.figure(figsize=(20, 3))

img90 = img.rotate(90, expand=True)
img180 = img.rotate(180, expand=True)
img270 = img.rotate(270, expand=True)

plt.subplot(1, 5, 1); plt.imshow(img);
plt.subplot(1, 5, 2); plt.imshow(img90);
plt.subplot(1, 5, 3); plt.imshow(img180);
plt.subplot(1, 5, 4); plt.imshow(img270);
plt.subplot(1, 5, 5); 
plt.text(0.1, 0.75, '1.0000000', fontsize=20); 
plt.text(0.1, 0.55, cos(img, img90), fontsize=20);
plt.text(0.1, 0.35, cos(img, img180), fontsize=20);
plt.text(0.1, 0.15, cos(img, img270), fontsize=20);

In [None]:
main_class = [0, 5, 7, 11, 8][3]
img = Image.open(test_dataset.data[main_class])
main_vector = emb.img2vect(img)
test_embeddings = emb.dataloader2vect(dataloader)[0]
l = len(test_embeddings)
img

#### Визуализация

In [None]:
fig = plt.figure(figsize=(10,40))
treshold = 0.92

for i in range(l):
    c = cos(main_vector, test_embeddings[i])
    plt.subplot(l, 3, i*3+1); plt.imshow(img);
    plt.subplot(l, 3, i*3+2); plt.imshow(Image.open(test_dataset.data[i]));
    plt.subplot(l, 3, i*3+3);
    plt.text(0.2, 0.6, 'схожи' if c > treshold else 'различны', 
             fontsize=20, color='g' if c > treshold else 'r',);
    plt.text(0.05, 0.4, 'коэф.схожести = ' + str(round(c, 3)), fontsize=12);