In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

import sklearn
from sklearn.model_selection import train_test_split
import torchvision
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import transforms
from torch.utils.data import Dataset, DataLoader
import multiprocessing

from kaggle_secrets import UserSecretsClient

# from skimage.io import imread
import cv2

import imgaug as ia
import imgaug.augmenters as iaa

ia.seed(1)

from skimage.transform import resize
import numpy as np
import math

In [None]:
GLOBAL_SEED = 42

np.random.seed(GLOBAL_SEED)

In [None]:
!pip install faiss-gpu

In [None]:
num_cores = multiprocessing.cpu_count()
print(f"CPU Cores: {num_cores}")

In [None]:
!nvcc --version

In [None]:
train_df = pd.read_csv("../input/hotel-id-2021-fgvc8/train.csv")

In [None]:
n_classes = len(train_df['hotel_id'].value_counts())
n_classes

In [None]:
from sklearn import preprocessing

le = preprocessing.LabelEncoder()
le.fit(train_df['hotel_id'])

train_df['label'] = le.transform(train_df['hotel_id'])

kaggle_path = "/kaggle/input/hotel-id-2021-fgvc8/train_images/"
train_df['full_filepath'] = kaggle_path + train_df.chain.astype(str) +"/"+ train_df.image.astype(str)

train_df

In [None]:
class_map = dict(sorted(train_df[['label', 'hotel_id']].values.tolist()))

In [None]:
min_value = 7
for i, v in train_df['label'].value_counts().items():
    if v > min_value - 1:
        continue
    else:
        for j in range(min_value - v):
            train_df = train_df.append(train_df[train_df['label'] == i].iloc[0])

In [None]:
# shuffle
train = train_df.sample(frac=1.0)

# get the first two by group
train = train.groupby("label").head(7)

# sort by Rings
train = train.sort_values("label")

print(train)

Subsample

In [None]:
train['label'].value_counts()

In [None]:
train['label'].value_counts()

In [None]:
X_train, X_val, = train_test_split(train, test_size = 0.30,
    stratify = train['label'], random_state = GLOBAL_SEED, shuffle = True
)

In [None]:
#n_classes = X_train.label.nunique()

BATCH_SIZE = 64
STEPS_PER_EPOCH = len(X_train) // BATCH_SIZE
EPOCHS = 50

IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_SIZE = (IMG_HEIGHT, IMG_WIDTH)
n_classes

## TF Sequence Class - Faster Approach

In [None]:
class HotelDataset(torch.utils.data.Dataset):
    """Some Information about CaliforniaDataset"""
    def __init__(self, x_set, y_set, training = True, img_size = (224, 224), transform = None):
        super(HotelDataset, self).__init__()

        self.x_set = x_set
        self.y_set = (y_set)

        self.aug = iaa.Sequential([
            iaa.Fliplr(0.5),
            iaa.Crop(percent=(0, 0.1)),
            iaa.Sometimes(
                0.5,
                iaa.GaussianBlur(sigma=(0, 0.5))
            ),
            iaa.LinearContrast((0.75, 1.5)),
            iaa.AdditiveGaussianNoise(loc=0, scale=(0.0, 0.05*255), per_channel=0.5),
            iaa.Multiply((0.8, 1.2), per_channel=0.2),
            iaa.Affine(
                scale={"x": (0.8, 1.2), "y": (0.8, 1.2)},
                translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)},
                rotate=(-25, 25),
                shear=(-8, 8)
            )
        ], random_order=True)
        
        self.transform = transform
        self.training = training
        self.img_size = img_size

    def __getitem__(self, index):
        #print(self.x_set[index])
        x = cv2.resize(cv2.imread(self.x_set[index]), dsize = self.img_size)
        #print(x.shape)
        if self.training:
            x = self.aug(image = x)
            
        #x = torchvision.transforms.functional.to_tensor(x)
        if self.transform is not None:
            x = self.transform(x)
        y = self.y_set[index]

        return (x, y)

    def __len__(self):
        return len(self.x_set)

In [None]:
!pip install efficientnet_pytorch

In [None]:

from efficientnet_pytorch import EfficientNet
from torch import nn
class MLP(nn.Module):
    def __init__(self, layers_size, dropout_rates, final_relu=False, type = "embedding"):
        super(MLP, self).__init__()
        layers_list = []
        for i in range(1, len(layers_size) - 1):
            layers_list.append(nn.Linear(layers_size[i - 1], layers_size[i]))
            layers_list.append(nn.ReLU())
            layers_list.append(nn.Dropout(p = dropout_rates[i]))
        layers_list.append(nn.Linear(layers_size[-2], layers_size[-1]))
        if final_relu:
            layers_list.append(nn.ReLU())
        else:
            layers_list.append(nn.Softmax(dim = 1))
        self.net = nn.Sequential(*layers_list)
    
    def forward(self, x):
        return self.net(x)
        
    

In [None]:
!nvidia-smi --gpu-reset


In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Set trunk model and replace the softmax layer with an identity function
trunk = EfficientNet.from_pretrained('efficientnet-b0')
trunk_output_size = trunk._fc.out_features

trunk = torch.nn.DataParallel(trunk.to(device))

# Set embedder model. This takes in the output of the trunk and outputs 64 dimensional embeddings
embedder = torch.nn.DataParallel(MLP([trunk_output_size, 512, 512], dropout_rates = [0, 0.2, 0.2]).to(device))

# Set the classifier. The classifier will take the embeddings and output a 50 dimensional vector.
# (Our training set will consist of the first 50 classes of the CIFAR100 dataset.)
# We'll specify the classification loss further down in the code.
classifier = torch.nn.DataParallel(MLP([512, 512, n_classes], dropout_rates = [0, 0.2, 0.2])).to(device)

# Set optimizers
trunk_optimizer = torch.optim.Adam(trunk.parameters(), lr=0.00001, weight_decay=0.0001)
embedder_optimizer = torch.optim.Adam(embedder.parameters(), lr=0.005, weight_decay=0.0001)
classifier_optimizer = torch.optim.Adam(classifier.parameters(), lr=0.005, weight_decay=0.0001)

# Set the image transforms
train_transform = transforms.Compose([transforms.ToTensor(),
                                    #transforms.Resize(224, 224),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])

val_transform = transforms.Compose([transforms.ToTensor(),
                                    #transforms.Resize(224, 224),
                                    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])





In [None]:
train_dataset = HotelDataset(X_train.full_filepath.values, torch.tensor(X_train.label.values), training = True, transform = train_transform)
val_dataset = HotelDataset(X_val.full_filepath.values, torch.tensor(X_val.label.values), training = False, transform = train_transform)

In [None]:
!pip install pytorch_metric_learning

In [None]:
from pytorch_metric_learning import losses, miners, distances, reducers, testers, samplers
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
### MNIST code originally from https://github.com/pytorch/examples/blob/master/mnist/main.py ### 
from torchvision import datasets
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
import numpy as np


loss = losses.TripletMarginLoss(margin=0.1)

classification_loss = torch.nn.CrossEntropyLoss()

# Set the mining function
miner = miners.MultiSimilarityMiner(epsilon=0.1)

# Set the dataloader sampler
sampler = samplers.MPerClassSampler(train_dataset.y_set, m=4, length_before_new_iter=len(train_dataset))

# Set other training parameters
batch_size = 8
num_epochs = 4

# Package the above stuff into dictionaries.
models = {"trunk": trunk, "embedder": embedder, "classifier": classifier}
optimizers = {"trunk_optimizer": trunk_optimizer, "embedder_optimizer": embedder_optimizer, "classifier_optimizer": classifier_optimizer}
loss_funcs = {"metric_loss": loss, "classifier_loss": classification_loss}
mining_funcs = {"tuple_miner": miner}

# We can specify loss weights if we want to. This is optional
loss_weights = {"metric_loss": 1, "classifier_loss": 0.5}

In [None]:
# Remove logs if you want to train with new parameters
!rm -rf example_logs/ example_saved_models/ example_tensorboard/

In [None]:
!pip install umap-learn
!pip install record-keeper

In [None]:
print(device)

In [None]:
%matplotlib inline
from pytorch_metric_learning import losses, miners, samplers, trainers, testers
from pytorch_metric_learning.utils import common_functions
import pytorch_metric_learning.utils.logging_presets as logging_presets
from pytorch_metric_learning.utils.accuracy_calculator import AccuracyCalculator
import numpy as np
import torchvision
from torchvision import datasets, transforms
import torch
import torch.nn as nn
from PIL import Image
import logging
import matplotlib.pyplot as plt
import umap
from cycler import cycler
import record_keeper
import pytorch_metric_learning
logging.getLogger().setLevel(logging.INFO)
logging.info("VERSION %s"%pytorch_metric_learning.__version__)
    

In [None]:
record_keeper, _, _ = logging_presets.get_record_keeper("example_logs", "example_tensorboard")
hooks = logging_presets.get_hook_container(record_keeper)
dataset_dict = {"val": val_dataset}
model_folder = "example_saved_models"

def visualizer_hook(umapper, umap_embeddings, labels, split_name, keyname, *args):
    logging.info("UMAP plot for the {} split and label set {}".format(split_name, keyname))
    label_set = np.unique(labels)
    num_classes = len(label_set)
    fig = plt.figure(figsize=(20,15))
    plt.gca().set_prop_cycle(cycler("color", [plt.cm.nipy_spectral(i) for i in np.linspace(0, 0.9, num_classes)]))
    for i in range(num_classes):
        idx = labels == label_set[i]
        plt.plot(umap_embeddings[idx, 0], umap_embeddings[idx, 1], ".", markersize=1)   
    plt.show()

# Create the tester
tester = testers.GlobalEmbeddingSpaceTester(end_of_testing_hook = hooks.end_of_testing_hook, 
                                            visualizer = umap.UMAP(), 
                                            visualizer_hook = visualizer_hook,
                                            dataloader_num_workers = 2,
                                            accuracy_calculator=AccuracyCalculator(k="max_bin_count"))

end_of_epoch_hook = hooks.end_of_epoch_hook(tester, 
                                            dataset_dict, 
                                            model_folder, 
                                            test_interval = 1,
                                            patience = 1)

In [None]:
trainer = trainers.TrainWithClassifier(models,
                                optimizers,
                                batch_size,
                                loss_funcs,
                                mining_funcs,
                                train_dataset,
                                sampler=sampler,
                                dataloader_num_workers = 2,
                                loss_weights = loss_weights,
                                end_of_iteration_hook = hooks.end_of_iteration_hook,
                                end_of_epoch_hook = end_of_epoch_hook)

In [None]:
%load_ext tensorboard
%tensorboard --logdir example_tensorboard

In [None]:
trainer.train(num_epochs=5)

<a href="./trunk_best3.pth"> Download File </a>

In [None]:
!zip -r saved_model.zip example_saved_models/

<a href="./trunk_optimizer_best3.pth"> Download File </a>