In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

import os

import sklearn
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow.keras as keras

import multiprocessing

from tensorflow.keras.layers import Dense, Input, Conv2D
from tensorflow.keras.applications import EfficientNetB0

from kaggle_secrets import UserSecretsClient

# from skimage.io import imread
import cv2

from skimage.transform import resize
import numpy as np
import math

import wandb

In [None]:
user_secrets = UserSecretsClient()
wandb_api = user_secrets.get_secret("wandb_api")
wandb_user = user_secrets.get_secret("wandb_user")

wandb.login(key = wandb_api)
init = wandb.init(project = 'hotel-id')

In [None]:
GLOBAL_SEED = 42

np.random.seed(GLOBAL_SEED)
tf.random.set_seed(GLOBAL_SEED)

In [None]:
num_cores = multiprocessing.cpu_count()
print(f"CPU Cores: {num_cores}")

In [None]:
train = pd.read_csv("../input/hotel-id-2021-fgvc8/train.csv")

# train.image = train.image.astype(str)

In [None]:
train.head()

In [None]:
kaggle_path = "../input/hotel-id-2021-fgvc8/train_images/"
train['full_filepath'] = kaggle_path + train.chain.astype(str) +"/"+ train.image.astype(str)

In [None]:
train.head()

In [None]:
train.iloc[0,4]

Subsample

In [None]:
train = train[train.chain.isin([0,1,2])]
train.shape

In [None]:
n_subsample = 5000
train = train.sample(n_subsample)

In [None]:
X_train, X_val, = train_test_split(train, test_size = 0.30,
    stratify = train['chain'], random_state = GLOBAL_SEED, shuffle = True
)

In [None]:
print(X_train.shape)
print(X_val.shape)

In [None]:
n_classes = X_train.chain.nunique()

BATCH_SIZE = 64
STEPS_PER_EPOCH = len(X_train) // BATCH_SIZE
EPOCHS = 50

IMG_HEIGHT = 224
IMG_WIDTH = 224
IMG_SIZE = (IMG_HEIGHT, IMG_WIDTH)

## TF Sequence Class - Faster Approach

In [None]:
# Based on https://www.tensorflow.org/api_docs/python/tf/keras/utils/Sequence
# https://github.com/keras-team/keras/issues/12847
# https://stanford.edu/~shervine/blog/keras-how-to-generate-data-on-the-fly
# https://keunwoochoi.wordpress.com/2017/08/24/tip-fit_generator-in-keras-how-to-parallelise-correctly/

class HotelBatchSequence(tf.keras.utils.Sequence):
    
    def __init__(self, x_set, y_set, batch_size,
                 img_size = (224, 224),
                 augment = False):
        """
        `x_set` is list of paths to the images
        `y_set` are the associated classes.

        """
        
        self.x = x_set
        self.y = y_set
        self.batch_size = batch_size
        self.img_size = img_size
    
    def __len__(self):
        """Denotes the number of batches per epoch"""
        return math.ceil(len(self.x) / self.batch_size)
    
    def __getitem__(self, idx):
        """Generate one batch of data"""
        
        first_id = idx * self.batch_size
        last_id =  (idx + 1) * (self.batch_size)
        
        batch_x = self.x[first_id:last_id]
        batch_y = self.y[first_id:last_id]
        
        #Xs = np.array([resize(imread(file_name), self.img_size)
        #      for file_name in batch_x])
        # 
        #ys = np.array(batch_y)
        
        output = np.array([
            resize(cv2.imread(file_name), self.img_size)
                   for file_name in batch_x]), np.array(batch_y)
        
        return output


In [None]:
TrainGenerator = HotelBatchSequence(X_train.full_filepath, 
                                    tf.keras.utils.to_categorical(X_train.chain),
                                    BATCH_SIZE)

ValidGenerator = HotelBatchSequence(X_val.full_filepath, 
                                   tf.keras.utils.to_categorical(X_val.chain),
                                   BATCH_SIZE)

In [None]:
efficientnet = EfficientNetB0(include_top=True, 
                              weights=None, 
                              input_shape = (IMG_HEIGHT, IMG_WIDTH, 3),
                              classes = n_classes
)

# efficientnet.summary()

In [None]:
model = efficientnet

model.compile(optimizer = 'adam',
              loss = 'categorical_crossentropy',
              metrics = 'accuracy')


In [None]:
# Source: https://gist.github.com/Callidior/747eb767862c9d48f9d900a6373b16d1
# Author: Callidior

# Also: https://gist.github.com/jeremyjordan/5a222e04bb78c242f5763ad40626c452

class SGDR(tf.keras.callbacks.Callback):
    """
    
    # Source: https://gist.github.com/Callidior/747eb767862c9d48f9d900a6373b16d1
    # Author: Callidior

    This callback implements the learning rate schedule for
    Stochastic Gradient Descent with warm Restarts (SGDR),
    as proposed by Loshchilov & Hutter (https://arxiv.org/abs/1608.03983).
    
    The learning rate at each epoch is computed as:
    lr(i) = min_lr + 0.5 * (max_lr - min_lr) * (1 + cos(pi * i/num_epochs))
    
    Here, num_epochs is the number of epochs in the current cycle, which starts
    with base_epochs initially and is multiplied by mul_epochs after each cycle.
    
    # Example
        ```python
            sgdr = CyclicLR(min_lr=0.0, max_lr=0.05,
                                base_epochs=10, mul_epochs=2)
            model.compile(optimizer=keras.optimizers.SGD(decay=1e-4, momentum=0.9),
                          loss=loss)
            model.fit(X_train, Y_train, callbacks=[sgdr])
        ```
    
    # Arguments
        min_lr: minimum learning rate reached at the end of each cycle.
        max_lr: maximum learning rate used at the beginning of each cycle.
        base_epochs: number of epochs in the first cycle.
        mul_epochs: factor with which the number of epochs is multiplied
                after each cycle.
    """

    def __init__(self, min_lr=0.0, max_lr=0.05, base_epochs=10, mul_epochs=2):
        super(SGDR, self).__init__()

        self.min_lr = min_lr
        self.max_lr = max_lr
        self.base_epochs = base_epochs
        self.mul_epochs = mul_epochs

        self.cycles = 0.
        self.cycle_iterations = 0.
        self.trn_iterations = 0.

        self._reset()

    def _reset(self, new_min_lr=None, new_max_lr=None,
               new_base_epochs=None, new_mul_epochs=None):
        """Resets cycle iterations."""
        
        if new_min_lr != None:
            self.min_lr = new_min_lr
        if new_max_lr != None:
            self.max_lr = new_max_lr
        if new_base_epochs != None:
            self.base_epochs = new_base_epochs
        if new_mul_epochs != None:
            self.mul_epochs = new_mul_epochs
        self.cycles = 0.
        self.cycle_iterations = 0.
        
    def sgdr(self):
        
        cycle_epochs = self.base_epochs * (self.mul_epochs ** self.cycles)
        return self.min_lr + 0.5 * (self.max_lr - self.min_lr) * (1 + np.cos(np.pi * (self.cycle_iterations + 1) / cycle_epochs))
        
    def on_train_begin(self, logs=None):
        
        if self.cycle_iterations == 0:
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())
            
    def on_epoch_end(self, epoch, logs=None):
        
        logs = logs or {}
        logs['lr'] = K.get_value(self.model.optimizer.lr)
        
        self.trn_iterations += 1
        self.cycle_iterations += 1
        if self.cycle_iterations >= self.base_epochs * (self.mul_epochs ** self.cycles):
            self.cycles += 1
            self.cycle_iterations = 0
            K.set_value(self.model.optimizer.lr, self.max_lr)
        else:
            K.set_value(self.model.optimizer.lr, self.sgdr())

In [None]:
# wandb_callback = wandb.keras.WandbCallback(log_weights=True)
model_checkpoint = tf.keras.callbacks.ModelCheckpoint("tfmodels/weights.{epoch:02d}-{val_loss:.2f}.hdf5")
cosine_annealing_lr = SGDR(min_lr=0.0, max_lr=0.05, base_epochs=10, mul_epochs=2)


In [None]:
history = model.fit(TrainGenerator,
                    steps_per_epoch = STEPS_PER_EPOCH,
                    validation_data = ValidGenerator,
                    workers = num_cores,
                    epochs = 2,
                    use_multiprocessing = False,
                    max_queue_size = 10,
                    callbacks=[
#                        wandb_callback, 
                         model_checkpoint,
#                         cosine_annealing_lr
                    ])