In [None]:
#Numpy
import numpy as np
from numpy import loadtxt
#Pandas
import pandas as pd
#Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import Sequence
from keras.models import Sequential
from keras.layers import Dense
from tensorflow.keras import layers
from keras.preprocessing.image import array_to_img 
from keras.preprocessing.image import ImageDataGenerator
#OS
import os
from os import listdir
from os.path import isfile, join
#albumentations
import albumentations as A
from albumentations import Compose, HorizontalFlip, RandomBrightnessContrast, ToFloat, RGBShift,VerticalFlip
from albumentations import RandomGamma,GaussNoise,GaussianBlur, RandomSizedCrop, Rotate
#Other
import cv2 
import matplotlib.pyplot as plt
from sklearn import preprocessing
from PIL import Image
from tqdm.notebook import tqdm

In [None]:
path = "../input/hotel-id-to-combat-human-trafficking-2022-fgvc9/"

train_dir = path + 'train_images/'
test_dir = path + 'test_images/'
validation_dir = path + 'train_masks'
IM_SIZE = 224
BATCH_SIZE = 32

In [None]:
#Finds all training images and links them to their hotel number
hotels = np.array([])
images = np.array([])
for hotel in tqdm(os.listdir(train_dir)):
    for file in os.listdir(train_dir + hotel):
        hotels = np.append(hotels, hotel)
        images = np.append(images, file)
        
#Puts all hotels into a dataframe 
d = {'image': images, 'hotel_id': hotels}
train = pd.DataFrame(data=d)     

#And then encdoes the labels
le = preprocessing.LabelEncoder()
le.fit(train['hotel_id'])
train['label'] = le.transform(train['hotel_id'])

#And adds a couple of other variables
train['chain_image'] = train['hotel_id'].astype(str) + '/' + train['image']
train['augmented'] = False
train[:5]

In [None]:
#Calculate the weights of each of the hotels to account for the difference in hotel occurances.
hotel_weight = np.array([a for a in train.groupby(['label']).size()])
hotel_weight = np.sum(hotel_weight)/(len(train['label'].unique()) * hotel_weight)
hotel_weight = dict(enumerate(hotel_weight))


In [None]:
#Augmentation of the images is done using the augmentor1 function
def augmentor1(imgArr):
    sizes = np.min(imgArr.shape[0:2])
    augmentation1 = A.Compose([
                        HorizontalFlip(p = 0.5), 
                        Rotate(), #might be bad for the network??
                        RandomBrightnessContrast(p = 1), 
                        RandomSizedCrop(min_max_height= [int(sizes*0.6),sizes], height= IM_SIZE, width = IM_SIZE, p = 0.5), GaussianBlur(p = 0.25),
                        GaussNoise(p=0.25, var_limit=10/255), 
                        A.CoarseDropout(p=0.5, min_holes=1, max_holes=6, 
                                        min_height=IM_SIZE//16, max_height=IM_SIZE//4,
                                        min_width=IM_SIZE//16,  max_width=IM_SIZE//4), # normal coarse dropout
                        A.CoarseDropout(p=0.75, max_holes=1, 
                                        min_height=IM_SIZE//4, max_height=IM_SIZE//2,
                                        min_width=IM_SIZE//4,  max_width=IM_SIZE//2, 
                                        fill_value=(255,0,0))
                        ])
    return augmentation1(image = imgArr/255)['image']*255

image1 = np.array(cv2.imread(train_dir+'10054/000039550.jpg'), dtype = 'float32')
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(15,15))
ax1.imshow(image1/255)
ax2.imshow(augmentor1(image1)/255)
ax3.imshow(augmentor1(image1)/255)

In [None]:
# Code used from https://stackoverflow.com/questions/51355524/use-multiple-directories-for-flow-from-directory-in-keras

class MergedGenerators(Sequence):

    def __init__(self, batch_size, generators=[], sub_batch_size=[]):
        self.generators = generators
        self.sub_batch_size = sub_batch_size
        self.batch_size = batch_size

    def __len__(self):
        return int(
            sum([(len(self.generators[idx]) * self.sub_batch_size[idx])
                 for idx in range(len(self.sub_batch_size))]) /
            self.batch_size)

    def __getitem__(self, index):
        """Getting items from the generators and packing them"""

        X_batch = []
        Y_batch = []
        for generator in self.generators:
            if generator.class_mode is None:
                x1 = generator[index % len(generator)]
                X_batch = [*X_batch, *x1]

            else:
                x1, y1 = generator[index % len(generator)]
                X_batch = [*X_batch, *x1]
                Y_batch = [*Y_batch, *y1]

        if self.generators[0].class_mode is None:
            return np.array(X_batch)
        return tf.keras.applications.resnet50.preprocess_input(np.array(X_batch)), np.array(Y_batch)

In [None]:
def two_image_generator(generator, 
                        directory, 
                        batch_size,
                        subset,
                        shuffle = False,
                        ): 
                        

    gen1 = generator.flow_from_directory(
        directory,
        subset = subset,
        target_size=(IM_SIZE, IM_SIZE),
        batch_size=batch_size,
        
        class_mode='categorical',
        shuffle = shuffle,
        seed = 7)

        

    while True:
        X1i = gen1.next()
        yield [X1i[0], X1i[1]], X1i[1]
        
        
train_image_generator = ImageDataGenerator(validation_split = 0.2, preprocessing_function = augmentor1)  
validation_image_generator = ImageDataGenerator(validation_split = 0.2, preprocessing_function = None)

train_generator = two_image_generator(train_image_generator, 
                                      train_dir,
                                      batch_size = BATCH_SIZE,  
                                      shuffle = True,
                                      subset = 'training')

validation_generator = two_image_generator(train_image_generator, 
                                      train_dir,
                                      batch_size = BATCH_SIZE,  
                                      shuffle = True, 
                                      subset = 'validation')

#Creates the two data generators and merges them afterwards
train_image_generator = ImageDataGenerator(validation_split = 0.2, preprocessing_function = augmentor1)
validation_image_generator = ImageDataGenerator(validation_split = 0.2, preprocessing_function = None)

train_images = data_generator1.flow_from_directory(train_dir,
        target_size = (IM_SIZE, IM_SIZE),
        subset="training",
        batch_size = BATCH_SIZE,
        shuffle= False,
        class_mode = 'categorical')

validation_images = data_generator2.flow_from_directory(train_dir,
        target_size = (IM_SIZE, IM_SIZE),
        subset="validation",
        batch_size = BATCH_SIZE,
        class_mode = 'categorical')

train_labels = data_generator1.flow_from_directory(train_dir,
        target_size = (IM_SIZE, IM_SIZE),
        subset="training",
        batch_size = BATCH_SIZE,
        shuffle= False,
        class_mode = 'categorical')


train_gen = MergedGenerators(
        BATCH_SIZE,
        generators=[ train_generator2],
        sub_batch_size=[ BATCH_SIZE])

val_gen = MergedGenerators(
        BATCH_SIZE,
        generators=[ validation_generator2],
        sub_batch_size=[ BATCH_SIZE])

In [None]:
#https://github.com/4uiiurz1/keras-arcface
from keras import regularizers
from keras import backend as K

class ArcFace(tf.keras.layers.Layer):
    def __init__(self, n_classes=10, s=30.0, m=0.50, regularizer=None, **kwargs):
        super(ArcFace, self).__init__(**kwargs)
        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.regularizer = regularizers.get(regularizer)

    def build(self, input_shape):
        super(ArcFace, self).build(input_shape[0])
        self.W = self.add_weight(name='W',
                                shape=(input_shape[0][-1], self.n_classes),
                                initializer='glorot_uniform',
                                trainable=True,
                                regularizer=self.regularizer)

    def call(self, inputs):
        x, y = inputs
        c = K.shape(x)[-1]
        # normalize feature
        x = tf.nn.l2_normalize(x, axis=1)
        # normalize weights
        W = tf.nn.l2_normalize(self.W, axis=0)
        # dot product
        logits = x @ W
        # add margin
        # clip logits to prevent zero division when backward
        theta = tf.acos(K.clip(logits, -1.0 + K.epsilon(), 1.0 - K.epsilon()))
        target_logits = tf.cos(theta + self.m)
        # sin = tf.sqrt(1 - logits**2)
        # cos_m = tf.cos(logits)
        # sin_m = tf.sin(logits)
        # target_logits = logits * cos_m - sin * sin_m
        #
        logits = logits * (1 - y) + target_logits * y
        # feature re-scale
        logits *= self.s
        out = tf.nn.softmax(logits)

        return out

    def compute_output_shape(self, input_shape):
        return (None, self.n_classes)
    
    def get_config(self):
        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'regularizer': self.regularizer,
        })
        return config


In [None]:
#The neural network and its training step
def make_model(name):
    inputs = tf.keras.layers.Input(shape=(IM_SIZE, IM_SIZE, 3))
    labels = tf.keras.layers.Input(shape=(3116,))
    #inputs = tf.keras.layers.Input((IM_SIZE, IM_SIZE, 3))
    inputs_aug = tf.keras.applications.resnet50.preprocess_input(inputs),
    if name == 'ResNet50':
        #pre_process = tf.keras.applications.resnet50.preprocess_input(inputs)
        #model.add(pre_process)
        
        #model.add(tf.keras.applications.resnet50.preprocess_input())
        
        resnet_model = keras.applications.ResNet50(include_top = False,
                                                weights='imagenet', 
                                                #classifier_activation="softmax", 
                                                pooling = 'max',
                                                #classes = 3116,
                                                input_shape = (IM_SIZE, IM_SIZE, 3),
                                                )
        
        for i in resnet_model.layers:
            i.trainable = False
        #resnet_model.summary()
        embedding = resnet_model
    
    
    if name == 'efficientnet_b0':
        input_changed = tf.cast(inputs, tf.float32)
        input_changed = tf.keras.applications.efficientnet.preprocess_input (input_changed)
        embedding = keras.applications.EfficientNetB0(include_top = False,
                                                weights='imagenet', 
                                                #classifier_activation="softmax", 
                                                pooling = 'avg',
                                                
                                                )
        embedding.trainable = False
        
    x = embedding(inputs_aug)
    x = tf.keras.layers.Flatten()(x)
    outputs = ArcFace(n_classes=3116)([x, labels])
    outputs = tf.keras.layers.Softmax()(outputs)
    model = tf.keras.Model([inputs, labels], outputs)
    #model.add(tf.keras.layers.Dense(3116, activation='Softmax'))
    
    #outputs = tf.keras.layers.Softmax()(outputs)
    #model = tf.keras.Model(inputs, outputs)
    callbacks = [
        keras.callbacks.ModelCheckpoint("save_at_{epoch}.h5"),
    ]
  
    
    model.compile(optimizer=tf.keras.optimizers.Adam(),
                  loss='categorical_crossentropy', 
                  metrics=['accuracy', tf.metrics.Precision(top_k=5, name = 'MAP@5')],
                  
                 )
    model.summary()
  
    return model, callbacks



model, callbacks = make_model('ResNet50')



In [None]:
a = model.fit(train_generator,  epochs=30,
          callbacks=tf.keras.callbacks.ModelCheckpoint('model.hdf5',
                     verbose=1, save_best_only=True),
          validation_data=validation_generator , 
          #class_weight = hotel_weight,)
           # use_multiprocessing = True)
          steps_per_epoch =100, 
          validation_steps = 10)

In [None]:
#np.argwhere(train_gen[0][1][0]==1)#

In [None]:
#plt.imshow(train_gen[0][0][10]/255)
#print(train_gen[0][1][10])205.63286