# Hotel Recognition to Combat Human Trafficking
![Hotel Traffiking](https://polarisproject.org/wp-content/uploads/2019/01/800x640-marriott-blog.jpg)
## Description

Victims of human trafficking are often photographed in hotel rooms as in the below examples. Identifying these hotels is vital to these trafficking investigations but poses particular challenges due to low quality of images and uncommon camera angles.Even without victims in the images, hotel identification in general is a challenging fine-grained visual recognition task with a huge number of classes and potentially high intraclass and low interclass variation. In order to support research into this challenging task and create image search tools for human trafficking investigators, we created the TraffickCam mobile application, which allows every day travelers to submit photos of their hotel room. Read more about [TraffickCam on TechCrunch](https://techcrunch.com/2016/06/25/traffickcam/).

## Task
In this contest, competitors are tasked with identifying the hotel seen in test images from the TraffickCam dataset, which are based  on a large gallery of training images with known hotel IDs.
Our team currently supports an image search system used at the National Center for Missing and Exploited Children in human        trafficking investigations. Novel and interesting approaches have the potential to be incorporated in this search system.

In [None]:
!/opt/conda/bin/python3.7 -m pip install --upgrade pip
! pip install -q efficientnet

In [None]:
#importing necessary libraries
import tensorflow as tf
from kaggle_datasets import KaggleDatasets
import efficientnet.tfkeras as efn
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import PIL

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path()
TRAIN_PATH = GCS_DS_PATH + "/train_images/"

train_df = pd.read_csv("../input/hotel-id-2021-fgvc8/train.csv")

print("Number of unique chains: ",train_df.chain.nunique())
print("Number of unique hotel_ids: ",train_df.hotel_id.nunique())

label2id = dict(zip(range(train_df.hotel_id.nunique()),train_df.hotel_id.unique()))
id2label = dict(zip(train_df.hotel_id.unique(),range(train_df.hotel_id.nunique())))

train_df["hotel_id"] = train_df["hotel_id"].map(id2label)
train_df["path"] = train_df['chain'].astype(str) + '/' + train_df['image']
train_df.sort_values("path",inplace=True)
train_df = train_df.drop_duplicates(subset=['image'])

train_df.head()

# Data Visualization

In [None]:
chain_dict = train_df.chain.value_counts()
chains = list(chain_dict.index)
chains_count = list(chain_dict.values)

id_dict = train_df.hotel_id.value_counts()
ids = list(id_dict.index)
ids_count = list(id_dict.values)

plt.figure(figsize = (20,10))
plt.subplot(1,2,1)
plt.bar(chains,chains_count)
plt.title("Number of instances per chain",fontweight="bold",fontsize=20)
plt.xlabel("Chains",fontsize = 30)
plt.xticks(rotation=20,fontweight = "bold")
plt.xticks(fontweight = "bold")
plt.ylabel("Count",fontsize=30)

plt.subplot(1,2,2)
plt.bar(ids,ids_count)
plt.title("Number of instances per each ID",fontweight="bold",fontsize=20)
plt.xlabel("Unique Id's",fontsize = 30)
plt.xticks(rotation=20,fontweight = "bold")
plt.xticks(fontweight = "bold")
plt.ylabel("Count",fontsize=30)
plt.show()

In [None]:
temp_df = train_df.groupby( ["chain","hotel_id"])
for key,val in temp_df.groups.items():
    temp_df.groups[key] = len(val)
count_ls = list(temp_df.groups.items())

count_ls = [[a,b,c] for ((a,b),c) in count_ls]
print("The chain with label '{}' has count of '{}' that belongs to hotel id '{}'".format(count_ls[0][0],
                                                                                    count_ls[0][2],
                                                                                    label2id[count_ls[0][1]]))
ls1 = [i[0] for i in count_ls]
ls2 = [i[1] for i in count_ls]
ls3 = [i[2] for i in count_ls]

fig = plt.figure(figsize = (10, 10))
ax = plt.axes(projection ="3d")
ax.scatter3D(ls1,ls2,ls3, color = "green")
ax.set_xlabel('Chain',fontweight="bold",fontsize = 20)
ax.set_ylabel('Unique Id',fontweight="bold",fontsize = 20)
ax.set_zlabel('Count',fontweight="bold",fontsize = 20)
ax.set_title("Varition of count with Chain and unique Id",fontweight="bold",fontsize = 25)
#ax.view_init(40,0)
plt.show()

### We can see that many images are in first and last chains (0-10,60-88) almost equally spread amon unique Id's

# Data Pipeline

In [None]:
NUM_CLASSES =  train_df.hotel_id.nunique()
HEIGHT,WIDTH = 256,256
CHANNELS = 3
BATCH_SIZE = 8 * strategy.num_replicas_in_sync
SEED = 143
SPLIT = int(0.8*len(train_df))
AUTO = tf.data.experimental.AUTOTUNE
STEPS_PER_EPOCH  = SPLIT//BATCH_SIZE
VALID_STEPS = (len(train_df)-SPLIT)//BATCH_SIZE

In [None]:
def process_img(filepath,label):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=CHANNELS)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label


def data_augment(image, label):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
    p_crop = tf.random.uniform([], 0, 1.0, dtype=tf.float32)
            
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k=3) 
    elif p_rotate > .5:
        image = tf.image.rot90(image, k=2) 
    elif p_rotate > .25:
        image = tf.image.rot90(image, k=1) 
        
    
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower=.7, upper=1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower=.8, upper=1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta=.1)
        
    
    if p_crop > .7:
        if p_crop > .9:
            image = tf.image.central_crop(image, central_fraction=.7)
        elif p_crop > .8:
            image = tf.image.central_crop(image, central_fraction=.8)
        else:
            image = tf.image.central_crop(image, central_fraction=.9)
    elif p_crop > .4:
        crop_size = tf.random.uniform([], int(HEIGHT*.8), HEIGHT, dtype=tf.int32)
        image = tf.image.random_crop(image, size=[crop_size, crop_size, CHANNELS])
    
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label

In [None]:
files_ls = tf.io.gfile.glob(TRAIN_PATH + '*/*.jpg')
labels = np.array(train_df.hotel_id.values).astype("uint8")
    
dataset = tf.data.Dataset.from_tensor_slices((files_ls,labels))
dataset = dataset.map(process_img,num_parallel_calls=AUTO)
dataset = dataset.map(data_augment,num_parallel_calls=AUTO)

train_ds = dataset.take(SPLIT)
val_ds = dataset.skip(SPLIT)

train_ds = train_ds.cache().repeat().shuffle(2048).batch(BATCH_SIZE).prefetch(AUTO)
val_ds = val_ds.cache().repeat().batch(BATCH_SIZE).prefetch(AUTO)
print("Data Pipeline")

# Model Function

In [None]:
def create_model():
    
    pretrained = efn.EfficientNetB4(include_top=False, weights='noisy-student',input_shape=[HEIGHT,WIDTH, 3])
            
    x = pretrained.output
    x = tf.keras.layers.GlobalAveragePooling2D() (x)
    outputs = tf.keras.layers.Dense(NUM_CLASSES,activation="softmax", dtype='float32')(x)
        
    model = tf.keras.Model(pretrained.input, outputs)
    return model

model = create_model()
#model.summary()

# Compiling Function

In [None]:
import tensorflow_addons as tfa

def compile_model(model, lr=0.0001):
    
    optimizer = tf.keras.optimizers.Adam(lr=lr)
    
    loss = tf.keras.losses.SparseCategoricalCrossentropy()
    #loss = tfa.losses.TripletSemiHardLoss()   
    metrics = [
       tf.keras.metrics.SparseCategoricalAccuracy(name='categorical_accuracy')
    ]

    model.compile(optimizer=optimizer, loss=loss, metrics=metrics)

    return model

# Callbacks Function

In [None]:
def create_callbacks():
    
    cpk_path = './best_model.h5'
    
    checkpoint = tf.keras.callbacks.ModelCheckpoint(
        filepath=cpk_path,
        monitor='val_categorical_accuracy',
        mode='max',
        save_best_only=True,
        verbose=1,
    )

    reducelr = tf.keras.callbacks.ReduceLROnPlateau(
        monitor='val_categorical_accuracy',
        mode='max',
        factor=0.1,
        patience=3,
        verbose=0
    )

    earlystop = tf.keras.callbacks.EarlyStopping(
        monitor='val_categorical_accuracy',
        mode='max',
        patience=10, 
        verbose=1
    )
    
    callbacks = [checkpoint, reducelr, earlystop]         
    
    return callbacks

# Training

In [None]:
EPOCHS= 1
VERBOSE =1

tf.keras.backend.clear_session()

with strategy.scope():
    
    model = create_model()
    model = compile_model(model, lr=0.0001)
   
    callbacks = create_callbacks()
    
    history = model.fit(train_ds, 
                        epochs=EPOCHS,
                        callbacks=callbacks,
                        steps_per_epoch = STEPS_PER_EPOCH,
                        validation_data = val_ds,
                        validation_steps = VALID_STEPS,
                        verbose=VERBOSE
                       )

# History Plotting

In [None]:
acc = history.history['categorical_accuracy']
val_acc = history.history['val_categorical_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs_range = range(len(history.history['val_loss']))
plt.figure(figsize=(8, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Categorical Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Categorical Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Categorical Accuracy')
plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()