## Starter notebook for data loading, preprocessing / augmentation and applying a simple CNN
### Acknowledgments: 
####    **https://www.kaggle.com/vstepanenko/batch-image-viewer**
####    This excellent notebook for sampling and viewing batches of images

In [None]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator 
import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np 
import random 
import cv2
import os 

In [None]:
train_dir = '/kaggle/input/landmark-recognition-2021/train'
train_df = pd.read_csv('/kaggle/input/landmark-recognition-2021/train.csv')

# Basic data exploaration 
print(f'There are {len(train_df)} training images and {len(train_df["landmark_id"].unique())} classes.\n')
# Count per class 
print('The top 10 classes are as follows:\n')
print(train_df['landmark_id'].value_counts()[:10])

# Add extra column with relative path (up to the train directory)
train_df['id_path'] = train_df['id'].map(lambda x: '/'.join(list(x[:3])) + f'/{x}.jpg')
train_df["landmark_id"] = train_df["landmark_id"].astype(str).apply(lambda x:x.split(","))

In [None]:
# Plot n random images  
def plot_imgs(n):
    for i in range(n):
        ax = plt.subplot(1,n,i+1)
        rand_num = random.randint(1,len(train_df))
        img = list(train_df['id_path'])[rand_num]
        landmark_id = list(train_df['landmark_id'])[rand_num]
        path = os.path.join(train_dir, img)
        img = cv2.imread(path)
        img = cv2.resize(img,(224,224))
        plt.imshow(img)
        plt.title(landmark_id[0])
        plt.show()
    
plot_imgs(3)

In [None]:
# Data preprocessing and augmentation 
image_generator = ImageDataGenerator(rescale=1./255,
                                    zoom_range=0.2,
                                    width_shift_range=0.4,
                                    height_shift_range=0.4,
                                    horizontal_flip=True,
                                    vertical_flip=True,
                                    rotation_range=60,
                                    brightness_range=[0.8,1.1])

# Load data from dataframe 
train_batches = image_generator.\
                flow_from_dataframe(
                                    directory = train_dir,
                                    dataframe = train_df.sample(n=20000),
                                    class_mode = 'categorical',
                                    x_col='id_path',
                                    y_col='landmark_id',
                                    batch_size=32,
                                    shuffle=True,
                                    target_size=(224,224))


# Get the number of classes
classes = len(train_batches.class_indices) 
 


In [None]:
model = tf.keras.models.Sequential([
  tf.keras.layers.Conv2D(filters = 64, 
                         kernel_size = (4,4),
                         strides=(2,2),
                         padding = 'same',
                         input_shape = (224,224,3)),
  tf.keras.layers.Activation(activation = 'relu'), 
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2), 

    tf.keras.layers.Conv2D(filters = 128, 
                         kernel_size = (4,4),
                         strides=(2,2),
                         padding = 'same'),
  tf.keras.layers.Activation(activation = 'relu'), 
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2),
  tf.keras.layers.Dropout(0.3),

  
  tf.keras.layers.Conv2D(filters = 64, 
                         kernel_size = (4,4),
                         strides=(2,2),
                         padding = 'same'),
  tf.keras.layers.Activation(activation = 'relu'), 
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2),
  tf.keras.layers.Dropout(0.3), 

  
  tf.keras.layers.Conv2D(filters = 32, 
                         kernel_size = (4,4),
                         strides=(2,2),
                         padding = 'same'),
  tf.keras.layers.Activation(activation = 'relu'), 
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.MaxPool2D(pool_size = (2,2), strides = 2),

  tf.keras.layers.Flatten(), 
  tf.keras.layers.Dense(4096, activation='relu'),
  tf.keras.layers.Dropout(0.3),
  tf.keras.layers.Dense(512, activation='relu'),
  tf.keras.layers.Dropout(0.3), 
  tf.keras.layers.Dense(64, activation='relu'),
  tf.keras.layers.BatchNormalization(),
  tf.keras.layers.Dropout(0.3), 
  tf.keras.layers.Dense(classes, activation = 'softmax')
])




model.compile(loss = tf.keras.losses.CategoricalCrossentropy(),
                     optimizer = tf.keras.optimizers.Adam(lr = 0.001),
                     metrics = ['accuracy'])

model.summary()
print(tf.keras.utils.plot_model(model))
model_history = model.fit(train_batches,
                          epochs = 10)

In [None]:
def plot_loss_curves(history):
    '''
    Returns loss curves for training and validation metrics (if available)
    '''
    if "val_loss" in history.history:
        loss = history.history["loss"]
        val_loss = history.history["val_loss"]
        accuracy = history.history["accuracy"]
        val_accuracy = history.history["val_accuracy"]

        epochs = range(len(history.history["loss"])) #number of epochs 

        # Plot losses 
        plt.figsize=(10,7)
        plt.plot(epochs, loss, label = 'training_loss')
        plt.plot(epochs, val_loss, label = 'val_loss')
        plt.title('loss')
        plt.xlabel('epochs')
        plt.legend()

        # Plot accuracy 
        plt.figure()
        plt.plot(epochs, accuracy, label = 'training_accuracy')
        plt.plot(epochs, val_accuracy, label = 'val_accuracy')
        plt.title('accuracy')
        plt.xlabel('epochs')
        plt.legend()
    
    else:
        # Plot training loss and accuracy together 
        loss = history.history["loss"]
        accuracy = history.history["accuracy"]

        epochs = range(len(history.history["loss"])) #number of epochs 

        fig, ax1 = plt.subplots(figsize=(11, 9))
        ax1.plot(epochs, accuracy, label = 'training_accuracy')
        plt.xlabel('epochs')
        ax1.set_ylabel('Training Accuracy')
        
        ax2 = ax1.twinx()
        ax2.plot(epochs, loss, label = 'training_loss', color = 'tab:red')
        ax2.set_ylabel('Training Loss')
        
plot_loss_curves(model_history)

## TO DO:
* Split to train and validation batches 
* Apply transfer larning 
* Experiment with different class selections and class-weights 
* Submit results

### The updated version will be following soon  