In [None]:
seed = 61299
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import matplotlib.pyplot as plt
import os

In [None]:
#Added by Chirag
from IPython.display import Image, display

BASE_PATH = '../input/landmark-recognition-2020'
TRAIN_DIR = f'{BASE_PATH}/train'
TEST_DIR = f'{BASE_PATH}/test'

print('Reading data...')
train = pd.read_csv(f'{BASE_PATH}/train.csv')
submission = pd.read_csv(f'{BASE_PATH}/sample_submission.csv')
print('Reading data completed')

train.head()

In [None]:
#Get all three IDs (113209,177870,194914)
landmark = train[train["landmark_id"].isin([113209,177870,194914])]
#Add a human-readable label to each

#This should be so simple, but is UGLY and inelegant in Python
labels = pd.DataFrame({"landmark_id": [113209,177870,194914],
         "label":["mountain", "city", "boat"]})

#Tack on a shuffle after merging
landmark = pd.merge(landmark, labels, on="landmark_id", how="left").sample(frac=1)
landmark.head()

#Set full path to the image
def get_image_path(image_id):
    return os.path.join(TRAIN_DIR, f'{image_id[0]}/{image_id[1]}/{image_id[2]}/{image_id}.jpg')
landmark["file_path"] = landmark["id"].apply(get_image_path)

print(landmark["label"].value_counts())
landmark.head()


In [None]:
#Visualize images
import PIL
from PIL import Image, ImageDraw

def display_images(images,title=None): 
    f, ax = plt.subplots(5,5, figsize=(18,22))
    if title:
        f.suptitle(title, fontsize = 30)

    for i, image_id in enumerate(images):
        image_path = get_image_path(image_id)
        image = Image.open(image_path)
        
        #This is clever floor division
        ax[i//5, i%5].imshow(image) 
        ax[i//5, i%5].axis('off')
        
        #Add titles
        landmark_id = landmark[landmark.id==image_id.split('.')[0]].landmark_id.values[0]
        this_label = labels[labels["landmark_id"] == landmark_id]["label"].values[0]
        ax[i//5, i%5].set_title(f"ID: {image_id.split('.')[0]}\nLandmark_id: {landmark_id}\nLabel: {this_label}", fontsize="12")

    plt.show() 

In [None]:
# pick random 25 images from the dataset and print
samples = landmark.sample(25).id.values
display_images(samples)

In [None]:
landmark.head()

In [None]:
#Now ready for keras flow from directory

from tensorflow.keras.preprocessing.image import ImageDataGenerator

imagegen = ImageDataGenerator(rescale=1/255.,
                             validation_split = 0.25) 

#Set up a generator
train_generator = imagegen.flow_from_dataframe(dataframe = landmark,
                                               directory = None,
                                               x_col = "file_path",
                                               y_col = "label",
                                               target_size = (256,256),
                                               class_mode = "sparse",
                                               batch_size = 128,
                                               shuffle = True,
                                               seed = 42,
                                               subset = "training")
#Validation data
val_generator = imagegen.flow_from_dataframe(dataframe = landmark,
                                               directory = None,
                                               x_col = "file_path",
                                               y_col = "label",
                                               target_size = (256,256),
                                               class_mode = "sparse",
                                               batch_size = 128,
                                               shuffle = True,
                                               seed = 42,
                                               subset = "validation")

In [None]:
#Check the output from one batch
for batch_data, batch_labels in train_generator:
    print("data batch shape: " + str(batch_data.shape))
    print("labels batch shape: " +str(batch_labels.shape))
    print("max value: " + str(np.max(batch_data[1,:,:,0])))
    print("min value: " + str(np.min(batch_data[1,:,:,0])))
    break


In [None]:
#Develop a simple linear classifier (this is a multinomial logistic model)
from tensorflow import keras
from tensorflow.keras import layers

#Functional API model
inputs = keras.Input(shape = (256,256,3))
x = layers.Flatten()(inputs)
outputs = layers.Dense(3, activation = "softmax")(x)

linear_model = keras.Model(inputs = inputs, outputs = outputs, name = "linear_model")
linear_model.summary()


In [None]:
keras.utils.plot_model(linear_model,show_shapes=True)

In [None]:
import matplotlib.pyplot as plt

def plot_history(history, log = False):
    
    epc = np.arange(len(history.history["loss"]))+1
    
    fig, ax = plt.subplots(1,2, figsize = (16,8))
    
    #Loss
    ax[0].plot(epc, history.history["loss"], color = "blue")
    ax[0].plot(epc, history.history["val_loss"], color = "red")
    ax[0].set_title("Loss")
    
    if log:
        ax[0].set_yscale('log')
    
    ax[1].plot(epc, history.history["accuracy"], color = "black")
    ax[1].plot(epc, history.history["val_accuracy"], color = "green")
    ax[1].set_title("Accuracy")
    
    if log:
        ax[1].set_yscale('log')
    

In [None]:
#Compile and train
linear_model.compile(loss = "sparse_categorical_crossentropy",
                    optimizer = keras.optimizers.Adam(learning_rate = 1e-4),
                    metrics = ["accuracy"])


history = linear_model.fit(x = train_generator,
                           validation_data = val_generator,
                           epochs = 30,
                           verbose = 1,
                           use_multiprocessing = True)


plot_history(history)

In [None]:
#Fit a neural network with two hidden layers (a multilayer perceptron or MLP)
#Functional API model
drop_prob = 0.3 #dropout probability

inputs = keras.Input(shape = (256,256,3))
x = layers.Flatten()(inputs)
x = layers.Dense(512)(x)
x = layers.Dropout(drop_prob)(x)
x = layers.Dense(512)(x)
x = layers.Dropout(drop_prob)(x)
outputs = layers.Dense(3, activation = "softmax")(x)

mlp_model = keras.Model(inputs = inputs, outputs = outputs, name = "mlp_model")
mlp_model.summary()

In [None]:
keras.utils.plot_model(mlp_model,show_shapes=True)

In [None]:
mlp_model.compile(loss = "sparse_categorical_crossentropy",
                    optimizer = keras.optimizers.Adam(learning_rate = 3e-4),
                    metrics = ["accuracy"])

"""
history = mlp_model.fit(x = train_generator,
                           validation_data = val_generator,
                           epochs = 50,
                           verbose = 1,
                           use_multiprocessing = True)

plot_history(history)
"""