# Load Metadata from CSV

In [1]:
import numpy as np
import pandas as pd

In [2]:
def load_df(path):
    train_df = pd.read_csv(path)
    X = train_df['id']
    y = train_df['landmark_id']
    
    return train_df, X, y

path = '../data/metadata/train_sample_temp.csv'

train_df, X, y = load_df(path)

In [3]:
NUM_CLASSES = train_df['landmark_id'].unique().shape[0]
NUM_CLASSES

6

# Seperating into Training, Validating, Testing

In [4]:
# map selected landmark ids to classes
# {landmark_id: class} 
# {995:0, 12345:1, ....}
landmarks = train_df['landmark_id'].unique()
landmark_to_idx = {}
i = 0
for k in landmarks:
    landmark_to_idx[k] = i
    i += 1

In [5]:
from sklearn.model_selection import StratifiedShuffleSplit

# split train & (validation + test)
split_rule = StratifiedShuffleSplit(n_splits=1, test_size=0.22, random_state=9)

for train_id, test_id in split_rule.split(X, y):
    X_train, X_val_test = X.iloc[train_id], X.iloc[test_id]
    y_train, y_val_test = y.iloc[train_id], y.iloc[test_id]

# split validation & test
split_rule2 = StratifiedShuffleSplit(n_splits=1, test_size=0.5, random_state=9)
for train_id, test_id in split_rule2.split(X_val_test, y_val_test):
    X_valid, X_test = X_val_test.iloc[train_id], X_val_test.iloc[test_id]
    y_valid, y_test = y_val_test.iloc[train_id], y_val_test.iloc[test_id]

In [6]:
print('X_train shape: ', X_train.shape)
print('y_train shape: ', y_train.shape)
print('X_valid shape: ', X_valid.shape)
print('y_valid shape: ', y_valid.shape)
print('X_test shape: ', X_test.shape)
print('y_test shape: ', y_test.shape)

X_train shape:  (1275,)
y_train shape:  (1275,)
X_valid shape:  (180,)
y_valid shape:  (180,)
X_test shape:  (180,)
y_test shape:  (180,)


In [7]:
valid_id_list = list(zip(list(X_valid), list(y_valid)))
train_id_list = list(zip(list(X_train), list(y_train)))

# Data Generator
- Loading image data based on the seperating rules on the fly

In [8]:
import os
import random
import shutil
import tarfile
import cv2
import numpy as np
#from keras.utils import Sequence
from tensorflow.python.keras.utils.data_utils import Sequence
#import keras



class DataGen(Sequence):
    def __init__(self, id_list, landmark_to_idx, batch_size=128, verbose=1):
        self.batch_size=batch_size
        self.id_list = id_list
        self.landmark_to_idx = landmark_to_idx


    def __getitem__(self, index):
        batch_id_list = random.sample(self.id_list, self.batch_size)
        landmark_to_idx = self.landmark_to_idx
        #num_classes = self.num_classes
        
        output = []
        label_idx = []
        for ix, ids in enumerate(batch_id_list):
            img_id = ids[0]
            ldmk_id = ids[1]
            path = '../train/'+str(ldmk_id)+'/'+img_id+'.jpg'
            try: 
                im = cv2.imread(path)
                im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)
                if im.size != 0:
                    output.append(im)
                    ldmk_idx = landmark_to_idx[ldmk_id]
                    label_idx.append(ldmk_idx)
            except:
                continue
        
        x = np.array(output)
        y = np.zeros((len(output), NUM_CLASSES))
        for i in range(len(label_idx)):
            y[i,label_idx[i]] = 1.
        
        return x,y
            
    def on_epoch_end(self):
        return

    def __len__(self):
        return int(np.floor(len(self.id_list) / self.batch_size))

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [9]:
validation_generator = DataGen(valid_id_list, landmark_to_idx)
training_generator = DataGen(train_id_list, landmark_to_idx)

# Modeling

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, AveragePooling2D, Flatten, Dense

In [11]:
model = Sequential()

model.add(Conv2D(filters=6, kernel_size=(3, 3), activation='relu', input_shape=(128,128,3)))
model.add(AveragePooling2D())

model.add(Conv2D(filters=16, kernel_size=(3, 3), activation='relu'))
model.add(AveragePooling2D())

model.add(Flatten())

model.add(Dense(units=120, activation='relu'))

model.add(Dense(units=84, activation='relu'))

model.add(Dense(units=NUM_CLASSES, activation = 'softmax'))

In [12]:
model.compile(optimizer='adam', 
              loss='categorical_crossentropy', 
              metrics=['categorical_accuracy'])

In [13]:
model.fit_generator(generator=training_generator,
                    validation_data=validation_generator,
                    use_multiprocessing=True,
                    epochs=10,
                    workers=8,
                    verbose=1)

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f60cc271d10>