### Overall findings about data:
#### Total 27 logos in data
#### 809 Training images and 270 testing images out of which only 135 are labeled
#### training set has 30 images per class but one image is missing
#### test set has 5 images per class

# Localizer model

In [1]:
import os
import cv2
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt

%matplotlib inline

#### subset column is dropped since it is of no use for the predictions
#### also dropping the duplicate rows

In [None]:
data = pd.read_table('flickr_logos_27_dataset_training_set_annotation.txt',sep=' ',names=['filename','logo' ,'subset', 'x1','y1','x2','y2'],index_col=False)
data = data.drop('subset',axis=1)
data = data.drop_duplicates(subset=['filename','logo','x1','y1','x2','y2'], keep='first')
data.reset_index(drop=True, inplace=True)

### data is splitted into 6 arrays...X_train, y_train, X_valid, y_valid, X_test, y_test

In [3]:
size=(224,224)
def process_image(img_file):
    img=cv2.imread(img_file) 
    img = cv2.cvtColor( img, cv2.COLOR_BGR2RGB ) 
    img=cv2.resize(img,size) # resizing all into a common size
    img = img - img.mean() #centering
    img = img/255 #normalisation
    return img


In [None]:
X = []
for i in range(data.shape[0]):
    X.append( process_image( 'flickr_logos_27_dataset_images/'+ data['filename'][i] ))
X = np.array(X)
y = data[['x1','y1','x2','y2']]

#### Approaching the problem as the standard way, not the way to just solve problem.
#### Hence, using a seperate validation set also

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_valid, y_train, y_valid = train_test_split(X ,y )

### As we have less data as needed to train a deep neural network, so we perform data augmentation
### but data augmentation will not help here much as data is consisting of coordinates, so we can't move objects hence only normalisation and whitening can be applied...hence applying data augmentation to this much only

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# data generator
def create_datagen():
    datagen = ImageDataGenerator(
    featurewise_center=True, # Set input mean to 0 over the dataset, feature-wise
    featurewise_std_normalization=True, # Divide inputs by std of the dataset, feature-wise
    zca_whitening=True, # Apply ZCA whitening
    rotation_range=0, # Degree range for random rotations
    width_shift_range=0, # Range for random horizontal shifts
    height_shift_range=0, # Range for random vertical shifts
    shear_range=0, # hear Intensity (Shear angle in counter-clockwise direction as radians)
    zoom_range=0, # Range for random zoom. If a float
    channel_shift_range=0, # Range for random channel shifts
    fill_mode='nearest', # Points outside the boundaries of the input are filled according to the given mode
    horizontal_flip=False, # Randomly flip inputs horizontally
    vertical_flip=False, # Randomly flip inputs vertically
  )
    return datagen
# instantiate a data generator
datagen = create_datagen()

In [None]:
from keras.models import Sequential
from keras.layers.convolutional import Convolution2D, MaxPooling2D
from keras.optimizers import SGD, Adam
from keras.utils import np_utils
from keras.models import model_from_json
from sklearn.metrics import log_loss
from keras.layers import Dense, Dropout, Flatten, Activation, BatchNormalization, regularizers

#### creating a regression model

### Although for object localisation, we can use state-of-the-art pretrained models for the task like YOLO, Faster R-CNN, or the google one SSD (single shot detector)
##### facebook has also recently launched such a model)
### But I can't find some official implementation of these models in keras documentation. So instead of going for the models what people have created and open-source it, I have choosen to create my own model.

In [None]:
def createmodel():
    model = Sequential()
    model.add(Convolution2D(16, (3, 3) , border_mode='same', init='he_normal', 
                            input_shape=(3,224,224))  )
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    

    model.add(Convolution2D(64, ( 3, 3), border_mode='same', init='he_normal',activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    

    model.add(Convolution2D(64, (3, 3), border_mode='same', init='he_normal'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    
    model.add(Convolution2D(64, (3, 3), border_mode='same', init='he_normal'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    
    model.add(Convolution2D(64, (3, 3), border_mode='same', init='he_normal'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(BatchNormalization())
    
    model.add(Flatten())
    model.add(Dense(256))
    model.add(Dropout(0.5))
    model.add(Dense(128))
    model.add(Dropout(0.5))
    model.add(Dense(4))
    
    model.add(Activation('linear')) #as opposed to classification, regression task will not have softmax as activation for last layer

    model.compile(Adam(lr=1e-4), loss='mean_squared_error',metrics=['mean_squared_error'])
    print(model.summary())
    return model

In [None]:
logo_localizer = createmodel()
hist = logo_localizer.fit_generator( datagen.flow( X_train, y_train), samples_per_epoch=len(X_train) * 2, epochs=50,
                            validation_data=(X_valid,y_valid)) #augmenting data by a factor of 2


### plotting loss curve

In [None]:
plt.figure(figsize=(14,3))
plt.title('Optimizer : Adam', fontsize=10)
plt.ylabel('Loss', fontsize=16)
plt.plot(hist.history['loss'], 'b', label='Training Loss')
plt.plot(hist.history['val_loss'], 'r', label='Validation Loss')
plt.legend(loc='upper right')

### saving model

In [None]:
from keras.models import model_from_json
model_json=logo_localizer.to_json()
with open("logo_localizer.json",'w')as json_file:
    json_file.write(model_json)
    
logo_localizer.save_weights("logo_localizer.h5")

### predictions

In [None]:
test_data = pd.read_table('flickr_logos_27_dataset_query_set_annotation.txt',sep='\t',names=['filename','logo'], na_values='none',index_col=False)
test_data.dropna(inplace=True)
test_data.reset_index(drop=True, inplace=True)


In [None]:
X_test = []
for i in range(test_data.shape[0]):
    X_test.append(process_image( 'flickr_logos_27_dataset_images/'+test_data['filename'][i] ))
X_test = np.array(X_test)
y_pred = logo_localizer.predict(X_test)

#### Hence, y_pred will contain all the predicted bounding box coordinates as vectors

##### setting coordinates in image range, i.e., 0 <= coordinates <= 224 ...............simply threshloding worked for me

In [None]:
#setting coordinates in image range, i.e., 0 <= coordinates <= 224
#simply threshloding worked for me
for i in range(y_pred.shape[0]):
    for j in range(y_pred.shape[1]):
        if(y_pred[i][j]<0):
            y_pred[i][j] = 0
        if(y_pred[i][j]>224):
            y_pred[i][j]= 224

## hence y_pred has predicted coordinates for all testing classes

#### Now, crop the images of training set based on the coordinates of logo...hence improving training set for classifier model
#### for testing , we first find coordinates of logo using logo_localizer model and then crop the testing images for prediction on classifier model.

# Classifier model (using localizer model)

In [6]:
crop_data = pd.read_table('flickr_logos_27_dataset_training_set_annotation.txt',sep=' ',names=['filename','logo' ,'subset', 'x1','y1','x2','y2'],index_col=False)
crop_data = crop_data.drop('subset',axis=1)
crop_data = crop_data.drop_duplicates(subset=['filename','logo','x1','y1','x2','y2'], keep='first')
crop_data.reset_index(drop=True, inplace=True)

### crop_data is splitted into 6 arrays...cx_train, cy_train, cx_valid, cy_valid, cx_test, cy_test

In [None]:
size=(64,64)
def load_crop(img_file,x1,y1,x2,y2):
    img=cv2.imread(img_file)
    img = cv2.cvtColor( img, cv2.COLOR_BGR2RGB )
    if(not (x1==x2 and y1==y2)):
        img = img[y1:y2, x1:x2]
    img=cv2.resize(img,size) # resizing all into a common size, as there are many different image sizes
    img = img - img.mean()
    img = img/255
    return img #returning reduced image

In [None]:
cx = []
for i in range(crop_data.shape[0]):
    cx.append(load_crop( 'flickr_logos_27_dataset_images/'+crop_data['filename'][i] , crop_data.x1[i],crop_data.y1[i],crop_data.x2[i],crop_data.y2[i]))  

## example of some corrupt values of dataset, i.e. , coordinates are invalid for a bouding box

In [8]:
crop_data.values[893]

array(['2662264721.jpg', 'RedBull', 3, 197, 3, 197], dtype=object)

In [None]:
crop_imgs = np.array(crop_imgs)
crop_labels = data['logo']
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
crop_labels  = le.fit_transform(crop_labels) # as keras one hot endoing works only for integers

from keras.utils import np_utils
crop_labels = np_utils.to_categorical(crop_labels,27)


In [None]:
from sklearn.model_selection import train_test_split
cx_train, cx_valid, cy_train, cy_valid = train_test_split(crop_imgs, crop_labels , stratify = crop_labels)

## here also data augmentation is limited as only logos are present in the image, so we can't shift but can rotate or flip( including previous things)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

# data generator
def crop_datagen():
    datagen = ImageDataGenerator(
    featurewise_center=True, # Set input mean to 0 over the dataset, feature-wise
    featurewise_std_normalization=True, # Divide inputs by std of the dataset, feature-wise
    zca_whitening=True, # Apply ZCA whitening
    rotation_range=30, # Degree range for random rotations
    width_shift_range=0, # Range for random horizontal shifts
    height_shift_range=0, # Range for random vertical shifts
    shear_range=0, # hear Intensity (Shear angle in counter-clockwise direction as radians)
    zoom_range=0, # Range for random zoom. If a float
    channel_shift_range=0.2, # Range for random channel shifts
    fill_mode='nearest', # Points outside the boundaries of the input are filled according to the given mode
    horizontal_flip=True, # Randomly flip inputs horizontally
    vertical_flip=True, # Randomly flip inputs vertically
  )
    return datagen
# instantiate a data generator
c_datagen = crop_datagen()

In [None]:
from keras.applications.vgg16 import VGG16
from keras.models import Model
from keras.applications.resnet50 import ResNet50
from keras.layers import GlobalAveragePooling2D, Dense, Dropout
from keras import optimizers

dropout_rate = 0.5

## as we have less number of images..hence transfer learning will be favourable. Hence picking pre-trained RESNET-50 model and fine-tuning it

In [None]:
base_classifier = ResNet50(weights='imagenet', include_top=False)

x = base_classifier.output
x = GlobalAveragePooling2D()(x)

x = Dense(1024, activation='relu')(x) 
x = Dropout(dropout_rate)(x)
x = Dense(512, activation='relu')(x)
x = Dropout(dropout_rate)(x)

# a softmax layer for 27 classes
predictions = Dense(27, activation='softmax')(x)

classifier = classifier(input=base_classifier.input, output=predictions)

# freezing layers except top ones
for layer in base_classifier.layers:
    layer.trainable = False
adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
classifier.compile(optimizer= adam, loss='categorical_crossentropy')


# train only the top layers for, say, 30 epochs
classifier.fit_generator( c_datagen.flow(cx_train, cy_train), samples_per_epoch=len(X_train) * 5, epochs=30, verbose=2, 
                    validation_data = (cx_valid,cy_valid))


In [None]:
# make all layers trainable
for layer in classifier.layers:
    layer.trainable=True

adam = optimizers.Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0)
classifier.compile(optimizer= adam, loss='categorical_crossentropy')

# train the whole net
hist2 = classifier.fit_generator( c_datagen.flow(cx_train, cy_train), samples_per_epoch=len(X_train) * 5, epochs=100, verbose=2, 
                    validation_data = (cx_valid,cy_valid))



### plotting loss curve for classifier model

In [None]:
plt.figure(figsize=(14,3))
plt.title('Optimizer : Adam', fontsize=10)
plt.ylabel('Loss', fontsize=16)
plt.plot(hist2.history['loss'], 'b', label='Training Loss')
plt.plot(hist2.history['val_loss'], 'r', label='Validation Loss')
plt.legend(loc='upper right')

### saving classifer model

In [None]:
from keras.models import model_from_json
model_json=classifier.to_json()
with open("logo_classifier.json",'w')as json_file:
    json_file.write(model_json)
    
classifier.save_weights("logo_classifier.h5")

### final Predictions on testing data

In [None]:
test_crop_data = pd.read_table('flickr_logos_27_dataset_query_set_annotation.txt',sep='\t',names=['filename','logo'], na_values='none',index_col=False)
test_crop_data.dropna(inplace=True)
test_crop_data.reset_index(drop=True, inplace=True)


### testing images are first cropped using predicted coordinates(bounding box), and then feed into model for final logo classification

In [None]:
cx_test = []
for i in range(test_crop_data.shape[0]):
    cx_test.append( load_crop( 'flickr_logos_27_dataset_images/'+test_crop_data['filename'][i] , y_pred[i][0], y_pred[i][1], y_pred[i][2], y_pred[i][3]))  
cx_test = np.array(cx_test)

In [None]:
cy_test = test_crop_data['logo']
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
cy_test  = le.fit_transform(cy_test)

from keras.utils import np_utils
cy_test = np_utils.to_categorical(cy_test,27)

In [None]:
cy_pred = classifier.predict(cx_test)

In [None]:
# Cross-entropy loss score
score = log_loss(cy_test, cy_pred)

### P.S. : I have not trained the model and hence unable to see the results. But is localizer will perform badly, then it should not be merged or used for classification. In that case, only classifier model will give more accuracy or some other experiments can be done like cropped logos for training and full image (with noise) for testing