In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
import cv2
import skimage
import os
import random
from imgaug.imgaug import augmenters as iaa
from densenet121 import DenseNet
from sklearn import decomposition
from sklearn.neighbors import KNeighborsClassifier
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import SGD
from keras.models import model_from_json

In [None]:
#Define Data Augmentation Methods
seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
    iaa.GaussianBlur(sigma=(0, 3.0)), # blur images with a sigma of 0 to 3.0
    iaa.CropAndPad(percent=(-0.25, 0.25)),
    iaa.Add((-30, 30)),
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Superpixels(p_replace=0.5, n_segments=64),
    iaa.Dropout(p=(0, 0.2)),
    iaa.Affine(rotate=(-45, 45))
])


In [None]:
class image_util:
    def __init__(self, data_dir, biz_label_file_name, photo_biz_file_name):
        self.batch_index = 0
        self.image_paths = [os.path.join(data_dir,i) for i in os.listdir(data_dir) if i.endswith('.jpg') and not i.startswith("._")]
        random.shuffle(self.image_paths)
        self.images = []
        self.labels = []
        self.image_len = len(self.image_paths)
        self.one_hot = self.read_csv_one_hot(biz_label_file_name)
        self.photo_biz = self.photo_to_biz_id(photo_biz_file_name)
    
    def next_batch(self, batch_size):
        images = []
        labels = []
        #upon calling next batch, a batch of data is read from disk
        #When reaches last batch, the conter will be updated to form a non-stop data input
        if (self.batch_index+1) * batch_size < self.image_len:
            start = self.batch_index * batch_size
            end = batch_size + start
            ## construct the image batch and labels
            for path in self.image_paths[start:end]:
                img = cv2.imread(path)
                photo_id = os.path.basename(path).split(".")[0]
                self.labels.append(self.one_hot[self.photo_biz[photo_id]])
                img = cv2.resize(img,(224,224),interpolation = cv2.INTER_AREA)
                images.append(img)
                labels.append(self.one_hot[self.photo_biz[photo_id]])
            images_inputs = []
            for im in images:
                im = cv2.resize(im, (224,224)).astype(np.float32)
                im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
                im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
                im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
                images_inputs.append(im)
            images = np.asarray(images_inputs)
            labels = np.asarray(labels)
            self.batch_index += 1
            return images, labels
        else:
            self.batch_index = 0
            start = self.batch_index * batch_size
            end = batch_size + start
            for path in self.image_paths[start:end]:
                img = cv2.imread(path)
                photo_id = os.path.basename(path).split(".")[0]
                self.labels.append(self.one_hot[self.photo_biz[photo_id]])
                img = cv2.resize(img,(224,224),interpolation = cv2.INTER_AREA)
                images.append(img)
                labels.append(self.one_hot[self.photo_biz[photo_id]])
            images_inputs = []
            for im in images:
                im = cv2.resize(im, (224,224)).astype(np.float32)
                im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
                im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
                im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
                images_inputs.append(im)
            images = np.asarray(images_inputs)
            images = np.asarray(images)
            labels = np.asarray(labels)
            return images, labels
        
    def read_csv_one_hot(self, file_name):
        ## return a dict where key is business id and value is encoded business label
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        biz_id_to_label = {}
        for line in lines:
            try:
                biz_id_to_label[line.split(",")[0]] = np.zeros(9)
                for label in line.split(",")[1].rstrip().split(' '):
                    biz_id_to_label[line.split(",")[0]][int(label)]=1
            except:
                if not line.split(",")[1].rstrip():
                    continue
        return biz_id_to_label
    
    def photo_to_biz_id(self, file_name):
        ## return a dict where the key is photo id and value is a list of coresponding business label
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        photo_to_biz = {}
        for line in lines:
            photo_to_biz[line.split(",")[0]] = line.split(",")[1].rstrip() 
        return photo_to_biz
        


In [None]:
## Change the data path to your data path
util = image_util('./train_photos', './train.csv', './train_photo_to_biz_ids.csv')

In [None]:
## This step is optional, and if needed should be implemented in the util.next_batch
ims = util.images
# do image augmentation
for idx in range(10):
    # 'images' should be either a 4D numpy array of shape (N, height, width, channels)
    # or a list of 3D numpy arrays, each having shape (height, width, channels).
    # Grayscale images must have shape (height, width, 1) each.
    # All images must have numpy's dtype uint8. Values are expected to be in
    # range 0-255.
    images_aug = seq.augment_images(util.images)
    ims = np.concatenate((ims, images_aug), axis=0)

In [None]:
images_input = []
# do normalization and return a tensor of 0 mean and 1 stddev
for im in ims:
    im = cv2.resize(im, (224,224)).astype(np.float32)
    im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
    im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
    im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
    images_input.append(im)
images_input = np.asarray(images_input)

In [None]:
## construct the Pre-trained DenseNet
## and reduce the number of feature maps in the transition block to save some compute power
model = DenseNet(reduction=0.5, classes=1000, weights_path='./densenet121_weights_tf.h5')
print(model.layers[-1].output_shape)
## remove Pre-trained Classifer at the end
model.layers.pop()
model.layers.pop()
print(model.layers[-1].output_shape)
output = model.layers[-1].output

In [None]:
## add our fully connected layer at the end of the model to do classification
output = Dense(100, activation='elu', name='fully_last')(output)
## use sigmoid as activation for the last layer to generate the un-normalized probablity for each bit of the label
output = Dense(9, activation='sigmoid', name='final')(output)
new_model = Model(model.input, output)
## use momentum to get a smoother convergence
sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
## use binary-crossentropy to get multilabel classification score.
new_model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
## start training
epoch = 4
batch_size = 20
display_step = 20
new_model.save("./new_model_checkpoint.h5")
## for each epoch, train all the images in the training set
for i in range(epoch):
    for j in range(util.image_len/batch_size):
        images, labels = util.next_batch(batch_size)
        loss = new_model.train_on_batch(images,labels)
        if (i+1)*j%display_step==0:
            images, labels = util.next_batch(batch_size)
            acc = new_model.test_on_batch(images,labels)
            print('Epoch:'+str(i)+'  '+'Batch:'+str(j)+'  '+'Loss:'+str(loss)+'   '+'Accuracy:'+str(acc))
        ## for every 100 batches, save a check point
        if (i+1)*j%100==0:
            new_model.save('./new_model_checkpoint.h5')