In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
import cv2
import skimage
import os
from imgaug.imgaug import augmenters as iaa
from densenet121 import DenseNet
from sklearn import decomposition
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import SGD

In [None]:
 seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
    iaa.GaussianBlur(sigma=(0, 3.0)), # blur images with a sigma of 0 to 3.0
    iaa.CropAndPad(percent=(-0.25, 0.25)),
    iaa.Add((-30, 30)),
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Superpixels(p_replace=0.5, n_segments=64),
    iaa.Dropout(p=(0, 0.2)),
    iaa.Affine(rotate=(-45, 45))
])



In [None]:
class image_util:
    def __init__ (self, data_dir, biz_label_file_name, photo_biz_file_name):
        image_paths = [os.path.join(data_dir,i) for i in os.listdir(data_dir) if i.endswith('.jpg') and not i.startswith("._")]
        one_hot = self.read_csv_one_hot(biz_label_file_name)
        photo_biz = self.photo_to_biz_id(photo_biz_file_name)
        
        label_photos = {}
        for path in image_paths:
            img = cv2.imread(path)
            if img is None:
                continue
            photo_id = os.path.basename(path).split(".")[0]
            img = cv2.resize(img,(299,299),interpolation = cv2.INTER_AREA)
            label = one_hot[photo_biz[photo_id]]
            label = tuple(label)
            if label in label_photos:
                label_photos[label].append(img)
            else:
                label_photos[label] = [img]
            if len(label_photos) == 100:
                break
        self.labels = np.asarray(label_photos.keys())
        self.images = np.asarray(label_photos.values())
        print(self.labels.shape)
        
    def read_csv_one_hot(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        biz_id_to_label = {}
        for line in lines:
            try:
                biz_id_to_label[line.split(",")[0]] = np.zeros(9)
                for label in line.split(",")[1].rstrip().split(' '):
                    biz_id_to_label[line.split(",")[0]][int(label)]=1
            except:
                if not line.split(",")[1].rstrip():
                    continue
        return biz_id_to_label
    
    def photo_to_biz_id(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        photo_to_biz = {}
        for line in lines:
            photo_to_biz[line.split(",")[0]] = line.split(",")[1].rstrip() 
        return photo_to_biz
    

In [None]:
inputs = image_util('./train_photos', './train.csv', './train_photo_to_biz_ids.csv')

In [None]:
processed_img_list = []
for images in inputs.images:
    processed_imgs = []
    for im in images:
        im = cv2.resize(im, (224, 224)).astype(np.float32)
        im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
        im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
        im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
        processed_imgs.append(im)
    processed_img_list.append(np.asarray(processed_imgs))
    
processed_img_list = np.asarray(processed_img_list)

# ignore augmentation first
for i in range(0):
    images = im
    images = np.expand_dims(images,0)
    images_aug = seq.augment_images(images)
    images_input = np.concatenate((images_input,images_aug),axis=0)

In [None]:
model, feature_list = DenseNet(reduction=0.5, classes=1000, weights_path='./densenet121_weights_tf.h5')
print(model.layers[-1].output_shape)
model.layers.pop()
model.layers.pop()
print(model.layers[-1].output_shape)

In [None]:
output = model.layers[-1].output
model = Model(model.input, output)
# output = Dense(9, activation='softmax', name='final')(output)
# new_model = Model(model.input, output)
sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
model.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
model.layers[-1].output_shape

In [None]:
# business features
biz_features_mean = []
for i, img_list in enumerate(processed_img_list):
    pred = model.predict(img_list)
    biz_features_mean.append(np.mean(pred, axis=0))
biz_features_mean = np.asarray(biz_features_mean)

In [None]:
biz_features_max = []
for i, img_list in enumerate(processed_img_list):
    pred = model.predict(img_list)
    biz_features_max.append(np.max(pred, axis=0))
biz_features_max = np.asarray(biz_features_max)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(biz_features_max, inputs.labels, test_size=0.2, random_state=42)

In [None]:
X_train.shape

In [None]:
from sklearn.ensemble import AdaBoostClassifier
from sklearn.model_selection import cross_val_score


In [None]:
# max - biz feature extraction
# 3-fold cross validation
max_score = []
test_score = []
for i in range(9):
    ABC = AdaBoostClassifier()
    ABC.fit(X_train, y_train[:, i])
    max_score.append(cross_val_score(ABC, X_train, y_train[:, i]).mean())
    test_score.append(ABC.score(X_test, y_test[:, i]))
print max_score
print(test_score)

In [None]:
max_test_score = test_score
max_test_score

In [None]:
# mean - biz feature extraction
X_train, X_test, y_train, y_test = train_test_split(biz_features_mean, inputs.labels, test_size=0.2, random_state=42)
mean_score = []
mean_test_score = []
for i in range(9):
    ABC = AdaBoostClassifier()
    ABC.fit(X_train, y_train[:, i])
    mean_score.append(cross_val_score(ABC, X_train, y_train[:, i]).mean())
    mean_test_score.append(ABC.score(X_test, y_test[:, i]))
print mean_score
print mean_test_score

In [None]:
np.subtract(max_score,mean_score)

In [None]:
np.subtract(max_test_score,mean_test_score)