In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
import cv2
import skimage
import os
from imgaug.imgaug import augmenters as iaa
from densenet121 import DenseNet
from sklearn.svm import SVC
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.multiclass import OneVsRestClassifier
from sklearn.svm import LinearSVC

In [None]:
seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
    iaa.GaussianBlur(sigma=(0, 3.0)), # blur images with a sigma of 0 to 3.0
    iaa.CropAndPad(percent=(-0.25, 0.25)),
    iaa.Add((-30, 30)),
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Superpixels(p_replace=0.5, n_segments=64),
    iaa.Dropout(p=(0, 0.2)),
    iaa.Affine(rotate=(-45, 45))
])


In [None]:
class image_util:
    def __init__ (self, data_dir, biz_label_file_name, photo_biz_file_name):
        image_paths = [os.path.join(data_dir,i) for i in os.listdir(data_dir) if i.endswith('.jpg') and not i.startswith("._")]
        one_hot = self.read_csv_one_hot(biz_label_file_name)
        photo_biz = self.photo_to_biz_id(photo_biz_file_name)
        
        label_photos = {}
        for path in image_paths[:100]:
            img = cv2.imread(path)
            if img == None:
                continue
            photo_id = os.path.basename(path).split(".")[0]
            img = cv2.resize(img,(299,299),interpolation = cv2.INTER_AREA)
            label = one_hot[photo_biz[photo_id]]
            label = tuple(label)
            if label in label_photos:
                label_photos[label].append(img)
            else:
                label_photos[label] = [img]
                
        self.labels = np.asarray(label_photos.keys())
        self.images = np.asarray(label_photos.values())
        print(self.labels.shape)
        
    def read_csv_one_hot(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        biz_id_to_label = {}
        for line in lines:
            try:
                biz_id_to_label[line.split(",")[0]] = np.zeros(9)
                for label in line.split(",")[1].rstrip().split(' '):
                    biz_id_to_label[line.split(",")[0]][int(label)]=1
            except:
                if not line.split(",")[1].rstrip():
                    continue
        return biz_id_to_label
    
    def photo_to_biz_id(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        photo_to_biz = {}
        for line in lines:
            photo_to_biz[line.split(",")[0]] = line.split(",")[1].rstrip() 
        return photo_to_biz
    

In [None]:
inputs = image_util('./train_photos', './train.csv', './train_photo_to_biz_ids.csv')

In [None]:
processed_img_list = []
for images in inputs.images:
    processed_imgs = []
    for im in images:
        im = cv2.resize(im, (224, 224)).astype(np.float32)
        im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
        im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
        im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
        processed_imgs.append(im)
    processed_img_list.append(np.asarray(processed_imgs))
    
processed_img_list = np.asarray(processed_img_list)

# ignore augmentation first
for i in range(0):
    images = im
    images = np.expand_dims(images,0)
    images_aug = seq.augment_images(images)
    images_input = np.concatenate((images_input,images_aug),axis=0)

In [None]:
model = DenseNet(reduction=0.5, classes=1000, weights_path='./densenet121_weights_tf.h5')
print(model.layers[-1].output_shape)
model.layers.pop()
model.layers.pop()
print(model.layers[-1].output_shape)

In [None]:
# business features
biz_features = []
for i, img_list in enumerate(processed_img_list):
    pred = model.predict(img_list)
    biz_features.append(np.mean(pred, axis=0))
biz_features = np.asarray(biz_features)

In [None]:
bfs.shape

In [None]:
clf = OneVsRestClassifier(LinearSVC(random_state=0))
clf.fit(bfs, biz_features.labels)

In [None]:
p = clf.predict(model.predict(processed_img_list[1]))

In [None]:
biz_features.labels[1]

In [None]:
p

In [None]:
clf.score(model.predict(processed_img_list[1]), np.tile(biz_features.labels[1], (13,1)))

In [None]:
a = np.array([1,0,1])