In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
import cv2
import skimage
import random
import os
from imgaug.imgaug import augmenters as iaa
from densenet121 import DenseNet
from sklearn import decomposition
from sklearn.neighbors import KNeighborsClassifier
from keras.layers import Dense
from keras.models import Model
from keras.optimizers import SGD

In [None]:
class image_util:
    def __init__(self, data_dir, biz_label_file_name, photo_biz_file_name):
        self.batch_index = 0
        self.image_paths = [os.path.join(data_dir,i) for i in os.listdir(data_dir) if i.endswith('.jpg') and not i.startswith("._")]
        random.shuffle(self.image_paths)
        self.images = []
        self.labels = []
        self.image_len = len(self.image_paths)
        self.one_hot = self.read_csv_one_hot(biz_label_file_name)
        self.photo_biz = self.photo_to_biz_id(photo_biz_file_name)

    def next_batch(self, batch_size):
        images = []
        labels = []
        if batch_size + self.batch_index * batch_size < self.image_len:
            start = self.batch_index
            end = batch_size + self.batch_index
            for path in self.image_paths[start:end]:
                img = cv2.imread(path)
                photo_id = os.path.basename(path).split(".")[0]
                self.labels.append(self.one_hot[self.photo_biz[photo_id]])
                img = cv2.resize(img,(224,224),interpolation = cv2.INTER_AREA)
                images.append(img)
                labels.append(self.one_hot[self.photo_biz[photo_id]])
            images = np.asarray(images)
            labels = np.asarray(labels)
            self.batch_index += 1
            return images, labels, 1
        else:
            self.batch_index = 0
            start = self.batch_index
            end = batch_size + self.batch_index
            for path in self.image_paths[start:end]:
                img = cv2.imread(path)
                photo_id = os.path.basename(path).split(".")[0]
                self.labels.append(self.one_hot[self.photo_biz[photo_id]])
                img = cv2.resize(img,(224,224),interpolation = cv2.INTER_AREA)
                images.append(img)
                labels.append(self.one_hot[self.photo_biz[photo_id]])
            images = np.asarray(images)
            labels = np.asarray(labels)
            return images, labels, 0
    
    def read_csv_one_hot(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        biz_id_to_label = {}
        for line in lines:
            try:
                biz_id_to_label[line.split(",")[0]] = np.zeros(9)
                for label in line.split(",")[1].rstrip().split(' '):
                    biz_id_to_label[line.split(",")[0]][int(label)]=1
            except:
                if not line.split(",")[1].rstrip():
                    continue
        return biz_id_to_label
    
    def photo_to_biz_id(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        photo_to_biz = {}
        for line in lines:
            photo_to_biz[line.split(",")[0]] = line.split(",")[1].rstrip() 
        return photo_to_biz
        

In [None]:
util = image_util('/home/rendaxuan/Documents/workspace/4032/test_photos', '/home/rendaxuan/Documents/workspace/4032/test.csv', '/home/rendaxuan/Documents/workspace/4032/test_photo_to_biz_ids.csv')

In [None]:
model = DenseNet(reduction=0.5, classes=1000, weights_path='./densenet121_weights_tf.h5')
print(model.layers[-1].output_shape)
model.layers.pop()
model.layers.pop()
print(model.layers[-1].output_shape)
output = model.layers[-1].output
new_model = Model(model.input, output)
sgd = SGD(lr=1e-2, decay=1e-6, momentum=0.9, nesterov=True)
new_model.compile(optimizer=sgd, loss='binary_crossentropy', metrics=['accuracy'])

In [None]:
batch_size = 1000
images_features = []
images_labels = []
images, labels, go_on = util.next_batch(batch_size)
counter = 0
import time
start = time.time()
while go_on:
    images_features.append(new_model.predict(images))
    images_labels.append(labels)
    images, labels, go_on = util.next_batch(batch_size)
    print("==================================================")
    print(str(counter*1000/238400*100)+"%   "+str(time.time()-start))
    start = time.time()
    counter += 1
features = np.vstack(images_features)
features_labels = np.vstack(images_labels)

In [None]:
features_labels.shape

In [None]:
np.save('/home/rendaxuan/Documents/workspace/4032/features_test.npy', features)

In [None]:
np.save('/home/rendaxuan/Documents/workspace/4032/features_labels_test.npy', features_labels)