In [None]:
import tensorflow as tf
import numpy as np
from keras.models import Sequential
import cv2
import skimage
import os
from imgaug.imgaug import augmenters as iaa
from densenet121 import DenseNet

from sklearn import decomposition
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA

In [None]:
seq = iaa.Sequential([
    iaa.Crop(px=(0, 16)), # crop images from each side by 0 to 16px (randomly chosen)
    iaa.Fliplr(0.5), # horizontally flip 50% of the images
    iaa.GaussianBlur(sigma=(0, 3.0)), # blur images with a sigma of 0 to 3.0
    iaa.CropAndPad(percent=(-0.25, 0.25)),
    iaa.Add((-30, 30)),
    iaa.Fliplr(0.5),
    iaa.Flipud(0.5),
    iaa.Superpixels(p_replace=0.5, n_segments=64),
    iaa.Dropout(p=(0, 0.2)),
    iaa.Affine(rotate=(-45, 45))
])


In [None]:
class image_util:
    def __init__(self, data_dir, biz_label_file_name, photo_biz_file_name):
        self.batch_index = 0
        image_paths = [os.path.join(data_dir,i) for i in os.listdir(data_dir) if i.endswith('.jpg') and not i.startswith("._")]
        self.images = []
        self.labels = []
        one_hot = self.read_csv_one_hot(biz_label_file_name)
        photo_biz = self.photo_to_biz_id(photo_biz_file_name)

        for path in image_paths[:100]:
            img = cv2.imread(path)
            if img == None:
                continue
            photo_id = os.path.basename(path).split(".")[0]
            self.labels.append(one_hot[photo_biz[photo_id]])
            img = cv2.resize(img,(299,299),interpolation = cv2.INTER_AREA)
            self.images.append(img)
        self.labels = np.asarray(self.labels)
        self.images = np.asarray(self.images)
        print(self.labels.shape)
        
    def next_batch(self, batch_size):
        if batch_size + self.batch_index < self.images.shape[0]:
            imgs = self.images[self.batch_index:batch_size + self.batch_index,:,:,:]
            labels = self.labels[self.batch_index:batch_size + self.batch_index,:,:,:]
            return imgs, labels
        else:
            end_len = self.images.shape[0]-self.batch_index
            start_len = batch_size - (self.images.shape[0] - end_len)
            imgs = np.concatenate((self.images[-end_len:,:,:,:],self.images[0:start_len,:,:,:]))
            labels = np.concatenate((self.labels[-end_len:,:,:,:],self.labels[0:start_len,:,:,:]))
            return imgs, labels
        
    def read_csv_one_hot(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        biz_id_to_label = {}
        for line in lines:
            try:
                biz_id_to_label[line.split(",")[0]] = np.zeros(9)
                for label in line.split(",")[1].rstrip().split(' '):
                    biz_id_to_label[line.split(",")[0]][int(label)]=1
            except:
                if not line.split(",")[1].rstrip():
                    continue
        return biz_id_to_label
    
    def photo_to_biz_id(self, file_name):
        with open(file_name,"r") as f:
            lines = f.readlines()[1:]
        photo_to_biz = {}
        for line in lines:
            photo_to_biz[line.split(",")[0]] = line.split(",")[1].rstrip() 
        return photo_to_biz
        


In [None]:
utils = image_util('./train_photos', './train.csv', './train_photo_to_biz_ids.csv')

In [None]:
images_input = []
for im in utils.images:
    im = cv2.resize(im, (224, 224)).astype(np.float32)
    im[:,:,0] = (im[:,:,0] - 103.94) * 0.017
    im[:,:,1] = (im[:,:,1] - 116.78) * 0.017
    im[:,:,2] = (im[:,:,2] - 123.68) * 0.017
#     im = np.expand_dims(im, axis=0)
    images_input.append(im)
images_input = np.asarray(images_input)

# im = np.expand_dims(im, axis=0)
# images_input= np.expand_dims(im,0)
for i in range(0):
    images = im
    images = np.expand_dims(images,0)
    images_aug = seq.augment_images(images)
    images_input = np.concatenate((images_input,images_aug),axis=0)

In [None]:
images_input.shape

In [None]:
model = DenseNet(reduction=0.5, classes=1000, weights_path='./densenet121_weights_tf.h5')
print(model.layers[-1].output_shape)
model.layers.pop()
model.layers.pop()
print(model.layers[-1].output_shape)

In [None]:
pred = model.predict(images_input)
pca = PCA(n_components=10)
low_dim = pca.fit_transform(pred)
print(low_dim.shape)
neigh = KNeighborsClassifier(n_neighbors=7)
labels = np.random.randint(10,high=1000,size=(101,1))
print(labels)
neigh.fit(low_dim,labels)
neigh.predict(pca.transform(model.predict(np.expand_dims(im,0))))

In [None]:
pca = PCA(n_components=10)
low_dim = pca.fit_transform(pred)
print(low_dim.shape)
neigh = KNeighborsClassifier(n_neighbors=7)
neigh.fit(low_dim,utils.labels)
knn_pred = neigh.predict(pca.transform(model.predict(images_input[:10])))

In [None]:
knn_pred = neigh.predict(pca.transform(model.predict(images_input[:10])))

In [None]:
utils.labels[:10]

In [None]:
np.linalg.norm(knn_pred-utils.labels[:10])

In [None]:
X = pred
pca = decomposition.PCA(n_components=9)
pca.fit(X)
print(X.shape)
X = pca.transform(X)
print(X.shape)