**Featurize Data**

*Summary of this notebook:*  
Obtain a low-dimensional feature vector for each image in an input dataset using a ImageNet based pretrained model (MobileNet, here). Load the dataset in a generator object, preprocess based on the model, run predict on every image to obtain a feature vector. Save the feature vector and the filenames in a separate pickle file.

*Definition of Done:*

In [None]:
from google.colab import drive
import os
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [None]:
os.chdir("/content/gdrive/Shared drives/2020_FDLUSA_Earth Science_Knowledge Discovery Framework/Code")
# !ls -lht

In [None]:
# Imports from Colab 2
import math
import numpy as np
import pickle
import keras
import tensorflow
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# Import pretrained model
# from tensorflow.keras.applications import MobileNet, ResNet50
# from tensorflow.keras.applications.resnet50 import preprocess_input

Using TensorFlow backend.


In [None]:
# Imports for Colab 6
import cv2 # Read raw image
import glob
from google.colab.patches import cv2_imshow
from matplotlib import pyplot as plt
from scipy import ndimage # For rotation task or
import imutils
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam

from tensorflow.python.keras.utils import data_utils
from tensorflow.keras.preprocessing.image import Iterator

# Imports for Colorizer
from os import path
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, UpSampling2D
from tensorflow.keras.layers import Activation, InputLayer, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from skimage.color import rgb2lab, lab2rgb, rgb2gray
from skimage.io import imsave
import random
import tensorflow as tf

In [None]:
# For loading pretrained models
from tensorflow.keras.models import load_model

In [None]:
tensorflow.test.gpu_device_name()

'/device:GPU:0'

In [None]:
## FOR UC-MERCED
# dataset = "MODIS_MCD43A4"
# dataPath = ("Datasets/"+ dataset+ "/Globe/Test/test_set/")

## FOR CLOUDSTREET-SMALLER
# dataset="nasa_impact/cloudstreet_smaller"
# dataPath = ("Datasets/"+ dataset+ "/")


# Colorizer needs array setup as well unfortunately
# dims=(256,256,3)
# image_globs=glob.glob(dataPath+'/*/np_arrays/*.npy')
# print(len(image_globs))
# X=np.empty((0,*dims))

# for f in image_globs:
#   image=np.expand_dims(img_to_array(load_img(f,target_size=dims))/255.,0)
#   X=np.vstack((X,image))

In [None]:
# name='merced_x.npy'
# # with open(name, 'wb') as f:
# #   np.save(f,X,allow_pickle=True)
# X=np.load(name)

Import Model



In [None]:
##  Basic convolution LAB colorization semi-supervised
modelPath='Models/Colorization_CustomCNN_epoch30_ucmerced'

model=load_model(modelPath)

model.summary()


Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 256, 256, 32)      896       
_________________________________________________________________
average_pooling2d (AveragePo (None, 85, 85, 32)        0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 43, 43, 32)        9248      
_________________________________________________________________
average_pooling2d_1 (Average (None, 14, 14, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 14, 14, 64)        18496     
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 7, 7, 64)          36928     
_________________________________________________________________
conv2d_4 (Conv2D)            (None, 7, 7, 128)         7

In [None]:
final_desired_layer=-11

In [None]:
submodel=Model(model.inputs, (model.layers[final_desired_layer].output))
# submodel=
submodel_pipeline=Sequential()
submodel_pipeline.add(submodel)

# submodel_pipeline.add(Conv2D(20,(1, 1),activation=None, padding='same',use_bias=False,kernel_initializer='ones'))

submodel_pipeline.add(Flatten())
## Stack 1x1 conv
submodel_pipeline.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
model (Model)                (None, 4, 4, 128)         1467296   
_________________________________________________________________
flatten (Flatten)            (None, 2048)              0         
Total params: 1,467,296
Trainable params: 1,467,296
Non-trainable params: 0
_________________________________________________________________


Get Data & Preprocess

In [None]:
class CustomDataGenerator(data_utils.Sequence):
    'Generates data for Keras'
    def __init__(self, list_IDs, batch_size=16, dim=(400,400,3), shuffle=True):
        'Initialization'
        self.dim = dim
        self.batch_size = batch_size
        self.list_IDs = list_IDs
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X = self.__data_generation(list_IDs_temp)
        print(X.shape)
        return X

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X = np.empty((self.batch_size, *self.dim))
        for i,f in enumerate(list_IDs_temp):
            x_temp = np.load(f)
            if np.sum(x_temp.shape) < np.sum((400,400,3)):
              continue
            X[i,] = x_temp

        gray_batch=rgb2gray(X)

        X_batch=gray_batch[:,:,:]/255.
        # Y_batch=X[:,:,:,:]/255.
        X_batch=np.expand_dims(X_batch,3)
        # return X_batch,Y_batch
        return X_batch


In [None]:
# inference = submodel_pipeline.predict(x=dataGenerator, batch_size=batch_size, use_multiprocessing=True, workers=2)

(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)


  return rgb @ coeffs


(256, 400, 400, 1)


  return rgb @ coeffs


(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)
(256, 400, 400, 1)


In [None]:
# Writing the features to the drive
pickle.dump(inference, file=open(("Features/" + modelName + "_" + dataset + "_features.pkl"), mode = 'wb'))

In [None]:
train_path='Datasets/UCMerced_LandUse/Splits_2/train/'
test_path='Datasets/UCMerced_LandUse/Splits_2/val/'
combined_path='Datasets/UCMerced_LandUse/Images/'

dataGenerator = ImageDataGenerator(rotation_range=20,
                                   width_shift_range=0.1,
                                   height_shift_range=0.1,
                                   zoom_range=0.1,
                                   horizontal_flip=True)

def batch_grayrgb(batch_size):
  for batch in dataGenerator.flow_from_directory(combined_path,batch_size=batch_size):
    # print(len(batch),batch[0].shape)
    gray_batch=rgb2gray(batch[0]) #batch_size,h,w
    # print(gray_batch.shape)
    gray_batch=np.expand_dims(gray_batch,3)
    # print(gray_batch)
    gray_batch=np.repeat(gray_batch,3,axis=3) #batch_size,h,w,3
    # print(gray_batch.shape)
    X_batch=gray_batch
    Y_batch=batch[0]
    # print(np.min(X_batch),np.max(X_batch),np.min(Y_batch),np.max(Y_batch))
    yield X_batch,Y_batch


batch_size = 105


inference=submodel_pipeline.predict_generator(batch_grayrgb(batch_size),max_queue_size=3,verbose=1,workers=3,use_multiprocessing=True,steps=2100/batch_size)



Instructions for updating:
Please use Model.predict, which supports generators.
Found 2100 images belonging to 21 classes.


In [None]:
print(inference.shape)

(2100, 2048)


In [None]:
fnames=dataGenerator.flow_from_directory(combined_path).filenames
pickle.dump(inference, file=open(("Features/" + 'customcolorizer' + "_" + 'merced_combined' + "_features.pkl"), mode = 'wb'))
pickle.dump(fnames, file = open(("Features/" + 'customcolorizer' + "_" + 'merced_combined' + "_filenames.pkl"), mode = 'wb'))

Found 2100 images belonging to 21 classes.
