In [1]:
import os
from os import path
import sys
import math
import cv2 # Read raw image
import glob
import random
import numpy as np
import pickle
from matplotlib import pyplot as plt
from scipy import ndimage # For rotation task or
import imutils
from skimage.color import rgb2lab, lab2rgb, rgb2gray
from skimage.io import imsave
from skimage.transform import resize
from pprint import pprint
# from numpy.linalg import norm
from tqdm import tqdm, tqdm_notebook

import tensorflow
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.utils import data_utils
from tensorflow.keras.preprocessing.image import Iterator
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, UpSampling2D, AveragePooling2D, MaxPooling2D, Reshape, Conv2DTranspose, ZeroPadding2D, Add
from tensorflow.keras.layers import Activation, InputLayer, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.layers import PReLU


# Check to see if GPU is being used
print(tensorflow.test.gpu_device_name())
print("Num GPUs Available: ", tf.config.experimental.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

/device:GPU:0
Num GPUs Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Num GPUs Available:  2


In [15]:
# DATA_PATH = "/home/satyarth934/data/modis_data_products/*/array_3bands_normalized/448/*"
# DATA_PATH = "/home/satyarth934/data/modis_data_products/terra/array_3bands_adapted/448/mean_stdev_removed/*" # <- needs to be normalized
DATA_PATH = "/home/satyarth934/data/modis_data_products/terra/array_3bands_adapted/448/median_removed/*" # <- needs to be normalized
NORMALIZE = True

MODEL_NAME = "baseAE_median"
BASE_DIR = "/home/satyarth934/code/FDL_2020/"

OUTPUT_MODEL_PATH = BASE_DIR + "Models/" + MODEL_NAME
TENSORBOARD_LOG_DIR = BASE_DIR + "tb_logs/" + MODEL_NAME
ACTIVATION_IMG_PATH = BASE_DIR + "activation_viz/" + MODEL_NAME
PATH_LIST = BASE_DIR + "Features/" + MODEL_NAME + "_filenames.pkl"
FEATURES_OUTPUT = BASE_DIR + "Features/" + MODEL_NAME + "_features.pkl"

NUM_EPOCHS = 200

In [16]:
# NEW MODIS DATASET

img_paths = glob.glob(DATA_PATH)
print("len(img_paths):", len(img_paths))
random.seed(a=13521)
random.shuffle(img_paths)

train_test_split = 0.8
X_test_paths = img_paths[int(train_test_split*len(img_paths)):]

dims=(448,448,3)

# Loading Testing Data
X_test = np.empty((len(X_test_paths),*dims))
for i, p in enumerate(X_test_paths):
    X_test[i,:,:,:] = np.load(p)

print("X_test:", X_test.shape)

# To check what percentage of pixels are 'nan'
print(np.sum(np.isnan(X_test)) / np.prod(X_test.shape))

# Checking min max to see if normalization is needed or not
print("Before normalization")
print(np.nanmin(X_test), np.nanmax(X_test))

# Normalize Inputs
def normalize(mat):
    valid_cells = np.invert(np.isnan(mat))
    normalized = np.subtract(mat, np.nanmin(mat), where=valid_cells) / (np.nanmax(mat) - np.nanmin(mat))
    return normalized

X_test = normalize(X_test)

# Checking min max after normalization 
print("After normalization")
print(np.nanmin(X_test), np.nanmax(X_test))


len(img_paths): 365
X_test: (73, 448, 448, 3)
0.10003613763570497
Before normalization
-0.3387008713849291 0.9136310171941195
After normalization
0.0 1.0


In [17]:
# Load Model
model = load_model("../Models/" + MODEL_NAME)
# print(model.summary())

In [18]:
# Function to featurize the input
def extract_features(img_array, model):
#     img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
#     preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(expanded_img_array)
    flattened_features = features.flatten()
    normalized_features = flattened_features / np.linalg.norm(flattened_features)
    return normalized_features

# Dataset location

# Featurize all input images
feature_list = []
for i in tqdm_notebook(range(len(X_test))):
    feature_list.append(extract_features(X_test[i,:,:,:], model))

Please use `tqdm.notebook.tqdm` instead of `tqdm.tqdm_notebook`
  from ipykernel import kernelapp as app


HBox(children=(FloatProgress(value=0.0, max=73.0), HTML(value='')))




In [19]:
feature_list[0].shape

(602112,)

In [20]:
# Save the features and the filelist order for later use.
pickle.dump(feature_list, file=open((FEATURES_OUTPUT), mode = 'wb'))
pickle.dump(X_test_paths, file = open((PATH_LIST), mode = 'wb'))