<h1>Table of Contents<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports" data-toc-modified-id="Imports-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports</a></span></li><li><span><a href="#Data-Augmentation/Analysis" data-toc-modified-id="Data-Augmentation/Analysis-2"><span class="toc-item-num">2&nbsp;&nbsp;</span>Data Augmentation/Analysis</a></span></li><li><span><a href="#Dataloader-creation-and-test" data-toc-modified-id="Dataloader-creation-and-test-3"><span class="toc-item-num">3&nbsp;&nbsp;</span>Dataloader creation and test</a></span></li><li><span><a href="#Model-creation" data-toc-modified-id="Model-creation-4"><span class="toc-item-num">4&nbsp;&nbsp;</span>Model creation</a></span></li><li><span><a href="#Model-Training" data-toc-modified-id="Model-Training-5"><span class="toc-item-num">5&nbsp;&nbsp;</span>Model Training</a></span><ul class="toc-item"><li><span><a href="#Model-Testing" data-toc-modified-id="Model-Testing-5.1"><span class="toc-item-num">5.1&nbsp;&nbsp;</span>Model Testing</a></span></li></ul></li></ul></div>

**GET DATA**

Summary of this notebook: ...

Definition of Done: ...

# Imports


In [2]:
import os
from os import path
import sys
import math
import cv2 # Read raw image
import glob
import random
import numpy as np
import pickle
from matplotlib import pyplot as plt
from scipy import ndimage # For rotation task or
import imutils
from skimage.color import rgb2lab, lab2rgb, rgb2gray
from skimage.io import imsave
from skimage.transform import resize
from pprint import pprint
import tqdm

import tensorflow
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications import VGG16
from tensorflow.keras import Model
from tensorflow.keras.layers import Dense, Flatten, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.utils import data_utils
from tensorflow.keras.preprocessing.image import Iterator
from tensorflow.keras import Input
from tensorflow.keras.layers import Conv2D, Conv2DTranspose, UpSampling2D, AveragePooling2D, MaxPooling2D, Reshape, Conv2DTranspose, ZeroPadding2D, Add
from tensorflow.keras.layers import Activation, InputLayer, BatchNormalization
from tensorflow.keras.callbacks import TensorBoard
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img
from tensorflow.keras.layers import PReLU

sys.path.append("/home/satyarth934/code/FDL_2020/training_scripts")
import utils

# Check to see if GPU is being used
print(tensorflow.test.gpu_device_name())
print("Num GPUs Available: ", tf.config.experimental.list_physical_devices('GPU'))
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

/device:GPU:0
Num GPUs Available:  [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU'), PhysicalDevice(name='/physical_device:GPU:1', device_type='GPU')]
Num GPUs Available:  2


In [3]:
DATA_PATH = "/home/satyarth934/data/nasa_impact/hurricanes/*/*"

NORMALIZE = True
MODEL_NAME = "baseAE_hurricane_try2"
OUTPUT_MODEL_PATH = "/home/satyarth934/code/FDL_2020/Models/" + MODEL_NAME
TENSORBOARD_LOG_DIR = "/home/satyarth934/code/FDL_2020/tb_logs/" + MODEL_NAME
ACTIVATION_IMG_PATH = "/home/satyarth934/code/FDL_2020/activation_viz/" + MODEL_NAME
PATH_LIST_LOCATION = "/home/satyarth934/code/FDL_2020/activation_viz/" + MODEL_NAME + "/train_test_paths.npy"

NUM_EPOCHS = 200
BATCH_SIZE = 64
INTERPOLATE_DATA_GAP = False

dims = (448, 448, 3)
    
# Dataloader creation and test
img_paths = glob.glob(DATA_PATH)
print("len(img_paths):", len(img_paths))

len(img_paths): 5345


In [4]:
# wind_speed in knots
def getCategory(wind_speed):
    if wind_speed <= 33:
        return 'TD'
    elif 34 <= wind_speed <= 63:
        return 'TS'
    elif 64 <= wind_speed <= 82:
        return 'C1'
    elif 83 <= wind_speed <= 95:
        return 'C2'
    elif 96 <= wind_speed <= 112:
        return 'C3'
    elif 113 <= wind_speed <= 136:
        return 'C4'
    elif wind_speed >= 137:
        return 'C5'


def classname(str):    
    file_name = str.split("/")[-1]
    wind_speed = int(file_name.split(".")[0].split("_")[-1].strip("kts"))
    return getCategory(wind_speed)

In [5]:
class_count = {}
for imgpath in tqdm.tqdm(img_paths):
    cid = classname(imgpath)
    if cid not in class_count:
        class_count[cid] = 1
    else:
        class_count[cid] += 1

print(class_count)

100%|██████████| 5345/5345 [00:00<00:00, 298238.06it/s]

{'TS': 2720, 'TD': 1611, 'C3': 186, 'C1': 378, 'C2': 264, 'C4': 153, 'C5': 33}





In [29]:
tiny_train_subset = []
class_count = {}

num_samples_per_class = 70
for imgpath in tqdm.tqdm(img_paths):
    cid = classname(imgpath)
    if cid not in class_count:
        class_count[cid] = 1
        tiny_train_subset.append(imgpath)
    elif class_count[cid] >= num_samples_per_class:
        continue
    else:
        class_count[cid] += 1
        tiny_train_subset.append(imgpath)
    
pprint(class_count)

test_subset = list(set(img_paths) - set(tiny_train_subset))

tiny_valid_subset = random.sample(tiny_train_subset, 
                                  int(0.2 * len(tiny_train_subset)))
tiny_train_subset = list(set(tiny_train_subset) - set(tiny_valid_subset))

print("len(tiny_train_subset):", len(tiny_train_subset))
print("len(tiny_valid_subset):", len(tiny_valid_subset))
print("len(test_subset):", len(test_subset))

100%|██████████| 5345/5345 [00:00<00:00, 483742.34it/s]

{'C1': 70, 'C2': 70, 'C3': 70, 'C4': 70, 'C5': 33, 'TD': 70, 'TS': 70}
len(tiny_train_subset): 363
len(tiny_valid_subset): 90
len(test_subset): 4892





In [42]:
cid_num_map = {"C1": 1, "C2": 2, "C3": 3, "C4": 4, "C5": 5, "TD": 6, "TS": 0}

# cid = classname(tiny_train_subset[0])
# tf.keras.utils.to_categorical(list(cid_num_map.values()))
tf.keras.utils.to_categorical(cid_num_map["C1"], num_classes=7)

array([0., 1., 0., 0., 0., 0., 0.], dtype=float32)