This notebook is for running in Google Colab. It has all the classes and methods together for convenience.
To run this notebook, simply upload the following files to the session storage:
- trainX.npy
- trainy.npy

In [None]:
! pip install tensorflow wandb numpy

### Setup

In [None]:
import wandb
from lib.utils.modelUtils import ModelUtils
from lib.utils.dataUtils import DataUtils
from lib.utils.sweep_configs import sweep_config
from lib.models.CNN import cnns
import random
import numpy as np
import keras.backend as K
import tensorflow as tf
from os import environ

# Set random seeds
environ['TF_CUDNN_DETERMINISTIC'] = '1'
random.seed(hash('setting random seeds') % 2 ** 32 - 1)
np.random.seed(hash('improves reproducibility') % 2 ** 32 - 1)
tf.random.set_seed(hash('so that runs are repeatable'))

print("# GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
if len(tf.config.list_physical_devices('GPU')):
    print('GPU Device Name', tf.test.gpu_device_name())

LABELS = ['buildings', 'forest', 'glacier', 'mountain', 'sea', 'street']

### CNN Configs

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, AveragePooling2D, Flatten, Dense, Dropout

cnns = {
    'simple-net': Sequential([
        Conv2D(32, 3, padding='same', input_shape=(50, 50, 3), activation='relu'),
        MaxPooling2D(),
        Flatten(),
        Dense(64, activation='relu'),
        Dense(units=6, activation='softmax')
    ]),
    'conv-conv-mp-flat-ds-ds-do-ds': Sequential([
        Conv2D(16, 3, padding='same', input_shape=(50, 50, 3), activation='relu'),
        Conv2D(16, 5, padding='same', activation='relu'),
        MaxPooling2D(2),
        Flatten(),
        Dense(100, activation='relu'),
        Dense(50, activation='relu'),
        Dropout(rate=0.6),
        Dense(units=6, activation='softmax')
    ]),
    'conv-conv-mp-f-ds-do-ds-do-ds': Sequential([
        Conv2D(16, 3, strides=1, padding="same", input_shape=(50, 50, 3), activation='relu'),
        Conv2D(32, 3, strides=1, padding="same", activation='relu'),
        MaxPooling2D(2),
        Flatten(),
        Dense(128, activation='relu'),
        Dropout(rate=0.6),
        Dense(64, activation='relu'),
        Dropout(rate=0.5),
        Dense(units=6, activation='softmax')
    ]),
    'le-net-5': Sequential([
        Conv2D(filters=6, kernel_size=(5, 5), activation='relu', input_shape=(50, 50, 3)),
        AveragePooling2D(pool_size=(2, 2)),
        Conv2D(filters=16, kernel_size=(5, 5), activation='relu'),
        AveragePooling2D(pool_size=(2, 2)),
        Flatten(),
        Dense(units=120, activation='relu'),
        Dense(units=84, activation='relu'),
        Dense(units=6, activation='softmax')
    ])
}

### MLP Configs

In [None]:
...

### Sweep Configs

In [None]:
from typing import Dict, Any

sweep_config: Dict[str, Any] = {
    'method': 'random',
    'metric': {
        'goal': 'minimize', 
        'name': 'loss'
    },
    'parameters': {
        'optimizer': {
            'values': ['adam', 'sgd']
        },
        'epochs': { 'value': 10 },
        'learning_rate': {
            'values': [0.1, 0.01, 0.001, 0.0001]
        }
    }
}

### Model Utils

In [None]:
import wandb
import tensorflow as tf
from wandb.keras import WandbCallback

class ModelUtils:
    '''
    Utility methods for model training, 
    validation, testing and selection.
    '''
    def __init__(self, model, training_data, validation_data, batch_size):
        self.model = model
        self.training_data = training_data
        self.validation_data = validation_data
        self.batch_size = batch_size

    def build_optimizer(self, optimizer: str = 'adam', lr: float = 0.01, momentum: float = 0.0):
        if optimizer == 'adam':
            return tf.keras.optimizers.Adam(learning_rate=lr)
        else:
            return tf.keras.optimizers.SGD(learning_rate=lr, momentum=momentum)
    
    def build_model(self, config):
        '''
        Builds necessary models components according to 
        config and compiles them into self.model.
        :param config: config passed from config file
        '''
        optimizer = self.build_optimizer(config.optimizer, config.learning_rate, 0.9)
        loss = tf.keras.losses.CategoricalCrossentropy(from_logits = False)
        metrics = ['Accuracy', 'AUC', 'Precision', 'Recall']
        self.model.compile(optimizer, loss=loss, metrics=metrics)
        print(self.model.summary())

    def train(self):
        '''
        This method trains self.model. 
        Performs validation in an embedded manner.
        @TODO Add cross-validation ?
        :param x: training data
        :param y: training labels
        '''
        config_defaults = {
            'optimizer': 'adam',
            'batch_size': 512,
            'learning_rate': 0.01,
            'epochs': 10,
        }
        wandb.init(project='intel-icc', entity='manelfideles', config=config_defaults)
        config = wandb.config
        self.build_model(config)
        self.model.fit(
            self.training_data,
            validation_data=self.validation_data,
            epochs=config.epochs, 
            batch_size=self.batch_size,
            callbacks=[WandbCallback()]
        )
        wandb.finish()

### Data Utils

In [None]:
import numpy as np
from PIL import Image as im
from keras.preprocessing.image import ImageDataGenerator
from keras.utils import to_categorical
from sklearn.model_selection import train_test_split

class DataUtils:
    '''
    Utility methods for importing and exporting data, as well
    as for simple pre-processing.
    '''
    def __init__(self, input_dir_path: str, output_dir_path: str, batch_size: int = 512):
        self.input_dir_path = input_dir_path
        self.output_dir_path = output_dir_path
        self.batch_size = batch_size

    def load_npy(self, filename: str) -> np.array:
        return np.load(
            self.input_dir_path + f'/{filename}.npy',
            allow_pickle=True
        )

    def tvt_split(self, x, y, test_size: float = 0.3):
        x_train, x_test, y_train, y_test = train_test_split(
            x, y,
            test_size=test_size, 
            random_state=1
        )
        return x_train, x_test, y_train, y_test

    def make_image_generator(self, train: bool = True):
        return ImageDataGenerator(
            rescale = 1./255,
            rotation_range = 20,
            width_shift_range = 0.2,
            height_shift_range = 0.2,
            horizontal_flip = True,
            validation_split = 0.2
        )
    
    def load_data(self, train: bool):
        if train:
            x, y = self.load_npy('trainX'), to_categorical(self.load_npy('trainy'))
            x_train, x_val, y_train, y_val = self.tvt_split(x, y)
            train_datagen = self.make_image_generator()
            train_datagen.fit(x_train)
            std_datagen = ImageDataGenerator(
                preprocessing_function=train_datagen.standardize
            )
            train_data = train_datagen.flow(
                x_train,
                y_train,
                batch_size=self.batch_size,
            )
            val_data = std_datagen.flow(x_val, y_val, batch_size=self.batch_size)
            return (train_data, val_data)
        else:
            return None

    def save_model(self, filename: str):
        raise NotImplementedError   

    def display_image(self, array: np.array) -> im.Image: 
        return im.fromarray(array)
    
    def _normalize(self, array: np.array) -> np.array:
        return (array / 255)
    
    def _standardize(self, array: np.array) -> np.array:
        mean, std = np.mean(
            array, 
            axis=(1,2), 
            keepdims=True
        ), np.std(
            array,
            axis=(1,2), 
            keepdims=True
        )
        return ((array - mean) / std)

### Main

In [None]:
# Import data
data_utils = DataUtils('./data', './outputs')
train_data, val_data = data_utils.load_data(train = True)

# Instatiate the model
K.clear_session()
model = ModelUtils(
    model = cnns['simple-net'],
    training_data = train_data,
    validation_data = val_data,
    batch_size = 512
)

# Run sweeps and log results
wandb.login()
sweep_id = wandb.sweep(sweep_config, entity='manelfideles', project="intel-icc")
wandb.agent(
    sweep_id, 
    model.train,
    count = 5
)
wandb.finish()