In [7]:
import os
import wandb
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental.preprocessing import Rescaling
from PIL import Image

## Preparing training and validation sets 

In [2]:
####################################################################
# Preparing training set (without augmentation) and validation set
####################################################################

# Preparing training and validation sets without augmentation
# Loading data from directory
data_dir = 'nature_12K/inaturalist_12K/train'
train_data = tf.keras.preprocessing.image_dataset_from_directory(
                      directory = data_dir,
                      labels = 'inferred',  
                      label_mode = 'categorical',
                      color_mode = 'rgb',
                      batch_size = 32,
                      image_size = (256, 256),
                      shuffle = True,
                      seed = 17,
                      validation_split = 0.2,
                      subset = 'training')

val_data = tf.keras.preprocessing.image_dataset_from_directory(
                      directory = data_dir,
                      labels = 'inferred',  
                      label_mode = 'categorical',
                      color_mode = 'rgb',
                      batch_size = 32,
                      image_size = (256, 256),
                      shuffle = True,
                      seed = 17,
                      validation_split = 0.2,
                      subset = 'validation')

# Retaining 25 percent of train and validation data and discarding the rest
len_train, len_val = len(train_data), len(val_data)
train_data = train_data.take(int(0.25*len_train))
val_data = val_data.take(int(0.25*len_val))

Found 9999 files belonging to 10 classes.
Using 8000 files for training.
Found 9999 files belonging to 10 classes.
Using 1999 files for validation.


In [3]:
################################################################
# Preparing training set with augmentation 
################################################################
train_data_augmenter = ImageDataGenerator(
                            rescale = None,
                            rotation_range = 20,
                            width_shift_range = 0.2,
                            height_shift_range = 0.2,
                            brightness_range = [0.2, 1.5],
                            shear_range = 0.2,
                            zoom_range = 0.2,
                            horizontal_flip=True,
                            data_format = 'channels_last',
                            validation_split = 0.2)        #Specifying parameters for augmentation of training data

val_data_augmenter = ImageDataGenerator(validation_split = 0.2) #No augmentation of validation data

train_aug_gen = train_data_augmenter.flow_from_directory(data_dir, shuffle = True, \
                                                         seed = 17, subset = 'training')
val_aug_gen = val_data_augmenter.flow_from_directory(data_dir, shuffle = True, \
                                                     seed = 17, subset = 'validation')

train_aug_data = tf.data.Dataset.from_generator(
                    lambda: train_aug_gen,
                    output_types = (tf.float32, tf.float32),
                    output_shapes = ([None, 256, 256, 3], [None, 10]))

val_aug_data = tf.data.Dataset.from_generator(
                  lambda: val_aug_gen,
                  output_types = (tf.float32, tf.float32),
                  output_shapes = ([None, 256, 256, 3], [None, 10]))

train_aug_data = train_aug_data.take(int(0.25*len_train))
val_aug_data = val_aug_data.take(int(0.25*len_val))

Found 8000 images belonging to 10 classes.
Found 1999 images belonging to 10 classes.


## Setting up wandb sweeps

In [4]:
###############################################
# Listing the hyperparameters in wandb config 
###############################################
sweep_config = {'name': 'random-test-sweep', 'method': 'random'}
sweep_config['metric'] = {'name': 'val_acc', 'goal': 'maximize'}
parameters_dict = {
                   'first_layer_filters': {'values': [32, 64]},
                   'filter_org': {'values': [0.5, 1, 2]}, # Halving, same, doubling in subsequent layers
                   'data_aug': {'values': [True]},
                   'batch_norm': {'values': [True]}, 
                   'dropout': {'values': [0.0, 0.2, 0.3]},
                   'kernel_size': {'values': [3]},
                   'dense_size': {'values': [32, 64, 128]},
                   'activation': {'values': ['relu']},
                   'num_epochs': {'values': [50]}, 
                   'optimizer': {'values': ['adam']},
                   'conv_layers': {'values': [5]}
                  }
sweep_config['parameters'] = parameters_dict

In [5]:
#####################################
# Defining the train function
#####################################
def CNN_train(config=sweep_config):
    with wandb.init(config=config):
        config = wandb.init().config
        wandb.run.name = 'firstLayerFilters_{}_filterOrg_{}_dataAug_{}_batchNorm_{}_dropout_{}_kerSize_{}_denseSize_{}'.format(config.first_layer_filters, config.filter_org, config.data_aug, config.batch_norm, config.dropout, config.kernel_size, config.dense_size)               
        
        ###########################################
        # Initializing the model architecture
        ###########################################
        inputs = tf.keras.Input(shape = (256, 256, 3))
        x = Rescaling(scale = 1.0/255)(inputs)
        filter_sizes = [int(config.first_layer_filters*(config.filter_org**layer_num)) for layer_num in range(config.conv_layers)]
        ker_size = config.kernel_size

        # Apply some convolution and pooling layers
        for layer_num in range(config.conv_layers):
            x = layers.Conv2D(filters = filter_sizes[layer_num], kernel_size = (ker_size, ker_size))(x)
            if config.batch_norm:
                x = layers.BatchNormalization(axis = -1)(x)
            x = layers.Activation(config.activation)(x)
            x = layers.MaxPooling2D(pool_size = (2, 2))(x)            
                
        # Dense Layer
        x = layers.Flatten()(x)
        x = layers.Dense(config.dense_size)(x)
        if config.batch_norm:
            x = layers.BatchNormalization(axis = -1)(x)
        x = layers.Activation(config.activation)(x)
        if config.dropout > 0:
            x = layers.Dropout(rate = config.dropout)(x)        

        # Output Layer
        outputs = layers.Dense(10, activation ='softmax')(x)
        model = tf.keras.Model(inputs = inputs, outputs = outputs)
        #print(model.summary())

In [None]:
os.environ['WANDB_NOTEBOOK_NAME'] = 'Part-A-wandb.ipynb'
os.environ['TF_XLA_FLAGS'] = '--tf_xla_enable_xla_devices'
sweep_id = wandb.sweep(sweep_config, project = 'DL-Assignment2-PartA-10')
wandb.agent(sweep_id, function = CNN_train)

Create sweep with ID: a3m2nh0j
Sweep URL: https://wandb.ai/cs6910-team/DL-Assignment2-PartA-10/sweeps/a3m2nh0j


wandb: Agent Starting Run: 7108wrkh with config:
wandb: 	activation: relu
wandb: 	batch_norm: True
wandb: 	conv_layers: 5
wandb: 	data_aug: True
wandb: 	dense_size: 128
wandb: 	dropout: 0.3
wandb: 	filter_org: 2
wandb: 	first_layer_filters: 64
wandb: 	kernel_size: 3
wandb: 	num_epochs: 50
wandb: 	optimizer: adam
wandb: Currently logged in as: cs6910-team (use `wandb login --relogin` to force relogin)


VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

wandb: Agent Starting Run: 32z3fz6d with config:
wandb: 	activation: relu
wandb: 	batch_norm: True
wandb: 	conv_layers: 5
wandb: 	data_aug: True
wandb: 	dense_size: 64
wandb: 	dropout: 0.2
wandb: 	filter_org: 1
wandb: 	first_layer_filters: 64
wandb: 	kernel_size: 3
wandb: 	num_epochs: 50
wandb: 	optimizer: adam


VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

wandb: Agent Starting Run: ryia85dw with config:
wandb: 	activation: relu
wandb: 	batch_norm: True
wandb: 	conv_layers: 5
wandb: 	data_aug: True
wandb: 	dense_size: 128
wandb: 	dropout: 0.3
wandb: 	filter_org: 2
wandb: 	first_layer_filters: 64
wandb: 	kernel_size: 3
wandb: 	num_epochs: 50
wandb: 	optimizer: adam


VBox(children=(Label(value=' 0.02MB of 0.02MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

wandb: Agent Starting Run: no91rni2 with config:
wandb: 	activation: relu
wandb: 	batch_norm: True
wandb: 	conv_layers: 5
wandb: 	data_aug: True
wandb: 	dense_size: 64
wandb: 	dropout: 0
wandb: 	filter_org: 1
wandb: 	first_layer_filters: 64
wandb: 	kernel_size: 3
wandb: 	num_epochs: 50
wandb: 	optimizer: adam


VBox(children=(Label(value=' 0.04MB of 0.04MB uploaded (0.00MB deduped)\r'), FloatProgress(value=1.0, max=1.0)…

wandb: Agent Starting Run: pzewuz1j with config:
wandb: 	activation: relu
wandb: 	batch_norm: True
wandb: 	conv_layers: 5
wandb: 	data_aug: True
wandb: 	dense_size: 32
wandb: 	dropout: 0
wandb: 	filter_org: 2
wandb: 	first_layer_filters: 32
wandb: 	kernel_size: 3
wandb: 	num_epochs: 50
wandb: 	optimizer: adam
