In [None]:
#importing the dataset to be worked on = inaturalist dataset

%%capture
!curl -SL https://storage.googleapis.com/wandb_datasets/nature_12K.zip > nature_12K.zip
!unzip nature_12K.zip

train_dir='inaturalist_12K/train/'  #specifying the train and test data as per available in the dataset.
test_dir='inaturalist_12K/val/'

#specifying the classes as present in the iNaturalist dataset
 
categories=['Amphibia','Animalia','Arachnida','Aves','Fungi','Insecta','Mammalia','Mollusca','Plantae','Reptilia']

#so as we can see there are 10 classes in the inaturalist dataset.

In [None]:
!pip install wandb

In [None]:
#importing the libraries required
import numpy as np
import pandas as pd
import os
import keras
import tensorflow as tf
from tensorflow.keras import layers,models
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Dropout, Dense, Flatten, Activation , BatchNormalization

#importing ImageDataGenarator for data augmentation 
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.pyplot as plt
import cv2

In [None]:
# importing the wandb dependencies. uncomment when using wandb

import wandb
from wandb.keras import WandbCallback

In [3]:
#importing the pretrained models from keras

from tensorflow.keras.applications.resnet50 import ResNet50 
from keras.applications.vgg16 import VGG16
from keras.applications.xception import Xception 
from keras.applications.inception_v3 import InceptionV3 
from keras.applications.inception_resnet_v2 import InceptionResNetV2


In [5]:
# sweep. 
#run this cell when using wandb for sweep

sweep_config = {
  "name": "Bayesian Sweep",
  "method": "bayes",
  "metric":{
  "name": "val_accuracy",
  "goal": "maximize"
  },
  'early_terminate': {
        'type':'hyperband',
        'min_iter': [3],
        's': [2]
  },
  "parameters": {
                    

        "base_model": {
            "values": [ "XCPTN", "IV3", "RN50", "IRV2"]
        },
        "epochs": {
            "values": [ 5, 10, 3]
        }, 
        "dense_neurons": {
            "values": [ 128, 256]
        },
        "batch_size": {
            "values": [ 16, 32]
        },
        "optimizer": {
            "values": [ "adam","nadam","rmsprop"]
        } 
              
    }
}


#this configuration gives the best validation accuracy

config_defaults={
    'fc_size':128,
    'batchnorm':'yes',
    'augmentation':'yes',
    'droprate':0.4,
    'pre_train':'inceptionresnetv2'
}

In [7]:
# defining the function for pretraining

def PreTrain():

  #initializing the wandb sweeps for combinations
  #uncomment it when to enable sweeps

  wandb.init(config=config_defaults)
  configs=wandb.config

  #initializing the sweeps with defaults set
  configs=config_defaults

  batchnorm=configs['batchnorm']
  augmentation=configs['augmentation']
  fc_size=configs['fc_size']
  droprate=configs['droprate']
  pre_train = configs['pre_train']

  #wandb.run.name='model_'+pre_train+'_fc_2_size_'+str(fc_size)+'_droprate_'+str(droprate)+ str('_bn_' if batchnorm=='yes' else '') +str('augment' if augmentation=='yes' else '')

  #setting up a flexible pretrained model 
  #assigning the weights already available as imagenet
  #the input image size as height*width*channels

  #the include_top part when we speify it as false, we chose not to include the fully connected layers and have more freedom on how to work with the model with a given possibility to retrain the weights of the model.

  if pre_train == 'inceptionv3':
    img_height = 299
    img_width = 299
    base_model = InceptionV3(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

  if pre_train == 'inceptionresnetv2':
    img_height = 299
    img_width = 299
    base_model = InceptionResNetV2(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

  if pre_train == 'resnet50':
    img_height = 224
    img_width = 224
    base_model = ResNet50(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

  if pre_train == 'Xception':
    img_height = 299
    img_width = 299
    base_model = Xception(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))  


  for layers in base_model.layers:
    layers.trainable = False

  #setting up the flatten layer

  model = keras.Sequential([
      tf.keras.Input(shape=(img_height, img_width,3,)),
      base_model,
      Flatten(),
      Dense(fc_size,activation='relu'),
      
  ])

  #adding batch normalization
  if batchnorm == 'yes':
    model.add(BatchNormalization())
  
  #adding parameters to the model
  model.add(Dropout(droprate))
  model.add(Dense(fc_size, activation='relu'))
  model.add(Dropout(droprate))

  #adding the last layer with 10 classes and activation function as softmax
  
  model.add(Dense(10 ,activation='softmax'))
  

  #data generators for train(with and without validation) and validation
  augment= ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rescale=1.0 / 255,
    validation_split=0.1,
    dtype=tf.float32,
  )

  datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.1,
    dtype=tf.float32,
  )

  train_set = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    color_mode='rgb',
    class_mode='sparse',
    shuffle=True,
    subset='training',
    seed=123,
  )

  aug_set = augment.flow_from_directory(
      train_dir,
      target_size=(img_height, img_width),
      batch_size=32,
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      subset='training',
      seed=123,
  )

  val_set = datagen.flow_from_directory(
      train_dir,
      target_size=(img_height, img_width),
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      subset='validation',
      seed=123,
  )
 #compiling the whole model with parameters and associating the losses in the model as cross entropy
 
  model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
    metrics=['accuracy'],
  )

  #making the model flexible for augmentation
  # giving the callback for data plot as wandb
  if augmentation =='no' :
    hist=model.fit(train_set,epochs=2,validation_data=val_set,callbacks=[WandbCallback()]) 
    hist=model.fit(train_set,epochs=2,validation_data=val_set)
  
  else:
    hist=model.fit(aug_set,epochs=2,validation_data=val_set,callbacks=[WandbCallback()])
    hist=model.fit(aug_set,epochs=2,validation_data=val_set)

  #defining the validation accuracy
  val_acc=max(hist.history['val_accuracy'])
  params={'batch_norm':batchnorm,'augmentation':augmentation,'dropout':droprate,'pre_trained_model':pre_train,'val_acc':val_acc}
  #wandb.log(params)
  

In [None]:
!wandb login

[34m[1mwandb[0m: You can find your API key in your browser here: https://wandb.ai/authorize
[34m[1mwandb[0m: Paste an API key from your profile and hit enter, or press ctrl+c to quit: 
[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


In [None]:
#wandb sweep
#Ignore this and proceed
sweep_id=wandb.sweep(sweep_config,project="a-2", entity="dl22")
wandb.agent(sweep_id, PreTrain)   



Create sweep with ID: hixpo6a6
Sweep URL: https://wandb.ai/dl22/a-2/sweeps/hixpo6a6


[34m[1mwandb[0m: Agent Starting Run: z4gnaoz9 with config:
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	optimizer: adam
[34m[1mwandb[0m: Currently logged in as: [33mdl22[0m (use `wandb login --relogin` to force relogin)


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='48.517 MB of 48.517 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.71233
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.57467
dropout,0.4
epoch,1
loss,0.92082
pre_trained_model,inceptionresnetv2
val_acc,0.82983


[34m[1mwandb[0m: Agent Starting Run: ofw1r8ao with config:
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	optimizer: rmsprop


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='50.662 MB of 50.662 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.71544
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.57344
dropout,0.4
epoch,1
loss,0.91244
pre_trained_model,inceptionresnetv2
val_acc,0.83884


[34m[1mwandb[0m: Agent Starting Run: 3j0izug1 with config:
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	optimizer: nadam


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='50.832 MB of 50.832 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,█▁
val_loss,█▁

0,1
accuracy,0.71689
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.5926
dropout,0.4
epoch,1
loss,0.90485
pre_trained_model,inceptionresnetv2
val_acc,0.83483


[34m[1mwandb[0m: Agent Starting Run: yx6tllwp with config:
[34m[1mwandb[0m: 	base_model: IRV2
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 3
[34m[1mwandb[0m: 	optimizer: rmsprop


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='49.355 MB of 49.355 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,█▁
val_loss,█▁

0,1
accuracy,0.72178
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.57516
dropout,0.4
epoch,1
loss,0.90404
pre_trained_model,inceptionresnetv2
val_acc,0.83584


[34m[1mwandb[0m: Agent Starting Run: swwy4ltq with config:
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	optimizer: adam


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='194.179 MB of 194.179 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0,…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.72222
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.61638
dropout,0.4
epoch,1
loss,0.90374
pre_trained_model,inceptionresnetv2
val_acc,0.83584


[34m[1mwandb[0m: Agent Starting Run: l2joz5gs with config:
[34m[1mwandb[0m: 	base_model: RN50
[34m[1mwandb[0m: 	batch_size: 32
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	optimizer: adam


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='48.571 MB of 48.571 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, m…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,▁█
val_loss,█▁

0,1
accuracy,0.71978
augmentation,yes
batch_norm,yes
best_epoch,1
best_val_loss,0.61858
dropout,0.4
epoch,1
loss,0.91008
pre_trained_model,inceptionresnetv2
val_acc,0.82683


[34m[1mwandb[0m: Agent Starting Run: 57g8nzvc with config:
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_neurons: 128
[34m[1mwandb[0m: 	epochs: 10
[34m[1mwandb[0m: 	optimizer: adam


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2



VBox(children=(Label(value='0.130 MB of 0.130 MB uploaded (0.000 MB deduped)\r'), FloatProgress(value=1.0, max…

0,1
accuracy,▁█
dropout,▁
epoch,▁█
loss,█▁
val_acc,▁
val_accuracy,█▁
val_loss,▁█

0,1
accuracy,0.719
augmentation,yes
batch_norm,yes
best_epoch,0
best_val_loss,0.62907
dropout,0.4
epoch,1
loss,0.89876
pre_trained_model,inceptionresnetv2
val_acc,0.83383


[34m[1mwandb[0m: Agent Starting Run: hi16vp0g with config:
[34m[1mwandb[0m: 	base_model: IV3
[34m[1mwandb[0m: 	batch_size: 16
[34m[1mwandb[0m: 	dense_neurons: 256
[34m[1mwandb[0m: 	epochs: 5
[34m[1mwandb[0m: 	optimizer: adam


Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2

In [8]:
#Trial run with config defaults
PreTrain()

<IPython.core.display.Javascript object>

[34m[1mwandb[0m: Appending key for api.wandb.ai to your netrc file: /root/.netrc


Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_resnet_v2/inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5
Found 9000 images belonging to 10 classes.
Found 9000 images belonging to 10 classes.
Found 999 images belonging to 10 classes.
Epoch 1/2
Epoch 2/2
Epoch 1/2
Epoch 2/2


In [13]:
#not the correct one
#trial
#discard
#data generator for test set
wandb.init(config=config_defaults)
configs=config_defaults

batchnorm=configs['batchnorm']
droprate=configs['droprate']
augmentation=configs['augmentation']
fc_size=configs['fc_size']
pre_train = configs['pre_train']
 #wandb.run.name='model_'+pre_train+'_fc_2_size_'+str(fc_size)+'_droprate_'+str(droprate)+ str('_bn_' if batchnorm=='yes' else '') +str('augment' if augmentation=='yes' else '')

img_height = 299
img_width = 299
base_model = InceptionResNetV2(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

model = keras.Sequential([
      tf.keras.Input(shape=(img_height, img_width,3,)),
      base_model,
      Flatten(),
      Dense(fc_size,activation='relu'),
      
  ])


model.add(BatchNormalization())
model.add(Dropout(droprate))
model.add(Dense(fc_size, activation='relu'))
model.add(Dropout(droprate))
model.add(Dense(10 ,activation='softmax'))


test_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
  )

test_set = test_datagen.flow_from_directory(
      test_dir,
      target_size=(299,299),
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      seed=123,
  )

model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
    metrics=['accuracy'],
  )


#evaluating model on test set
model.evaluate(test_set)

Found 2000 images belonging to 10 classes.


[2.463688611984253, 0.08550000190734863]

In [None]:
# Defining model with best configs to evaluate on test set  
#initializing the sweeps with defaults set
wandb.init(config=config_defaults)
configs=config_defaults

batchnorm=configs['batchnorm']
droprate=configs['droprate']
augmentation=configs['augmentation']
fc_size=configs['fc_size']
pre_train = configs['pre_train']

  #wandb.run.name='model_'+pre_train+'_fc_2_size_'+str(fc_size)+'_droprate_'+str(droprate)+ str('_bn_' if batchnorm=='yes' else '') +str('augment' if augmentation=='yes' else '')

#setting up a flexible pretrained model 
#assigning the weights already available as imagenet
#the input image size as height*width*channels
  
#the include_top part when we speify it as false, we chose not to include the fully connected layers and have more freedom on how to work with the model with a given possibility to retrain the weights of the model.

if pre_train == 'inceptionv3':
  img_height = 299
  img_width = 299
  base_model = InceptionV3(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

if pre_train == 'inceptionresnetv2':
  img_height = 299
  img_width = 299
  base_model = InceptionResNetV2(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

if pre_train == 'resnet50':
  img_height = 224
  img_width = 224
  base_model = ResNet50(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))

if pre_train == 'Xception':
  img_height = 299
  img_width = 299
  base_model = Xception(include_top=False, weights='imagenet',input_shape=(img_height, img_width,3))  


for layers in base_model.layers:
  layers.trainable = False

#setting up the flatten layer
model = keras.Sequential([
      tf.keras.Input(shape=(img_height, img_width,3,)),
      base_model,
      Flatten(),
      Dense(fc_size,activation='relu'),
      
])

#adding batch normalization
if batchnorm == 'yes':
  model.add(BatchNormalization())

#adding parameters to the model
model.add(Dropout(droprate))
model.add(Dense(fc_size, activation='relu'))
model.add(Dropout(droprate))

#adding the last layer with 10 classes and activation function as softmax 
model.add(Dense(10 ,activation='softmax'))
  

#data generators for train(with and without validation) and validation
augment= ImageDataGenerator(
    rotation_range=30,
    width_shift_range=0.2,
    height_shift_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True,
    rescale=1.0 / 255,
    validation_split=0.1,
    dtype=tf.float32,
)

datagen = ImageDataGenerator(
    rescale=1.0 / 255,
    validation_split=0.1,
    dtype=tf.float32,
)

train_set = datagen.flow_from_directory(
    train_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    color_mode='rgb',
    class_mode='sparse',
    shuffle=True,
    subset='training',
    seed=123,
)

aug_set = augment.flow_from_directory(
      train_dir,
      target_size=(img_height, img_width),
      batch_size=32,
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      subset='training',
      seed=123,
)

val_set = datagen.flow_from_directory(
      train_dir,
      target_size=(img_height, img_width),
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      subset='validation',
      seed=123,
)

#compiling the whole model with parameters and associating the losses in the model as cross entropy
 
model.compile(
    optimizer=tf.keras.optimizers.Adam(),
    loss=[tf.keras.losses.SparseCategoricalCrossentropy()],
    metrics=['accuracy'],
)
  #making the model flexible for augmentation
  # giving the callback for data plot as wandb
if augmentation =='no' :
  hist=model.fit(train_set,epochs=2,validation_data=val_set,callbacks=[WandbCallback()]) 
  hist=model.fit(train_set,epochs=2,validation_data=val_set)
  
else:
  hist=model.fit(aug_set,epochs=2,validation_data=val_set,callbacks=[WandbCallback()])
  hist=model.fit(aug_set,epochs=2,validation_data=val_set)

In [None]:
test_datagen = ImageDataGenerator(
    rescale=1.0 / 255,
  )

test_set = test_datagen.flow_from_directory(
      test_dir,
      target_size=(299,299),
      color_mode='rgb',
      class_mode='sparse',
      shuffle=True,
      seed=123,
  )
model.evaluate(test_set)