In [None]:
!pip install wandb --upgrade

In [None]:
import wandb
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import glob
import datetime

from wandb.keras import WandbCallback
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from keras.utils import Sequence
from keras.layers import AveragePooling2D, Input, Conv2D
from sklearn.model_selection import StratifiedKFold
from skimage.io import imread,imshow
from skimage.transform import resize
from sklearn.utils import shuffle

%matplotlib inline
%reload_ext tensorboard

wandb.login()

In [None]:
class MergedGenerators(Sequence):

    def __init__(self, batch_size, generators=[], sub_batch_size=[]):
        self.generators = generators
        self.sub_batch_size = sub_batch_size
        self.batch_size = batch_size

    def __len__(self):
        return int(
            sum([(len(self.generators[idx]) * self.sub_batch_size[idx])
                 for idx in range(len(self.sub_batch_size))]) /
            self.batch_size)

    def __getitem__(self, index):
        X_batch = []
        Y_batch = []
        for generator in self.generators:
            if generator.class_mode is None:
                x1 = generator[index % len(generator)]
                X_batch = [*X_batch, *x1]

            else:
                x1, y1 = generator[index % len(generator)]
                X_batch = [*X_batch, *x1]
                Y_batch = [*Y_batch, *y1]

        if self.generators[0].class_mode is None:
            return np.array(X_batch)
        return np.array(X_batch), np.array(Y_batch)


def build_datagenerator(dir1=None, dir2=None, dir3=None, batch_size=32):
    n_images_in_dir1 = sum([len(files) for r, d, files in os.walk(dir1)])
    n_images_in_dir2 = sum([len(files) for r, d, files in os.walk(dir2)])
    n_images_in_dir3 = sum([len(files) for r, d, files in os.walk(dir3)])

    generator1_batch_size = int((n_images_in_dir1 * batch_size) /
                                (n_images_in_dir1 + n_images_in_dir2 + n_images_in_dir3))
    
    generator2_batch_size = int((n_images_in_dir2 * batch_size) /
                                (n_images_in_dir1 + n_images_in_dir2 + n_images_in_dir3))

    generator3_batch_size = batch_size - generator1_batch_size - generator2_batch_size

    generator = ImageDataGenerator(rescale=1. / 255)

    generator1 = generator.flow_from_directory(
        dir1,
        target_size=(600, 600),
        color_mode='rgb',
        class_mode='binary',
        batch_size=generator1_batch_size,
        shuffle=True,
        seed=171)

    generator2 = generator.flow_from_directory(
        dir2,
        target_size=(600, 600),
        color_mode='rgb',
        class_mode='binary',
        batch_size=generator2_batch_size,
        shuffle=True,
        seed=171)
    
    generator3 = generator.flow_from_directory(
        dir3,
        target_size=(600, 600),
        color_mode='rgb',
        class_mode='binary',
        batch_size=generator3_batch_size,
        shuffle=True,
        seed=171)

    return MergedGenerators(
        batch_size,
        generators=[generator1, generator2, generator3],
        sub_batch_size=[generator1_batch_size, generator2_batch_size, generator2_batch_size])




In [None]:
def residual_module(layer_in, n_filters):
    merge_input = layer_in
    if layer_in.shape[-1] != n_filters:
        merge_input = Conv2D(n_filters, (1,1), padding='same', activation='relu', kernel_initializer='he_normal')(layer_in)
    conv1 = Conv2D(n_filters, wandb.config.kernel_size, padding='same', activation='relu')(layer_in)
    conv2 = Conv2D(n_filters, wandb.config.kernel_size, padding='same', activation='relu')(conv1)
    layer_out = add([conv2, merge_input])
    layer_out = Activation('relu')(layer_out)
    return layer_out

In [None]:
from keras.models import Model
from keras.layers import *
from keras.utils import plot_model
from keras.activations import relu, sigmoid

def MyModel():
    num_fil = wandb.config.conv_filters
    ks = wandb.config.kernel_size
    inputs = Input((600, 600, 3))
    layer = AveragePooling2D(pool_size=(2,2))(inputs)
    layer = Conv2D(num_fil, (ks, ks), activation='relu')(layer)
    layer = AveragePooling2D(pool_size=(2,2))(layer)
    layer = residual_module(layer, num_fil)
    layer = residual_module(layer, num_fil)
    layer = AveragePooling2D(pool_size=(2,2))(layer)
    layer = residual_module(layer, num_fil*2)
    layer = residual_module(layer, num_fil*2)
    layer = AveragePooling2D(pool_size=(2,2))(layer)
    layer = residual_module(layer, num_fil*3)
    layer = residual_module(layer, num_fil*3)
    layer = GlobalAveragePooling2D()(layer)
    layer = Dropout(wandb.config.dropout)(layer)
    outputs = Dense(units=1,activation='sigmoid')(layer)
    model = Model(inputs=inputs, outputs=outputs)
    model.summary()
    return model

In [None]:
def train():
    # Specify the hyperparameter to be tuned along with
    # an initial value
    config_defaults = {
        'batch_size': 8,
        'learning_rate': 1,
        'conv_filters': 16,
        'kernel_size': 3
    }

    # Initialize wandb with a sample project name
    wandb.init(config=config_defaults)

    # Specify the other hyperparameters to the configuration, if any
    wandb.config.epochs = 20

    # Prepare train dataset
    train_ds = build_datagenerator(dir1='/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_0/',
                                   dir2='/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_1/',
                                   dir3='/kaggle/input/leukemia-classification/C-NMC_Leukemia/training_data/fold_2/',
                                   batch_size=wandb.config.batch_size)
    
    
    # prepare validation dataset 
    generator = ImageDataGenerator(rescale=1. / 255)
    val_dir = '../input/leukemia-classification/C-NMC_Leukemia/validation_data/'
    val_df = pd.read_csv(val_dir + 'C-NMC_test_prelim_phase_data_labels.csv',dtype=str)
    val_df.loc[(val_df['labels'] == '0'), ['labels'] ] =  '2'
    val_df.loc[(val_df['labels'] == '1'), ['labels'] ] =  '0'
    val_df.loc[(val_df['labels'] == '2'), ['labels'] ] =  '1'
    val_ds = generator.flow_from_dataframe(val_df,
                                           directory=val_dir+'C-NMC_test_prelim_phase_data',
                                           x_col="new_names",
                                           y_col="labels",
                                           class_mode="binary",
                                           target_size=(600,600),
                                           validate_filenames=False,
                                           batch_size=wandb.config.batch_size,
                                           prefetch=tf.data.experimental.AUTOTUNE)
    
    # Iniialize model with hyperparameters
    keras.backend.clear_session()
    model = MyModel()
    
    # Compile the model
    opt = tf.keras.optimizers.SGD(lr=wandb.config.learning_rate)
    model.compile(opt, loss='binary_crossentropy', metrics=['acc'])
    model.summary()
    
    # Train the model
    _ = model.fit(train_ds,
                  epochs=wandb.config.epochs, 
                  validation_data=val_ds,
                  callbacks=[WandbCallback()]) # WandbCallback to automatically track metrics
                            
    # Evaluate    
    loss, accuracy = model.evaluate(val_ds, callbacks=[WandbCallback()])
    print('Test Error Rate: ', round((1-accuracy)*100, 2))
    wandb.log({'Test Error Rate': round((1-accuracy)*100, 2)}) # wandb.log to track custom metrics

In [None]:
sweep_config = {
  'method': 'bayes', 
  'metric': {
      'name': 'val_loss',
      'goal': 'minimize'
  },
  'early_terminate':{
      'type': 'hyperband',
      'min_iter': 5
  },
  'parameters': {
      'batch_size': {
          'values': [16,32]
      },
      'learning_rate':{
          'values': [0.1, 0.05, 0.01, 0.005, 0.001, 0.0005]
      },
      'conv_filters':{
          'values': [16,32,48,64]
      },
      'kernel_size':{
          'values': [5,7,9,11]
      },
      'dropout':{
          'values': [0, 0.2, 0.5, 0.8]
      }
  }
}

In [None]:
sweep_id = wandb.sweep(sweep_config, project="LC-ResNet")
(sweep_id)

In [None]:
sweep_id = '33i5anlw'