# Experiment Workbench

In [1]:
import os, sys, math, datetime
import pathlib
from pathlib import Path
import numpy as np
import random
from matplotlib import pyplot as plt
import PIL
import PIL.Image

import tensorflow as tf
import tensorflow_datasets as tfds
from tensorflow import keras
from tensorflow.keras.layers import Input, Dense, Flatten, Conv2D,DepthwiseConv2D, MaxPooling2D, AvgPool2D, GlobalAveragePooling2D, BatchNormalization, Concatenate
from tensorflow.keras.layers import ReLU
from tensorflow.keras.models import Model
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping
 
# Import the necessary MLTK APIs
from mltk.core import view_model, summarize_model, profile_model

#from dotenv import load_dotenv
import wandb
from wandb.keras import WandbCallback
#import deeplake


In [2]:
# DANGER ZONE: Disable warning messages

import absl.logging
absl.logging.set_verbosity(absl.logging.ERROR)

In [None]:
Waiting for W&B process to finish (success)

In [3]:

print("Tensorflow version " + tf.__version__)
AUTOTUNE = tf.data.AUTOTUNE

# Confirm that TensorFlow can access GPU
device_name = tf.test.gpu_device_name()
if not device_name:
  raise SystemError('GPU device not found')
print('Found GPU at: {}'.format(device_name))

Tensorflow version 2.10.0
Found GPU at: /device:GPU:0


In [4]:
# Start a Tensorboard session
%load_ext tensorboard

In [5]:
#os.environ['WANDB_NOTEBOOK_NAME'] = 'Experiment Workbench'

IMG_HEIGHT = 96
IMG_WIDTH = 96
BATCH_SIZE = 32
EPOCHS = 5
#LOGGING_STEPS = 64
LR = 0.0001
DROPOUT = 0.2

PROJECT = "Tiny CNN"
SEED = 42
tf.random.set_seed(SEED) # global seed for tensorflow random parts, like dropout

# Prepare the Lemon Quality Dataset

In [6]:
data_dir = Path.cwd().parent.joinpath("lemon_dataset", "docs", "data")

In [7]:
#batch_size = 32
#img_height = 92
#img_width = 92
shuffle_seed = 42

def get_lemon_quality_dataset(dataset_path, img_width, img_height, batch_size, normalize=True):
    """ Fetches the lemon quality dataset and prints dataset info. It normalizes the image data to range [0,1] by default.

    Args: 
        dataset_path (Path): the file location of the dataset. Subfolders "train", "test", and "val" are expected.
        normalize (boolean): Normalizes the image data to range [0, 1]. Default: True

    Returns:
        (train_ds, val_ds, test_ds, class_names) (tuple(tf.datasets)): Tensorflow datasets for train, validation and test.
    
    """
    if dataset_path.exists():
        try:
            train_dir = dataset_path.joinpath("train")
            val_dir = dataset_path.joinpath( "val")
            test_dir = dataset_path.joinpath( "test")
        except:
            print(f"Please check the folder structure of {dataset_path}.")
            raise

    print("Preparing training dataset...")        
    train_ds = tf.keras.utils.image_dataset_from_directory(
        train_dir,
        subset=None,
        seed=shuffle_seed,
        image_size=(img_height, img_width),
        #batch_size=batch_size)
    )

    class_names = train_ds.class_names


    print("Preparing validation dataset...")    
    val_ds = tf.keras.utils.image_dataset_from_directory(
        val_dir,
        subset=None,
        seed=shuffle_seed,
        image_size=(img_height, img_width),
        #batch_size=batch_size)
    )

    print("Preparing test dataset...")    
    test_ds = tf.keras.utils.image_dataset_from_directory(
        test_dir,
        subset=None,
        seed=shuffle_seed,
        image_size=(img_height, img_width),
        #batch_size=batch_size)
    )
    
    # Normalize the data to the range [0, 1]
    if normalize:
        normalization_layer = tf.keras.layers.Rescaling(1./255)

        train_ds= train_ds.map(lambda x, y: (normalization_layer(x), y))
        val_ds= val_ds.map(lambda x, y: (normalization_layer(x), y))
        test_ds= test_ds.map(lambda x, y: (normalization_layer(x), y))
    else:
        pass

    print (f"Class names: {class_names}")
    print(train_ds.element_spec)
    print(f"Normalize: {normalize}")
    return (train_ds, val_ds, test_ds, class_names)

In [8]:
dataset_path = Path.cwd().joinpath("datasets", "lemon_dataset")
dataset_path.exists()

True

In [9]:
train_ds, val_ds, test_ds, labels = get_lemon_quality_dataset(dataset_path, IMG_WIDTH, IMG_HEIGHT, BATCH_SIZE)

Preparing training dataset...
Found 2021 files belonging to 3 classes.
Preparing validation dataset...
Found 252 files belonging to 3 classes.
Preparing test dataset...
Found 255 files belonging to 3 classes.
Class names: ['bad_quality', 'empty_background', 'good_quality']
(TensorSpec(shape=(None, 96, 96, 3), dtype=tf.float32, name=None), TensorSpec(shape=(None,), dtype=tf.int32, name=None))
Normalize: True


In [10]:
classes = len(labels)
print(f"The dataset contains {classes } classes.")

The dataset contains 3 classes.


In [11]:
element = list(train_ds.as_numpy_iterator())[0]

In [12]:
element[0].shape


(32, 96, 96, 3)

# Define the model

In [13]:
mobilenet = tf.keras.applications.mobilenet.MobileNet(
    input_shape=(96,96,3),
    alpha=0.25,
    depth_multiplier=1,
    dropout=DROPOUT,
    include_top=True,
    weights= None, #'imagenet',
    input_tensor=None,
    pooling=None,
    classes=classes,
    classifier_activation='softmax',
    #**kwargs
)

In [14]:
MODELNAME = mobilenet.name
print(MODELNAME)

mobilenet_0.25_96


In [15]:
logdir = os.path.join("logs", MODELNAME, datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
root_logdir = os.getcwd()


# Python program to show working
# of update() method in Dictionary
 
# Dictionary with three items
Dictionary1 = {'A': 'Geeks', 'B': 'For', }
Dictionary2 = {'B': 'Geeks'}
 
# Dictionary before Updation
print("Original Dictionary:")
print(Dictionary1)
 
# update the value of key 'B'
Dictionary1.update(Dictionary2)
print("Dictionary after updation:")
print(Dictionary1)

In [16]:
# optimize the data flow
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.prefetch(AUTOTUNE)
val_ds = val_ds.prefetch(AUTOTUNE)



def train_model():
        # start with a clean TensorFlow session
        tf.keras.backend.clear_session()

        # solve issue from: https://github.com/wandb/wandb/issues/3536
        if len(wandb.patched["tensorboard"]) > 0:
                wandb.tensorboard.unpatch()
                
        # Configure Tensorboard root log directory to read the debugging information
        wandb.tensorboard.patch(root_logdir=root_logdir)
        # wandb.tensorboard.patch(root_logdir="wandb.run.dir")
        
        wandb.init(
                # Set the project where this run will be logged
                project=PROJECT, 
                # Track hyperparameters and run metadata
                #config={
                #"learning_rate": LR,
                #"epochs": EPOCHS,
                #},
                sync_tensorboard=True
                )


        config = wandb.config
        # Specify the configuration variables
        config.batch_size = BATCH_SIZE
        config.dropout =DROPOUT
        config.learn_rate = LR
        #config.decay = 1e-6
        #config.momentum = 0.9
        config.epochs = EPOCHS
        config.classes = classes

        # enable Tensorflow Debugging
        #tf.debugging.experimental.enable_dump_debug_info("./logs/debug", 
        #        tensor_debug_mode="FULL_HEALTH", circular_buffer_size=-1)

        model = mobilenet
        model.compile(optimizer='adam',
                        loss='sparse_categorical_crossentropy',
                        metrics=['accuracy'])

        logdir = os.path.join("logs", datetime.datetime.now().strftime("%Y%m%d-%H%M%S"))
        tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir= wandb.run.dir, histogram_freq=10, update_freq="epoch") #, profile_batch="10, 20")
        #tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir= logdir, histogram_freq=1)
        wandb_callback = WandbCallback()# input_type="image", labels=labels) #, validation_data = val_ds.as_numpy_iterator())

        early_stopping = EarlyStopping(monitor="val_accuracy", patience=50)

        checkpoint = ModelCheckpoint("my_tiny_model", save_weights_only=True)

        callbacks =[
                #tensorboard_callback,
                wandb_callback,
                checkpoint,
                early_stopping
        ]

        history = model.fit(train_ds,
                epochs=EPOCHS, 
                validation_data=val_ds, 
                callbacks=callbacks
        )

        # wandb.log({
        #         "loss": history.history["loss"],
        #         "accuracy": history.history["accuracy"],
        #         "val_loss": history.history["val_loss"],
        #         "val_accuracy": history.history["val_accuracy"],                                
        # })
        
        wandb.finish()
        return history, model


In [17]:
wandb.finish()
history, model = train_model()

Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.
[34m[1mwandb[0m: Currently logged in as: [33msusbrock[0m. Use [1m`wandb login --relogin`[0m to force relogin




Epoch 1/5


INFO:tensorflow:Assets written to: i:\tinyml\tiny_cnn\wandb\run-20221128_140237-2uengde2\files\model-best\assets
[34m[1mwandb[0m: Adding directory to artifact (i:\tinyml\tiny_cnn\wandb\run-20221128_140237-2uengde2\files\model-best)... Done. 0.1s


Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [None]:
#history.history["loss"]
history.history.keys()

dict_keys(['loss', 'accuracy', 'val_loss', 'val_accuracy'])

In [None]:
history.params

{'verbose': 1, 'epochs': 50, 'steps': 64}