# Import Modules

In [None]:
import dataset as ds
import config as cfg

from sklearn.model_selection import train_test_split
import os
from tqdm.auto import tqdm
import matplotlib.pyplot as plt

import tensorflow as tf
tf.random.set_seed(42)

from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Conv2D, MaxPooling2D, Flatten, Dense, Rescaling, Dropout
from tensorflow.keras.metrics import Precision, Recall

from clearml import Task
import clearml
clearml.browser_login()


# ClearML
https://app.clear.ml/dashboard

# Import Dataset

In [None]:
# Base paths for the images and labels
train_images_path = f'{cfg.BASE_DATASET}/images/train'
train_labels_path = f'{cfg.BASE_DATASET}/labels/train'
val_images_path = f'{cfg.BASE_DATASET}/images/valid'
val_labels_path = f'{cfg.BASE_DATASET}/labels/valid'

# Base path for metadata
metadata_path = f'{cfg.BASE_DATASET}/metadata.json'

# Create the DataFrames for the train and validation sets
train_df = ds.create_dataframe(train_images_path, train_labels_path, metadata_path)
valid_df = ds.create_dataframe(val_images_path, val_labels_path, metadata_path)

# Test Parameters

In [None]:
project_name= cfg.PROJECT_NAME + '/tf_clf'
dataset_name = 'test500'
dataset_dir = cfg.CLF_DATASET_DIR + f'/{dataset_name}'
project_dir = f'{cfg.CLF_PROJECT_DIR}/{dataset_name}/'
class_names = cfg.CLF_CLASS_NAMES

epochs = 20
zoom_factor = 1.5


_, test_train_df = train_test_split(
    train_df,
    test_size=500,  # Number of items you want in your sample
    stratify=train_df['ac'],  # Stratify based on the combined column
    #random_state=42  # Ensures reproducibility
)

_, test_val_df = train_test_split(
    valid_df,
    test_size=100,  # Number of items you want in your sample
    stratify=valid_df['ac'],  # Stratify based on the combined column
    #random_state=42  # Ensures reproducibility
)

ds.create_sub_dataset(dataset_dir, test_train_df, test_val_df, class_names)


In [None]:
# Pre-processing to AID classification

ds.pre_process_dataset_for_classification(dataset_dir, zoom_factor)


In [None]:
ds.correct_dataset_labels(dataset_dir, test_train_df, test_val_df, class_names)

In [None]:
augmentation_metadata = {
    'methods': {        
        'flip': {
            'parameters': {
                'orientation': 'h',  # Could be 'h' for horizontal or 'v' for vertical
                'p': 1.0  # Probability of applying the augmentation
            },
            'apply_to_percentage': 0.5  # 50% of the training images
        }        
    }
}

ds.augment_dataset(dataset_dir, augmentation_metadata)

In [None]:
ds.reorganize_dataset_for_keras(dataset_dir)

# Image Classifier Approach 2

In [None]:
train_dir = os.path.join(dataset_dir,'images','train')
train_aug_dir = os.path.join(dataset_dir,'images','train-aug')
valid_dir = os.path.join(dataset_dir,'images','valid')

batch_size = 32
img_height = 256
img_width = 256

# load datasets using keras
train_data = tf.keras.utils.image_dataset_from_directory(
    train_dir,
    batch_size=batch_size,
    seed=42,
    image_size=(img_height, img_width),
    shuffle=True)

train_aug_data = tf.keras.utils.image_dataset_from_directory(
    train_aug_dir,
    batch_size=batch_size,
    seed=42,
    image_size=(img_height, img_width),
    shuffle=True)

valid_data = tf.keras.utils.image_dataset_from_directory(
    valid_dir,
    batch_size=batch_size,
    seed=42,
    image_size=(img_height, img_width),    
    shuffle=True)

class_names = train_data.class_names
print(class_names)



In [None]:
# test visualise 
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 10))
for images, labels in train_aug_data.take(1):
  for i in range(9):
    ax = plt.subplot(3, 3, i + 1)
    plt.imshow(images[i].numpy().astype("uint8"))
    plt.title(class_names[labels[i]])
    plt.axis("off")

In [None]:
def one_hot_enc(image, label):
    return image, tf.one_hot(label, len(class_names))

train_data = train_data.map(one_hot_enc)
train_aug_data = train_aug_data.map(one_hot_enc)
valid_data = valid_data.map(one_hot_enc)


In [None]:
for image_batch, labels_batch in train_data:
  print(image_batch.shape)
  print(labels_batch.shape)
  break

In [None]:
AUTOTUNE = tf.data.AUTOTUNE

train_data = train_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
train_aug_data = train_aug_data.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
valid_data = valid_data.cache().prefetch(buffer_size=AUTOTUNE)


In [None]:
# model

def get_tf_model(): 
  num_classes = len(class_names)

  model = Sequential([
    Input(shape=(img_height, img_width, 3)),
    Rescaling(1./255),
    Conv2D(16, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(32, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Conv2D(64, 3, padding='same', activation='relu'),
    MaxPooling2D(),
    Dropout(0.2),
    Flatten(),
    Dense(128, activation='relu'),
   #Conv2D(32, (3,3), activation='relu'),
   #MaxPooling2D(),
   #Conv2D(32, (3,3), activation='relu'),
   #MaxPooling2D(),
   #Conv2D(16, (3,3), activation='relu'),
   #MaxPooling2D(),
   #Dropout(0.2),
   #Flatten(),
   #Dense(256, activation='relu'),      
    Dense(num_classes, activation='softmax')
 
  ])
  
  """model = Sequential()
    model.add(Input(shape=(256,256,3)))  # Add an Input layer to specify the input shape
    model.add(Conv2D(32, (3,3), activation='relu')) 
    model.add(MaxPooling2D())
    model.add(Conv2D(32, (3,3), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Conv2D(16, (3,3), activation='relu'))
    model.add(MaxPooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dense(3, activation='softmax'))  # Assuming you have 3 classes """

  model.compile(optimizer='adam',
                loss='categorical_crossentropy',
                metrics=['accuracy', Precision(name='precision'), Recall(name='recall')])

  #model.summary()
  
  return model


In [None]:
epochs = 20

In [None]:
# train pure

# local logs directory
logs_dir=cfg.CLF_PROJECT_DIR
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logs_dir)

#connect to clearml
task = Task.init(project_name=project_name, task_name=f"{dataset_name}-pure-eps:{epochs}-zf:{zoom_factor}")
logger = task.get_logger()

# clearml hyperparameters
hyper_params = {'epochs': epochs, 'zoom_factor': zoom_factor, 'batch_size': batch_size, 'img_height': img_height, 'img_width': img_width, 'class_names': class_names}
task.connect(hyper_params)

model = None
train_hst = None

model = get_tf_model()

# trian model
train_hst = model.fit(
    train_data, 
    epochs=hyper_params['epochs'], 
    validation_data=valid_data,
    callbacks=[tensorboard_callback])

# send metrics to clearML
for epoch in range(epochs):
    # Log training metrics
    logger.report_scalar('loss', 'train', iteration=epoch, value=train_hst.history['loss'][epoch])
    logger.report_scalar('accuracy', 'train', iteration=epoch, value=train_hst.history['accuracy'][epoch])
    logger.report_scalar('precision', 'train', iteration=epoch, value=train_hst.history['precision'][epoch])
    logger.report_scalar('recall', 'train', iteration=epoch, value=train_hst.history['recall'][epoch])

    # Log validation metrics
    logger.report_scalar('loss', 'validation', iteration=epoch, value=train_hst.history['val_loss'][epoch])
    logger.report_scalar('accuracy', 'validation', iteration=epoch, value=train_hst.history['val_accuracy'][epoch])
    logger.report_scalar('precision', 'validation', iteration=epoch, value=train_hst.history['val_precision'][epoch])
    logger.report_scalar('recall', 'validation', iteration=epoch, value=train_hst.history['val_recall'][epoch])

# close task
task.close()

In [None]:
# train aug

# local logs directory
logs_dir=cfg.CLF_PROJECT_DIR
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logs_dir)

#connect to clearml
task = Task.init(project_name=project_name, task_name=f"{dataset_name}-aug-eps:{epochs}-zf:{zoom_factor}")
logger = task.get_logger()


# clearml hyperparameters
hyper_params = {'epochs': epochs, 'zoom_factor': zoom_factor, 'batch_size': batch_size, 'img_height': img_height, 'img_width': img_width, 'class_names': class_names}
task.connect(hyper_params)

model_aug = None
train_aug_hst = None

# reset model from previous test
model_aug = get_tf_model()

# trian model
train_aug_hst = model_aug.fit(
    train_aug_data, 
    epochs=hyper_params['epochs'], 
    validation_data=valid_data, 
    callbacks=[tensorboard_callback])

# send metrics to clearML
for epoch in range(epochs):
    # Log training metrics
    logger.report_scalar('loss', 'train', iteration=epoch, value=train_aug_hst.history['loss'][epoch])
    logger.report_scalar('accuracy', 'train', iteration=epoch, value=train_aug_hst.history['accuracy'][epoch])
    logger.report_scalar('precision', 'train', iteration=epoch, value=train_aug_hst.history['precision'][epoch])
    logger.report_scalar('recall', 'train', iteration=epoch, value=train_aug_hst.history['recall'][epoch])

    # Log validation metrics
    logger.report_scalar('loss', 'validation', iteration=epoch, value=train_aug_hst.history['val_loss'][epoch])
    logger.report_scalar('accuracy', 'validation', iteration=epoch, value=train_aug_hst.history['val_accuracy'][epoch])
    logger.report_scalar('precision', 'validation', iteration=epoch, value=train_aug_hst.history['val_precision'][epoch])
    logger.report_scalar('recall', 'validation', iteration=epoch, value=train_aug_hst.history['val_recall'][epoch])
    
# close task
task.close()

In [None]:
task.close()
































# Image Classifier (redundant)

In [None]:
## check GPU available
#gpus = tf.config.list_physical_devices('GPU')
#print(gpus)
#
## limit vram usage
#for gpu in gpus: tf.config.experimental.set_memory_growth(gpu, True)

In [None]:
# directories of train, train-aug and validation
#train_dir = os.path.join(dataset_dir,'images','train')
#train_aug_dir = os.path.join(dataset_dir,'images','train-aug')
#valid_dir = os.path.join(dataset_dir,'images','valid')
#
## load datasets using keras
#train_data = tf.keras.utils.image_dataset_from_directory(train_dir,
#                                                         batch_size=32,
#                                                        seed=42, 
#                                                         shuffle=True)
#train_aug_data = tf.keras.utils.image_dataset_from_directory(train_aug_dir,
#                                                             batch_size=32,
#                                                             seed=42,
#                                                             shuffle=True)
#valid_data = tf.keras.utils.image_dataset_from_directory(valid_dir,
#                                                         batch_size=32,
#                                                         seed=42,
#                                                         shuffle=True)
#
## scale images between 0 and 1
#train_data = train_data.map(lambda x,y: (x/255, y))
#train_aug_data = train_aug_data.map(lambda x,y: (x/255, y))
#valid_data = valid_data.map(lambda x,y: (x/255, y))

In [None]:
# TEST - use numpy to turn dataset into iterator for ease of use with batches, call each batch with .next()
#train_iterator = train_data.as_numpy_iterator()
#train_batch = train_iterator.next()

In [None]:
# TEST - visualise images with class labels
#train_batch = train_iterator.next()
#fig, ax = plt.subplots(ncols=4, figsize=(20, 20))
#for idx, img in enumerate(train_batch[0][:4]):
#    ax[idx].imshow(img)
#    ax[idx].title.set_text(train_batch[1][idx])


In [None]:
# use numpy to turn dataset into iterator for ease of use with batches, call each batch with .next()
#train_data.as_numpy_iterator().next()
#train_aug_data.as_numpy_iterator().next()
#valid_data.as_numpy_iterator().next()
#
#print("done")

In [None]:
# create a CNN model
#def get_model():
#
#    tf.random.set_seed(42)
#    # model architecture - Also try ResNet50 or VGG16
#    model = Sequential()
#    model.add(Input(shape=(256,256,3)))  # Add an Input layer to specify the input shape
#    model.add(Conv2D(32, (3,3), activation='relu')) 
#    model.add(MaxPooling2D())
#    model.add(Conv2D(32, (3,3), activation='relu'))
#    model.add(MaxPooling2D())
#    model.add(Conv2D(16, (3,3), activation='relu'))
#    model.add(MaxPooling2D())
#    model.add(Flatten())
#    model.add(Dense(256, activation='relu'))
#    model.add(Dense(3, activation='softmax'))  # Assuming you have 3 classes
#
#    model.compile(optimizer='adam', 
#                loss='sparse_categorical_crossentropy', 
#                metrics=['accuracy'])
#    #model.summary()
#    
#    return model


In [None]:
# train pure

# local logs directory
#logs_dir=cfg.CLF_PROJECT_DIR
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logs_dir)
#
##connect to clearml
#task = Task.init(project_name=project_name, task_name=f"{dataset_name}-pure-eps:{epochs}-zf:{zoom_factor}")
#logger = task.get_logger()
#
## clearml hyperparameters
#hyper_params = {'epochs': epochs}
#task.connect(hyper_params)
#
#model = get_model()
#
## trian model
#train_hst = model.fit(
#    train_data, 
#    epochs=hyper_params['epochs'], 
#    validation_data=valid_data, 
#    callbacks=[tensorboard_callback])
#
## send metrics to clearML
#for epoch in range(epochs):
#    # Log training metrics
#    logger.report_scalar('loss', 'train', iteration=epoch, value=train_hst.history['loss'][epoch])
#    logger.report_scalar('accuracy', 'train', iteration=epoch, value=train_hst.history['accuracy'][epoch])
#
#    # Log validation metrics
#    logger.report_scalar('loss', 'validation', iteration=epoch, value=train_hst.history['val_loss'][epoch])
#    logger.report_scalar('accuracy', 'validation', iteration=epoch, value=train_hst.history['val_accuracy'][epoch])
#
## close task
#task.close()



In [None]:
# train aug

# local logs directory
#logs_dir=cfg.CLF_PROJECT_DIR
#tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=logs_dir)
#
##connect to clearml
#task = Task.init(project_name=project_name, task_name=f"{dataset_name}-aug-eps:{epochs}-zf:{zoom_factor}")
#logger = task.get_logger()
#
#
## clearml hyperparameters
#hyper_params = {'epochs': epochs}
#task.connect(hyper_params)
#
## reset model from previous test
#model_aug = get_model()
#
## trian model
#train_aug_hst = model.fit(
#    train_aug_data, 
#    epochs=hyper_params['epochs'], 
#    validation_data=valid_data, 
#    callbacks=[tensorboard_callback])
#
## send metrics to clearML
#for epoch in range(epochs):
#    # Log training metrics
#    logger.report_scalar('loss', 'train', iteration=epoch, value=train_aug_hst.history['loss'][epoch])
#    logger.report_scalar('accuracy', 'train', iteration=epoch, value=train_aug_hst.history['accuracy'][epoch])
#
#    # Log validation metrics
#    logger.report_scalar('loss', 'validation', iteration=epoch, value=train_aug_hst.history['val_loss'][epoch])
#    logger.report_scalar('accuracy', 'validation', iteration=epoch, value=train_aug_hst.history['val_accuracy'][epoch])
#    
## close task
#task.close()



# Evaluation