# Install WanDB

In [None]:
!pip install --upgrade -q wandb

In [None]:
from kaggle_secrets import UserSecretsClient
import wandb
from wandb.keras import WandbCallback

import tensorflow as tf
print(tf.__version__)
from tensorflow.keras import layers
from tensorflow.keras import models
import tensorflow_addons as tfa

import os
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline

from sklearn.model_selection import train_test_split

# Add Secret Kagle and Login into WanDB

In [None]:
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_api")

wandb.login(key=wandb_api)
# Or using
# ! wandb login $secret_value_0

In [None]:
def seed_everything():
    os.environ['TF_CUDNN_DETERMINISTIC'] = '1' 
    np.random.seed(hash("improves reproducibility") % 2**32 - 1)
    tf.random.set_seed(hash("by removing stochasticity") % 2**32 - 1)
    
seed_everything()

In [None]:
TRAIN_PATH = '../input/resized-plant2021/img_sz_256/'
AUTOTUNE = tf.data.experimental.AUTOTUNE
# Store your hyperparameters as a dictionary, because you can later directly log this config dict to W&B.
CONFIG = dict (
    num_labels = 6,
    train_val_split = 0.2,
    img_width = 224,
    img_height = 224,
    batch_size = 64,
    epochs = 10,
    learning_rate = 0.001,
    architecture = "CNN",
    infra = "Kaggle",
    competition = 'plant-pathology',
    _wandb_kernel = 'ayut'
)

In [None]:
# 4. Build input pipeline

# Encode competition-provided labels 
label_to_id = {
    'healthy': 0,
    'scab': 1,
    'frog_eye_leaf_spot': 2,
    'rust': 3,
    'complex': 4,
    'powdery_mildew': 5
}
id_to_label = {value:key for key, value in label_to_id.items()} 
id_to_label

In [None]:
# Helper fu
def make_path(row):
    return TRAIN_PATH+row.image

def parse_labels(row):
    label_list = row.labels.split()
    labels = []
    for label in label_list:
        labels.append(str(label_to_id[label]))
    
    return ' '.join(labels)

# Read train.csv file
df = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
# Get absolute path
df['image'] = df.apply(lambda row: make_path(row), axis=1)
# Parse labels
df['labels'] = df.apply(lambda row: parse_labels(row), axis=1)

# Look at the dataframe
df.head()

In [None]:
# 5. Training and validation split

train_df, valid_df = train_test_split(df, test_size=CONFIG['train_val_split'])
print(f'Number of train images: {len(train_df)} and validation images: {len(valid_df)}')

In [None]:
# 6. Helper functions for input pipeline

@tf.function
def decode_image(image):
    # Convert the compressed string to a 3D uint8 tensor
    image = tf.image.decode_jpeg(image, channels=3)
    
    # Normalize image
    image = tf.image.convert_image_dtype(image, dtype=tf.float32)
    
    # Resize the image to the desired size
    return image

@tf.function
def load_image(df_dict):
    # Load image
    image = tf.io.read_file(df_dict['image'])
    image = decode_image(image)
    
    # Resize image
    image = tf.image.resize(image, (CONFIG['img_height'], CONFIG['img_width']))
    
    # Parse label
    label = tf.strings.split(df_dict['labels'], sep='')
    label = tf.strings.to_number(label, out_type=tf.int32)
    label = tf.reduce_sum(tf.one_hot(indices=label, depth=CONFIG['num_labels']), axis=0)
    
    return image, label

In [None]:
# 7. Build data loaders

AUTOTUNE = tf.data.AUTOTUNE

trainloader = tf.data.Dataset.from_tensor_slices(dict(train_df))
validloader = tf.data.Dataset.from_tensor_slices(dict(valid_df))

trainloader = (
    trainloader
    .shuffle(1024)
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .batch(CONFIG['batch_size'])
    .prefetch(AUTOTUNE)
)

validloader = (
    validloader
    .map(load_image, num_parallel_calls=AUTOTUNE)
    .batch(CONFIG['batch_size'])
    .prefetch(AUTOTUNE)
)

In [None]:
# Data loader sanity check

def show_batch(image_batch, label_batch):
    plt.figure(figsize=(20,20))
    for n in range(25):
        ax = plt.subplot(5,5,n+1)
        plt.imshow(image_batch[n])
        plt.title(' '.join([id_to_label[i] for i, label in enumerate(label_batch[n].numpy()) if label==1.]))
        plt.axis('off')

image_batch, label_batch = next(iter(trainloader))
show_batch(image_batch, label_batch)

In [None]:
# 8. Define model: EfficientNetB0 trained on ImageNet as backbone

def get_model():
    base_model = tf.keras.applications.EfficientNetB0(include_top=False, weights='imagenet')
    base_model.trainabe = True

    inputs = layers.Input((CONFIG['img_height'], CONFIG['img_width'], 3))
    x = base_model(inputs, training=True)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(len(label_to_id), activation='sigmoid')(x)
    
    return models.Model(inputs, outputs)

# Model sanity check
tf.keras.backend.clear_session()
model = get_model()
model.summary()

In [None]:
# Initialize model
tf.keras.backend.clear_session()
model = get_model()

# Compile model
optimizer = tf.keras.optimizers.Adam(learning_rate=CONFIG['learning_rate'])
model.compile(optimizer, 
              loss=tfa.losses.SigmoidFocalCrossEntropy(), 
              metrics=[tf.keras.metrics.AUC(multi_label=True), tfa.metrics.F1Score(num_classes=6, average='micro')])

In [None]:
# Update CONFIG dict with the name of the model.
CONFIG['model_name'] = 'efficientnetb0'
print('Training configuration: ', CONFIG)

# Initialize W&B run
run = wandb.init(project='plant-pathology', 
                 config=CONFIG,
                 group='EfficientNet', 
                 job_type='train')

In [None]:
CONFIG = dict (
    num_labels = 6,
    train_val_split = 0.2,
    img_width = 224,
    img_height = 224,
    batch_size = 64,
    epochs = 10,
    learning_rate = 0.001,
    architecture = "CNN",
    infra = "Kaggle",
    model_name = "efficientnetb0"
)

In [None]:
# Add "type" and "kaggle_competition" to `wandb.config` directly
wandb.config.type = 'baseline'
wandb.config.kaggle_competition = 'Plant Pathology 2021 - FGVC8'

In [None]:
earlystopper = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', patience=3, verbose=0, mode='min',
    restore_best_weights=True
)

# Train
model.fit(trainloader, 
          epochs=CONFIG['epochs'],
          validation_data=validloader,
          callbacks=[WandbCallback(),
                     earlystopper])

# Close W&B run
run.finish()

# Reference

https://www.kaggle.com/code/ayuraj/experiment-tracking-with-weights-and-biases/