## Multi-Label Image Classification

#### Quick Introductory Example

In [None]:
!wget https://dl.dropbox.com/s/0htmeoie69q650p/miml_dataset.zip?dl=1 -O dataset.zip
!unzip -q dataset.zip 
!rm dataset.zip

In [None]:
from keras.models import Sequential

from keras_preprocessing.image import ImageDataGenerator
from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from keras.layers import Conv2D, MaxPooling2D
from keras import regularizers, optimizers
import pandas as pd
import numpy as np

In [None]:
df = pd.read_csv("./miml_dataset/miml_labels_1.csv")

columns = list(df.columns[1:])
columns

Making Data Generators for Augmentation

In [None]:
datagen = ImageDataGenerator(rescale = 1./255.)

test_datagen = ImageDataGenerator(rescale = 1./255.)

train_generator=datagen.flow_from_dataframe(
dataframe=df[:1800],
directory="./miml_dataset/images",
x_col="Filenames",
y_col=columns,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(100,100))
valid_generator=test_datagen.flow_from_dataframe(
dataframe=df[1800:1900],
directory="./miml_dataset/images",
x_col="Filenames",
y_col=columns,
batch_size=32,
seed=42,
shuffle=True,
class_mode="raw",
target_size=(100,100))
test_generator=test_datagen.flow_from_dataframe(
dataframe=df[1900:],
directory="./miml_dataset/images",
x_col="Filenames",
batch_size=1,
seed=42,
shuffle=False,
class_mode=None,
target_size=(100,100))

In [None]:
df

In [None]:
model = Sequential()
model.add(Conv2D(32, (3,3), padding = 'same', input_shape = (100,100,3)))
model.add(Activation('relu'))
model.add(Conv2D(32, (3,3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2,2)))
model.add(Dropout(0.25))

model.add(Conv2D(64, (3,3), padding = 'same'))
model.add(Activation('relu'))
model.add(Conv2D(64, (3,3)))
model.add(Activation('relu'))

model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Dropout(0.25))
model.add(Flatten())
model.add(Dense(512))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(5, activation='sigmoid'))
model.compile(optimizers.RMSprop(lr=0.0001, decay=1e-6),loss="binary_crossentropy",metrics=["accuracy"])

In [None]:
STEP_SIZE_TRAIN=train_generator.n//train_generator.batch_size
STEP_SIZE_VALID=valid_generator.n//valid_generator.batch_size
STEP_SIZE_TEST=test_generator.n//test_generator.batch_size
model.fit(train_generator,
                    steps_per_epoch=STEP_SIZE_TRAIN,
                    validation_data=valid_generator,
                    validation_steps=STEP_SIZE_VALID,
                    epochs=30
)

#### Plant Pathology 2021 - FGVC8

https://www.kaggle.com/nickuzmenkov/pp2021-tpu-tf-training/data <br>
https://www.kaggle.com/nickuzmenkov/pp2021-tpu-tf-inference

In [None]:
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
secret_value_0 = user_secrets.get_secret("wandb_login")

In [None]:
%%capture
!pip install wandb

In [None]:
%%capture

import wandb
from wandb.keras import WandbCallback 


wandb.login(key = secret_value_0)

In [None]:
import numpy as np
import pandas as pd 
import matplotlib.pyplot as plt 
import seaborn as sns
import tensorflow as tf
from tqdm.auto import tqdm

sns.set_style('darkgrid')

In [None]:
train = pd.read_csv('../input/plant-pathology-2021-fgvc8/train.csv')
print(train.shape)
train.head()

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer().fit(train.labels.apply(lambda x : x.split()))
labels = pd.DataFrame(mlb.transform(train.labels.apply(lambda x : x.split())), columns = mlb.classes_)

fig, ax = plt.subplots(figsize = (20, 6))
labels.sum().plot.bar(title = 'Target Class Distribution')

In [None]:
fig, ax = plt.subplots(figsize=(20, 6))
labels.sum(axis=1).value_counts().plot.bar(title='Distribution of Number of Labels per Image');

In [None]:
labels = pd.concat([train['image'], labels], axis=1)
labels.head()

In [None]:
image_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale=1./255, validation_split=0.1)

train_generator = image_data_generator.flow_from_dataframe(
    dataframe=labels,
    directory='../input/plant-pathology-2021-fgvc8/train_images',
    x_col='image',
    y_col=labels.columns.tolist()[1:],
    class_mode='raw',
    color_mode="rgb",
    target_size=(224, 224),
    batch_size=64,
    subset='training'
)

valid_generator = image_data_generator.flow_from_dataframe(
    dataframe=labels,
    directory='../input/plant-pathology-2021-fgvc8/train_images',
    x_col='image',
    y_col=labels.columns.tolist()[1:],
    class_mode='raw',
    color_mode="rgb",
    target_size=(224, 224),
    batch_size=64,
    subset='validation'
)

In [None]:
# inputs = tf.keras.Input(shape = (224,224,3))

# x = tf.keras.applications.MobileNetV2(include_top = False)(inputs)
# x = tf.keras.layers.GlobalAveragePooling2D()(x)
# outputs = tf.keras.layers.Dense(6, activation = 'sigmoid')(x)

# model = tf.keras.Model(inputs, outputs)
# model.compile(loss = 'binary_crossentropy', optimizer = tf.keras.optimizers.Adam(lr = 1e-4))

# model.summary()

# tf.keras.utils.plot_model(model, show_shapes = True)

In [None]:
def Model():
    inputs = tf.keras.Input(shape = (224,224,3))

    x = tf.keras.applications.MobileNetV2(include_top = False)(inputs)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    outputs = tf.keras.layers.Dense(6, activation = 'sigmoid')(x)

    model = tf.keras.Model(inputs, outputs)
    return model
    

In [None]:
tf.test.is_gpu_available()


In [None]:
if tf.test.is_gpu_available():
    configs = {

        "learning_rate" : 0.0001, 
        "loss_function" : 'binary_crossentropy',
        "epochs" : 10, 
        "batch_size" : 64,
        "log_step" : 200,
        "val_log_step" : 50,
        "architecture" : "MobileNetV2",
        "dataset" : "Plant Pathology FGVC8"
    }

    run = wandb.init(project = "Plant Pathology FGVC8", config = configs)

    config = wandb.config

    model = Model()
    model.summary()

    optimizer = tf.keras.optimizers.Adam(config.learning_rate)
    model.compile(optimizer, config.loss_function, metrics = ['acc'])

    rlp = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.01)

    es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 3, verbose = 1, restore_best_weights = True)



    model.fit(train_generator, validation_data = valid_generator, epochs = 20, callbacks = [WandbCallback(), rlp, es])
    
    fig, ax = plt.subplots(figsize = (20, 6))
    pd.DataFrame(history.history)[['loss', 'val_loss']].plot(ax = ax, title = 'Model Loss Curve')
    
    submissions = pd.read_csv('../input/plant-pathology-2021-fgvc8/sample_submission.csv')
    
    test_data_generator = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255)
    test_generator = test_data_generator.flow_from_dataframe( 
    submissions, 
    directory = '../input/plant-pathology-2021-fgvc8/test_images',
    x_col = "image",
    y_col = None,
    target_size = (224,224),
    color_mode = "rgb",
    classes = None,
    class_mode = None,
    shuffle = False, 
    batch_size = 1
    )


In [None]:
# rlp = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'val_loss', patience = 2, verbose = 1, factor = 0.01)

# es = tf.keras.callbacks.EarlyStopping(monitor = 'val_loss', patience = 3, verbose = 1, restore_best_weights = True)

# history = model.fit(train_generator, validation_data = valid_generator, epochs = 10, callbacks = [rlp, es])

TF TPU Training

## Notebook in Making