In [None]:
import os
import time
import shutil
import random
import cv2
import pandas as pd
import seaborn as sn
import tensorflow as tf
import tensorflow_hub as hub
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from sklearn.preprocessing import MultiLabelBinarizer
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.models import *
from tensorflow.keras.layers import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.utils import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.initializers import *
from kaggle_datasets import KaggleDatasets
import warnings
warnings.filterwarnings("ignore")

In [None]:
import os
os.system('pip install /kaggle/input/kerasapplications -q')
os.system('pip install /kaggle/input/efficientnet-keras-source-code/ -q --no-deps')

import efficientnet.tfkeras as efn

In [None]:
print('TF version:', tf.__version__)
print('Hub version:', hub.__version__)
print('Physical devices:', tf.config.list_physical_devices())

In [None]:
AUTO = tf.data.experimental.AUTOTUNE

try:
    tpu = tf.distribute.cluster_resolver.TPUClusterResolver()  
    print('Running on TPU ', tpu.master())
except ValueError:
    tpu = None

if tpu:
    tf.config.experimental_connect_to_cluster(tpu)
    tf.tpu.experimental.initialize_tpu_system(tpu)
    strategy = tf.distribute.experimental.TPUStrategy(tpu)
else:
    strategy = tf.distribute.get_strategy()

print("REPLICAS: ", strategy.num_replicas_in_sync)

In [None]:
EPOCHS = 10
BATCH_SIZE = 16 * strategy.num_replicas_in_sync
WIDTH = 480
HEIGHT = 480
CHANNELS = 3
LEARNING_RATE = 0.001
CLASSES = 6
SEED = 32
top_dropout_rate = 0.2

In [None]:
GCS_DS_PATH = KaggleDatasets().get_gcs_path('fgvc8aug')
TRAIN_PATH = GCS_DS_PATH + "/data_full_augmentation_images/data_full_augmentation/images/"
print(GCS_DS_PATH)

In [None]:
final_model = 'FGVC8-B0-full-aug.h5'
hist_path = 'FGVC8-B0-full-aug.log'
train_image = '../input/fgvc8aug/data_full_augmentation_images/data_full_augmentation/images'
train_df = pd.read_csv('../input/fgvc8aug/data.csv', )

In [None]:
train_df = train_df[["image", "labels"]]
mlb = MultiLabelBinarizer().fit(train_df.labels.apply(lambda x : x.split()))
labels = pd.DataFrame(mlb.transform(train_df.labels.apply(lambda x : x.split())), columns = mlb.classes_)

labels = pd.concat([train_df['image'], labels], axis=1)
labels.head()

In [None]:
def format_path(st):
    return TRAIN_PATH + st

train_paths = labels.image.apply(format_path).values

train_labels = np.float32(labels.loc[:, 'complex':'scab'].values)
train_paths, valid_paths, train_labels, valid_labels =\
train_test_split(train_paths, train_labels, test_size=0.15, random_state=2020)

In [None]:
def process_img(filepath,label):
    image = tf.io.read_file(filepath)
    image = tf.image.decode_jpeg(image, channels=CHANNELS)
    image = tf.image.convert_image_dtype(image, tf.float32) 
    image = tf.image.resize(image, [HEIGHT,WIDTH])
    return image,label

In [None]:
train_dataset = (
    tf.data.Dataset
    .from_tensor_slices((train_paths, train_labels))
    .map(process_img, num_parallel_calls=AUTO)
    .repeat()
    .shuffle(512)
    .batch(BATCH_SIZE)
    .prefetch(AUTO)
)

valid_dataset = (
    tf.data.Dataset
    .from_tensor_slices((valid_paths, valid_labels))
    .map(process_img, num_parallel_calls=AUTO)
    .batch(BATCH_SIZE)
    .cache()
    .prefetch(AUTO)
)

In [None]:
def get_model():
    base_model = efn.EfficientNetB0(include_top=False, weights=None)
    base_model.trainabe = True

    inputs = Input((HEIGHT, WIDTH, 3))
    x = base_model(inputs, training=True)
    x = GlobalAveragePooling2D()(x)
    x = Dropout(top_dropout_rate)(x)
    outputs = Dense(CLASSES, activation='sigmoid')(x)
    
    return Model(inputs, outputs)

In [None]:
with strategy.scope():
    model = get_model()
    model.load_weights("../input/fgvc8b0fullaug/FGVC8-B0-full-aug.h5")
    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
    model.compile(optimizer, 
              loss=tf.keras.losses.BinaryCrossentropy(), 
              metrics=['accuracy'])
    model.summary()

In [None]:
checkpoint = ModelCheckpoint(
    final_model,
    monitor = 'val_accuracy',
    mode = 'max',
    save_best_only = True,
    save_weights_only= False ,
    perior = 1,
    verbose = 1
)

early_stopping = EarlyStopping(
    monitor = 'val_accuracy',
    mode = 'auto',
    min_delta = 0.0001,
    patience = 5,
    baseline = None,
    restore_best_weights = True,
    verbose = 1
)
def build_lrfn(lr_start=0.00001, lr_max=0.00005, 
               lr_min=0.00001, lr_rampup_epochs=5, 
               lr_sustain_epochs=0, lr_exp_decay=.8):
    lr_max = lr_max * strategy.num_replicas_in_sync

    def lrfn(epoch):
        if epoch < lr_rampup_epochs:
            lr = (lr_max - lr_start) / lr_rampup_epochs * epoch + lr_start
        elif epoch < lr_rampup_epochs + lr_sustain_epochs:
            lr = lr_max
        else:
            lr = (lr_max - lr_min) *\
                 lr_exp_decay**(epoch - lr_rampup_epochs\
                                - lr_sustain_epochs) + lr_min
        return lr
    return lrfn

In [None]:
lrfn = build_lrfn()
STEPS_PER_EPOCH = train_labels.shape[0] // BATCH_SIZE
lr_schedule = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose=1)

In [None]:
params = model.fit(
    train_dataset, 
    validation_data = valid_dataset, 
    epochs = EPOCHS,
    steps_per_epoch=STEPS_PER_EPOCH,
    callbacks = [lr_schedule, early_stopping, checkpoint, CSVLogger(hist_path)]
)