In [None]:
import numpy as np 
import pandas as pd 
import cv2
import matplotlib.pyplot as plt
import os
import tensorflow as tf
import tensorflow_addons as tfa
from tensorflow.keras.preprocessing import image
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras import regularizers, optimizers
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint


from collections import Counter

In [None]:
image_label = pd.read_csv('../input/fptu-huynhld3-spr22/dev/devset_images_gt.csv', names=['id', 'label'])
image_label['id'] = image_label['id'].apply(lambda x: '{}.jpg'.format(x))
image_label['label'] = image_label['label'].astype(str)
image_label = image_label.sample(frac=1, random_state=3).reset_index(drop=True)
train_data = image_label.iloc[:int(image_label.shape[0]*0.8)].copy(deep=True)
valid_data = image_label.iloc[int(image_label.shape[0]*0.8):].copy(deep=True)
# image_label = list(image_label.to_records(index=False))

In [None]:
dataset_gen = image.ImageDataGenerator(rescale=1./255,
                                       rotation_range=30, width_shift_range=0.3, 
                                       height_shift_range=0.1, 
                                       zoom_range=[0.1, 1], 
                                       horizontal_flip=True,
                                       brightness_range=[0.2, 1])
test_gen = image.ImageDataGenerator(rescale=1./255)
train_data = dataset_gen.flow_from_dataframe(train_data, directory = '../input/fptu-huynhld3-spr22/dev/images', 
                                            x_col = 'id', y_col = 'label', class_mode='categorical', 
                                            target_size=(288, 288), batch_size=64, shuffle=True, seed=2)
valid_data = test_gen.flow_from_dataframe(valid_data, directory = '../input/fptu-huynhld3-spr22/dev/images', 
                                            x_col = 'id', y_col = 'label', class_mode='categorical', 
                                            target_size=(288, 288), batch_size=64, shuffle=False, seed=2)
counter = Counter(train_data.classes)

print(counter.items())
counter = Counter(valid_data.classes)

print(counter.items())

In [None]:
!pip install --quiet vit-keras

from vit_keras import vit
vit_model = vit.vit_b32(
        image_size = 288,
        activation = 'softmax',
        pretrained = True,
        include_top = False,
        pretrained_top = False,
        classes = 2)
model = Sequential([
        vit_model,
        Flatten(),
        Dropout(0.25),
        BatchNormalization(),
        Dense(1024, activation = tfa.activations.gelu),
        Dropout(0.25),
        BatchNormalization(),
        Dense(64, activation = tfa.activations.gelu),
        Dropout(0.25),
        BatchNormalization(),
        Dense(2, 'softmax')],
        name = 'vision_transformer')

model.summary()

In [None]:
learning_rate = 5e-5
EPOCHS = 200

optimizer = tfa.optimizers.RectifiedAdam(learning_rate = learning_rate)
auc = tf.keras.metrics.AUC()
checkpoint_path = './dlp301_model_vit.h5'

model.compile(optimizer = optimizer, 
              loss = tf.keras.losses.CategoricalCrossentropy(label_smoothing = 0.2), 
              metrics = ['accuracy', auc])
keras_callbacks   = [
      EarlyStopping(monitor='val_loss', patience=10, mode='min', min_delta=0.0001),
      ModelCheckpoint(checkpoint_path, monitor='val_loss', save_best_only=True, mode='min')
]

STEP_SIZE_TRAIN=train_data.n//train_data.batch_size
STEP_SIZE_VALID=valid_data.n//valid_data.batch_size
model.fit(x = train_data,
          steps_per_epoch = STEP_SIZE_TRAIN,
          validation_data = valid_data,
          validation_steps = STEP_SIZE_VALID,
          callbacks = keras_callbacks,
          epochs = EPOCHS)

In [None]:
result = model.evaluate(valid_data)
result

In [None]:
from keras.models import load_model

model.save('./dlp301_model_vit_ver3.h5')

In [None]:
from sklearn.metrics import roc_curve
from sklearn.metrics import roc_auc_score
from matplotlib import pyplot

preds = model.predict(valid_data)
yhat = preds[:, 1]
# calculate roc curves
fpr, tpr, thresholds = roc_curve(valid_data.classes, yhat)
print(roc_auc_score(valid_data.classes, yhat))

pyplot.plot([0,1], [0,1], linestyle='--', label='No Skill')
pyplot.plot(fpr, tpr, marker='.', label='Model')
# axis labels
pyplot.xlabel('False Positive Rate')
pyplot.ylabel('True Positive Rate')
pyplot.legend()
# show the plot
pyplot.show()

In [None]:
from numpy import sqrt, argmax
gmeans = sqrt(tpr * (1 - fpr))
ix = argmax(gmeans)
print('Best Threshold=%f, G-Mean=%.3f' % (thresholds[ix], gmeans[ix]))

In [None]:
i = np.arange(len(tpr))
roc = pd.DataFrame({'fpr' : pd.Series(fpr, index=i),'tpr' : pd.Series(tpr, index = i), '1-fpr' : pd.Series(1-fpr, index = i), 'tf' : pd.Series(tpr - (1-fpr), index = i), 'thresholds' : pd.Series(thresholds, index = i)})
roc.iloc[(roc.tf-0).abs().argsort()[:1]]

In [None]:
test_generator = test_gen.flow_from_directory(directory='../input/fptu-huynhld3-spr22/', classes=['test'], class_mode=None, shuffle=False, target_size=(288, 288), batch_size=64)
preds = model.predict(test_generator)
file_id = []
for filepath in test_generator.filenames:
    file_name = filepath.split('/')[-1]
    tmp_file_id = file_name.split('.')[0]
    file_id.append(tmp_file_id)
file_id_flood = []
prob_flood = []
for id, pred in zip(file_id, preds):
    if pred[1]>=0.5:
        file_id_flood.append(id)
        prob_flood.append(pred[1])
submission = pd.DataFrame()
submission['id'] = file_id_flood
submission['prediction'] = prob_flood
submission = submission.sort_values(by=['prediction'], ascending=False)
submission

In [None]:
submission = submission['id'].copy(deep=True)
submission.to_csv('./submission.csv', index=False)

In [None]:
from keras.models import load_model
optimizer = tfa.optimizers.RectifiedAdam(learning_rate = 5e-5)
auc = tf.keras.metrics.AUC()
model_2 = load_model('../input/model-weights/dlp301_model_vit_ver2.h5', custom_objects={'auc':auc})
test_gen = image.ImageDataGenerator(rescale=1./255)
test_generator = test_gen.flow_from_directory(directory='../input/fptu-huynhld3-spr22/', classes=['test'], class_mode=None, shuffle=False, target_size=(224, 224))
preds = model_2.predict(test_generator)

In [None]:
file_id = []
for filepath in test_gen.filenames:
    file_name = filepath.split('/')[-1]
    tmp_file_id = file_name.split('.')[0]
    file_id.append(tmp_file_id)
file_id_flood = []
prob_flood = []
for id, pred in zip(file_id, preds):
    if pred[1]>=0.6:
        file_id_flood.append(id)
        prob_flood.append(pred[1])
submission = pd.DataFrame()
submission['id'] = file_id_flood
submission['prediction'] = prob_flood
submission = submission.sort_values(by=['prediction'], ascending=False)
submission

In [None]:
from vit_keras import visualize

x = test_generator.next()
image = x[0]

attention_map = visualize.attention_map(model = model, image = image)

# Plot results
fig, (ax1, ax2) = plt.subplots(ncols = 2)
ax1.axis('off')
ax2.axis('off')
ax1.set_title('Original')
ax2.set_title('Attention Map')
_ = ax1.imshow(image)
_ = ax2.imshow(attention_map)

In [None]:
model