In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

In [None]:
import cv2
import tensorflow as tf
from sklearn.model_selection import train_test_split

from keras import backend as K
from keras.applications.vgg16 import VGG16

from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras.models import Sequential

**Load data and augmentation**

In [None]:
# Read csv

img_folder = '../input/dbm301-dataset/devset_images/devset_images'
label_file = '../input/dbm301-dataset/devset_images_gt_kaggle.csv'

new_image = '../input/dbm301-dataset2/fptu-huynhld3-sum21/dev/devset_images/devset_images'
new_label = '../input/dbm301-dataset2/fptu-huynhld3-sum21/dev/devset_images_gt.csv'

data = pd.read_csv(label_file) 
data_array = data.values
data2 = pd.read_csv(new_label)
data2_array = data2.values

X_train=[]
Y_train=[]

X_train2=[]
Y_train2=[]

k=0
for i in data_array:
  image = cv2.imread(img_folder+'/'+str(i[0])+'.jpg')
  if image is not None:
    #print(k)
    k=k+1
    image = cv2.resize(image, (224,224))
    # print(image.shape)
    X_train.append(image)
    Y_train.append(i[1])

k=0
for i in data2_array:
  image2 = cv2.imread(new_image+'/'+str(i[0])+'.jpg')
  if image2 is not None:
    #print(k)
    k=k+1
    image2 = cv2.resize(image2, (224,224))
    # print(image.shape)
    X_train2.append(image2)
    Y_train2.append(i[1])
    
X_train = np.array(X_train)
Y_train = np.array(Y_train)    
    
X_train2 = np.array(X_train2)
Y_train2 = np.array(Y_train2)

In [None]:
# Split train/test set
x_train, x_test, y_train, y_test = train_test_split(X_train, Y_train)
x_train2, x_test2, y_train2, y_test2 = train_test_split(X_train2, Y_train2)

# x_train = x_train.astype('float32')/255
# x_test = x_test.astype('float32')/255
# x_train2 = x_train2.astype('float32')/255
# x_test2 = x_test2.astype('float32')/255

print('X:',X_train.shape)
print('Y:',Y_train.shape)
print('X2:',X_train2.shape)
print('Y2:',Y_train2.shape)

In [None]:
# Create f1 measure def
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [None]:
# Input Augmentation
img_augmentation = Sequential(
    [
        preprocessing.RandomRotation(factor=0.15),
        preprocessing.RandomTranslation(height_factor=0.1, width_factor=0.1),
        preprocessing.RandomFlip(),
        preprocessing.RandomContrast(factor=0.1),
    ],
    name="img_augmentation",
)

**Transfer learning model construction**

In [None]:
from keras.layers import Activation, Dropout, Flatten, Dense, Input
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.applications import EfficientNetB0
    
# Input and augmentation
input_ = Input(shape=(224,224,3))
x = preprocess_input(input_)
x = img_augmentation(x)

# Base model
model = EfficientNetB0(weights='imagenet', input_tensor=x, include_top=False)

# Freeze model
for layer in model.layers:
    layer.trainable=False 
# Rebuild top layers
x = Flatten()(model.output)
x = Dense(1024, activation="relu")(x)
x = Dropout(0.5)(x)
predictions = Dense(1, activation="sigmoid")(x)
model = tf.keras.Model(model.input, predictions)

# Compile model
model.compile(loss='binary_crossentropy',
              optimizer=tf.keras.optimizers.RMSprop(lr=0.0001, decay=1e-6),
              metrics=['accuracy', f1])

model.summary()

In [None]:
# Callback parameters

checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='./dbm301-enf0.h5', 
                                                      monitor='val_accuracy',  
                                                      save_weights_only='True', 
                                                      period=1,
                                                      verbose=1,
                                                      save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                  min_delta=0.0,
                                                  patience=10,
                                                  verbose=1)
reduce_learning_rate = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
                                                            factor=0.2,
                                                            patience=6,
                                                            verbose=1,
                                                            epsilon=0.001,
                                                            cooldown=0,
                                                            min_lr=0.00001)

**Training**

In [None]:
# Train frozen model

history = model.fit(
          x_train, y_train,
          epochs=10,
          batch_size=32,
          steps_per_epoch = 100,
          validation_data = (x_test, y_test),
          callbacks=[checkpointer,early_stopping,reduce_learning_rate])

**Unfreeze model anh continue training**

In [None]:
# Unfreeze model

model.load_weights('./dbm301-enf0.h5')

def unfreeze_model(model):
    # We unfreeze the top layers while leaving BatchNorm layers frozen(if it exists)
    for layer in model.layers[-400:]:
        if not isinstance(layer, layers.BatchNormalization):
            layer.trainable = True

    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-4)
    model.compile(
        optimizer=optimizer, loss='binary_crossentropy', metrics=["accuracy",f1]
    )

unfreeze_model(model)

In [None]:
checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath='./dbm301-enf0-2.h5', 
                                                      monitor='val_accuracy',  
                                                      save_weights_only='True', 
                                                      period=1,
                                                      verbose=1,
                                                      save_best_only=True)
early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_accuracy',
                                                  min_delta=0.0,
                                                  patience=10,
                                                  verbose=1)
reduce_learning_rate = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_accuracy',
                                                            factor=0.2,
                                                            patience=6,
                                                            verbose=1,
                                                            epsilon=0.001,
                                                            cooldown=0,
                                                            min_lr=0.00001)

In [None]:
# model.load_weights('../input/rn50-model/dbm301-Iv3-2.h5')
history2 = model.fit(
          x_train2, y_train2,
          epochs=50,
          batch_size=32,
          steps_per_epoch=100,
          verbose=1,
          validation_data = (x_test2, y_test2),
          callbacks=[checkpointer,early_stopping,reduce_learning_rate])

In [None]:
# Test accuracy
score = model.evaluate(x_test2, y_test2)
print('Test accuracy:', score[1])

**Submission**

In [None]:
import csv

# Load csv data
folder_path = "../input/dbm301-dataset2/fptu-huynhld3-sum21/test/testset_images/testset_images"
label_file  = "../input/dbm301-dataset2/fptu-huynhld3-sum21/test/testset_images.csv"

data = pd.read_csv(label_file) 
data_array=data.values
data.head()

X_train=[]
k=0
for i in data_array:
  image = cv2.imread(folder_path+'/'+str(i[0])+'.jpg')
  if image is not None:
    k=k+1
    image = cv2.resize(image, (224,224))
    # print(image.shape)
    X_train.append(image)
  else:
    pass
X_train=np.array(X_train)

# prediction
predict_submit = model.predict(X_train)

# Write submission
with open('submission_EFN0_best-acc_0-81.csv', mode='w') as employee_file:
    employee_writer = csv.writer(employee_file, delimiter=',', quotechar='"', quoting=csv.QUOTE_MINIMAL)
    employee_writer.writerow(['Id'])
    j = 0
    for i in data_array:
      image = cv2.imread(folder_path+'/'+str(i[0])+'.jpg') 
      if image is not None:
        if predict_submit[j] > 0.85:
          employee_writer.writerow([i[0]])
        else:
          pass
        j += 1
      else:
        pass