In [None]:
import sys
sys.path.append('/kaggle/input/efficientnet-keras-dataset/efficientnet_kaggle')

In [None]:
from sklearn.preprocessing import MultiLabelBinarizer
from tqdm.notebook import tqdm
import efficientnet.tfkeras as efn
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow as tf
import pandas as pd
import numpy as np
import random
import os

In [None]:
tf.keras.mixed_precision.set_global_policy('mixed_float16')



class CFG:
    
    '''
    keep these
    '''
    strategy = tf.distribute.get_strategy()
    batch_size = 16 * strategy.num_replicas_in_sync
    
    img_size = 600
    
    classes = np.array([
        'complex', 
        'frog_eye_leaf_spot', 
        'powdery_mildew', 
        'rust', 
        'scab'])
    root = '../input/plant-pathology-2021-fgvc8/test_images'
    
    '''
    tweak these
    '''
    seed = 42 # random seed we use for each operation
#     tta_steps = 1 # number of TTA folds, run without TTA if 0

In [None]:
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels=3)
    image = tf.reshape(image, [CFG.img_size, CFG.img_size, 3])
    image = tf.cast(image, tf.float32) / 255.#chuyển sang dạng này do tpu chỉ hộ trợ float32 
    return image


def data_augment(image, label):
    image = tf.image.random_flip_left_right(image, seed=CFG.seed)
    image = tf.image.random_flip_up_down(image, seed=CFG.seed)
    
    k = tf.tf.random.uniform([], minval=0, maxval=4, dtype=tf.int64, seed=CFG.seed)
    image = tf.image.rot90(image, k=k)
    
    image = tf.image.random_hue(image, .1, seed=CFG.seed)
    image = tf.image.random_saturation(image, .8, 1.2, seed=CFG.seed)
    image = tf.image.random_contrast(image, .8, 1.2, seed=CFG.seed)
    image = tf.image.random_brightness(image, .1, seed=CFG.seed)
    
    return image, label

#config lại dạng image
feature_map = {
    'image': tf.io.FixedLenFeature([], tf.string),
    'image_name': tf.io.FixedLenFeature([], tf.string)}

#đọc fild tfrec
def read_tfrecord(example):
    example = tf.io.parse_single_example(example, feature_map)
    image = decode_image(example['image'])
    label = example['image_name']
    return image, label


def get_dataset(filenames, ordered=True, shuffled=False, repeated=False, 
                augmented=False, cached=False, distributed=False):
    auto = tf.data.experimental.AUTOTUNE#hàm tối ưu
    dataset = tf.data.TFRecordDataset(filenames, num_parallel_reads=auto)
    if not ordered:
        ignore_order = tf.data.Options()
        ignore_order.experimental_deterministic = False
        dataset = dataset.with_options(ignore_order)
    dataset = dataset.map(read_tfrecord, num_parallel_calls=auto)
    if shuffled:#xáo trộn
        dataset = dataset.shuffle(2048, seed=CFG.seed)
    if repeated:#repeat ảnh
        dataset = dataset.repeat()
    dataset = dataset.batch(CFG.batch_size)
    if augmented:#tạo augumentation
        dataset = dataset.map(data_augment, num_parallel_calls=auto)
    if cached:#lưu bộ nhớ cached
        dataset = dataset.cache()
    dataset = dataset.prefetch(auto)
    if distributed:#phân phối data set trên nhiều tpu
        dataset = CFG.strategy.experimental_distribute_dataset(dataset)
    return dataset


def get_model():
    model = tf.keras.models.Sequential(name='EfficientNetB7')
    
    model.add(efn.EfficientNetB7(
        include_top=False,
        input_shape=(CFG.img_size, CFG.img_size, 3),
        weights=None,
        pooling='avg'))
    
    model.add(tf.keras.layers.Dense(len(CFG.classes), 
        kernel_initializer=tf.keras.initializers.RandomUniform(seed=CFG.seed),
        bias_initializer=tf.keras.initializers.Zeros(), name='dense_top'))
    model.add(tf.keras.layers.Activation('sigmoid', dtype='float32'))
    
    return model

In [None]:
def _serialize_image(path):
    image = tf.io.read_file(path)
    
    image = tf.image.decode_jpeg(image, channels=3)
    image = tf.image.resize(image, [CFG.img_size, CFG.img_size])
    image = tf.cast(image, tf.uint8)
    return tf.image.encode_jpeg(image).numpy()


def _serialize_sample(image, name):
    feature = {
        'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=[image])),
        'image_name': tf.train.Feature(bytes_list=tf.train.BytesList(value=[name]))}
    sample = tf.train.Example(features=tf.train.Features(feature=feature))
    return sample.SerializeToString()

#lưu tập test du
def serialize_test():
    samples = []
    
    for path in os.listdir(CFG.root):
        image = _serialize_image(os.path.join(CFG.root, path))
        name = path.encode()
        samples.append(_serialize_sample(image, name))
    
    with tf.io.TFRecordWriter('test.tfrec') as writer:
        [writer.write(x) for x in tqdm(samples, total=len(samples))]

In [None]:
serialize_test()

In [None]:
size = len(os.listdir(CFG.root)) #=3

filenames = tf.io.gfile.glob('*.tfrec')

 #tạo augmentation cho tập test
# if CFG.tta_steps > 0:
#     dataset = get_dataset(filenames, repeated=True, augmented=False)
# else:
dataset = get_dataset(filenames)   
    
predicts = np.zeros((size, len(CFG.classes)))#ma trận 3xCFG.classes, 
paths = tf.io.gfile.glob('../input/modelfgvc8efnb7/model_4.h5')

for path in tqdm(paths, total=len(paths)):

    with CFG.strategy.scope():
        model = get_model()
        model.load_weights(path)
   
    #chạy cho tập test có augmentation và tính điểm  trug bình
#     if CFG.tta_steps > 0:
#         steps = CFG.tta_steps * (size / CFG.batch_size + 1)

#         predict = model.predict(dataset, steps=steps)[:size * CFG.tta_steps] / len(paths)
#         predicts += np.mean(
#             predict.reshape(size, CFG.tta_steps, len(CFG.classes), order='F'), axis=1)

#     else:
predicts += model.predict(dataset) / len(paths)

In [None]:
# predicts1 = predicts.copy()
# print(predicts1)
# print(thresholds )
# for i in range(len(predicts1)):
#     predicts1[i] = predicts1[i] > thresholds #nếu > threshold thì bằng 1
#     print(predicts1[i])

for i in range(len(predicts)):
    predicts[i] = predicts[i] > 0.4 #nếu > threshold thì bằng 1
    print(predicts[i])

    
predicts = predicts.astype('bool')
labels = []

#1 class nếu true thì chuyển sang dang string
for i in range(len(predicts)):
    labels.append(' '.join(CFG.classes[predicts[i]]))
# print(labels) ['complex frog_eye_leaf_spot scab', 'frog_eye_leaf_spot', 'scab']            
    
labels = ['healthy' if ('healthy' in x or x == '') else x for x in labels] #nếu xuất hiện nhãn heathy hay các nhãn trống thì gán nhãn healthy
  

df = pd.DataFrame({
    'image': os.listdir(CFG.root),
    'labels': labels})
# print(df)                   image                           labels
#                 0  ad8770db05586b59.jpg  complex frog_eye_leaf_spot scab
#                 1  c7b03e718489f3ca.jpg               frog_eye_leaf_spot
#                 2  85f8cb619c66b863.jpg                             scab

df.to_csv('submission.csv', index=False)
display(df.head())