In [None]:
!pip install -q efficientnet

import os
import numpy as np 
import pandas as pd
import random
import math

import warnings
from shutil import copyfile
import tensorflow as tf
from tensorflow import keras
import tensorflow.keras.applications as tfka
from tensorflow.keras.layers.experimental import preprocessing
from tensorflow.keras import layers
from tensorflow.keras.preprocessing import image
import tensorflow_hub as hub
import tensorflow_datasets as tfds
from sklearn.metrics import f1_score
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import efficientnet.tfkeras as efn
from tqdm import tqdm
import gc
import cv2
from tensorflow.keras import backend as K

In [None]:
# Configuration
# For tf.dataset
AUTO = tf.data.experimental.AUTOTUNE

EPOCHS = 10
BATCH_SIZE = 32
IMAGE_SIZE = [256, 256]
# Seed
SEED = 9527
seed = 9527
# Learning rate
LR = 0.0005
# Verbosity
VERBOSE = 2
# Label_dim
label_dim = 1
N_CLASSES = 2

# dataset path
img_path = '../input/food-ingredients-and-recipe-dataset-with-images/Food Images/Food Images/'


common_allergens = {
    'cows milk': {'Cheese', 'Butter', 'Margarine', 'Yogurt', 'Cream', 'Ice cream'},
    'eggs': {'egg'},
    'tree nuts': {'Brazil nut', 'Almond', 'Cashew', 'Macadamia nut', 'Pistachio','Pine nut','Walnut'},
    'peanuts': {'peanut'},
    'shellfish': {'Shrimp','Prawn','Crayfish', 'Lobster', 'Squid', 'Scallops'},
    'wheat': {'flour', 'wheat', 'pasta', 'noodle', 'bread', 'crust'},
    'soy': {'soy', 'tofu', 'soya'},
    'fish': {'fish', 'seafood'}
}

In [None]:
def seed_everything(seed):
    random.seed(seed)
    np.random.seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    tf.random.set_seed(seed)

def image_mapping_check(dataset):
    counter = 0
    record = []
    while counter < dataset.shape[0]-1:
        row = dataset.loc[counter]
        img_name = row['Image_Name']
        img = cv2.imread(img_path+img_name+'.jpg')
        try:
            img = cv2.cvtColor(img, cv2.COLOR_RGB2BGR) 
        except:
            record.append(counter)
        counter+= 1
    new = dataset.drop(record, axis = 0)
    new = new.reset_index(drop = True)
    return new

def allergens_mapping(row, types):
    for item in common_allergens[types]:
        if item.lower() in row.lower():
                return 1
    return 0

def combination(row):
    return(row['cows_milk'], row['eggs'], row['tree nuts'], row['peanuts'], row['shellfish'], row['wheat'], row['soy'], row['fish'])


def load_dataset():
    path = "../input/food-ingredients-and-recipe-dataset-with-images/Food Ingredients and Recipe Dataset with Image Name Mapping.csv"
    df = pd.read_csv(path)
    df = image_mapping_check(df)
    df['image_path'] = img_path + df['Image_Name'] + '.jpg'
    df['cows_milk'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'cows milk'))
    df['eggs'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'eggs'))
    df['tree nuts'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'tree nuts'))
    df['peanuts'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'peanuts'))
    df['shellfish'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'shellfish'))
    df['wheat'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'wheat'))
    df['soy'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'soy'))
    df['fish'] = df['Cleaned_Ingredients'].apply(lambda x: allergens_mapping(x, types = 'fish'))
    df['total'] = df.apply(combination, axis = 1)
    x_train, x_val, y_train, y_val = train_test_split(df[['image_path']], df.iloc[:,7:16], shuffle = True, random_state = seed, test_size = 0.25)
    train_df = pd.concat([x_train, y_train], axis = 1).reset_index(drop = True)
    val_df = pd.concat([x_val, y_val], axis = 1).reset_index(drop = True)
    train_df.head()
    return df, train_df, val_df

# Function to decode our images
def decode_image(image_data):
    image = tf.image.decode_jpeg(image_data, channels = 3)
    image = tf.image.resize(image, IMAGE_SIZE)
    image = tf.cast(image, tf.float32) / 255.0
    return image

# Function to read our test image and return image
def read_image(image):
    image = tf.io.read_file(image)
    image = decode_image(image)
    return image

# Function to get our dataset that read images
def get_dataset(image):
    dataset = tf.data.Dataset.from_tensor_slices(image)
    dataset = dataset.map(read_image, num_parallel_calls = AUTO)
    dataset = dataset.batch(BATCH_SIZE)
    dataset = dataset.prefetch(AUTO)
    return dataset

In [None]:
# Arcmarginproduct class keras layer
class ArcMarginProduct(tf.keras.layers.Layer):
    '''
    Implements large margin arc distance.

    Reference:
        https://arxiv.org/pdf/1801.07698.pdf
        https://github.com/lyakaap/Landmark2019-1st-and-3rd-Place-Solution/
            blob/master/src/modeling/metric_learning.py
    '''
    def __init__(self, n_classes, s=30, m=0.50, easy_margin=False,
                 ls_eps=0.0, **kwargs):

        super(ArcMarginProduct, self).__init__(**kwargs)

        self.n_classes = n_classes
        self.s = s
        self.m = m
        self.ls_eps = ls_eps
        self.easy_margin = easy_margin
        self.cos_m = tf.math.cos(m)
        self.sin_m = tf.math.sin(m)
        self.th = tf.math.cos(math.pi - m)
        self.mm = tf.math.sin(math.pi - m) * m

    def get_config(self):

        config = super().get_config().copy()
        config.update({
            'n_classes': self.n_classes,
            's': self.s,
            'm': self.m,
            'ls_eps': self.ls_eps,
            'easy_margin': self.easy_margin,
        })
        return config

    def build(self, input_shape):
        super(ArcMarginProduct, self).build(input_shape[0])

        self.W = self.add_weight(
            name='W',
            shape=(int(input_shape[0][-1]), self.n_classes),
            initializer='glorot_uniform',
            dtype='float32',
            trainable=True,
            regularizer=None)

    def call(self, inputs):
        X, y = inputs
        y = tf.cast(y, dtype=tf.int32)
        cosine = tf.matmul(
            tf.math.l2_normalize(X, axis=1),
            tf.math.l2_normalize(self.W, axis=0)
        )
        sine = tf.math.sqrt(1.0 - tf.math.pow(cosine, 2))
        phi = cosine * self.cos_m - sine * self.sin_m
        if self.easy_margin:
            phi = tf.where(cosine > 0, phi, cosine)
        else:
            phi = tf.where(cosine > self.th, phi, cosine - self.mm)
        one_hot = tf.cast(
            tf.one_hot(y, depth=self.n_classes),
            dtype=cosine.dtype
        )
        if self.ls_eps > 0:
            one_hot = (1 - self.ls_eps) * one_hot + self.ls_eps / self.n_classes

        output = (one_hot * phi) + ((1.0 - one_hot) * cosine)
        output *= self.s
        return output

In [None]:
# Function for a custom learning rate scheduler with warmup and decay
def get_lr_callback():
    # lr_start   = 0.0000001
    # lr_max     = 0.000005 * BATCH_SIZE
    lr_min     = 0.0000001
    lr_ramp_ep = 5
    lr_sus_ep  = 0
    lr_decay   = 0.8
   
    def lrfn(epoch):
        if epoch < EPOCHS/2:
            lr = 0.00001 * (BATCH_SIZE - epoch)
        #elif epoch < EPOCHS/3 * 2:
        #    lr = 0.000001 * (BATCH_SIZE - epoch)
        else:
            lr = lr_min
        return lr

    lr_callback = tf.keras.callbacks.LearningRateScheduler(lrfn, verbose = True)
    return lr_callback

In [None]:
def get_model(mode):
    
    inp = tf.keras.layers.Input(shape = (*IMAGE_SIZE, 3), name = 'inp')
    if mode == 'eff0':
        x = efn.EfficientNetB0(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff1':
        x = efn.EfficientNetB1(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff2':
        x = efn.EfficientNetB2(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff3':
        x = efn.EfficientNetB3(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff4':
        x = efn.EfficientNetB4(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff5':
        x = efn.EfficientNetB5(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff6':
        x = efn.EfficientNetB6(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'eff7':
        x = efn.EfficientNetB7(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'ICPV2':
        x = tfka.InceptionResNetV2(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'ICPV3':
        x = tfka.InceptionV3(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'XCP':
        x = tfka.Xception(weights = 'imagenet', include_top = False)(inp)
    elif mode == 'RN50':
        x = tfka.ResNet50(weights = 'imagenet', include_top = False)(inp)
    else:
        # 'RN101'
        x = tfka.ResNet101(weights = 'imagenet', include_top = False)(inp)
    x = tf.keras.layers.GlobalAveragePooling2D()(x)
    output = tf.keras.layers.Dense(label_dim, activation='sigmoid')(x)
    model = tf.keras.models.Model(inputs = [inp], outputs = [output])
    return model

def model_prediction(image_paths,target, mode):
    model = get_model(mode)
    weight_path = '../input/asthma-allenger-prediction-model-weights/'f'Model_{mode}_{target}_{SEED}.h5'
    print(weight_path)
    model.load_weights(weight_path)   
    img = get_dataset(image_paths)
    pred = model.predict(img)
    return pred

In [None]:
def get_report(targets, models, thresholds):
    seed_everything(seed)
    df, train_df, val_df = load_dataset()
    final_target = []
    final_model = []
    final_acc = []
    final_acc_threshold = []
    final_f1s = []
    final_f1s_threshold = []
    for target in targets:
        for mode in models:
            acc = []
            f1s = []
            pred = model_prediction(val_df.image_path.values,target = target, mode = mode)
            y_true = val_df[target].values
            for threshold in thresholds:
                y_pred = np.array([1 if x > threshold else 0 for x in pred])
                accuracy = accuracy_score(y_true,y_pred)
                f1s_score = f1_score(y_true, y_pred, pos_label = 0)
                acc.append(accuracy)
                f1s.append(f1s_score)
            best_acc = max(acc)
            best_acc_threshold = thresholds[acc.index(best_acc)]
            best_f1s = max(f1s)
            best_f1s_threshold = thresholds[f1s.index(best_f1s)]

            print('\n')
            print(f'Allenger: {target}, Model: {mode}')
            print(f'Our best accuracy is {best_acc} with threshold {best_acc_threshold}')
            print(f'Our best f1 score is {best_f1s} with threshold {best_f1s_threshold}')
            final_target.append(target)
            final_model.append(mode)
            final_acc.append(best_acc)
            final_acc_threshold.append(best_acc_threshold)
            final_f1s.append(best_f1s)
            final_f1s_threshold.append(best_f1s_threshold)
    result = pd.DataFrame({
        'Allergens': final_target,
        'Model': final_model,
        'Accuracy': final_acc,
        'Accuracy_threshold': final_acc_threshold,
        'F1_score': final_f1s,
        'F1_score_threshold': final_f1s_threshold
    })

    return result

In [None]:
models = ['eff0','eff1','eff2','eff3','eff4','eff5','eff6','eff7','ICPV2','ICPV3', 'XCP', 'RN50', 'RN101']
targets = ["cows_milk", "eggs", "tree nuts", "peanuts", "shellfish", "wheat", "soy", "fish"]
thresholds = list(np.arange(0, 0.8, 0.01))

In [None]:
result = get_report(targets = targets, models = models, thresholds = thresholds)
result.to_csv('Single_model_results.csv', index = False)