In [None]:
import os
import random

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import lightgbm as lgb
import tensorflow as tf
from tensorflow.keras import Sequential,Model
from tensorflow.keras.layers import Dense,Conv2D,Flatten,Dropout, Input, Concatenate, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

In [None]:
# Constants

AUTOTUNE = tf.data.experimental.AUTOTUNE  
img_size = 299
channels = 3
Batch_size = 16 # 32, 64 - ResourceExhaustedError:

# Directory for dataset

train_dir = "/kaggle/input/petfinder-pawpularity-score/train/"
test_dir = "/kaggle/input/petfinder-pawpularity-score/test/"

def seed_everything():
    np.random.seed(123)
    random.seed(123)
    tf.random.set_seed(123)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
    os.environ['PYTHONHASHSEED'] = str(123)

seed_everything()

In [None]:
# Reading dataset train, test in df and df_test respectively

df = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
df_test = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")
Id = df_test["Id"].copy()


# Converting Id column for taking images

df["Id"] = df["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/train/" + x + ".jpg")
df_test["Id"] = df_test["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [None]:
#Normalize Pawpularity from [0,100] range to [0,1].
df["Pawpularity"] /= 100

In [None]:
print(len(df))
df.head()

In [None]:
# Augmenting the image
def image_preprocess(is_labelled):  
    def augment(image):
        image = tf.image.resize(image, [img_size,img_size], method='nearest')
        image = tf.image.random_flip_left_right(image)
        image = tf.image.random_flip_up_down(image)
        image = tf.image.random_saturation(image, 0.95, 1.05)
        image = tf.image.random_contrast(image, 0.95, 1.05)
        image = tf.image.random_crop(image, size=(img_size,img_size,channels))
        image = tf.image.random_brightness(image, 0.4)
        return image
    
    def can_be_augmented(img, label):
        return augment(img), label
    
    return can_be_augmented if is_labelled else augment



In [None]:
# Reading and rescaling images
def image_read(is_labelled):
    def decode(path):
        global img_size
        image = tf.io.read_file(path)
        image = tf.image.decode_jpeg(image, channels=channels)
        image = tf.cast(image, tf.float32)
        image = tf.image.resize(image, (img_size, img_size))
        return image
    
    def can_be_decoded(path, label):
        return decode(path), label

    return can_be_decoded if is_labelled else decode

In [None]:
# Creating the dataset
def create_dataset(df, batch_size, is_labelled = False, augment = False, shuffle = False):
    image_read_fn = image_read(is_labelled)
    image_preprocess_fn = image_preprocess(is_labelled)
    
    if is_labelled:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values, df["Pawpularity"].values))
    else:
        dataset = tf.data.Dataset.from_tensor_slices((df["Id"].values))
    
    dataset = dataset.map(image_read_fn, num_parallel_calls=AUTOTUNE)
    dataset = dataset.map(image_preprocess_fn, num_parallel_calls=AUTOTUNE) if augment else dataset
    dataset = dataset.shuffle(1024, reshuffle_each_iteration=True) if shuffle else dataset
    dataset = dataset.batch(batch_size)
    dataset = dataset.prefetch(AUTOTUNE)
    return dataset

In [None]:
test = create_dataset(df_test, Batch_size, is_labelled = False, augment = False, shuffle=False)

In [None]:
#ef_model_path = '../input/efficientnet-keras-noisystudent-weights-b0b7/noisystudent/noisy.student.notop-b3.h5'
ef_model_path = '../input/keras-applications-models/EfficientNetB7.h5'
efnet = tf.keras.models.load_model(ef_model_path)
efnet.trainable=False

In [None]:
# from tensorflow.keras.layers import Input, Dense, Flatten, Activation
# from tensorflow.keras.utils import get_custom_objects
# #import tensorflow.keras.backend as K

# class Mish(Activation):
#     def __init__(self, activation, **kwargs):
#         super(Mish, self).__init__(activation, **kwargs)
#         self.__name__ = 'Mish'

# def mish(x):
#     return x * tf.math.tanh(tf.math.softplus(x))

# get_custom_objects().update({'Mish': Mish(mish)})

In [None]:
def create_model():
  
    model = Sequential([
          Input(shape=(img_size, img_size, channels)),
          BatchNormalization(), # added
          efnet,
          BatchNormalization(),
          Dropout(0.5),
          Flatten(),
          BatchNormalization(),
          Dense(units = 16, activation='relu'),
          Dense(units = 1, activation='sigmoid') # sigmoid -> BinaryCrossEntropy
    ])
    return model

In [None]:
#model.summary()
create_model().summary()

In [None]:
lr_schedule = ExponentialDecay(
        initial_learning_rate=1e-1,
        decay_steps=100, decay_rate=0.96,
        staircase=True)

# Early stopping helps as it stops training if val_loss(validation score) does not decrease.
early_stopping = EarlyStopping(patience = 3,restore_best_weights=True)

class rmseDiffEarlyStop(tf.keras.callbacks.Callback):
  def on_epoch_end(self, epoch, logs = {}):
    val_rmse, trn_rmse = logs.get('val_root_mean_squared_error') , logs.get('root_mean_squared_error') 
    if val_rmse is not None:
      if abs(val_rmse-trn_rmse)>5 and trn_rmse < 15:
        print("Gap is too big. Stop!")
        self.model.stop_training = True
callback = rmseDiffEarlyStop()
models = []
finals = []

In [None]:
#preds = np.zeros((test.shape[0],1))
 # 0 1 2 3 4 5 6 7 8 9
for i in range(10): 
    trn_now =  pd.concat( [df.iloc[:1000*i], df.iloc[1000*i+912:]], axis=0)
    val_now = df.iloc[1000*i:1000*i+912]
    train = create_dataset(trn_now, Batch_size, is_labelled = True, augment = True, shuffle = True)
    validation = create_dataset(val_now, Batch_size, is_labelled = True, augment = False, shuffle = False)

    model = create_model()

    model.compile(loss=tf.keras.losses.BinaryCrossentropy(label_smoothing=0.01), 
              optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule), 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])
    
    print("\n", i+1,"model")
    predictor = model.fit(train,
                          epochs=15, 
                          validation_data = validation,
                         callbacks=[early_stopping,callback])
    
    
    pred = model.predict(test)

    final=pd.DataFrame()
    final['Id']=Id
    final['Pawpularity']=  pred
    finals.append(final)
    print(final[:6])

In [None]:
np.array(finals).shape[0]

In [None]:
n = np.array(finals).shape[0]
pred_final = np.dot(np.array([1]*n), np.array(finals)[:,:,1] )
pred_final /= n
pred_final

In [None]:
submission = pd.DataFrame()
submission['Id']=Id
submission['Pawpularity']=  pred_final
submission.to_csv('submission.csv', index=False)