## 1. Importing Packages

In [None]:
# import data processing and visualisation libraries
import numpy as np
import pandas as pd
import keras
import matplotlib.pyplot as plt
%matplotlib inline

# import tensorflow and keras
import tensorflow as tf
#from tensorflow import keras
import os

print("Packages imported...")

In [None]:
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))
with tf.device('/GPU:0'):
    print('Yes, there is GPU')
    
tf.debugging.set_log_device_placement(True)

In [None]:
# MAKE THE SEED FIXED FOR PRODUCTIVITY
# Lets set all random seeds
import random
import time, datetime

def get_current_time() -> str:
    """returns the current time in (str)"""
    time_string = datetime.datetime.fromtimestamp(time.time()).strftime("%Y_%m_%d_%H_%M")
    return str(time_string)
def seed_everything(seed=0):
    
    random.seed(seed)
    np.random.seed(seed)
    tf.random.set_seed(seed)
    os.environ['PYTHONHASHSEED'] = str(seed)
    os.environ['TF_DETERMINISTIC_OPS'] = '1'

seed = 21
seed_everything(seed)
#warnings.filterwarnings('ignore')

## 2. Exploring data

In [None]:
## Exploring files in folder
folder_path = '../input/petfinder-pawpularity-score/'
#print(list(os.walk(folder_path)))
for path, directories, files in os.walk(folder_path):
    print(path,'--> number of files : ', len(files))
# I SEE TEST DATA IS MEANT TO BE TEST IN WILD. but it's kinda usless honestly it's taken from same dataset, same human hand

In [None]:
#Preparing data in dataframe for easier data handling
train_file = '../input/petfinder-pawpularity-score/train.csv'
data_df = pd.read_csv(train_file)
data_df['path'] = data_df['Id'].map(lambda x: str(folder_path+'/train/'+x)+'.jpg')
#train_df = train_df.drop(columns=['Id'])
#train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
data_df.head()

In [None]:
# Distrubtion of the target
target_col = 'Pawpularity'
fig, ax = plt.subplots(figsize =(20, 10))
ax.hist(data_df[target_col], bins=100)
ax.set_title(f'Targets Histogram ')
plt.show()
# honeslty I didn't need the visuals as I saw the count frequency of the folder, but hey it won't bite

In [None]:
#SHOWING SOME RANDOM IMAGES
import random
import matplotlib.image as mpimg

signs = data_df[target_col].unique().tolist()
images = []
print(f'total number of unique traget : {len(signs)}')
no_of_samples = 5
random_signs = random.choices(signs, k=no_of_samples)
for sign in random_signs:
    rows = data_df[data_df[target_col]==sign]['path']
    #print(rows)
    filepath = random.choice(list(rows))
    #print(filepath)
    img = mpimg.imread(filepath)
    plt.figure()
    plt.title(sign)
    plt.imshow(img)

## 3. Data Prepocessing


In [None]:
from sklearn.model_selection import train_test_split
#ENCODING LABEL to 0,1,2,3,4,5,6, etc..
# parameters
x_col = 'path'
y_col = 'Pawpularity'
test_size = 0.2
# NO NEED TO DO SPLITTIN< JUST EVALUATE ON THE TEST SAMPLE PROVIDED. LAST YEAR SHOULD BE MAPPED FROM 0 to 100.

#splitting data ..................
train_df, test_df = train_test_split(data_df, test_size= test_size, random_state=seed, stratify=data_df[[y_col]])
print(f'train size : {len(train_df)}')
print(f'test size : {len(test_df)}')

In [None]:
# CREATING DATA GENERATORS
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# for efficentNetB0 input is 224
# for B2 260

img_width, img_height = 300, 300
batch_size = 32
no_of_classes = 1

# NO AUGMENTAION, JUST NRORMALIZING THE DATA
# TRAINING GENERATOR
# WITH AUGMENTAIONS
train_datagen = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input,
                                   rotation_range = 40,
                                   width_shift_range = 0.2,
                                   height_shift_range = 0.2,
                                   shear_range = 0.2,
                                   zoom_range = 0.2,
                                   horizontal_flip = True,
                                   vertical_flip = True,
                                   fill_mode = 'nearest'
                                  )
train_generator = train_datagen.flow_from_dataframe(
    dataframe=train_df,x_col=x_col, y_col=y_col,
    target_size=(img_width, img_height),
    class_mode='raw',
    batch_size=batch_size,
    seed=seed,
    shuffle=True,
)

# TESTING GENERATOR
validation_datagen = ImageDataGenerator(preprocessing_function = tf.keras.applications.efficientnet.preprocess_input)
validation_generator = validation_datagen.flow_from_dataframe(
    dataframe=test_df, x_col=x_col, y_col=y_col,
    target_size=(img_width, img_height),
    class_mode='raw',
    batch_size=batch_size,
    seed=seed,
    shuffle=True,
)

## 4. Modeling

In [None]:
from keras.models import Sequential
from keras.layers import GlobalAveragePooling2D, Flatten, Dense, Dropout, BatchNormalization
#from keras.optimizers import Adam
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau
from tensorflow.keras.applications import EfficientNetB3

# I FEEL LIKE THE DROPOUT IS A BIT LARGE
def create_model():
    model = Sequential()
    # initialize the model with input shape
    model.add(
        EfficientNetB3(
            input_shape = (img_width, img_height, 3), 
            include_top = False,
            weights='imagenet',
            drop_connect_rate=0.6,
        )
    )
    model.add(GlobalAveragePooling2D())
    model.add(Flatten())
    model.add(Dense(256, activation='relu', bias_regularizer=tf.keras.regularizers.L1L2(l1=0.01, l2=0.001)))
    model.add(Dropout(0.5))
    model.add(Dense(no_of_classes))

    return model

In [None]:
# #import keras
# from tensorflow.keras.models import Sequential
# from tensorflow.keras.layers import Conv2D, MaxPooling2D, GlobalAveragePooling2D, Activation, Dense, Flatten, BatchNormalization, Dropout, Input

# # SIMPLE MODEL 
# model = Sequential()

# # model.add(Conv2D(32, (5, 5), input_shape=(img_width, img_height, 3)))

# # model.add(BatchNormalization())
# # model.add(Activation('relu'))
# # model.add(MaxPooling2D((2, 2)))
# # model.add(Dropout(0.4))

# # model.add(Conv2D(64, (3, 3)))
# # model.add(BatchNormalization())
# # model.add(Activation('relu'))
# # model.add(MaxPooling2D((2, 2)))
# # model.add(Dropout(0.4))

# # model.add(Conv2D(64, (3, 3)))
# # model.add(BatchNormalization())
# # model.add(Activation('relu'))
# # model.add(MaxPooling2D((2, 2)))
# # model.add(Dropout(0.4))

# # model.add(Flatten())

# # model.add(Dense(512, activation='relu'))

# # model.add(Dense(no_of_classes))

# # EFFICENT NET SOLUTION
# # Importing EfficientNets pretrained model
# # MODEL ONE 
# img_mod = "/kaggle/input/keras-applications-models/EfficientNetB0.h5"
# efnet_model = tf.keras.models.load_model(img_mod)
# efnet_model.trainable = False

# model = Sequential()

# model.add(Input(shape=(img_width, img_height, 3)))
# model.add(efnet_model)
# # OUTPUT OF EFNET is 1280 
# model.add(BatchNormalization())
# model.add(Dropout(0.2))          
# model.add(Dense(128, activation='relu'))
# model.add(Dense(no_of_classes))
# ##############################

# model.summary()

In [None]:
create_model().summary()

In [None]:
# from tensorflow.keras.utils import plot_model
# plot_model(model, show_shapes=True)

In [None]:
# OERFORMING EARLY STOPS 

def compile_model(model):
    # put model trackers
    model.compile(optimizer='adam',
                  loss='mse',
                  metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse"), "mae"])
    return model

In [None]:
sess = tf.compat.v1.Session(config=tf.compat.v1.ConfigProto(log_device_placement=True))

from tensorflow.compat.v1.keras import backend as K
K.set_session(sess)

In [None]:
## K-FOLDING

from sklearn.model_selection import StratifiedKFold
#### STRAIFY LABEL CUT TO 20 PIECES
Q = 20
data_df['stratify_label'] = pd.qcut(data_df['Pawpularity'], q = Q, labels = range(Q))
########################################################################################
# NO OF FOLDS
# NOF O EPCOSH
k_folds = 5
epochs = 50
kfold = StratifiedKFold(n_splits = k_folds, shuffle = True, random_state = seed)
kfold_splits = kfold.split(data_df.index, data_df['stratify_label'])

history_objs = []
current_time = get_current_time()
for fold, (train_index, val_index) in enumerate(kfold_splits):
    # CREATE MODEL, MAYBE IT RESET WEIGHTS?
    # CREATE THE MODEL FROM SCRATCH AND COMPILE IT EACH FOLD
    model = create_model()
    model = compile_model(model)
    train_df = data_df.loc[train_index].reset_index()
    test_df = data_df.loc[val_index].reset_index()
    # GENERATE THE DATA
    train_generator = train_datagen.flow_from_dataframe(
        dataframe=train_df,x_col=x_col, y_col=y_col,
        target_size=(img_width, img_height),class_mode='raw', batch_size=batch_size,
        shuffle=False,
    )
    validation_generator = validation_datagen.flow_from_dataframe(
        dataframe=test_df, x_col=x_col, y_col=y_col,
        target_size=(img_width, img_height), class_mode='raw', batch_size=batch_size,
        shuffle=False
    )
    
    # MAKE THE CHECKPOINTS
    early_stop = EarlyStopping(patience=10, monitor='val_mae', restore_best_weights=True)
    ckpt = ModelCheckpoint(f'feature_model_{fold}_{current_time}.h5',
                                          verbose = 1, 
                                          monitor = 'val_mae',
                                          mode = 'min', 
                                          save_weights_only = True,
                                          save_best_only = True)
    with tf.device('/GPU:0'):
        history = model.fit(train_generator,
                            epochs=epochs,
                            verbose=1,
                            validation_data=validation_generator,
                            callbacks = [early_stop, ckpt]
                           )
        history_objs.append(history)


In [None]:
# # TRAINNING

# epochs = 50
# history = model.fit(train_generator,
#                     epochs=epochs,
#                     verbose=1,
#                     validation_data=validation_generator,
#                     callbacks = [early_stop]
#                    )

In [None]:
print("The model metrics are")
for idx, history in enumerate(history_objs):
    print(idx)
    metrics = pd.DataFrame(history.history)
    display(metrics)
    print('==========================================================')

In [None]:
for idx, history in enumerate(history_objs):
    print(idx)
    acc=history.history['rmse']
    val_acc=history.history['val_rmse']
    loss=history.history['loss']
    val_loss=history.history['val_loss']

    epochs=range(len(acc))

    fig = plt.figure(figsize=(14,7))
    plt.plot(epochs, acc, 'r', label="Training RMSE")
    plt.plot(epochs, val_acc, 'b', label="Validation RMSE")
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy')
    plt.title('Training and validation RMSE')
    plt.legend(loc='lower right')
    plt.show()
    fig = plt.figure(figsize=(14,7))
    plt.plot(epochs, loss, 'r', label="Training Loss")
    plt.plot(epochs, val_loss, 'b', label="Validation Loss")
    plt.legend(loc='upper right')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training and validation loss')
    plt.show()
    print('=====================================================================================================')

## 6. Predictions

In [None]:
# #Preparing data in dataframe for easier data handling
# test_file = '../input/petfinder-pawpularity-score/test.csv'
# test_df = pd.read_csv(test_file)
# test_df['path'] = test_df['Id'].map(lambda x: str(folder_path+'/test/'+x)+'.jpg')
# #train_df = train_df.drop(columns=['Id'])
# #train_df = train_df.sample(frac=1).reset_index(drop=True) #shuffle dataframe
# test_df.head()

In [None]:
# test_datagen = ImageDataGenerator(rescale=1./255)
# train_generator = test_datagen.flow_from_dataframe(
#     dataframe=test_df, x_col=x_col,
#     target_size=(img_width, img_height),class_mode=None,
#     #validate_filenames=False,
#     shuffle=False) 

In [None]:
#predictions = model.predict(train_generator)

In [None]:
#predictions

In [None]:
# result_df = pd.DataFrame()
# result_df['Id'] = test_df['Id']
# result_df['Pawpularity'] = predictions
# result_df.to_csv('submission.csv', index=False)