In [None]:
import sklearn
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
import matplotlib.pyplot as plt
from tensorflow.keras.applications.resnet50 import ResNet50
from sklearn.model_selection import KFold
from tensorflow.keras.layers.experimental.preprocessing import RandomRotation, RandomContrast, RandomFlip, RandomTranslation
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau
from tensorflow.keras.optimizers import *
from keras.models import Model
from tensorflow.keras.applications import VGG16
import tensorflow_addons as tfa
from tensorflow.keras.applications import EfficientNetB7
from keras.models import Sequential
from keras import layers
import os
from sklearn.model_selection import train_test_split
from keras.models import Model
from tensorflow.keras.applications.xception import Xception
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
import seaborn as sns
%matplotlib inline

# **Image Augmentation**

In [None]:
def data_augment(image):
    p_spatial = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_rotate = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_1 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_2 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    p_pixel_3 = tf.random.uniform([], 0, 1.0, dtype = tf.float32)
    
    # Flips
    image = tf.image.random_flip_left_right(image)
    image = tf.image.random_flip_up_down(image)
    
    if p_spatial > .75:
        image = tf.image.transpose(image)
        
    # Rotates
    if p_rotate > .75:
        image = tf.image.rot90(image, k = 3) # rotate 270º
    elif p_rotate > .5:
        image = tf.image.rot90(image, k = 2) # rotate 180º
    elif p_rotate > .25:
        image = tf.image.rot90(image, k = 1) # rotate 90º
        
    # Pixel-level transforms
    if p_pixel_1 >= .4:
        image = tf.image.random_saturation(image, lower = .7, upper = 1.3)
    if p_pixel_2 >= .4:
        image = tf.image.random_contrast(image, lower = .8, upper = 1.2)
    if p_pixel_3 >= .4:
        image = tf.image.random_brightness(image, max_delta = .1)
        
    return image

# **Image preprocessing**

# **Loading Train and Test data using Image Data generator**

In [None]:
# function to load train data
def load_train_data(img_height, img_width):

    
    train_filenames = os.listdir("/kaggle/input/petfinder-pawpularity-score/train")
    train_df = pd.DataFrame({'filename': train_filenames})
    train_df['Id'] = train_df['filename'].str.replace('.jpg', '')
    train_df_tabular = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/train.csv')
    train_df = train_df.merge(train_df_tabular, on="Id")
    #train_df = train_df.iloc[0:500,:]
    
    
    X_train, X_val = train_test_split(train_df, test_size=0.15,  random_state=69)
    
    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
                                                            samplewise_center = True,
                                                            samplewise_std_normalization = True
                                                            ,preprocessing_function = data_augment
                                                             )




    train_it = datagen.flow_from_dataframe(X_train, "/kaggle/input/petfinder-pawpularity-score/train", 
            x_col='filename', y_col='Pawpularity',
            target_size=(img_height, img_width), 
            class_mode='raw',
            batch_size=24,
            shuffle=False # not to shuffle the given data
        )
    
    validate_it = datagen.flow_from_dataframe(X_val, "/kaggle/input/petfinder-pawpularity-score/train", 
            x_col='filename', y_col='Pawpularity',
            target_size=(img_height, img_width), 
            class_mode='raw',
            batch_size=24,
            shuffle=False # not to shuffle the given data
        )
    
    return train_it, validate_it, X_train, X_val

# function to load test data
def load_test_data(img_height, img_width):

    datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255,
                                preprocessing_function = data_augment)
    
    test_filenames = os.listdir("/kaggle/input/petfinder-pawpularity-score/test")
    test_df = pd.DataFrame({'filename': test_filenames})
    test_df['Id'] = test_df['filename'].str.replace('.jpg', '')
    test_df_tabular = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/test.csv')
    test_df = test_df.merge(test_df_tabular, on="Id")
    
    test_it = datagen.flow_from_dataframe(test_df, "/kaggle/input/petfinder-pawpularity-score/test", 
            x_col='filename', y_col=None,
            target_size=(img_height, img_width), 
            class_mode=None,
            batch_size=8,
            shuffle=False # not to shuffle the given data
        )
    
    return test_it, test_df

In [None]:
IMAGE_SIZE = 224

train_gen, valid_gen, full_data_train, full_data_val = load_train_data(IMAGE_SIZE, IMAGE_SIZE)
test_gen, full_data_test = load_test_data(IMAGE_SIZE, IMAGE_SIZE)

In [None]:
image_processing = Sequential(
    [
        layers.RandomRotation(factor=(-0.1,0.4)),
        layers.RandomTranslation(height_factor=0.15, width_factor=0.15),
        layers.RandomFlip(),
        layers.RandomContrast(factor=0.2),
    ]
)

# **Callbacks**

In [None]:
learning_rate = 1e-4
EPOCHS = 5

optimizer = tf.keras.optimizers.SGD(
    learning_rate=learning_rate
)



STEP_SIZE_TRAIN = train_gen.n // train_gen.batch_size
STEP_SIZE_VALID = valid_gen.n // valid_gen.batch_size

reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor = 'mse',
                                                 factor = 0.2,
                                                 patience = 2,
                                                 verbose = 1,
                                                 min_delta = 1e-4,
                                                 min_lr = 1e-6,
                                                 mode = 'max')

earlystopping = tf.keras.callbacks.EarlyStopping(monitor = 'mse',
                                                 min_delta = 1e-4,
                                                 patience = 5,
                                                 mode = 'max',
                                                 restore_best_weights = True,
                                                 verbose = 1)

checkpointer = tf.keras.callbacks.ModelCheckpoint(filepath = './model.hdf5',
                                                  monitor = 'mse', 
                                                  verbose = 1, 
                                                  save_best_only = True,
                                                  save_weights_only = True,
                                                  mode = 'max')

callbacks = [earlystopping, reduce_lr, checkpointer]


# **ResNet50**

**ResNet50 model** was the winner of ImageNet challenge in 2015. The fundamental breakthrough with ResNet was it allowed us to train extremely deep neural networks with 150+layers successfully.

Prior to ResNet (AlexNet) training very deep neural networks was difficult due to the problem of vanishing gradients.

*Below is the architecture of ResNet50 model*

https://arxiv.org/abs/1512.03385

In [None]:
image_size = 224
def get_resnet_model():
    image_inputs = tf.keras.Input((image_size, image_size , 3))
    #tabular_inputs = tf.keras.Input(len(columns))
    
    resnet = ResNet50(include_top=False, weights = None, pooling=None)
    
    image_x = resnet(RandomContrast(factor = 0.1)(RandomRotation(factor = 0.15)(image_inputs)))
    
    image_x = tf.keras.layers.GlobalAveragePooling2D()(image_x)

    output = tf.keras.layers.Dense(1)(image_x)
    #model = tf.keras.Model(inputs=[image_inputs, tabular_inputs], outputs=[output])
    model = tf.keras.Model(inputs=[image_inputs], outputs=[output])
    return model

In [None]:
resnet_model = get_resnet_model()
tf.keras.utils.plot_model(resnet_model, show_shapes=True)

In [None]:


resnet_model.compile(optimizer = optimizer, 
              loss = 'mse', 
              metrics = tf.keras.metrics.RootMeanSquaredError())

resnet_history = resnet_model.fit(x = train_gen,
          steps_per_epoch = STEP_SIZE_TRAIN,
          validation_data = valid_gen,
          validation_steps = STEP_SIZE_VALID,
          epochs = EPOCHS
          ,callbacks = callbacks
         )

In [None]:
resnet_pred = resnet_model.predict(valid_gen)
resnet_pred = [j for sub in resnet_pred for j in sub]

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(7, 7))

ax1.plot(resnet_history.history['loss'], color='r', label="Train loss")
ax1.plot(resnet_history.history['val_loss'], color='b', label="Validation loss")
ax1.set_xticks(np.arange(1, EPOCHS, 1))
legend = ax1.legend(loc='best', shadow=True)

#ax2.plot(resnet_history.history['accuracy'], color='r', label="Train accuracy")
#ax2.plot(resnet_history.history['val_accuracy'], color='b',label="Validation accuracy")
#ax2.set_xticks(np.arange(1, EPOCHS, 1))
#legend = ax2.legend(loc='best', shadow=True)

plt.tight_layout()
plt.show()

In [None]:
resnet_rmse = resnet_model.evaluate(valid_gen)[1]
resnet_rmse

# **Efficient Net**

Efficient Net is a convolutional neural network architecture and scaling method that uniformly scales all dimensions of depth/width/resolution using a compound coefficient. The compound scaling method is justified by the intuition that if the input image is bigger, then the network needs more layers to increase the receptive field and more channels to capture more fine-grained patterns on the bigger image as described in the paper **"EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks"**

*Shown below is the architecture of EffNet B3 (with 3 blocks)*

In [None]:
image_size = 224
def get_effnet_model():

    eff_model = Sequential()

    eff_model.add(layers.Input(shape=(image_size, image_size, 3)))
    eff_model.add(image_processing)
    base_model = EfficientNetB7(include_top=False, weights=None)
    base_model.trainable = False
    eff_model.add(base_model)

    eff_model.add(layers.GlobalAveragePooling2D())
    eff_model.add(layers.BatchNormalization())
    eff_model.add(layers.Dropout(0.4))
    eff_model.add(layers.Dense(1, activation="linear"))
    
    return eff_model 

In [None]:
eff_model = get_effnet_model()
tf.keras.utils.plot_model(eff_model, show_shapes=True)

In [None]:

eff_model.compile(optimizer = optimizer, 
              loss = 'mse', 
              metrics = tf.keras.metrics.RootMeanSquaredError())

eff_history = eff_model.fit(x = train_gen,
          steps_per_epoch = STEP_SIZE_TRAIN,
          validation_data = valid_gen,
          validation_steps = STEP_SIZE_VALID,
          epochs = EPOCHS
          ,callbacks = callbacks
         )

In [None]:
eff_pred = eff_model.predict(valid_gen)
eff_pred = [j for sub in eff_pred for j in sub]

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(7, 7))

ax1.plot(eff_history.history['loss'], color='r', label="Train loss")
ax1.plot(eff_history.history['val_loss'], color='b', label="Validation loss")
ax1.set_xticks(np.arange(1, EPOCHS, 1))
legend = ax1.legend(loc='best', shadow=True)

#ax2.plot(eff_history.history['accuracy'], color='r', label="Train accuracy")
#ax2.plot(eff_history.history['val_accuracy'], color='b',label="Validation accuracy")
#ax2.set_xticks(np.arange(1, epochs, 1))
#legend = ax2.legend(loc='best', shadow=True)

plt.tight_layout()
plt.show()

In [None]:
eff_rmse = eff_model.evaluate(valid_gen)[1]
eff_rmse

# **VGGNet model**

VGGNet that supports 16 layers is also referred to as VGG16, which is a convolutional neural network model with 16 layers. VGNet is a ground breaking model for large scale image recognition tasks. We will use pre-trained VGG model weights on ImageNet dataset with some fine tuning

Below is the model architecture from the paper https://arxiv.org/abs/1409.1556 �Very Deep Convolutional Networks for Large-Scale Image Recognition.�

In [None]:
image_size = 224
def get_vgg_model():
    pre_trained_vgg = VGG16(input_shape=(image_size, image_size, 3), include_top=False, weights=None)
    # Model creation   
    last_layer = pre_trained_vgg.get_layer('block5_pool')
    last_output = last_layer.output

    x = layers.GlobalAveragePooling2D()(last_output)
    x = layers.Dropout(0.4)(x)
    x = layers.Dense(1, activation='sigmoid')(x)

    vgg_model = Model(pre_trained_vgg.input, x)
    return vgg_model

In [None]:
vgg_model = get_vgg_model()
tf.keras.utils.plot_model(vgg_model, show_shapes=True)

In [None]:

vgg_model.compile(optimizer = optimizer, 
              loss = 'mse', 
              metrics = tf.keras.metrics.RootMeanSquaredError())

vgg_history = vgg_model.fit(x = train_gen,
          steps_per_epoch = STEP_SIZE_TRAIN,
          validation_data = valid_gen,
          validation_steps = STEP_SIZE_VALID,
          epochs = EPOCHS
          ,callbacks = callbacks
         )

In [None]:
vgg_pred = eff_model.predict(valid_gen)
vgg_pred = [j for sub in vgg_pred for j in sub]

In [None]:
fig, (ax1) = plt.subplots(1, 1, figsize=(7, 7))

ax1.plot(vgg_history.history['loss'], color='r', label="Train loss")
ax1.plot(vgg_history.history['val_loss'], color='b', label="Validation loss")
ax1.set_xticks(np.arange(1, EPOCHS, 1))
legend = ax1.legend(loc='best', shadow=True)

#ax2.plot(vgg_history.history['accuracy'], color='r', label="Train accuracy")
#ax2.plot(vgg_history.history['val_accuracy'], color='b',label="Validation accuracy")
#ax2.set_xticks(np.arange(1, epochs, 1))
#legend = ax2.legend(loc='best', shadow=True)

plt.tight_layout()
plt.show()

In [None]:
vgg_rmse = vgg_model.evaluate(valid_gen)[1]
vgg_rmse

# **Train LR on 80, predict 20**

In [None]:
from sklearn import linear_model
from sklearn.model_selection import cross_val_score, train_test_split
from sklearn.metrics import *
from sklearn.svm import SVR
from sklearn.ensemble import RandomForestRegressor

In [None]:
svr = SVR(kernel="rbf", C=100, gamma=0.1, epsilon=0.1)
svr.fit(full_data_train.drop(['filename','Pawpularity', 'Id'], axis = 1), full_data_train['Pawpularity'])
svr_pred = svr.predict(full_data_val.drop(['filename','Id','Pawpularity'], axis = 1))
mean_squared_error(y_true=full_data_val['Pawpularity'], y_pred=svr_pred, squared = False)

In [None]:
reg = linear_model.LinearRegression()
reg.fit(full_data_train.drop(['filename','Pawpularity', 'Id'], axis = 1), full_data_train['Pawpularity'])
lr_pred = reg.predict(full_data_val.drop(['filename','Id','Pawpularity'], axis = 1))
mean_squared_error(y_true=full_data_val['Pawpularity'], y_pred=lr_pred,  squared = False)

In [None]:
rf = RandomForestRegressor(max_depth=2, random_state=0)
rf.fit(full_data_train.drop(['filename','Pawpularity', 'Id'], axis = 1), full_data_train['Pawpularity'])
rf_pred = rf.predict(full_data_val.drop(['filename','Id','Pawpularity'], axis = 1))
mean_squared_error(y_true=full_data_val['Pawpularity'], y_pred=rf_pred, squared = False)

# **Put the preds and actual values in a df**

In [None]:
stacked_reg = pd.DataFrame(columns = ['nn', 'rf','actual'])

In [None]:
stacked_reg['nn'] = eff_pred
stacked_reg['rf'] = rf_pred
stacked_reg['actual'] = full_data_val['Pawpularity'].to_list()

# **Fit a Stacked Regressor with the preds and actual values**

In [None]:
import lightgbm as ltb

In [None]:
stacked = ltb.LGBMRegressor()
stacked.fit(stacked_reg.drop(['actual'], axis = 1), stacked_reg['actual'])
#model.predict(X_test)

# **Real Test Data **

# **Predict NN for test data**

In [None]:
test_nn_preds = eff_model.predict(test_gen)
test_nn_preds = [j for sub in test_nn_preds for j in sub]
#sample_submission["Pawpularity"] = results
#sample_submission.to_csv("submission.csv", index=False)

# **Predict LR for test data**

In [None]:
full_data_test.head()

In [None]:
test_rf_preds = rf.predict(full_data_test.drop(['Id','filename'], axis = 1))

# **Create Stacked Input format**

In [None]:
stacked_test = pd.DataFrame(columns = ['nn','lr'])
stacked_test['nn'] = test_nn_preds
stacked_test['lr'] = test_rf_preds
#stacked_reg['actual'] = val['Pawpularity']

# **Get Final Prediction**

In [None]:
final_preds = stacked.predict(stacked_test)

In [None]:
submission = pd.DataFrame(columns = ['Id','Pawpularity'])
submission['Id'] = full_data_test['Id']
submission['Pawpularity'] = final_preds
submission.to_csv('submission.csv', index = False)