In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns

import os
import random 

In [None]:
# Directory for dataset

train_dir = "/kaggle/input/petfinder-pawpularity-score/train/"
test_dir = "/kaggle/input/petfinder-pawpularity-score/test/"

In [None]:
train = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')

In [None]:
train.shape,test.shape

In [None]:
train.head()

In [None]:
sns.heatmap(train.corr())#correlation from tabular data

In [None]:
import tensorflow as tf
import tensorflow.keras as keras
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.applications.resnet_v2 import preprocess_input, decode_predictions
from tensorflow.keras.applications import EfficientNetB6

In [None]:
from tensorflow.keras.callbacks import EarlyStopping,ReduceLROnPlateau
from tensorflow.keras.optimizers.schedules import ExponentialDecay

In [None]:
# from tensorflow.keras.preprocessing.image import img_to_array
# convert the image pixels to a numpy array
# image = img_to_array(resizedImage)

In [None]:
device_name = tf.test.gpu_device_name()
if "GPU" not in device_name:
    print("GPU device not found")
print('Found GPU at: {}'.format(device_name))

In [None]:
def seed_env():
    np.random.seed(43)
    random.seed(43)
    tf.random.set_seed(43)
    os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
    os.environ['PYTHONHASHSEED'] = str(43)

seed_env()

In [None]:

train["Id"] = train["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/train/" + x + ".jpg")
test["Id"] = test["Id"].apply(lambda x : "/kaggle/input/petfinder-pawpularity-score/test/" + x + ".jpg")

In [None]:
# Define your data generator
train_gen = ImageDataGenerator(
rotation_range=45,
rescale=1./255,
horizontal_flip=True,
# width_shift_range=0.2,
# height_shift_range=0.2,
shear_range=0.2,
# zoom_range=0.2,
)
test_gen = ImageDataGenerator(rescale = 1./255)

In [None]:
df_train = train.sample(frac=0.7, random_state=0)

In [None]:
train_data = train_gen.flow_from_dataframe(dataframe = df_train, 
directory = train_dir, x_col = 'Id', 
y_col = 'Pawpularity' , seed = 42,
batch_size = 64, shuffle = True, 
class_mode="raw",target_size = (224,224))

vald_data = train_gen.flow_from_dataframe(dataframe = train.drop(df_train.index), 
directory = train_dir, x_col = 'Id', 
y_col = 'Pawpularity' , seed = 42,
batch_size = 64, shuffle = True, 
class_mode="raw",target_size = (224,224))

test_data = test_gen.flow_from_dataframe(dataframe = test, 
directory = test_dir, x_col = 'Id', 
y_col = None,
batch_size = 64, shuffle = False, 
class_mode=None,target_size = (224,224))

In [None]:
# tf.debugging.set_log_device_placement(True)
# gpus = tf.config.list_logical_devices('GPU')
# strategy = tf.distribute.MirroredStrategy(gpus)

In [None]:
# base_conv = EfficientNetB6(include_top=False) #no internet acccess
# base_conv.trainable = False

eff_model = "/kaggle/input/keras-applications-models/EfficientNetB0.h5"
conv_base = tf.keras.models.load_model(eff_model)
conv_base.trainable = False
conv_base.include_top = False

In [None]:
def get_base_mode():
    

    inpt = keras.Input(shape=(224,224,3))

    h = conv_base(inpt)
    h =tf.keras.layers.BatchNormalization()(h)
    h = tf.keras.layers.Dropout(0.1)(h)

    h = tf.keras.layers.Flatten()(h)
    h = tf.keras.layers.Dense(512, activation="relu")(h)
    h = tf.keras.layers.Dense(256, activation="relu")(h)
    output = keras.layers.Dense(1, activation="relu")(h)

    model = keras.Model(inpt, output)
    
    return model

In [None]:
# tf.debugging.set_log_device_placement(True)
with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
    model =get_base_mode()
    early_stopping = EarlyStopping(patience = 5,restore_best_weights=True)

    lr_schedule = ExponentialDecay(
        initial_learning_rate=1e-3,
        decay_steps=100, decay_rate=0.96,
        staircase=True)
    model.compile(loss="mse", 
              optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule), 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [None]:
model.summary()

In [None]:
predictor = model.fit(train_data,
                      epochs=10, 
                      validation_data = vald_data,
#                       use_multiprocessing=True, workers=-1,
                      callbacks=[early_stopping])

In [None]:
def plot_history(hist):
    """Function plots a chart with training and validation metrics.
    :param hist: Tensorflow history object from model.fit()
    """
    # Losses and metrics
    loss = hist.history['loss']
    val_loss = hist.history['val_loss']
    rmse = hist.history['root_mean_squared_error']
    val_rmse = hist.history['val_root_mean_squared_error']

    # Epochs to plot along x axis
    x_axis = range(1, len(loss) + 1)

    fig, (ax1, ax2) = plt.subplots(nrows=2, ncols=1, sharex=True)

    ax1.plot(x_axis, loss, 'bo', label='Training')
    ax1.plot(x_axis, val_loss, 'ro', label='Validation')
    ax1.set_title('MSE Loss')
    ax1.legend()

    ax2.plot(x_axis, rmse, 'bo', label='Training')
    ax2.plot(x_axis, val_rmse, 'ro', label='Validation')
    ax2.set_title('Root Mean Squared Error')
    ax2.set_xlabel('Epochs')
    ax2.legend()

    plt.tight_layout()
    plt.show()

In [None]:
plot_history(predictor)

In [None]:
def get_base_mode_2():
    

    inpt = keras.Input(shape=(224,224,3))

    h = tf.keras.layers.Conv2D(32, kernel_size=(3, 3), activation="relu", name="Conv_1")(inpt)
    h = tf.keras.layers.MaxPool2D(pool_size=(2, 2))(h)

    h = tf.keras.layers.Conv2D(64, kernel_size=(3, 3), activation="relu", name="Conv_3")(h)
    h = tf.keras.layers.MaxPool2D()(h)
    
    
    h = tf.keras.layers.Conv2D(128, kernel_size=(3, 3), activation="relu", name="Conv_5")(h)
    h = tf.keras.layers.AveragePooling2D()(h)
    
    h =tf.keras.layers.BatchNormalization()(h)
    h = tf.keras.layers.Dropout(0.1)(h)

    h = tf.keras.layers.Flatten()(h)
    h = tf.keras.layers.Dense(512, activation="relu")(h)
    h = tf.keras.layers.Dense(256, activation="relu")(h)
    output = keras.layers.Dense(1, activation="relu")(h)

    model = keras.Model(inpt, output)
    
    return model

In [None]:
# tf.debugging.set_log_device_placement(True)
with tf.device('/job:localhost/replica:0/task:0/device:GPU:0'):
    model_2 =get_base_mode_2()
    early_stopping = EarlyStopping(patience = 5,restore_best_weights=True)

    lr_schedule = ExponentialDecay(
        initial_learning_rate=1e-3,
        decay_steps=100, decay_rate=0.96,
        staircase=True)
    model_2.compile(loss="mse", 
              optimizer = tf.keras.optimizers.Adam(learning_rate = lr_schedule), 
              metrics=[tf.keras.metrics.RootMeanSquaredError()])

In [None]:
predictor_2 = model_2.fit(train_data,
                      epochs=10, 
                      validation_data = vald_data,
#                       use_multiprocessing=True, workers=-1,
                      callbacks=[early_stopping])

In [None]:
plot_history(predictor_2)

In [None]:
pred = model.predict(test_data)
pred

In [None]:
pred2 = model_2.predict(test_data)
pred2


In [None]:
final = [sum(x)/2 for x in zip(pred,pred2)]#avg
final =[x[0] for x in final]
final2 = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
final2['Pawpularity']=final

In [None]:
final2.to_csv('submission.csv',index=False)

In [None]:
#continue
##combining with tabulardata output for estimation