In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from keras.preprocessing import image
from sklearn.preprocessing import StandardScaler
import cv2
import os
import tensorflow as tf
tf.random.set_seed(42)
# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))
        break

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
    # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

In [None]:
path = "/kaggle/input/petfinder-pawpularity-score/train/"
path_test = "/kaggle/input/petfinder-pawpularity-score/test/"

In [None]:
data = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/train.csv')
ss = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/sample_submission.csv')
print(data.shape)
print(ss.shape)

In [None]:
d = ["pixel", 'ratio', "st_size", "mean", "form"]
d = {i:[] for i in d}
for i in data["Id"]:
    i+=".jpg"
    img_path = os.path.join(path, i)
    x = os.stat(img_path)
    file_pr = {k: getattr(x, k) for k in dir(x) if k in d}
    for i in file_pr:
        d[i].append(file_pr[i])
    
    img = image.load_img(img_path)
    img_pr = img.__dict__
    try:
        _size_1, _size_2 = img_pr.get("_size")
    except:
        print(img_pr.get("_size"))
    d["pixel"].append(_size_1*_size_2)
    d['ratio'].append(_size_1/_size_2)
    d["mean"].append(np.mean(img))
    
    if _size_1==_size_2:
        fm =  "sqr"
    elif _size_1<_size_2:
        fm =  "falt"
    elif _size_1>_size_2:
        fm =  "front"
    d["form"].append(fm)

In [None]:
img_info = pd.DataFrame.from_dict(d)
img_info['size_h'] = img_info['st_size'] / img_info['ratio']
img_info['size_mean'] = img_info['st_size'] / img_info['mean']
img_info['ratio_max'] = img_info['ratio'] / img_info['mean']
img_info['ratio_p'] = img_info['pixel'] / img_info['mean']

In [None]:
data = pd.concat([data, img_info], axis=1)

In [None]:
scaler = StandardScaler()
scaler.fit(data[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']])
data[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']] = scaler.transform(data[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']])

In [None]:
minus = ["Eyes", "Action", "Info", "Blur"]
plus = ["Face", "Near", "Accessory", "Group", "Collage", "Human", "Occlusion"]
data['minus'] = data['Subject Focus']
data['plus'] = data['Face']

for i in plus:
    data['plus'] = data['plus'] + data[i]
    
for i in minus:
    data['minus'] = data['minus'] + data[i]

In [None]:
fr = pd.get_dummies(data['form'])
data = pd.concat([data, fr], axis=1)
data = data.drop(["form", "falt"], axis=1)

In [None]:
data[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']] = data[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']].astype('float32')

In [None]:
from sklearn.model_selection import train_test_split 
train,val = train_test_split(data, test_size=0.2,random_state=42) 

In [None]:
print(train.shape, val.shape)

In [None]:
### Hyperparams 

BATCH_SIZE = 16
IMG_SIZE = ( 224 ,  224) 

In [None]:
col = ['front', 'sqr', 'Subject Focus', 'Eyes', 'Face', 'Near', 'Action',
       'Accessory', 'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur',
       'pixel', 'ratio', 'st_size', 'mean', 'size_h',
       'size_mean', 'ratio_max', 'ratio_p', 'minus', 'plus']

In [None]:
from tensorflow.keras.utils  import Sequence
class DataGenerator(Sequence):
    'Generates data for Keras'
    def __init__(self, metadata, labels, path, batch_size=16, w=224, h=224, n_channels=3, shuffle=True):
        'Initialization'
        self.dim = (w,h)
        self.batch_size = batch_size
        self.labels = labels
        self.data_dir = path
        self.metadata = metadata
        self.list_IDs = metadata['Id'].to_list()
        self.n_channels = n_channels
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(len(self.list_IDs) / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        # Generate indexes of the batch
        indexes = self.indexes[index*self.batch_size:(index+1)*self.batch_size]

        # Find list of IDs
        list_IDs_temp = [self.list_IDs[k] for k in indexes]

        # Generate data
        X, y = self.__data_generation(list_IDs_temp)

        return X, y

    def on_epoch_end(self):
        'Updates indexes after each epoch'
        self.indexes = np.arange(len(self.list_IDs))
        if self.shuffle == True:
            np.random.shuffle(self.indexes)

    def __data_generation(self, list_IDs_temp):
        'Generates data containing batch_size samples' # X : (n_samples, *dim, n_channels)
        # Initialization
        X1 = np.empty((self.batch_size, *self.dim, self.n_channels))
        X2 = np.empty((self.batch_size, 24))
        y = np.empty((self.batch_size), dtype=int)

        # Generate data
        for i, ID in enumerate(list_IDs_temp):
            # Store sample
            img = ID + ".jpg"
            img_path = os.path.join(self.data_dir, img)
            im = cv2.resize(cv2.imread(img_path), self.dim).astype(np.float32)
            X1[i,] = im
            X2[i,] = self.metadata[col][self.metadata['Id']==ID]

            # Store class
            y[i] = self.labels[ID]
            
        return {"image": X1, "features": X2}, y

In [None]:
labels = dict(data[['Id',"Pawpularity"]].values)

In [None]:
train_datagen = DataGenerator(train, labels, path)
val_datagen = DataGenerator(val, labels, path)

In [None]:
from tensorflow.keras.layers import Input, Conv2D, BatchNormalization, Dense
from tensorflow.keras.layers import AvgPool2D, GlobalAveragePooling2D, MaxPool2D
from tensorflow.keras.models import Model
from tensorflow.keras.layers import ReLU, concatenate
import tensorflow.keras.backend as K
def densenet(input_shape, filters = 64):
    model = tf.keras.Sequential()
    model.add(tf.keras.Input(shape=input_shape))
    model.add(Conv2D(filters=filters,padding="same",kernel_size=(3,3), activation='relu'))
    model.add(MaxPool2D(2,2))

    model.add(Conv2D(filters=filters,padding="same",kernel_size=(3,3), activation='relu'))
    model.add(MaxPool2D(2,2))

    model.add(Conv2D(filters=filters,padding="same",kernel_size=(3,3),activation='relu'))
    model.add(MaxPool2D(2,2))

    return model
base_image_model = densenet(input_shape)

In [None]:
class ProcessImageBlock(tf.keras.Model):

    def __init__(self):

        super(ProcessImageBlock, self).__init__()

        self.input_l = tf.keras.layers.InputLayer( input_shape = IMG_SIZE + (3,)  ) 
        self.base_model = base_image_model
        self.preprocess_input = tf.keras.applications.densenet.preprocess_input 
        
        self.data_augmentation = tf.keras.Sequential([
                                tf.keras.layers.experimental.preprocessing.RandomFlip('horizontal'),
                                tf.keras.layers.experimental.preprocessing.RandomRotation(0.2),
                                ])
        self.rescale = tf.keras.layers.experimental.preprocessing.Rescaling(1./127.5, offset= -1)
#         self.gap = tf.keras.layers.GlobalAveragePooling2D() ##  ( batch_size , 2048 )
        self.flat = tf.keras.layers.Flatten() ##  ( batch_size , 2048 )

        self.activation = tf.keras.layers.ReLU()
        self.dense = tf.keras.layers.Dense(512, activation= self.activation )
        self.final = tf.keras.layers.Dense(128, activation= self.activation )

        
    def call(self, input_tensor):

        x = self.input_l(input_tensor)
        x = self.data_augmentation(x)
        x = self.preprocess_input(x)
        x = self.base_model(x, training=True)
#         x = self.gap(x)
        x = self.flat(x)

        x = self.dense(x)
        x = self.final(x)
 
        return  x

class ProcessTabBlock(tf.keras.Model):

    def __init__(self):

        super(ProcessTabBlock, self).__init__()

        self.input_l = tf.keras.layers.InputLayer( input_shape = (24,)  ) 
        self.layer_1 = tf.keras.layers.Dense(64, activation='relu')
        self.layer_2 = tf.keras.layers.Dense(128, activation='relu')
        
    def call(self, input_tensor ):
        
        x = self.input_l(input_tensor)
        x = self.layer_1(x)
        x = self.layer_2(x)

        return x

In [None]:
class MyCustomModel(tf.keras.Model):

    def __init__(self):

        super(MyCustomModel, self).__init__()

        self.process_image_data = ProcessImageBlock()
        self.process_tabular_data = ProcessTabBlock()

        self.activation_1 = tf.keras.layers.LeakyReLU( alpha=0.3)
        self.activation_2 = tf.keras.layers.ReLU()
        self.activation_final = tf.keras.layers.ReLU(max_value = 100 )
        self.dropout = tf.keras.layers.Dropout(0.2) 

        self.dense_1 =   tf.keras.layers.Dense(64,activation= self.activation_1  )
        self.dense_2 =   tf.keras.layers.Dense(16,activation=  self.activation_2  )
        self.final =   tf.keras.layers.Dense(1, activation=  self.activation_final )
    
    def call(self, inputs ):

        image = inputs["image"]
        feature = inputs["features"]

        x1 = self.process_image_data(image)
        x2 = self.process_tabular_data(feature)


        x = tf.keras.layers.concatenate([x1, x2])
        x = self.dropout(x)## ( batch_size, 128 )
        x = self.dense_1(x)
        x = self.dense_2(x)

        x = self.final(x)
   
        return  x

In [None]:
def create_model():
    
    model = MyCustomModel()
    
    model.compile(
        optimizer='rmsprop', 
        loss="mse",
        metrics=[tf.keras.metrics.MeanSquaredError(name="mean_squared_error", 
                                                   dtype=None)]
      )
    
    return model 

In [None]:
model = create_model()

In [None]:
epochs = 100

checkpoint_path = "cp.ckpt"
checkpoint_dir = os.path.dirname(checkpoint_path)


# Create a callback that saves the model's weights
cp_callback = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)

es_callback = tf.keras.callbacks.EarlyStopping(
                                monitor='val_mean_squared_error',
                                patience=5,
                                verbose=1,
                                restore_best_weights=True)

tf.keras.backend.set_floatx('float32')

with tf.device(tf.DeviceSpec(device_type="GPU", device_index=0)):
    history = model.fit_generator(
                    train_datagen,
                    validation_data = val_datagen,
                    epochs=epochs,
                    verbose=1,
                    callbacks = [cp_callback , es_callback ] ,
                    )

In [None]:
saved_checkpoint_path = "cp.ckpt"

In [None]:
new_model = create_model()
new_model.load_weights(saved_checkpoint_path)

In [None]:
test = pd.read_csv('/kaggle/input/petfinder-pawpularity-score/test.csv')
test_d = ["pixel", 'ratio', "st_size", "mean", "form"]
test_d = {i:[] for i in test_d}
for i in test["Id"]:
    i+=".jpg"
    img_path = os.path.join(path_test, i)
    x = os.stat(img_path)
    file_pr = {k: getattr(x, k) for k in dir(x) if k in test_d}
    for i in file_pr:
        test_d[i].append(file_pr[i])
    
    img = image.load_img(img_path)
    img_pr = img.__dict__
    try:
        _size_1, _size_2 = img_pr.get("_size")
    except:
        print(img_pr.get("_size"))
    test_d["pixel"].append(_size_1*_size_2)
    test_d['ratio'].append(_size_1/_size_2)
    test_d["mean"].append(np.mean(img))
    
    if _size_1==_size_2:
        fm =  "sqr"
    elif _size_1<_size_2:
        fm =  "falt"
    elif _size_1>_size_2:
        fm =  "front"
    test_d["form"].append(fm)

In [None]:
img_info = pd.DataFrame.from_dict(test_d)
img_info['size_h'] = img_info['st_size'] / img_info['ratio']
img_info['size_mean'] = img_info['st_size'] / img_info['mean']
img_info['ratio_max'] = img_info['ratio'] / img_info['mean']
img_info['ratio_p'] = img_info['pixel'] / img_info['mean']

In [None]:
test = pd.concat([test, img_info], axis=1)
test[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']] = scaler.transform(test[['pixel', 'ratio', 'st_size', 'mean', 'size_h', 'size_mean', 'ratio_max', 'ratio_p']])
minus = ["Eyes", "Action", "Info", "Blur"]
plus = ["Face", "Near", "Accessory", "Group", "Collage", "Human", "Occlusion"]
test['minus'] = test['Subject Focus']
test['plus'] = test['Face']


for i in plus:
    test['plus'] = test['plus'] + test[i]
    
for i in minus:
    test['minus'] = test['minus'] + test[i]
fr = pd.get_dummies(test['form'])
test = pd.concat([test, fr], axis=1)
test = test.drop(["form"], axis=1)
if 'falt' in test.columns:
    test.drop(['"falt"'], axis=1)
if 'front' not in test.columns:
    test['front'] = 0

In [None]:
labels = dict(test[['Id',"pixel"]].values)

In [None]:
st = len(test)//2 if len(test)<= 32 else 32

In [None]:
test_datagen = DataGenerator(test, labels, path_test, batch_size=8)

In [None]:
final = new_model.predict_generator(test_datagen, steps = st)

In [None]:
test["Pawpularity"] = final

submission = test[["Id", "Pawpularity"]]
submission.to_csv("submission.csv", index = False)
submission