In [None]:
import tensorflow as tf
from tensorflow.keras import datasets, layers, models, utils
import seaborn as sns
import pandas as pd
import numpy as np
import os
from tensorflow import keras
from PIL import Image
import math

### Load and transform data

In [None]:
batch_size = 16
validation_split = 0.2
random_seed = 42
img_size = (64,64)

In [None]:
# Images: Directories
training_dir = "../input/petfinder-pawpularity-score/train"
test_dir = "../input/petfinder-pawpularity-score/test"

In [None]:
# Labels: Dataset
training_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")[0:9000]
validation_df = pd.read_csv("../input/petfinder-pawpularity-score/train.csv")[9001:]
test_df = pd.read_csv("../input/petfinder-pawpularity-score/test.csv")

In [None]:
training_df

In [None]:
target = "Pawpularity"
features = training_df.drop(columns = {"Id",target}).columns.values

In [None]:
features

In [None]:
def get_image(image_id, image_folder, img_size, resized_folder):
    resized_path = os.path.join(resized_folder,'{}.jpg'.format(image_id))
    
    if os.path.isfile(resized_path):
        img = Image.open(resized_path)
        return img
    
    img_path = os.path.join(image_folder, '{}.jpg'.format(image_id))
    
    img = Image.open(img_path)
    img = img.resize(img_size)
    img.save(resized_path)
        
    return img

class DataGenerator(utils.Sequence):
    def __init__(
        self, 
        data_type,
        data, 
        target, 
        img_folder, 
        img_size,
        batch_size,
        features
    ):
        
    
        self.data = data
        self.target = target
        self.batch_size = batch_size
        self.img_folder = img_folder
        self.img_size = img_size
        self.data_type = data_type
        self.features = features
        self.resized_folder = os.path.join("/resized", data_type)
        
        if os.path.isdir(self.resized_folder) == False:
            os.makedirs(self.resized_folder)
        self.on_epoch_end()
        
    def __len__(self):
        return math.ceil(len(self.data) / self.batch_size)
    
    def __getitem__(self, idx):
        start_idx = idx * self.batch_size
        end_idx = (idx + 1) * self.batch_size
        ids = self.data[start_idx : end_idx]["Id"]
        
        images = np.array([np.array(get_image(id, self.img_folder, self.img_size, self.resized_folder)) for id in ids])
        meta = np.array(self.data[start_idx : end_idx][self.features]).astype(np.float32)
        
        images = tf.cast(images, tf.float32)
        if self.target is None or not self.target.any():
            return [images, meta]
            #return images
        else:
            target = np.array(self.target[start_idx : end_idx]).astype(np.float32)
            return [images, meta], target
            #return images, target
    def on_epoch_end(self):
        feat = np.append(self.features, "Id")
        df = pd.concat([self.target, self.data[feat]], axis=1).sample(frac=1)
        if self.data_type in ["training","validation"]:
            self.target = df["Pawpularity"]
        self.data = df[feat]

In [None]:
training_ds = DataGenerator("training",training_df, training_df[target], training_dir, img_size, batch_size, features)
validation_ds = DataGenerator("validation",validation_df, validation_df[target], training_dir, img_size, batch_size, features)
test_ds = DataGenerator("test",test_df, None, test_dir, img_size, batch_size, features)

In [None]:
len(features)

### Create model

In [None]:
inputs = keras.Input(shape=(12,))
img_inputs = keras.Input(shape=(64, 64, 3))

# Conv Models
img = layers.RandomRotation(0.2)(img_inputs)
img = layers.RandomFlip('horizontal')(img)
img1 = layers.Conv2D(4, (4, 4), activation='relu')(img)
img2 = layers.MaxPooling2D((4, 4))(img1)
img3 = layers.Conv2D(8, (4, 4), activation='relu')(img2)
img6 = layers.Flatten()(img3)
img7 = layers.Dense(256, activation='relu', kernel_regularizer=keras.regularizers.l2(0.01))(img6)
img8 = layers.Dropout(0.3)(img7)
img9 = layers.Dense(64)(img8)
img10 = layers.Dense(8)(img9)
img = keras.Model(inputs=img_inputs, outputs=img10)

# Tab model
tab1 = layers.Dense(8,input_shape = (12,))(inputs)
tab = keras.Model(inputs=inputs, outputs=tab1)

# Concatenation
x1 = layers.Concatenate(axis=1)([tab.output, img.output])
x2 = layers.Dense(32)(x1)
x3 = layers.Dense(16)(x2)
outputs = layers.Dense(1)(x3)

#model = keras.Model(inputs=[img_inputs,inputs], outputs=outputs, name="model")
model = keras.Model(inputs=[img.input, tab.input], outputs=outputs, name="model")

In [None]:
model.summary()

### Compile the model

In [None]:
model.compile(optimizer='adam',
              loss=tf.keras.losses.MSE,
              metrics=['mse'])

In [None]:
import math
history = model.fit(training_ds, epochs=20, validation_data = validation_ds)

In [None]:
import matplotlib.pyplot as plt
plt.plot(history.history['loss'], label='loss')
plt.plot(history.history['val_loss'], label = 'val_loss')
plt.xlabel('Epoch')
plt.ylabel('loss')
plt.legend(loc='lower right')

#test_loss, test_acc = model.evaluate(test_images,  test_labels, verbose=2)

## Explainability

In [None]:
import shap
import numpy as np

In [None]:
background = validation_ds.__getitem__(0)

In [None]:
e = shap.DeepExplainer(model, background)

In [None]:
e.shap_values(validation_ds.__getitem__(0))

In [None]:
shap_values = e.shap_values(x_test[1:5])

In [None]:


# select a set of background examples to take an expectation over
background = x_train[np.random.choice(x_train.shape[0], 100, replace=False)]

# explain predictions of the model on four images
e = shap.DeepExplainer(model, background)
# ...or pass tensors directly
# e = shap.DeepExplainer((model.layers[0].input, model.layers[-1].output), background)
shap_values = e.shap_values(x_test[1:5])

# plot the feature attributions
shap.image_plot(shap_values, -x_test[1:5])

In [None]:
prediction = model.predict(test_ds)

submission = pd.read_csv("../input/petfinder-pawpularity-score/sample_submission.csv")
submission["Pawpularity"] = prediction
submission.to_csv('submission.csv', index=False)

In [None]:
top20 = training_df.sort_values("Pawpularity", ascending=False)["Id"][0:20]

In [None]:
for each in top20.values:
    plt.imshow(Image.open("../input/petfinder-pawpularity-score/train/"+each+".jpg"))
    plt.show()

In [None]:
low20 = training_df.sort_values("Pawpularity")["Id"][0:20]

for each in low20.values:
    plt.imshow(Image.open("../input/petfinder-pawpularity-score/train/"+each+".jpg"))
    plt.show()