In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline
import warnings
warnings.filterwarnings('ignore')
from sklearn.model_selection import KFold
from sklearn.metrics import mean_squared_error
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import tensorflow as tf
from sklearn.model_selection import train_test_split

import math
from tensorflow.keras.callbacks import LearningRateScheduler, EarlyStopping, Callback

In [2]:
train_df = pd.read_csv('./train.csv')
test_df = pd.read_csv('./test.csv')

train_meta = train_df.copy()
test_meta = test_df.copy()
train_meta = train_meta.drop(["Id", "Pawpularity"], axis=1)
test_meta = test_meta.drop(["Id"], axis=1)

In [3]:
train_df.head()

Unnamed: 0,Id,Subject Focus,Eyes,Face,Near,Action,Accessory,Group,Collage,Human,Occlusion,Info,Blur,Pawpularity
0,0007de18844b0dbbb5e1f607da0606e0,0,1,1,1,0,0,1,0,0,0,0,0,63
1,0009c66b9439883ba2750fb825e1d7db,0,1,1,0,0,0,0,0,0,0,0,0,42
2,0013fd999caf9a3efe1352ca1b0d937e,0,1,1,1,0,0,0,0,1,1,0,0,28
3,0018df346ac9c1d8413cfcc888ca8246,0,1,1,1,0,0,0,0,0,0,0,0,15
4,001dc955e10590d3ca4673f034feeef2,0,0,0,1,0,0,1,0,0,0,0,0,72


In [4]:
train_image = train_df.copy()
test_image = test_df.copy()

train_image["file_path"] = train_df["Id"].apply(lambda x: "./train/" + x + ".jpg")
test_image["file_path"] = test_df["Id"].apply(lambda x: "./test/" + x + ".jpg")

In [5]:
def preprocess(image_url):
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (128, 128))
    return image

x_train_image=[]
for i in train_image['file_path']:
    x1=preprocess(i)
    x_train_image.append(x1)


In [6]:
test1_image=[]

for i in test_image['file_path']:
    x1=preprocess(i)
    test1_image.append(x1)

test1_image=np.array(test1_image)

In [7]:
def get_model() :

    # model for Photo image
    image_inputs = tf.keras.Input((128, 128 , 3))
    x = layers.Conv2D(32, 3, activation="relu")(image_inputs)
    x = layers.Conv2D(64, 3, activation="relu")(x)
    block_1_output = layers.MaxPooling2D(3)(x)

    x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_1_output)
    x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
    block_2_output = layers.add([x, block_1_output])

    x = layers.Conv2D(64, 3, activation="relu", padding="same")(block_2_output)
    x = layers.Conv2D(64, 3, activation="relu", padding="same")(x)
    block_3_output = layers.add([x, block_2_output])

    x = layers.Conv2D(64, 3, activation="relu")(block_3_output)
    x = layers.GlobalAveragePooling2D()(x)
    x = layers.Dense(256, activation="relu")(x)
    x = layers.Dropout(0.5)(x)
    x_image = layers.Dense(10)(x)

    # model for Meta data
    meta_inputs = tf.keras.Input(shape=((12,)))
    x_meta = tf.keras.layers.Dense(12,activation='relu')(meta_inputs)    
    x_meta = tf.keras.layers.Dense(24,activation='relu')(x_meta)    
    x_meta = tf.keras.layers.Dense(12,activation='relu')(x_meta)      

    x = tf.keras.layers.Concatenate(axis=1)([x_image, x_meta])
    output = tf.keras.layers.Dense(1)(x)

    model = tf.keras.Model(inputs=[image_inputs, meta_inputs], outputs=output)
    
    return model

In [8]:
model =  get_model()

In [9]:
x_train_image = np.array(x_train_image)
y_train=train_df['Pawpularity']

In [10]:
def preprocess1(image_url, tabular):
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (128, 128))
    return (image, tabular[1:]), tabular[0]

In [11]:
tf.keras.backend.clear_session()

models = []
historys = []
tabular_columns = ['Subject Focus', 'Eyes', 'Face', 'Near', 'Action', 'Accessory', 
                   'Group', 'Collage', 'Human', 'Occlusion', 'Info', 'Blur']
kf = KFold(n_splits=3, shuffle=True)

for train_index, val_index in kf.split(train_meta):      
    
    x_path = train_image.loc[train_index, "file_path"]    
    x_val= train_image.loc[val_index, "file_path"]
    
    tabular_train = train_df.loc[train_index, ["Pawpularity"] + tabular_columns].values
    tabular_val = train_df.loc[val_index, ["Pawpularity"] + tabular_columns].values

    def step_decay(epoch):
        initial_lrate = 0.001
        drop = 0.5
        epochs_drop = 10.0
        lrate = initial_lrate * math.pow(drop, math.floor((epoch)/epochs_drop)
        )
        return lrate

    lrate = LearningRateScheduler(step_decay)

    earstop = EarlyStopping(monitor = 'val_loss', min_delta = 0, patience = 5)
    
    model = get_model()
    model.compile(
        loss = tf.keras.losses.MeanSquaredError(),    
        metrics=[tf.keras.metrics.RootMeanSquaredError(name="rmse"), "mae", "mape"],
        optimizer = tf.keras.optimizers.Adam(1e-3)
        )
    
    train_ds = tf.data.Dataset.from_tensor_slices((x_path, tabular_train)).map(preprocess1).shuffle(512).batch(100).cache().prefetch(2)
    val_ds = tf.data.Dataset.from_tensor_slices((x_val, tabular_val)).map(preprocess1).batch(100).cache().prefetch(2)
    
    history = model.fit(
        train_ds,
        epochs = 5,
        validation_data = val_ds,
        verbose = 1,
        callbacks = [lrate, earstop]
        )
        
    historys.append(history)
    models.append(model)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
def preprocess_test_data(image_url, tabular):    
    image_string = tf.io.read_file(image_url)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.cast(image, tf.float32) / 255.0
    image = tf.image.central_crop(image, 1.0)
    image = tf.image.resize(image, (128, 128))    
    return (image, tabular), 0

test_ds = tf.data.Dataset.from_tensor_slices((test_image["file_path"], test_df[tabular_columns].values)).map(preprocess_test_data).batch(100).cache().prefetch(2)

In [13]:
preds = []

for model in models:
    nn_pred=model.predict(test_ds)
    preds.append(nn_pred)

preds_array = np.array(preds)
preds_mean = np.mean(preds_array, axis =0)

In [14]:
sub=pd.DataFrame()
sub['Id']=test_df['Id']
sub['Pawpularity']=preds_mean
sub.to_csv('submission.csv',index=False)
sub

Unnamed: 0,Id,Pawpularity
0,4128bae22183829d2b5fea10effdb0c3,37.763508
1,43a2262d7738e3d420d453815151079e,36.853008
2,4e429cead1848a298432a0acad014c9d,37.857426
3,80bc3ccafcc51b66303c2c263aa38486,37.143612
4,8f49844c382931444e68dffbe20228f4,37.400448
5,b03f7041962238a7c9d6537e22f9b017,38.311924
6,c978013571258ed6d4637f6e8cc9d6a3,37.318115
7,e0de453c1bffc20c22b072b34b54e50f,36.98267
