# Hello
This notebook is for PetFinder.my competition.

I'll use tensorflow for training data.

First import train and test data for process.

In [None]:
import os
import pandas as pd

# set parameters
proc_img_width = 224
proc_img_height = 224

# load csv data
train_data = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')

# set image path
train_img_path = '../input/petfinder-pawpularity-score/train'

# Process Image
Resize all image as predefined size (proc_img_width x proc_img_height).

Also make image batch for train and test data.

In [None]:
from tqdm import tqdm, trange
import numpy as np
import tensorflow as tf

train_ids = train_data['Id'][:]
train_label = train_data['Pawpularity'][:]

# train_imgs = []
# for i in trange(len(train_ids)):
#     path = os.path.join(train_img_path, train_ids[i] + '.jpg')
#     image = tf.image.decode_jpeg(tf.io.read_file(path), channels = 3)
#     image = tf.cast(tf.image.resize_with_pad(image, proc_img_width, proc_img_height), dtype = tf.int32)
#     train_imgs.append(image)
# train_imgs = np.array(train_imgs)

print(train_ids.shape, train_label.shape)

# Prepare Test Data

Prepare test data with feature and train_data.

In [None]:
total_cnt = train_ids.shape[0]
print(total_cnt)

split_rate = 0.9

train_label_re = np.array(train_label).reshape(-1, 1)

train_id = np.array(train_ids[:(int)(total_cnt * split_rate)])
val_id = np.array(train_ids[(int)(total_cnt * split_rate):])
train_pawp = train_label_re[:(int)(total_cnt * split_rate)]
val_pawp = train_label_re[(int)(total_cnt * split_rate):]

print(train_id.shape, train_pawp.shape)
print(val_id.shape, val_pawp.shape)

# Train with NN

Train data with simple neural network.

First load pretrained image classification model (EfficientNet B0)

In [None]:
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *
from tensorflow.keras.metrics import *
from tensorflow.keras import *
from tensorflow.keras.callbacks import LearningRateScheduler
from tensorflow.keras.applications.efficientnet import EfficientNetB0

pre_train_model = load_model('../input/keras-applications-models/EfficientNetB0.h5')
pre_train_model.trainable = False

And then define model network.

In [None]:
input1 = Input(shape = (proc_img_width, proc_img_height, 3))
# input2 = Input(shape = (train_other_fe.shape[1],))

model_feat1 = layers.experimental.preprocessing.RandomFlip(mode = 'horizontal')(input1)

model_feat1 = pre_train_model(model_feat1)

model_feat1 = BatchNormalization()(model_feat1)
model_feat1 = Dropout(0.2)(model_feat1)
model_feat1 = Dense(32, activation = "relu")(model_feat1)

# model_feat1 = Dense(8, activation = "relu", kernel_initializer = "normal")(model_feat1)

# model_feat2 = Dense(8, activation = "relu", kernel_initializer = "normal")(input2)
# model_feat2 = Dense(8, activation = "relu", kernel_initializer = "normal")(model_feat2)

# model_feat = add([model_feat1, model_feat2])

# model_feat = Dense(16, activation = "relu", kernel_initializer = "normal")(model_feat1)
model_feat = Dense(1)(model_feat1)

# model = Model(inputs = [input1, input2], outputs = model_feat)
model = Model(inputs = input1, outputs = model_feat)

# gradually decrease learning rate
lr_schedule = schedules.ExponentialDecay(
    initial_learning_rate = 1e-3,
    decay_steps = 100,
    decay_rate = 0.96,
    staircase = True)

# compile model network
model.compile(optimizer = Adam(learning_rate = lr_schedule),
             loss = losses.MeanSquaredError(),
             metrics = [metrics.RootMeanSquaredError()])

model.summary()

Start training.

In [None]:
from tensorflow.keras.utils import *
from sklearn.utils import shuffle

class DataGenerator(Sequence):
    def __init__(self, id_data, pawp_data, batch_size = 128, shuffle = True):
        'Initialization'
        self.id_data = id_data
        self.pawp_data = pawp_data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(self.pawp_data.shape[0] / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        id_batch = self.id_data[index * self.batch_size : (index + 1) * self.batch_size]
        pawp_data = self.pawp_data[index * self.batch_size : (index + 1) * self.batch_size]

        # make image batch
        img_batch = []
        for i in range(self.batch_size):
            path = os.path.join(train_img_path, id_batch[i] + '.jpg')
            image = tf.image.decode_jpeg(tf.io.read_file(path), channels = 3)
            image = tf.cast(tf.image.resize_with_pad(image, proc_img_width, proc_img_height), dtype = tf.int32)
            img_batch.append(image)
            
            # release buffer
            del path
            del image
        img_batch = np.array(img_batch).astype(np.float32)
        
        # release buffer
        del id_batch

        # return [img_batch.astype(np.float32), feat_batch.astype(np.float32)], pawp_data
        return img_batch, pawp_data

    def on_epoch_end(self):
        if self.shuffle == True:
            self.id_data, self.pawp_data = shuffle(self.id_data, self.pawp_data)

In [None]:
from tensorflow.keras.callbacks import *

# define callback for best result training
early_stop = EarlyStopping(
    monitor = 'val_loss', patience = 5, restore_best_weights = True)

In [None]:
train_gen = DataGenerator(train_id, train_pawp, shuffle = False)
val_gen = DataGenerator(val_id, val_pawp, shuffle = False)

history = model.fit(train_gen, epochs = 1, validation_data = val_gen,
                    # use_multiprocessing = True, workers = -1)
                    use_multiprocessing = True, workers = -1,
                    callbacks = [early_stop])

# Show Train Result

Show trained result as graph.

In [None]:
import matplotlib.pyplot as plt

rmse = history.history['root_mean_squared_error']
val_rmse = history.history['val_root_mean_squared_error']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(rmse) + 1)

plt.plot(epochs, rmse, 'bo', label='Training rmse')
plt.plot(epochs, val_rmse, 'b', label='Validation rmse')
plt.title('Training and validation rmse')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Clear Buffer

Clear all buffer for reduce memory.

In [None]:
del train_data
del train_img_path

del train_ids
del train_label

del train_label_re
del train_id
del val_id
del train_pawp
del val_pawp

del train_gen
del val_gen
del history

# Evaluation & Submit

Evaluation with trained model and save result as csv.

In [None]:
test_data = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
test_img_path = '../input/petfinder-pawpularity-score/test'

test_ids = test_data['Id'][:]

test_imgs = []
for i in trange(len(test_ids)):
    path = os.path.join(test_img_path, test_ids[i] + '.jpg')
    image = tf.image.decode_jpeg(tf.io.read_file(path), channels = 3)
    image = tf.cast(tf.image.resize_with_pad(image, proc_img_width, proc_img_height), dtype = tf.int32)
    test_imgs.append(image)
test_imgs = np.array(test_imgs)

print(test_imgs.shape)

In [None]:
commit_x1 = np.array(test_imgs)
# commit_x2 = np.array(test_other_feat).astype(np.float32)

# predictions = model.predict([commit_x1, commit_x2]).reshape(commit_x1.shape[0],)
predictions = model.predict(commit_x1).reshape(commit_x1.shape[0],)
submission_df = pd.DataFrame()

submission_df['Id'] = test_ids
submission_df['Pawpularity'] = predictions
submission_df.to_csv('submission.csv',index = False)

# show result
print(submission_df.head(10))

print('Finished')