In [None]:
import os
import math

import numpy as np
import pandas as pd

import albumentations
import tensorflow as tf

In [None]:
# CSV-data reading

df_train = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/train.csv")
df_train.head()

In [None]:
# Small changes in dataframe for convenience

#df_train.rename(columns = lambda x: x.lower(), inplace=True)
df_train['Id'] = df_train['Id'].astype(str) + ".jpg"

In [None]:
# EDA and data preprocessing

In [None]:
# Data generators for loading

TRAIN_DIR = '/kaggle/input/petfinder-pawpularity-score/train'
#TEST_DIR = '/kaggle/input/petfinder-pawpularity-score/test'

WIDTH = 64
HEIGHT = 64
BATCH_SIZE = 16

img_datagen = tf.keras.preprocessing.image.ImageDataGenerator(
                        rescale = 1./255, horizontal_flip = True,
                        fill_mode = "nearest", zoom_range = 0.2,
                        width_shift_range = 0.2, height_shift_range=0.2,
                        rotation_range=30, validation_split=0.2) 

train_img_generator = img_datagen.flow_from_dataframe(
                        dataframe = df_train, directory = TRAIN_DIR, 
                        x_col = "Id", y_col = df_train.columns[1:],
                        class_mode = "raw", target_size = (WIDTH, HEIGHT), 
                        batch_size = BATCH_SIZE, subset="training")

test_img_generator = img_datagen.flow_from_dataframe(
                        dataframe = df_train, directory = TRAIN_DIR, 
                        x_col = "Id", y_col = df_train.columns[1:],
                        class_mode = "raw", target_size = (WIDTH, HEIGHT), 
                        batch_size = BATCH_SIZE, subset="validation")

def train_custom_generator():
    # to keep track of complete epoch
    count = 0 
    while True:
        if count == 495:
            train_img_generator.reset()
            count = 0 
            #break
        count += 1
        data = train_img_generator.next()
       
        imgs = []
        cols = []
        targets = []

        # iterate the data and append the necessary columns in the corresponding arrays 
        for k in range(BATCH_SIZE):
            # the first array contains all images
            imgs.append(data[0][k])
      
            # the second array contains all features with last column as class, so [:-1]
            cols.append(data[1][k][:-1])

            # the last column in the second array from data is the class
            targets.append(data[1][k][-1])

        # this will yield the result as you expect.
        yield [np.array(imgs), np.array(cols)], np.array(targets)
        
def test_custom_generator():
    # to keep track of complete epoch
    count = 0 
    while True:
        if count == 123:
            count = 0 
            # if the count is matching with the length of df, 
            # the one pass is completed, so reset the generator
            test_img_generator.reset()
            #break
        count += 1
        # get the data from the generator
        data = test_img_generator.next()

        # the data looks like this [[img,img] , [other_cols,other_cols]]  based on the batch size        
        imgs = []
        cols = []
        targets = []

        # iterate the data and append the necessary columns in the corresponding arrays 
        for k in range(BATCH_SIZE):
            # the first array contains all images
            imgs.append(data[0][k])
      
            # the second array contains all features with last column as class, so [:-1]
            cols.append(data[1][k][:-1])

            # the last column in the second array from data is the class
            targets.append(data[1][k][-1])

        # this will yield the result as you expect.
        yield [np.array(imgs), np.array(cols)], np.array(targets)

In [None]:
# Model creation
"""
input1 = tf.keras.layers.Input(shape = (WIDTH, HEIGHT, 3, ), name = 'input1')
input2 = tf.keras.layers.Input(shape = (12,), name = 'input2')

conv1 = tf.keras.layers.Conv2D(256, (3, 3), activation='relu', input_shape=(WIDTH, HEIGHT, 3))(input1)
maxp1 = tf.keras.layers.MaxPooling2D((2, 2))(conv1)

conv2 = tf.keras.layers.Conv2D(128, (3, 3), activation='relu')(maxp1)
maxp2 = tf.keras.layers.MaxPooling2D((2, 2))(conv2)

conv3 = tf.keras.layers.Conv2D(64, (3, 3), activation='relu')(maxp2)
maxp3 = tf.keras.layers.MaxPooling2D((2, 2))(conv3)

flat1 = tf.keras.layers.Flatten()(conv3)
dense1 = tf.keras.layers.Dense(12, activation='relu')(flat1)
merge1 = tf.keras.layers.Concatenate(axis = 1, name = 'inputs_merge_1')([dense1, input2])

dense2 = tf.keras.layers.Dense(64, activation='relu')(merge1)
dense3 = tf.keras.layers.Dense(32, activation='relu')(dense2)

pred = tf.keras.layers.Dense(1, activation='linear')(dense3)

# Model init and check out

model = tf.keras.models.Model(inputs = [input1, input2], outputs = pred)
model.summary()

nb_train_steps = math.floor(7930 / BATCH_SIZE)
nb_valid_steps = math.floor(1982 / BATCH_SIZE)

nb_epochs = 50

print(nb_train_steps * BATCH_SIZE)
print(nb_valid_steps * BATCH_SIZE)

model.compile(optimizer='adam',
              loss=tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error"),
              metrics=['mse'])
"""

In [None]:
csv_input = tf.keras.layers.Input(shape = (12, ), name = 'CSV_Input')
img_input = tf.keras.layers.Input(shape = (WIDTH, HEIGHT, 3, ), name = 'IMG_Input')

csv_hidden1 = tf.keras.layers.Dense(50, activation='relu', name='CSV_Hidden1')(csv_input)
csv_hidden2 = tf.keras.layers.Dense(80, activation='relu', name='CSV_Hidden2')(csv_hidden1)
csv_hidden3 = tf.keras.layers.Dense(100, activation='relu', name='CSV_Hidden3')(csv_hidden2)
csv_hidden4 = tf.keras.layers.Dense(300, activation='relu', name='CSV_Hidden4')(csv_hidden3)
csv_dropout = tf.keras.layers.Dropout(0.15, name ='CSV_Dropout')(csv_hidden4)

img_batch1 = tf.keras.layers.BatchNormalization(name='IMG_Normal1')(img_input)
img_conv1 = tf.keras.layers.Conv2D(1000, 4, padding = 'same', activation = 'relu', name='IMG_Conv1' ,use_bias=False)(img_batch1)
img_pooling1 = tf.keras.layers.MaxPooling2D(4, name= 'IMG_Max1')(img_conv1)
img_batch2 = tf.keras.layers.BatchNormalization(name='IMG_Normal2')(img_pooling1)
img_conv2 = tf.keras.layers.Conv2D(1000, 4, padding = 'same', activation = 'relu', name='IMG_Conv2' ,use_bias=False)(img_batch2)
img_batch3 = tf.keras.layers.BatchNormalization(name='IMG_Normal3')(img_conv2)
img_conv3 = tf.keras.layers.Conv2D(1000, 4, padding = 'same', activation = 'relu', name='IMG_Conv3' ,use_bias=False)(img_batch3)
img_pooling2 = tf.keras.layers.MaxPooling2D(4, name= 'IMG_Max2')(img_conv3)
img_batch4 = tf.keras.layers.BatchNormalization(name='IMG_Normal4')(img_pooling2)
img_conv4 = tf.keras.layers.Conv2D(1000, 4, padding = 'same', activation = 'relu', name='IMG_Conv4' ,use_bias=False)(img_pooling2)
img_batch5 = tf.keras.layers.BatchNormalization(name='IMG_Normal5')(img_conv4)
img_conv5 = tf.keras.layers.Conv2D(1000, 4, padding = 'same', activation = 'relu', name='IMG_Conv5' ,use_bias=False)(img_batch5)
img_pooling3 = tf.keras.layers.MaxPooling2D(4, name= 'IMG_Max3')(img_conv5)

flatten = tf.keras.layers.Flatten(name='IMG_Flatten')(img_pooling3)
img_batch6 = tf.keras.layers.BatchNormalization(name='IMG_Normal6')(flatten)
img_hidden1 = tf.keras.layers.Dense(2000, activation='relu', name='IMG_hidden1' ,use_bias=False)(img_batch6)
img_dropout1 = tf.keras.layers.Dropout(0.15, name='IMG_Dropout1')(img_hidden1)
img_batch7 = tf.keras.layers.BatchNormalization(name='IMG_Normal7')(img_dropout1)
img_hidden2 = tf.keras.layers.Dense(1000, activation='relu', name='IMG_hidden2' ,use_bias=False)(img_batch7)
img_dropout2 = tf.keras.layers.Dropout(0.15, name='IMG_Dropout2')(img_hidden2)

#csv_output = tf.keras.layers.Dense(1, name = 'CSV_Output')(csv_dropout)
#img_output = tf.keras.layers.Dense(1,name = 'IMG_Output')(img_dropout2)

merge1 = tf.keras.layers.Concatenate(axis = 1, name = 'inputs_merge_1')([csv_dropout, img_dropout2])
output = tf.keras.layers.Dense(1, name = 'output')(merge1)

In [None]:
model = tf.keras.models.Model(inputs=[img_input, csv_input], outputs = output, name='model_reg')

opt = tf.keras.optimizers.Ftrl(learning_rate = 0.003, learning_rate_power = -0.5)
#model.compile(loss=['mse','mse'], loss_weights=[0.3, 0.7], optimizer = opt, metrics = ['mape'])
model.compile(loss=tf.keras.losses.MeanSquaredError(reduction="auto", name="mean_squared_error"),
              metrics=['mse', 'mape'], optimizer = opt)

check_1 = tf.keras.callbacks.ModelCheckpoint('model_reg.h5', save_best_only=True, verbose=2)

nb_train_steps = math.floor(7930 / BATCH_SIZE)
nb_valid_steps = math.floor(1982 / BATCH_SIZE)

nb_epochs = 20

m = model.fit( 
    train_custom_generator(), 
    steps_per_epoch = nb_train_steps, 
    epochs = nb_epochs, 
    validation_data = test_custom_generator(),
    validation_steps = nb_valid_steps,
    callbacks = [check_1])
    # workers = 3 // generator isn't safe for multiprocessing

In [None]:
df_test = pd.read_csv("/kaggle/input/petfinder-pawpularity-score/test.csv")

#df_test.rename(columns = lambda x: x.lower(), inplace=True)
df_test['Id'] = df_test['Id'].astype(str) + ".jpg"

In [None]:
TEST_DIR = '/kaggle/input/petfinder-pawpularity-score/test'

WIDTH = 64
HEIGHT = 64
TEST_BATCH_SIZE = 1

img_datagen = tf.keras.preprocessing.image.ImageDataGenerator(rescale = 1./255) 

img_generator = img_datagen.flow_from_dataframe(
                dataframe = df_test, directory = TEST_DIR, 
                x_col = "Id", y_col = df_test.columns[1:],
                class_mode = "raw", target_size = (WIDTH, HEIGHT), 
                batch_size = TEST_BATCH_SIZE)
  
def custom_generator(df, batch_size):
    count = 0 
    while True:
        print(count)
        if count == len(df)/batch_size:
            print(len(df)/batch_size)
            count = 0 
            img_generator.reset()
            break
            
        data = img_generator.next()
        count += 1
        imgs = []
        cols = []
        for k in range(TEST_BATCH_SIZE):
            imgs.append(data[0][k])
            cols.append(data[1][k])
        yield [np.array(imgs), np.array(cols)]

In [None]:
best_model = tf.keras.models.load_model('model_reg.h5')
result = best_model.predict(custom_generator(df = df_test, batch_size = 1))
final_result = pd.DataFrame(result)
final_result.columns =['Pawpularity']
final_result

In [None]:
submission = pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')
df_test['Id'] = df_test['Id'].str.replace(".jpg", "")

In [None]:
import os

isExist = os.path.exists('./working')
if not isExist:
  os.makedirs('./working')

try:
    f = open("submission.csv", "x")
    f.close()
except Exception as error:
        print('Caught this error: ' + repr(error))

In [None]:
for ids, paw in zip(df_test['Id'], final_result['Pawpularity']):
    location = submission[submission['Id'] == ids].index[0]
    submission['Pawpularity'].loc[location] = paw
submission
submission.to_csv('submission.csv',index=False)