In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import cv2
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm import tqdm 
from glob import glob
from sklearn.model_selection import *
import warnings
warnings.simplefilter("ignore")


from tensorflow.keras.applications import *
from tensorflow.keras.layers import *
import tensorflow.keras.backend as K
from tensorflow.keras.utils import Sequence
from tensorflow.keras.models import *
from tensorflow.keras.callbacks import *
from tensorflow.keras.optimizers import *

**Read Images**

In [None]:
train_images='../input/petfinder-pawpularity-score/train'
test_images='../input/petfinder-pawpularity-score/test'
train_df=pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
sample_sub=pd.read_csv('../input/petfinder-pawpularity-score/sample_submission.csv')

**Some Insights**

In [None]:
train_df.head()

In [None]:
#Target Distribution
sns.displot(train_df['Pawpularity'])

In [None]:
#Let's plot some pets along with it's popularity
r=c=3
fig=plt.figure(figsize=(12,12))
for i in range(1,r*c+1):
    img=cv2.imread(os.path.join(train_images,train_df.loc[i,'Id'])+'.jpg')
    img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
    lbl=train_df.loc[i,'Pawpularity']
    
    fig.add_subplot(r,c,i)
    plt.imshow(img)
    plt.title('Pawwpularity: '+str(lbl))
    plt.xticks([])
    plt.yticks([])

plt.show()

In [None]:
#data Split
train_df_,val_df=train_test_split(train_df,test_size=0.15)
print('Training Shape: ',train_df_.shape)
print('Validation Shape: ',val_df.shape)

train_ids=train_df_['Id'].values
train_targets=train_df_['Pawpularity'].values

val_ids=val_df['Id'].values
val_targets=val_df['Pawpularity'].values

**Define Hyperparameters**

In [None]:
#HyperParameters
WIDTH=HEIGHT=224
BATCH_SIZE=32
EPOCHS=15
lr_=0.001

**Custom Generator**
* Takes in ids,meta_data,target
* Output: Images,meta_data,target

> In Next Update: Augmentation will be added

In [None]:
#Kustom Generator
class KustomGenerator(Sequence):
    def __init__(self,image_data,batch_size=BATCH_SIZE,dims=(WIDTH,HEIGHT,3),is_train=True):
        self.imageids=image_data[0]
        self.meta_data=image_data[1]
        self.target=image_data[2]
        self.batch_size=batch_size
        self.dims=dims
        self.is_train=is_train
        self.on_epoch_end()
    
    def __len__(self):
        return len(self.imageids)//self.batch_size
    
    def on_epoch_end(self):
        self.indexes=np.arange(len(self.imageids))
        if self.is_train:
            np.random.shuffle(self.indexes)
    
    def __getitem__(self,index):
        indexes=self.indexes[index*self.batch_size:(index+1)*self.batch_size]
        X_ids=[self.imageids[k] for k in indexes]
        X_meta=[self.meta_data[k] for k in indexes]
        y_=[self.target[k] for k in indexes]
        
        X=self.__data_generation(X_ids)
        return [X,np.array(X_meta)],np.array(y_)
    
    def __data_generation(self,ids_):
        tmp_images=np.zeros((self.batch_size,*self.dims))
        for ix,id_ in enumerate(ids_):
            img=cv2.imread(os.path.join(train_images,id_+'.jpg'))
            img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
            img=cv2.resize(img,(HEIGHT,WIDTH))
            tmp_images[ix]=img.astype('float')/255.
        return tmp_images

In [None]:
#Train data
train_ids=train_df_['Id'].values
train_meta=train_df_.iloc[:,2:13].values
train_target=train_df_['Pawpularity'].values

#Val data
val_ids=val_df['Id'].values
val_meta=val_df.iloc[:,2:13].values
val_target=val_df['Pawpularity'].values

In [None]:
#Augmentation will be added in next update
train_gen=KustomGenerator([train_ids,train_meta,train_target])
val_gen=KustomGenerator([val_ids,val_meta,val_target],is_train=False) 

**Simple Model with 2 inputs and 1 output**

In [None]:
def simple_model(lr=lr_):
    #Inputs
    inp_0=Input((HEIGHT,WIDTH,3))
    inp_1=Input(11)
    
    #Base Model
    base_feat=EfficientNetB5(weights=None,
                             include_top=False,input_tensor=inp_0)
    for layer in base_feat.layers:
        layer.trainable=True
        
    gap=GlobalAvgPool2D()(base_feat.output)
    x=Dense(1024,activation='relu')(gap)
    
    #Meta Data
    fc=Dense(256,activation='relu')(inp_1)
    fc=Dense(512,activation='relu')(fc)
    
    x=Concatenate()([x,fc])
    x=Dense(1024,activation='relu')(x)
    out=Dense(1,activation='relu')(x)
    
    model_=Model([inp_0,inp_1],out)
    model_.compile(loss='mse',optimizer=RMSprop(lr_))
    return model_

In [None]:
model=simple_model()

In [None]:
mc=ModelCheckpoint('val_loss',save_best_only=True,mode='min',save_freq="epoch")

In [None]:
history=model.fit(train_gen,steps_per_epoch=train_gen.__len__(),epochs=EPOCHS,
          validation_data=val_gen,validation_steps=val_gen.__len__(),callbacks=[mc])

In [None]:
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs=range(1,len(loss)+1)
plt.plot(epochs,loss,'b',color='red',label='Training MSE')
plt.plot(epochs,val_loss,'b',color='blue',label='Validation MSE')
plt.title('Training and Validation Loss')
plt.legend()
plt.figure()
plt.show()

In [None]:
def test_data(data):
    image_ids=data[0]
    meta_data=data[1]
    
    tmp_images=np.zeros((len(image_ids),HEIGHT,WIDTH,3))
    for ix,id_ in enumerate(image_ids):
        img=cv2.imread(os.path.join('../input/petfinder-pawpularity-score/test',id_+'.jpg'))
        img=cv2.cvtColor(img,cv2.COLOR_BGR2RGB)
        img=cv2.resize(img,(HEIGHT,WIDTH))
        tmp_images[ix]=img.astype('float')/255.
    
    return [tmp_images,np.array(meta_data)]

**Prediction For test data**

In [None]:
test_df=pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
image_ids=test_df['Id'].values
meta_data=test_df.iloc[:,2:].values

In [None]:
pred=model.predict(test_data([image_ids,meta_data]))

In [None]:
sample_sub['Pawpularity']=pred
sample_sub.to_csv('submission.csv',index=False)