**VGG-16 network for calculating populatiry score**

The model predicts the popularity score of an image. I have used pre-trained computer vision technique to train my model. I have used Google AI cloud to train my model. 

Steps are mentioned below:

1. Created directories and sub-directories for storing training and validation images. 
2. Divided the dataset into training and validation set and stored in respective folder. 
3. Resized the images to speed up the training. 
4. Used convolution based on the VGG-16 and added dense layer and trained with 10 epochs. 
5. Results are calculated.

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

import os, shutil
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras import utils
import matplotlib.pyplot as plt
from tensorflow.keras import optimizers
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import matplotlib.image as mpimg
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

In [None]:
#Create directory to separate train and validation images
original_dataset_dir = '/kaggle/input/petfinder-pawpularity-score/'
base_dir = '/kaggle/petfinder-pawpularity-score-small/'
os.mkdir(base_dir)


In [None]:
#Creating train,test and validation directory
train_dir = os.path.join(base_dir, 'train')
os.mkdir(train_dir)
validation_dir = os.path.join(base_dir, 'validation')
os.mkdir(validation_dir)
test_dir = os.path.join(base_dir, 'test')
os.mkdir(test_dir)

In [None]:
#Reading score file
csv_path = os.path.join(original_dataset_dir , 'train.csv')
df_score = pd.read_csv(csv_path)

In [None]:
#Reading first ten columns
df_score.head(10)

In [None]:
for dirname, _, filenames in os.walk('/kaggle/input/petfinder-pawpularity-score/'):
    print(dirname)

In [None]:
def get_score_by_id(filename):
    image_id = filename.split('.')[0]
    if image_id is not None:
        score = df_score[df_score['Id'] == image_id]['Pawpularity'].values[0]
        return score

In [None]:
#Creating cat dataset
train_label = np.array([])
validation_label = np.array([])
for dirname, _, filenames in os.walk('/kaggle/input/petfinder-pawpularity-score/train'):
    for i, fname in enumerate(filenames):
        if i<9500:
            src = os.path.join(dirname, fname)
            dst = os.path.join(train_dir, fname)
            shutil.copyfile(src, dst)
            train_label = np.append(train_label, get_score_by_id(fname))
        elif i>=9500 and i<10000:
            src = os.path.join(dirname, fname)
            dst = os.path.join(validation_dir, fname)
            shutil.copyfile(src, dst)
            validation_label = np.append(validation_label, get_score_by_id(fname))
        else:
            pass

In [None]:
#Check whether image files aligns to corect label 
#Script also correct any mis-labelled images
def create_label_array_from_images(curr_dir):
    labels = []
    for dirname,_, filename in os.walk(curr_dir):
        for i, file in enumerate(filename):
            labels.append(get_score_by_id(str(file)))
    print ("Size of label array: ", len(labels)) 
    return np.array(labels)

In [None]:
print("No of train images in train_dir ", len(os.listdir(train_dir)))
print("No of train images in validation_dir ", len(os.listdir(validation_dir)))

In [None]:
print("No of train labels in train_label ",train_label.shape[0])
print("No of train labels in validation_label ",validation_label.shape[0])


In [None]:
#test the function
score = get_score_by_id('0007de18844b0dbbb5e1f607da0606e0.jpg')
print(score)

In [None]:
#Instatntied VGG16 model 

from tensorflow.keras.applications import VGG16
conv_base = VGG16(weights='imagenet',
include_top=False,
input_shape=(150, 150, 3))

In [None]:
import glob
import cv2
train_data_np = []
files = glob.glob (train_dir+'/*')
for myFile in files:
    image = cv2.imread (myFile)
    resized = cv2.resize(image, (150, 150), interpolation = cv2.INTER_AREA)
    train_data_np.append(resized/255)
train_data_np = np.array(train_data_np)
print('train_data_np shape:', train_data_np.shape)

In [None]:
validation_data_np = []
files = glob.glob (validation_dir+'/*')
for myFile in files:
    image = cv2.imread (myFile)
    resized = cv2.resize(image, (150, 150), interpolation = cv2.INTER_AREA)
    validation_data_np.append(resized/255)

validation_data_np = np.array(validation_data_np)
print('validation_data_np shape:', validation_data_np.shape)

In [None]:
result_val = conv_base.predict(validation_data_np)

In [None]:
result_train = conv_base.predict(train_data_np)

In [None]:
result_val.shape

In [None]:
result_train.shape

In [None]:
import matplotlib.pyplot as plt
#plt.imshow(result.reshape((4,4), 150))
#result[0]

In [None]:
train_features = np.reshape(result_train, (9500, 4 * 4 * 512))
validation_features = np.reshape(result_val, (412, 4 * 4 * 512))

In [None]:
model = keras.Sequential(([
    layers.Flatten(), 
    layers.Dense(256, activation='relu', input_dim=4 * 4 * 512), 
    layers.Dense(1, activation='linear')]))
    

In [None]:
opt = keras.optimizers.Adam(learning_rate=0.1)
model.compile(optimizer=opt,
    loss='mean_squared_error',
    metrics=['mse'])

In [None]:
history = model.fit(train_features, train_label, epochs=10, batch_size=64, validation_data=(validation_features, validation_label))

In [None]:
import matplotlib.pyplot as plt
loss = history.history['loss']
mse = history.history['mse']
val_loss = history.history['val_loss']
val_mse = history.history['val_mse']
epochs = range(1, len(loss) + 1)
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()
plt.figure()
plt.plot(epochs, mse, 'bo', label='Training mse')
plt.plot(epochs, val_mse, 'b', label='Validation mse')
plt.title('Training and validation loss')
plt.legend()
plt.show()

Epoch 1/10
149/149 [==============================] - 2s 12ms/step - loss: 75421.1406 - mse: 75421.1406 - val_loss: 422.3257 - val_mse: 422.3257 
Epoch 2/10
149/149 [==============================] - 2s 12ms/step - loss: 391.7776 - mse: 391.7776 - val_loss: 439.9749 - val_mse: 439.9749
Epoch 3/10
149/149 [==============================] - 2s 12ms/step - loss: 377.6066 - mse: 377.6066 - val_loss: 418.5122 - val_mse: 418.5122
Epoch 4/10
149/149 [==============================] - 2s 13ms/step - loss: 335.4939 - mse: 335.4939 - val_loss: 435.2131 - val_mse: 435.2131
Epoch 5/10
149/149 [==============================] - 2s 12ms/step - loss: 321.1635 - mse: 321.1635 - val_loss: 427.8064 - val_mse: 427.8064
Epoch 6/10
149/149 [==============================] - 2s 11ms/step - loss: 320.0218 - mse: 320.0218 - val_loss: 470.0755 - val_mse: 470.0755
Epoch 7/10
149/149 [==============================] - 2s 11ms/step - loss: 302.0693 - mse: 302.0693 - val_loss: 434.5990 - val_mse: 434.5990
Epoch 8/10
149/149 [==============================] - 2s 11ms/step - loss: 283.7918 - mse: 283.7918 - val_loss: 461.1010 - val_mse: 461.1010
Epoch 9/10
149/149 [==============================] - 2s 12ms/step - loss: 272.7795 - mse: 272.7795 - val_loss: 479.4508 - val_mse: 479.4508
Epoch 10/10
149/149 [==============================] - 2s 11ms/step - loss: 258.2112 - mse: 258.2112 - val_loss: 525.3227 - val_mse: 525.3227

In [None]:
#Predicting values for the test images
files = glob.glob ('/home/kaggle/input/petfinder-pawpularity-score/test/*')
score_df = pd.DataFrame(columns = ['FileName','Score'])
for myFile in files:
    image = cv2.imread (myFile)
    image_dim_modify = cv2.resize(image, (150, 150), interpolation = cv2.INTER_AREA)
    resized = image_dim_modify/255
    resized_expand_dim = np.expand_dims(resized, axis = 0)
    interim_arry = conv_base.predict(resized_expand_dim)
    test_features = np.reshape(interim_arry, (1, 4 * 4 * 512))
    score = model.predict(test_features)
    score_row = {'FileName' :myFile.split('/')[-1], 'Score': score[0][0]}
    score_df = score_df.append(score_row, ignore_index=True)

In [None]:
#Save the result
score_df.to_csv('result.csv')

In [None]:
# #Save hte result
# a = pd.read_csv('/kaggle/input/submission/result.csv')
# a = a.drop(columns = ['Unnamed: 0'])
# a.to_csv('result.csv')