# PetFinder.my using GLCM

This notebook is for PetFinder.my competition.

I'll use GLCM for training data. And train with tensorflow keras.

First import train data for process.

In [None]:
import os
import pandas as pd

# load csv data
train_data_csv = pd.read_csv('../input/petfinder-pawpularity-score/train.csv')
test_data_csv = pd.read_csv('../input/petfinder-pawpularity-score/test.csv')
print(train_data_csv.shape)
print(test_data_csv.shape)

# set image path
train_img_path = '../input/petfinder-pawpularity-score/train'
test_img_path = '../input/petfinder-pawpularity-score/test'

# Process Image

Convert image as grayscale and resize it as 256x256.

And then, extract GLCM features (20 features) from image.

In [None]:
from tqdm import tqdm, trange
import numpy as np
import cv2
from skimage.feature import greycomatrix, greycoprops

properties = ['contrast', 'dissimilarity', 'energy', 'homogeneity', 'correlation']

# calculate greycomatrix() & greycoprops() for angle 0, 45, 90, 135
def calc_glcm_all_agls(img,
                       props,
                       dists = [5],
                       agls = [0, np.pi / 4, np.pi / 2, 3 * np.pi / 4],
                       lvl = 256,
                       sym = True,
                       norm = True):
    glcm = greycomatrix(img,
                        distances = dists, 
                        angles = agls, 
                        levels = lvl,
                        symmetric = sym, 
                        normed = norm)
    feature = []
    glcm_props = [propery for name in props for propery in greycoprops(glcm, name)[0]]
    for item in glcm_props:
        feature.append(item)
    
    return feature

train_features = []
train_pawpul = train_data_csv['Pawpularity'].astype(np.float32)
train_data_array = np.array(train_data_csv)
for i in trange(train_data_csv.shape[0]):
    path = os.path.join(train_img_path, train_data_csv['Id'][i] + '.jpg')
    img_org = cv2.imread(path)
    img_gray = cv2.cvtColor(img_org, cv2.COLOR_BGR2GRAY)
    img_resize = cv2.resize(img_gray, (256, 256))
    train_features.append(calc_glcm_all_agls(img_resize, props = properties))
    for j in range(12):
        train_features[i].append((np.float32)(train_data_array[i][j + 1]))
print(len(train_features))
print(len(train_pawpul))
print(train_features[0])
print(train_pawpul[0])

In [None]:
# prepare test features
test_features = []
test_data_array = np.array(test_data_csv)
for i in trange(test_data_csv.shape[0]):
    path = os.path.join(test_img_path, test_data_csv['Id'][i] + '.jpg')
    img_org = cv2.imread(path)
    img_gray = cv2.cvtColor(img_org, cv2.COLOR_BGR2GRAY)
    img_resize = cv2.resize(img_gray, (256, 256))
    test_features.append(calc_glcm_all_agls(img_resize, props = properties))
    for j in range(12):
        test_features[i].append((np.float32)(test_data_array[i][j + 1]))
print(len(test_features))
print(test_features[0])

# Prepare Train Data

Split train data as Train data and Validation data.

In [None]:
total_cnt = len(train_features)
print(total_cnt)
split_rate = 0.8

train_feat = np.array(train_features[:(int)(total_cnt * split_rate)])
val_feat = np.array(train_features[(int)(total_cnt * split_rate):])
train_pawp = np.array(train_pawpul[:(int)(total_cnt * split_rate)])
val_pawp = np.array(train_pawpul[(int)(total_cnt * split_rate):])

print(train_feat.shape, val_feat.shape)
print(train_pawp.shape, val_pawp.shape)

# Train with NN

Train data with simple neural network.

First, build network.

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import *
from tensorflow.keras.models import *
from tensorflow.keras.optimizers import *
from tensorflow.keras.losses import *
from tensorflow.keras.metrics import *
from tensorflow.keras import *

# tensorflow
print(tf.__version__)

# network
input = Input(shape = (train_feat.shape[1]))
model_feat = Dense(32, activation = "relu")(input)
model_feat = Dense(16, activation = "relu")(model_feat)
output = Dense(1)(model_feat)
model = Model(inputs = input, outputs = output)

# learning rate
lr_schedule = schedules.ExponentialDecay(
    initial_learning_rate = 1e-3,
    decay_steps = 100,
    decay_rate = 0.96,
    staircase = True)

# compile network
model.compile(optimizer = Adam(learning_rate = lr_schedule),
             loss = losses.MeanSquaredError(),
             metrics = [metrics.RootMeanSquaredError()])

# show network
model.summary()

Start training.

In [None]:
from tensorflow.keras.utils import *
from sklearn.utils import shuffle

class DataGenerator(Sequence):
    def __init__(self, feat_data, pawp_data, batch_size = 64, shuffle = True):
        'Initialization'
        self.feat_data = feat_data
        self.pawp_data = pawp_data
        self.batch_size = batch_size
        self.shuffle = shuffle
        self.on_epoch_end()

    def __len__(self):
        'Denotes the number of batches per epoch'
        return int(np.floor(self.pawp_data.shape[0] / self.batch_size))

    def __getitem__(self, index):
        'Generate one batch of data'
        feat_batch = self.feat_data[index * self.batch_size : (index + 1) * self.batch_size]
        pawp_data = self.pawp_data[index * self.batch_size : (index + 1) * self.batch_size]
        
        return feat_batch, pawp_data

    def on_epoch_end(self):
        if self.shuffle == True:
            self.feat_data, self.pawp_data = shuffle(self.feat_data, self.pawp_data)

In [None]:
from tensorflow.keras.callbacks import *

# define callback for best result training
early_stop = EarlyStopping(
    monitor = 'val_loss', patience = 5, restore_best_weights = True)

In [None]:
train_gen = DataGenerator(train_feat, train_pawp, shuffle = False)
val_gen = DataGenerator(val_feat, val_pawp, shuffle = False)

history = model.fit(train_gen, epochs = 100, validation_data = val_gen,
                    # use_multiprocessing = True, workers = -1)
                    use_multiprocessing = True, workers = -1,
                    callbacks = [early_stop])

# Show Train result

Show trained result as graph.

In [None]:
import matplotlib.pyplot as plt

rmse = history.history['root_mean_squared_error']
val_rmse = history.history['val_root_mean_squared_error']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(rmse) + 1)

plt.plot(epochs, rmse, 'bo', label='Training rmse')
plt.plot(epochs, val_rmse, 'b', label='Validation rmse')
plt.title('Training and validation rmse')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

# Evaluation & Submit

Evaluation with trained model and save result as csv.

In [None]:
Id = np.array(test_data_csv['Id'])

predictions = model.predict(test_features)
submission_df = pd.DataFrame()

submission_df['Id'] = Id
submission_df['Pawpularity'] = predictions
submission_df.to_csv('submission.csv',index = False)

# show result
print(submission_df.head(10))

print('Finished')