In [None]:
import numpy as np
import pandas as pd 
import os
import math

import matplotlib.pyplot as plt
import tqdm

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

from nltk.corpus import wordnet as wn
import nltk
nltk.download("wordnet")

# global variabels

In [None]:
TRAIN_PATH = "../input/petfinder-pawpularity-score/train.csv"
TRAIN_IMAGE_FOLDER = "../input/petfinder-pawpularity-score/train"
RESULT_PATH = "result.csv"

FILE_EXTENSION = "jpg"
BATCH_SIZE = 500
TARGET_SIZE = (224, 224)

DOG_TYPE = "dog"
CAT_TYPE = "cat"
OTHER_TYPE = "other"
RESULT = "result"

MODEL = ResNet50(weights='imagenet')

def get_all_hyponyms(label):
    syn = wn.synset(label)
    return set([w.lower() for s in syn.closure(lambda s:s.hyponyms()) for w in s.lemma_names()])

DOG_HYPONYMS = get_all_hyponyms("dog.n.01")
CAT_HYPONYMS = get_all_hyponyms("cat.n.01")

# common functions

In [None]:
def image2vector(imagePath):
    img = image.load_img(imagePath, target_size=TARGET_SIZE)
    img = image.img_to_array(img)
    img = image.smart_resize(img, TARGET_SIZE)
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    
    return img

def predictImageType(predictions):
    probs = np.array([e[2] for e in predictions])

    dog_arr = np.array([e[1].lower() in DOG_HYPONYMS for e in predictions])
    dog = np.sum(dog_arr * probs)

    cat_arr = np.array([e[1].lower() in CAT_HYPONYMS for e in predictions])
    cat = np.sum(cat_arr * probs)

    neither_arr = np.logical_and(np.logical_not(dog_arr), np.logical_not(cat_arr))
    neither = np.sum(neither_arr * probs)

    res = OTHER_TYPE
    if dog > cat:res = DOG_TYPE;
    elif dog < cat:res = CAT_TYPE;

    return {RESULT:res, DOG_TYPE:dog, CAT_TYPE:cat, OTHER_TYPE:neither}

# load data

In [None]:
train = pd.read_csv(TRAIN_PATH)

# predict image label

In [None]:
IDS = list(train.Id)
NUM_BATCHES = math.ceil(len(IDS) / BATCH_SIZE)

LABELS = []

for batch in tqdm.tqdm(range(NUM_BATCHES)):
    images_batch = []

    for filename in IDS[(BATCH_SIZE*batch):(BATCH_SIZE*batch+BATCH_SIZE+1)]:
        img_path = f'{TRAIN_IMAGE_FOLDER}/{filename}.{FILE_EXTENSION}'
        img = image2vector(img_path)
        images_batch.append(img)

    # predict animal type 
    preds = MODEL.predict_on_batch(np.array(images_batch).squeeze())
    # decode
    decoded = decode_predictions(preds)
    # predict label
    batch_labels = [predictImageType(dec)[RESULT] for dec in decoded]
    # add label list 
    LABELS += batch_labels

# make image label dataframe file 

In [None]:
with open(RESULT_PATH, "w") as fo:
    for img_id, label in zip(IDS, LABELS):
        fo.write(f"{img_id},{label}\n")
        
        
labels = pd.read_csv(RESULT_PATH)
labels.head()