In [None]:
import os

import tqdm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from tensorflow.keras.applications.resnet50 import ResNet50
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions

#---

from nltk.corpus import wordnet as wn
import nltk
nltk.download("wordnet")

In [None]:
if 'COLAB_GPU' in os.environ:
    TRAIN_IMAGES_DIR = "/content/drive/MyDrive/datasets/pawpularity/train"
    TRAIN_DS = "/content/drive/MyDrive/datasets/pawpularity/train.csv"
    OUTPUT_DIR = "/content/drive/MyDrive/datasets/pawpularity"
elif 'KAGGLE_CONTAINER_NAME' in os.environ:
    TRAIN_IMAGES_DIR = "../input/petfinder-pawpularity-score/train"
    TRAIN_DS = "../input/petfinder-pawpularity-score/train.csv"
    OUTPUT_DIR = "."

In [None]:
model = ResNet50(weights='imagenet')

In [None]:
df = pd.read_csv(TRAIN_DS)

In [None]:
def get_all_hyponyms(label):
  syn = wn.synset(label)
  return set([w.lower() for s in syn.closure(lambda s:s.hyponyms()) for w in s.lemma_names()])

def cat_or_dog(predictions):
  probs = np.array([e[2] for e in predictions])
  
  dog_arr = np.array([e[1].lower() in dogs for e in predictions])
  dog = np.sum(dog_arr * probs)

  cat_arr = np.array([e[1].lower() in cats for e in predictions])
  cat = np.sum(cat_arr * probs)

  neither_arr = np.logical_and(np.logical_not(dog_arr), np.logical_not(cat_arr))
  neither = np.sum(neither_arr * probs)

  res = "neither"
  if dog > cat:
    res = "dog"
  elif dog < cat:
    res = "cat"

  return {'result':res, 'dog':dog, 'cat':cat, 'neither':neither}

In [None]:
dogs = get_all_hyponyms("dog.n.01")
cats = get_all_hyponyms("cat.n.01")

In [None]:
img_filenames = df.sample(5).Id
for filename in list(img_filenames.values):
  img_path = f'{TRAIN_IMAGES_DIR}/{filename}.jpg'
  img = image.load_img(img_path, target_size=(224, 224))
  img = image.img_to_array(img)
  img = image.smart_resize(img, (224, 224))

  #x = image.img_to_array(img)
  #x = np.expand_dims(x, axis=0)
  x = np.expand_dims(img, axis=0)
  x = preprocess_input(x)
  preds = model.predict(x)

  plt.imshow(image.array_to_img(img))
  decoded = decode_predictions(preds, top=5)[0]
  print('Predicted:', decoded)
  result = cat_or_dog(decoded)
  print(f'{result}')
  plt.show()

In [None]:
import math

labels = []
batch_size = 500
ids = list(df.Id)
num_batches = math.ceil(len(ids) / batch_size)
for batch in tqdm.tqdm(range(num_batches)):
  images_batch = []
  
  for filename in ids[(batch_size*batch):(batch_size*batch+batch_size+1)]:
    img_path = f'{TRAIN_IMAGES_DIR}/{filename}.jpg'
    img = image.load_img(img_path, target_size=(224, 224))
    img = image.img_to_array(img)
    img = image.smart_resize(img, (224, 224))
    img = np.expand_dims(img, axis=0)
    img = preprocess_input(img)
    images_batch.append(img)

  #x = preprocess_input(np.array(images_batch))
  preds = model.predict_on_batch(np.array(images_batch).squeeze())
  decoded = decode_predictions(preds, top=5)
  batch_labels = [cat_or_dog(dec)['result'] for dec in decoded]
  labels += batch_labels

with open(f"{OUTPUT_DIR}/labels.csv", "w") as fo:
  for img_id, label in zip(ids, labels):
    fo.write(f"{img_id},{label}\n")

In [None]:
!nvidia-smi