In [None]:
# imports
import csv
import glob
import pandas as pd
from pathlib import Path

import matplotlib.pyplot as plt
import matplotlib.image as mpimg

from shutil import copy, copytree
from sklearn.model_selection import train_test_split

import tensorflow as tf
import tensorflow_datasets as tfds

In [None]:
# get handles
data_dir  = (Path.cwd() / '../input/petfinder-pawpularity-score').resolve()
test_dir  = data_dir / 'test'
train_dir = data_dir / 'train'
out_dir   = (Path.cwd() / 'output').resolve()

test_images = [x for x in test_dir.glob('*.jpg') if x.is_file()]
train_images = [x for x in train_dir.glob('*.jpg') if x.is_file()]

test_labels = pd.read_csv(data_dir / 'test.csv') 
train_labels = pd.read_csv(data_dir / 'train.csv')

submission = pd.read_csv(data_dir / 'sample_submission.csv')

In [None]:
# View it
sample_row = train_labels.sample(1)

plt.xticks([])
plt.yticks([])
plt.title(sample_row.Pawpularity.item())
plt.tight_layout()
plt.imshow(mpimg.imread(train_dir / f'{sample_row.Id.item()}.jpg'))

In [None]:
# split into train validate
train, validate = train_test_split(train_labels, test_size=0.2)

datasets = {'train': train, 'validate': validate}

# make 10 catagories
for key, df in datasets.items():
    df = df[['Id', 'Pawpularity']].round(-1)

    for index, row in df.iterrows():
        label  = row['Pawpularity']
        img_id = row['Id']

        tens_dir = out_dir / f'{key}' / f'{label}'
        tens_dir.mkdir(parents=True, exist_ok=True)

        copy(train_dir / f'{img_id}.jpg', tens_dir)
        
copytree(test_dir, out_dir / 'test')

In [None]:
# make datasets
builder = tfds.ImageFolder(out_dir)
print(builder.info)  # num examples, labels... are automatically calculated
ds_train = builder.as_dataset(split='train', shuffle_files=True)
ds_validate = builder.as_dataset(split='validate', shuffle_files=True)
#tfds.show_examples(ds_validate, builder.info)

now based off https://colab.research.google.com/drive/1bOzVaDQo8h6Ngstb7AcfzC35OihpHspt?usp=sharing#scrollTo=53OTCh3jnbwV

In [None]:
IMG_SIZE = 160 # All images will be resized to 160x160

def format_example(pair):
  image, label = pair['image'], pair['label']
  image = tf.cast(image, tf.float32)
  image = (image/127.5) - 1
  image = tf.image.resize(image, (IMG_SIZE, IMG_SIZE))
  return image, label

train = ds_train.map(format_example)
validation = ds_validate.map(format_example)

In [None]:
BATCH_SIZE = 32
SHUFFLE_BUFFER_SIZE = 1000

train_batches = train.shuffle(SHUFFLE_BUFFER_SIZE).batch(BATCH_SIZE)
validation_batches = validation.batch(BATCH_SIZE)

In [None]:
for image_batch, label_batch in train_batches.take(1):
   pass

image_batch.shape

In [None]:
IMG_SHAPE = (IMG_SIZE, IMG_SIZE, 3)

# Create the base model from the pre-trained model MobileNet V2
base_model = tf.keras.applications.MobileNetV2(input_shape=IMG_SHAPE,
                                               include_top=False,
                                               weights='imagenet')

In [None]:
feature_batch = base_model(image_batch)
print(feature_batch.shape)

In [None]:
base_model.trainable = False

In [None]:
# Let's take a look at the base model architecture
base_model.summary()

In [None]:
global_average_layer = tf.keras.layers.GlobalAveragePooling2D()
feature_batch_average = global_average_layer(feature_batch)
print(feature_batch_average.shape)

In [None]:
prediction_layer = tf.keras.layers.Dense(11)
prediction_batch = prediction_layer(feature_batch_average)
print(prediction_batch.shape)

In [None]:
model = tf.keras.Sequential([
  base_model,
  global_average_layer,
  prediction_layer
])

In [None]:
base_learning_rate = 0.0001
model.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=base_learning_rate),
              loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
              metrics=['accuracy'])

In [None]:
model.summary()

In [None]:
len(model.trainable_variables)

In [None]:
initial_epochs   = 100
validation_steps = 20

loss0,accuracy0 = model.evaluate(validation_batches, steps = validation_steps)

In [None]:
print("initial loss: {:.2f}".format(loss0))
print("initial accuracy: {:.2f}".format(accuracy0))

In [None]:
history = model.fit(train_batches,
                    epochs=initial_epochs,
                    validation_data=validation_batches)

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']

plt.figure(figsize=(8, 8))
plt.subplot(2, 1, 1)
plt.plot(acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.ylabel('Accuracy')
plt.ylim([min(plt.ylim()),1])
plt.title('Training and Validation Accuracy')

plt.subplot(2, 1, 2)
plt.plot(loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.ylabel('Cross Entropy')
plt.ylim([0,1.0])
plt.title('Training and Validation Loss')
plt.xlabel('epoch')
plt.show()

In [None]:
# save model
model_dir = out_dir / f'model'
model_dir.mkdir(parents=True, exist_ok=True)
model.save(model_dir)

In [None]:
# load model
model = tf.keras.models.load_model(model_dir)

In [None]:
f = open(f'/kaggle/working/submission.csv', 'w')
writer = csv.writer(f)
writer.writerow(['Id','Pawpularity'])

for sample in test_images:
    Id = sample.stem
    # prep image
    img = mpimg.imread(sample)
    img = tf.cast(img, tf.float32)
    img = (img/127.5) - 1
    img = tf.image.resize(img, (IMG_SIZE, IMG_SIZE))
    img = tf.expand_dims(img, axis=0)
    
    # prediction 
    guesses = model.predict(img)
    guess = guesses.argmax()
    catagories = [0, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    prediction = catagories[guess]

    # write prediction
    print(f'{Id},{prediction}')
    writer.writerow([Id, prediction])

f.close()