In [None]:
!pip install -U tensorflow-addons

In [None]:
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow_addons as tfa
import matplotlib.pyplot as plt
import cv2
import os
import scipy.io
import shutil

### Hyper parameters

In [None]:
image_size = 224
patch_size = 32

### Prepare dataset

In [None]:
path_to_download_file = keras.utils.get_file(
    fname='caltech_101_zipped',
    origin="https://data.caltech.edu/tindfiles/serve/e41f5188-0b32-41fa-801b-d1e840915e80/",
    extract=True,
    archive_format='zip',
    cache_dir='./'
)

In [None]:
shutil.unpack_archive('datasets/caltech-101/101_ObjectCategories.tar.gz', './')

In [None]:
shutil.unpack_archive('datasets/caltech-101/Annotations.tar', './')

In [None]:
path_images = '101_ObjectCategories/airplanes/'
path_annot = 'Annotations/Airplanes_Side_2/'

In [None]:
image_paths = [f for f in os.listdir(path_images) if os.path.isfile(os.path.join(path_images, f))]
annot_paths = [f for f in os.listdir(path_annot) if os.path.isfile(os.path.join(path_annot, f))]

In [None]:
image_paths.sort()
annot_paths.sort()

In [None]:
image_paths[:10], annot_paths[:10]

In [None]:
images, targets = [], []
for i in range(len(annot_paths)):
  annot = scipy.io.loadmat(os.path.join(path_annot, annot_paths[i]))['box_coord'][0]
  top_left_x, top_left_y = annot[2], annot[0]
  bottom_right_x, bottom_right_y = annot[3], annot[1]

  image = keras.utils.load_img(os.path.join(path_images, image_paths[i]))
  (w, h) = image.size[:2]

  # Resize train images
  if i < int(len(annot_paths) * 0.8):
    image = image.resize((image_size, image_size))

  images.append(keras.utils.img_to_array(image))

  # Apply relative scaling
  targets.append((
       float(top_left_x) / w,
       float(top_left_y) / h,
       float(bottom_right_x) / w,
       float(bottom_right_y) / h
  ))

(x_train, y_train) = (
  np.asarray(images[: int(len(images) * 0.8)]),
  np.asarray(targets[: int(len(targets) * 0.8)])
)
(x_test, y_test) = (
  np.asarray(images[int(len(images) * 0.8) :]),
  np.asarray(targets[int(len(targets) * 0.8) :])
)

### MLP layer

In [None]:
def mlp(x, hidden_units, dropout_rate):
  for units in hidden_units:
    x = layers.Dense(units, activation=tf.nn.gelu)(x)
    x = layers.Dropout(dropout_rate)(x)
  return x

### Patch creation layer

In [None]:
class Patches(layers.Layer):
  def __init__(self, patch_size):
    super().__init__()
    self.patch_size = patch_size

  def call(self, images):
    batch_size = tf.shape(images)[0]
    patches = tf.image.extract_patches(
        images=images,
        sizes=[1, self.patch_size, self.patch_size, 1],
        strides=[1, self.patch_size, self.patch_size, 1],
        rates=[1, 1, 1, 1],
        padding='VALID'
    )
    return tf.reshape(patches, [batch_size, -1, patches.shape[-1]])

#### Display patches

In [None]:
plt.figure(figsize=(4, 4))
plt.imshow(x_train[0].astype('uint8'))
plt.axis('off')

patches = Patches(patch_size)(tf.convert_to_tensor([x_train[0]]))
print(f'Image size: {image_size}x{image_size}')
print(f'Patch_size: {patch_size}x{patch_size}')
print(f'{patches.shape[1]} patches per image')
print(f'{patches.shape[-1]} elements per patch')
print(f'Pathces shape: {patches.shape}')

n = int(np.sqrt(patches.shape[1]))
plt.figure(figsize=(4, 4))
for i, patch in enumerate(patches[0]):
  ax = plt.subplot(n, n, i + 1)
  patch_img = tf.reshape(patch, (patch_size, patch_size, 3))
  plt.imshow(patch_img.numpy().astype('uint8'))
  plt.axis('off')