In [66]:
import numpy as np
import matplotlib.pyplot as plt

%matplotlib inline

In [67]:
import tensorflow as tf
from tensorflow import keras

## 8.2 TensorFlow and Keras

In [68]:
from tensorflow.keras.preprocessing.image import load_img

In [69]:
file_path = "../../clothing-dataset-small/train/t-shirt/5f0a3fa0-6a3d-4b68-b213-72766a643de7.jpg"
# Neural network expects image of certain size (usually 299^2, 224^2 or 150^2)
img = load_img(file_path, target_size=(299, 299))
# Uses PIL
# IMG consists is an array that has 3 channels (R, G, B) - each channel contains a representation of pixels with the appropriate value from this channel
# Images are encoded internally - array of image size and 3 channels - e.g. (150, 150, 3)
# Can transform PIL image into Numpy array, where each row is a pixel - [[[177 169  97], and we have 15 of them
x = np.array(img)
print(x.shape)
print(x)


(299, 299, 3)
[[[179 171  99]
  [179 171  99]
  [181 173 101]
  ...
  [251 253 248]
  [251 253 248]
  [251 254 247]]

 [[188 179 112]
  [187 178 111]
  [186 177 108]
  ...
  [251 252 247]
  [251 252 247]
  [251 252 246]]

 [[199 189 127]
  [200 190 128]
  [200 191 126]
  ...
  [250 251 245]
  [250 251 245]
  [250 251 245]]

 ...

 [[165 151  76]
  [173 159  84]
  [171 157  82]
  ...
  [183 135  25]
  [181 133  22]
  [183 135  24]]

 [[165 151  76]
  [173 159  84]
  [171 157  82]
  ...
  [182 134  23]
  [180 132  21]
  [182 134  23]]

 [[165 151  76]
  [173 159  84]
  [171 157  82]
  ...
  [181 133  22]
  [179 131  20]
  [182 134  23]]]


## 8.3 Pre-trained convolutional neural networks
- Imagenet dataset: https://www.image-net.org/
- Pre-trained models: https://keras.io/api/applications/
- Using SaturnCloud for running models in cloud - with GPU

In [70]:
from tensorflow.keras.applications.xception import Xception
from tensorflow.keras.applications.xception import preprocess_input
from tensorflow.keras.applications.xception import decode_predictions

In [71]:
model = Xception(weights="imagenet", input_shape=(299, 299, 3))

In [72]:
X = np.array([x])
X.shape

(1, 299, 299, 3)

In [73]:
# Always need to use `preprocess_input`
X = preprocess_input(X)
X[0]

array([[[ 0.4039216 ,  0.3411765 , -0.2235294 ],
        [ 0.4039216 ,  0.3411765 , -0.2235294 ],
        [ 0.41960788,  0.35686278, -0.20784312],
        ...,
        [ 0.96862745,  0.9843137 ,  0.94509804],
        [ 0.96862745,  0.9843137 ,  0.94509804],
        [ 0.96862745,  0.99215686,  0.9372549 ]],

       [[ 0.47450984,  0.4039216 , -0.12156862],
        [ 0.4666667 ,  0.39607847, -0.12941176],
        [ 0.45882356,  0.38823533, -0.15294117],
        ...,
        [ 0.96862745,  0.9764706 ,  0.9372549 ],
        [ 0.96862745,  0.9764706 ,  0.9372549 ],
        [ 0.96862745,  0.9764706 ,  0.92941177]],

       [[ 0.56078434,  0.48235297, -0.00392157],
        [ 0.5686275 ,  0.4901961 ,  0.00392163],
        [ 0.5686275 ,  0.49803925, -0.01176471],
        ...,
        [ 0.9607843 ,  0.96862745,  0.92156863],
        [ 0.9607843 ,  0.96862745,  0.92156863],
        [ 0.9607843 ,  0.96862745,  0.92156863]],

       ...,

       [[ 0.2941177 ,  0.18431377, -0.40392154],
        [ 0

In [74]:
pred = model.predict(X)
pred.shape



(1, 1000)

In [75]:
decode_predictions(pred)

[[('n03595614', 'jersey', 0.68196297),
  ('n02916936', 'bulletproof_vest', 0.03814007),
  ('n04370456', 'sweatshirt', 0.034324836),
  ('n03710637', 'maillot', 0.011354229),
  ('n04525038', 'velvet', 0.0018453625)]]

## 8.4. Convolutional neural networks
- Types of layers
- Convolutional layers and filters
- Dense layers
- Mostly used for images
  
There are more layers, read: https://cs231n.github.io


1. Convolutional (role - extract vector representation of the image)
   Consists of filters, small images,e.g. 5x5. Contains simple shapes, lines.
   Everytime the filters i applied we slide the filter across the image - and calculate similarity between the filter and the image. - We get Feature map where highher value in a "cell" means higher similarity with the filter.
   Input - image, output - feature maps (one for each filter).
   Ouptut of 1st convolutional layer is a set of feature map. 
   Then we use this as an image to the 2nd convolutional layer.
   .. could be more convolutional layers and each chained actions (layer , filters, feature maps) learns more complex filters.

   Filters are "learned" during training.
   e.g. 3 layers
   1st layer learns simple filters (e.g. simple shapes)
   2nd layer learns more complex shapes (by combining filters from previous layers)
   3rd layer learns even more complex shapes
   Finaly, there's a vectore representation that' s ready to be used in dense layer
2. Dense layers (role - make final prediction)
   Dense - because each elem of input connects to each elem of output.
   So it's essentially matrix multiplication
   E.g. if we want to predict if image is a t-shirt, dress or jeans, we could use Logistic regression.
   Multiplicating the iumage vector values xi with the specific class weights wj.
   Possible to combine multiple dense layers

* Pooling layer 
  Takes feature map that one of Convolutional layers learned and it makes it smaller. Forcing it to have fewer parameters.
  E.g. if feature map os 200x200, after pooling it could be 100x100.



## 8.5. Transfer learning
- Reading data with ImageDataGenerator
- Train `Xception` on smaller images (150x150)
- Better run this on GPU (using Saturn Cloud)


- Transforming Image to Vector and learning the filters is quite generic, no need to change that
- Specific to the ImageNet dataset  - dense layers
- So we keep the convolutional layers but train new dense layers (the idea behind transfer learning)


In [76]:
from tensorflow.keras.preprocessing.image import ImageDataGenerator

In [77]:
train_gen = ImageDataGenerator(preprocessing_function=preprocess_input)

In [78]:
train_ds = train_gen.flow_from_directory(
    '../../clothing-dataset-small/train',
    target_size=(150, 150),
    batch_size=32,
)

Found 3068 images belonging to 10 classes.


In [79]:
!ls -l ../../clothing-dataset-small/train

total 264
drwxrwxr-x 2 tom tom 20480 nov  9 20:21 dress
drwxrwxr-x 2 tom tom 12288 nov  9 20:21 hat
drwxrwxr-x 2 tom tom 36864 nov  9 20:21 longsleeve
drwxrwxr-x 2 tom tom 20480 nov  9 20:21 outwear
drwxrwxr-x 2 tom tom 36864 nov  9 20:21 pants
drwxrwxr-x 2 tom tom 20480 nov  9 20:21 shirt
drwxrwxr-x 2 tom tom 20480 nov  9 20:21 shoes
drwxrwxr-x 2 tom tom 20480 nov  9 20:21 shorts
drwxrwxr-x 2 tom tom 12288 nov  9 20:21 skirt
drwxrwxr-x 2 tom tom 69632 nov  9 20:21 t-shirt


In [80]:
# class names are inferred from folder names
#  e.g. everything inside t-shirt folder was put under 't-shirt' class
train_ds.class_indices

{'dress': 0,
 'hat': 1,
 'longsleeve': 2,
 'outwear': 3,
 'pants': 4,
 'shirt': 5,
 'shoes': 6,
 'shorts': 7,
 'skirt': 8,
 't-shirt': 9}

In [81]:
X, y = next(train_ds)

In [82]:
y[:5]  # E.g. last column represents t-shirt, using one-hot encoding for multiclass classification

array([[0., 0., 1., 0., 0., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 1., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 1., 0., 0., 0., 0., 0.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [83]:
val_gen = ImageDataGenerator(preprocessing_function=preprocess_input)
val_ds = val_gen.flow_from_directory(
    '../../clothing-dataset-small/validation',
    target_size=(150, 150),
    batch_size=32,
    shuffle=False,
)

Found 341 images belonging to 10 classes.


In [84]:
# base model will be the convolutional layer
# we will train custom model on top of that
## include_top=False - in Keras mentally arrange the visual represenation of deep learning from bottom to top,
# where bottom - convolutional layer and top - dense layer and prediction
base_model = Xception(
    weights="imagenet",
    include_top=False,
    input_shape=(150,150,3),
)

# trainable = False - meaning when we train our model, we don't want to change convolutional layers
base_model.trainable = False

In [95]:
## Creating a new top

inputs = keras.Input(shape=(150, 150, 3))

base = base_model(inputs)

# Averages out a single filter (5x5) to a single value ,so we can get a vector repr of an image 
vectors = keras.layers.GlobalAveragePooling2D()(base)

# Dimensionality is 10, because we have 10 classes
outputs = keras.layers.Dense(10)(vectors)

model = keras.Model(inputs, outputs)


In [96]:
preds = model.predict(X)
preds.shape



(32, 10)

In [98]:
# optimizer - calculates weights by checking whether chaneg in wiegth leads to better outcome (by evaluating 'loss')
# there are different optimizers

learning_rate = 0.01
optimizer = keras.optimizers.Adam(learning_rate=learning_rate)

loss = keras.losses.CategoricalCrossentropy(from_logits=True)

In [100]:
model.compile(optimizer=optimizer, loss=loss, metrics=["accuracy"])