##**Notebook for sun attribute classification in tensorflow 2**

###A Lot of this code is taken from Tensorflow 2's [tutorial](https://www.tensorflow.org/beta/tutorials/load_data/images) to load data

+ First Objective is to Load the images and attributes to Create a dataset


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [0]:
!pip install -q tensorflow-gpu==2.0.0-beta1
import os
import scipy.io
import pandas as pd
import numpy as np
import tensorflow as tf
#import tensorflow_datasets as tfds
from PIL import Image
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt

The path to all images is stored in the images.mat file

In [0]:
images = scipy.io.loadmat('gdrive/My Drive/Sun_Att/SUNAttributeDB/images.mat')
im_list = [list(images['images'][i][0])[0] for i in range(len(images['images']))]
im_path  = ['gdrive/My Drive/Sun_Att/images/' + str(i) for i in im_list]

In [0]:
im_path[0:3]

['gdrive/My Drive/Sun_Att/images/a/abbey/sun_aakbdcgfpksytcwj.jpg',
 'gdrive/My Drive/Sun_Att/images/a/abbey/sun_aaoktempcmudsvna.jpg',
 'gdrive/My Drive/Sun_Att/images/a/abbey/sun_abegcweqnetpdlrh.jpg']

Path to attributes is stored in attributeLabels_continuous.mat

The labels are loaded and multiplied by 100 then converted into integers so that the np.where command can work. It was not working for small float values for some reason

In [0]:
attributes = scipy.io.loadmat('gdrive/My Drive/Sun_Att/SUNAttributeDB/attributeLabels_continuous.mat')

In [0]:
labels = attributes['labels_cv']
labels = labels*100
labels = labels.astype(int)
labels[0]

array([  0,   0,   0,   0,   0,  66,   0,   0,   0,   0,   0,   0,   0,
         0,   0,   0,   0,   0,   0,  33,   0,   0,   0,   0,   0,  33,
         0,   0,   0,   0,   0,   0,   0,   0,   0, 100,   0,   0,   0,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0,  33,   0,   0,
         0,  33,   0,   0,   0,   0,   0,   0,   0,  33,   0,   0,  33,
         0,   0,   0,   0,   0,   0,   0,   0,   0,  33,  33,   0,  66,
         0,   0,   0,   0,   0,   0,   0,   0,   0,   0, 100,  33,   0,
         0,   0,  33,   0,  66,   0, 100,   0,   0,   0,   0])

In [0]:
labels = np.where(labels==33, 1, labels)
labels = np.where(labels==66, 1, labels)
labels = np.where(labels==100, 1, labels)

The attributes here are changed where it is 1 if it is present and 0 if not, since we're only interesed to know whether the attribute is present or not

In [0]:
labels[0]

array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0,
       0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0])

In [0]:
len(labels[0])

102

In [0]:
len(attributes['labels_cv'])

14340

In [0]:
x_train, x_val, y_train, y_val = train_test_split(im_path, labels, test_size=0.15)

In [0]:
x_train[0], y_train[0]

('gdrive/My Drive/Sun_Att/images/c/circus_tent/outdoor/sun_apjttwzdvavbhosc.jpg',
 array([0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 1, 0,
        0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0,
        1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 1]))

In [0]:
path_ds = tf.data.Dataset.from_tensor_slices(x_train)
def preprocess_image(image):
    image = tf.image.decode_image(image, channels=3, expand_animations=False)
    image = tf.image.resize(image, [400, 400])
    image /= 255.0  # normalize to [0,1] range

    return image

def load_and_preprocess_image(path):
    image = tf.io.read_file(path)
    return preprocess_image(image)

In [0]:
image_ds = path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(y_train, tf.int64))

In [0]:
for i,j in enumerate(image_ds.take(2)):
    print(i, j.shape)

0 (400, 400, 3)
1 (400, 400, 3)


In [0]:
for i,j in enumerate(label_ds.take(2)):
    print(i, j.shape)

0 (102,)
1 (102,)


In [0]:
image_label_ds = tf.data.Dataset.zip((image_ds, label_ds))

In [0]:
image_label_ds

<ZipDataset shapes: ((400, 400, None), (102,)), types: (tf.float32, tf.int64)>

In [0]:
BATCH_SIZE = 32
ds = image_label_ds.apply(
  tf.data.experimental.shuffle_and_repeat(buffer_size=400))
ds = ds.batch(BATCH_SIZE)
ds = ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
ds

<PrefetchDataset shapes: ((None, 400, 400, None), (None, 102)), types: (tf.float32, tf.int64)>

In [0]:
def change_range(image,label):
    return 2*image-1, label

train_ds = ds.map(change_range)

In [0]:
val_path_ds = tf.data.Dataset.from_tensor_slices(x_val)
val_image_ds = val_path_ds.map(load_and_preprocess_image, num_parallel_calls=tf.data.experimental.AUTOTUNE)
val_label_ds = tf.data.Dataset.from_tensor_slices(tf.cast(y_val, tf.int64))
val_image_label_ds = tf.data.Dataset.zip((val_image_ds, val_label_ds))
BATCH_SIZE = 32

# Setting a shuffle buffer size as large as the dataset ensures that the data is
# completely shuffled.
val_ds = val_image_label_ds.apply(
  tf.data.experimental.shuffle_and_repeat(buffer_size=400))
val_ds = val_ds.batch(BATCH_SIZE)
# `prefetch` lets the dataset fetch batches in the background while the model is training.
val_ds = val_ds.prefetch(buffer_size=tf.data.experimental.AUTOTUNE)
val_ds

<PrefetchDataset shapes: ((None, 400, 400, None), (None, 102)), types: (tf.float32, tf.int64)>

In [0]:
val_ds = val_ds.map(change_range)

In [0]:
mobile_net = tf.keras.applications.MobileNetV2(input_shape=(400, 400, 3), include_top=False)
mobile_net.trainable=False



In [0]:
tf.data.experimental.cardinality(train_ds)

<tf.Tensor: id=6575, shape=(), dtype=int64, numpy=-1>

In [0]:
image_batch, label_batch = next(iter(train_ds))

In [0]:
feature_map_batch = mobile_net(image_batch)
print(feature_map_batch.shape)

(32, 13, 13, 1280)


In [0]:
val_image_batch, val_label_batch = next(iter(val_ds))
val_feature_map_batch = mobile_net(val_image_batch)
print(val_feature_map_batch.shape)

(32, 13, 13, 1280)


In [0]:
model = tf.keras.Sequential([
    mobile_net,
    tf.keras.layers.GlobalAveragePooling2D(),
    tf.keras.layers.Dense(len(labels[0]), activation = 'sigmoid')])

In [0]:
tmp_output = model(image_batch).numpy()

In [0]:
tmp_output.shape

(32, 102)

In [0]:
model.compile(optimizer=tf.keras.optimizers.Adam(),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [0]:
len(model.trainable_variables)

2

In [0]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
mobilenetv2_1.00_224 (Model) (None, 13, 13, 1280)      2257984   
_________________________________________________________________
global_average_pooling2d (Gl (None, 1280)              0         
_________________________________________________________________
dense (Dense)                (None, 102)               130662    
Total params: 2,388,646
Trainable params: 130,662
Non-trainable params: 2,257,984
_________________________________________________________________


In [0]:
train_steps_per_epoch=len(x_train)/BATCH_SIZE
test_steps_per_epoch=len(x_val)/BATCH_SIZE
train_steps_per_epoch, test_steps_per_epoch

(381.0, 68.0)

In [0]:
len(labels)

14340

In [0]:
steps_per_epoch=tf.math.ceil(len(labels)/BATCH_SIZE).numpy()
steps_per_epoch

449.0

In [0]:
model.fit(train_ds, epochs=4, steps_per_epoch=train_steps_per_epoch, validation_data=(val_ds), validation_steps=test_steps_per_epoch)

Epoch 1/2


W0812 19:35:34.010533 140098165557120 deprecation.py:323] From /usr/local/lib/python3.6/dist-packages/tensorflow/python/ops/math_grad.py:1250: add_dispatch_support.<locals>.wrapper (from tensorflow.python.ops.array_ops) is deprecated and will be removed in a future version.
Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Epoch 2/2


<tensorflow.python.keras.callbacks.History at 0x7f6a631f3dd8>

In [0]:
test_im = tf.expand_dims(val_image_batch[1], axis=0)
test_im.shape

TensorShape([1, 400, 400, 3])

In [0]:
test_out = model.predict(test_im)

In [0]:
test_out[0]

array([0.0098764 , 0.2206653 , 0.20066684, 0.12631199, 0.60593295,
       0.20537478, 0.29818672, 0.08312905, 0.86660796, 0.10559937,
       0.03176907, 0.16858926, 0.01236215, 0.02379438, 0.0535675 ,
       0.09715232, 0.08070478, 0.05702436, 0.27940136, 0.14336541,
       0.03645343, 0.77432394, 0.8406855 , 0.68013173, 0.82911515,
       0.09287405, 0.336307  , 0.7799603 , 0.03123611, 0.00486681,
       0.03241161, 0.04931036, 0.06600982, 0.12933946, 0.01173598,
       0.06865934, 0.584544  , 0.18383107, 0.52363133, 0.08419418,
       0.9039204 , 0.9631185 , 0.9657166 , 0.694248  , 0.8847792 ,
       0.877459  , 0.08762953, 0.17868146, 0.30578122, 0.15257296,
       0.06810191, 0.13756359, 0.07794237, 0.14198199, 0.08350685,
       0.01605085, 0.08714584, 0.04317525, 0.11219284, 0.16913259,
       0.2745856 , 0.03872469, 0.7713981 , 0.04051003, 0.12312123,
       0.02897906, 0.01556373, 0.06747529, 0.04892761, 0.02661797,
       0.05062994, 0.2979083 , 0.02558517, 0.0117012 , 0.97681

In [0]:
test_out = np.where(test_out <= 0.5, 0, 1)

In [0]:
test_out[0]

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0])

In [0]:
val_label_batch[1].numpy()

array([0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0,
       1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
       1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0])

In [0]:
sum(test_out[0] * val_label_batch[1].numpy()) 

18

In [0]:
sum(val_label_batch[1].numpy())

19