In [1]:
%tensorflow_version 2.x

TensorFlow 2.x selected.


In [0]:
from tensorflow.keras.applications import VGG16
from tensorflow.keras.models import Model, save_model
from tensorflow.keras.layers import Dense, Conv2D, Conv2DTranspose, concatenate, multiply, Flatten, Reshape
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.optimizers.schedules import ExponentialDecay
from tensorflow.keras.metrics import TopKCategoricalAccuracy
from pydrive.auth import GoogleAuth
from pydrive.drive import GoogleDrive
from google.colab import auth
from oauth2client.client import GoogleCredentials
from google.colab import files
import pandas as pd
import numpy as np
import tensorflow_addons as tfa
import tarfile
import tensorflow as tf

In [0]:
auth.authenticate_user()
gauth = GoogleAuth()
gauth.credentials = GoogleCredentials.get_application_default()
drive = GoogleDrive(gauth)

In [0]:
downloaded = drive.CreateFile({'id':"1Kt28zC5d3MRf8i9ZYLL1qaF_0PrpnIfX"})
downloaded.GetContentFile('img.tar.gz')
tarfile.open('img.tar.gz').extractall()
!rm -f img.tar.gz

downloaded = drive.CreateFile({'id':"1bUNyC5dlnVJXBd0LhXU08le9XYxiB4F_"})
downloaded.GetContentFile('train_data.csv')

downloaded = drive.CreateFile({'id':"1hqKXEI3n9_rjfJLLWFy_hwzWXw5Uyyu_"})
downloaded.GetContentFile('val_data.csv')

downloaded = drive.CreateFile({'id':"1rCEFjQ9v3Vwt1fTo5WNscU3wKz7kNeJR"})
downloaded.GetContentFile('test_data.csv')

In [0]:
train_data = pd.read_csv("train_data.csv")
train_data = train_data.sample(frac=1).reset_index(drop=True)

val_data = pd.read_csv("val_data.csv")
val_data = val_data.sample(frac=1).reset_index(drop=True)

test_data = pd.read_csv("test_data.csv")
test_data = test_data.sample(frac=1).reset_index(drop=True)

In [0]:
def fix_labels(num):
    num = int(num)
    if num > 45:
        num -= 1
    if num > 38:
        num -= 1
    if num > 31:
        num -= 1
    num -= 1
    return str(num)

train_data["category_label"] = train_data["category_label"].apply(lambda x:fix_labels(x))
val_data["category_label"] = val_data["category_label"].apply(lambda x:fix_labels(x))
test_data["category_label"] = test_data["category_label"].apply(lambda x:fix_labels(x))

In [7]:
num_classes = 45
vgg_model = VGG16(weights="imagenet")
vgg_model = Model(vgg_model.input, vgg_model.get_layer("block4_conv3").output)

#landmark branch
x = Conv2D(64, (3, 3), activation="relu", padding="same")(vgg_model.output)
x = Conv2DTranspose(64, (3, 3), strides=(2, 2), activation="relu", padding="same")(x)
x = Conv2DTranspose(32, (3, 3), strides=(2, 2), activation="relu", padding="same")(x)
x = Conv2DTranspose(16, (3, 3), strides=(2, 2), activation="relu", padding="same")(x)
x = Conv2D(1, (3, 3), strides=(8, 8), activation="relu", padding="same")(x)
x = Conv2D(8, (3, 3), activation="sigmoid", padding="same")(x)
landmark_output = concatenate([vgg_model.output, x])

#attention branch
x = Conv2D(32, (3, 3), activation="relu")(landmark_output)
x = Conv2D(64, (3, 3), activation="relu", strides=(2, 2), padding="same")(x)
x = Conv2D(128, (3, 3), activation="relu", strides=(2, 2), padding="same")(x)
x = Conv2DTranspose(64, (3, 3), activation="relu", strides=(2, 2), padding="same")(x)
attention_output = Conv2DTranspose(512, (3, 3), activation="sigmoid", strides=(2, 2), padding="same")(x)

#final branch
x = multiply([vgg_model.output, attention_output])
x = Conv2D(512, (3, 3), activation="relu", padding="same")(x)
x = Conv2D(512, (3, 3), activation="relu", padding="same")(x)
x = Conv2D(512, (3, 3), activation="relu", padding="same")(x)
x = Flatten()(x)
x = Dense(1000, activation="relu", kernel_regularizer=l2(l=0.001))(x)
final_out = Dense(num_classes, activation="softmax", kernel_regularizer=l2(l=0.001))(x)
model = Model(vgg_model.input, final_out)

lr_schedule = ExponentialDecay(0.0001, 1000, 0.9, staircase=True)
model.compile(loss="categorical_crossentropy", optimizer=Adam(learning_rate=lr_schedule),
              metrics=["accuracy", TopKCategoricalAccuracy(k=3)])
model.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
block1_conv1 (Conv2D)           (None, 224, 224, 64) 1792        input_1[0][0]                    
__________________________________________________________________________________________________
block1_conv2 (Conv2D)           (None, 224, 224, 64) 36928       block1_conv1[0][0]               
__________________________________________________________________________________________________
block1_pool (MaxPooling2D)      (None, 112, 112, 64) 

In [0]:
AUTOTUNE = tf.data.experimental.AUTOTUNE

all_train_labels = tf.one_hot(train_data["category_label"].values.astype(np.int32), num_classes)
all_train_paths = train_data["image_name"].values
train_filepath_dataset = tf.data.Dataset.from_tensor_slices((all_train_paths, all_train_labels))

all_val_labels = tf.one_hot(val_data["category_label"].values.astype(np.int32), num_classes)
all_val_paths = val_data["image_name"].values
val_filepath_dataset = tf.data.Dataset.from_tensor_slices((all_val_paths, all_val_labels))

all_test_labels = tf.one_hot(test_data["category_label"].values.astype(np.int32), num_classes)
all_test_paths = test_data["image_name"].values
test_filepath_dataset = tf.data.Dataset.from_tensor_slices((all_test_paths, all_test_labels))

In [0]:
im_size = [224,224]
batch_size = 8
def get_image_and_label(filename, label):
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, im_size)
    return (image, label)

def get_image_and_label_augment(filename, label):
    image_string = tf.io.read_file(filename)
    image = tf.image.decode_jpeg(image_string, channels=3)
    image = tf.image.convert_image_dtype(image, tf.float32)
    image = tf.image.resize(image, im_size)

    image = tfa.image.rotate(image, 30)
    image = tf.image.central_crop(image, .80)
    image = tf.image.resize(image, im_size)
    image = tfa.image.translate(image, translations=[20, 20])
    image = tf.image.random_flip_left_right(image)
    return (image, label)

def prepare_for_training(ds, batch_size=None, shuffle_buffer_size=1000, repeat=True):
    ds = ds.shuffle(buffer_size=shuffle_buffer_size)
    if repeat:
        ds = ds.repeat()
    if batch_size != None:
        ds = ds.batch(batch_size)
    ds = ds.prefetch(buffer_size=AUTOTUNE)
    return ds

In [0]:
train_dataset = train_filepath_dataset.map(get_image_and_label, num_parallel_calls=AUTOTUNE)
train_dataset = prepare_for_training(train_dataset, batch_size=batch_size)

val_dataset = val_filepath_dataset.map(get_image_and_label, num_parallel_calls=AUTOTUNE)
val_dataset = prepare_for_training(val_dataset, batch_size=batch_size)

test_dataset = test_filepath_dataset.map(get_image_and_label, num_parallel_calls=AUTOTUNE)
test_dataset = prepare_for_training(test_dataset, batch_size=batch_size)

In [11]:
model.fit(train_dataset,
          validation_data=val_dataset,
          steps_per_epoch=2000,
          validation_steps=100,
          epochs=10)

Train for 2000 steps, validate for 100 steps
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<tensorflow.python.keras.callbacks.History at 0x7f23693606a0>

In [0]:
from google.colab import files
tf.keras.models.save_model(model, "attention.h5", include_optimizer=False, save_format="h5")
tf.keras.models.save_model(model, "attention_opt.h5", include_optimizer=True, save_format="h5")

files.download('attention.h5')
files.download('attention_opt.h5')