In [1]:
import sys
sys.path.append("..")

In [2]:
!nvidia-smi

Mon Apr 18 00:25:03 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.94       Driver Version: 470.94       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:05:00.0 Off |                  N/A |
| 35%   61C    P2    65W / 250W |   7709MiB / 11177MiB |     10%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:06:00.0 Off |                  N/A |
| 70%   84C    P2   198W / 250W |  10281MiB / 11178MiB |     92%      Default |
|       

In [3]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="3"

In [4]:
# Import Libraries
from transformers import TFAutoModel
from utils import rotate_preserve_size
from loss import angular_loss_mae
import glob
import os
import numpy as np
import cv2
import random

from tensorflow.keras.models import Model
from tensorflow.keras import layers as L
import tensorflow as tf
import os
import pandas as pd
from tensorflow.keras.applications import Xception, EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from loguru import logger
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adadelta
from generator import RotGenerator, ValidationTestGenerator

2022-04-18 00:25:14.696835: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1


In [5]:
from transformers import ViTFeatureExtractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

class ViTRotGenerator(Sequence):
    def __init__(self, image_dir, batch_size, dim):
        self.files = glob.glob(os.path.join(image_dir, "*.jpg"))
        self.batch_size = batch_size
        self.dim = dim
        
    def __len__(self):
        if len(self.files) % self.batch_size == 0:
            return len(self.files) // self.batch_size
        return len(self.files) // self.batch_size + 1
    
    def __getitem__(self, idx):
        batch_slice = slice(idx * self.batch_size, (idx + 1) * self.batch_size)
        batch_files = self.files[batch_slice]

        X_conv = []
        X_vit = []
        y = []
        
        for i, f in enumerate(batch_files):
            try:
                angle = float(np.random.choice(range(0, 360)))
                img = rotate_preserve_size(f, angle, (self.dim, self.dim))
                img = np.array(img)
                X_vit.append(img)

                img = np.expand_dims(img, axis=0)
                X_conv.append(img)
                y.append(angle)

            except:
                pass
        
        X_vit = feature_extractor(images=X_vit, return_tensors="pt")["pixel_values"]
        X_vit = np.array(X_vit)
        X_conv = np.concatenate(X_conv, axis=0)
        y = np.array(y)

        return [X_vit, X_conv], y
    
    def on_epoch_end(self):
        random.shuffle(self.files)

In [6]:
class ViTValidationTestGenerator(Sequence):
    def __init__(self, image_dir, df_label_path, batch_size, dim, mode, channels_first=False, is_vit=False):
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.dim = dim
        self.mode = mode
        self.channels_first = channels_first
        self.is_vit = is_vit
        
        df_label = pd.read_csv(df_label_path)
        self.df = df_label[df_label["mode"] == self.mode].reset_index(drop=True)
        
    def __len__(self):
        total = self.df.shape[0]
        if total % self.batch_size == 0:
            return total // self.batch_size
        return total // self.batch_size + 1
    
    def __getitem__(self, idx):
        batch_slice = slice(idx * self.batch_size, (idx + 1) * self.batch_size)
        df_batch = self.df[batch_slice].reset_index(drop=True).copy()
        

        X_conv = []
        X_vit = []
        y = []
        
        for i in range(len(df_batch)):
            try:
                angle = df_batch.angle[i]
                path = os.path.join(self.image_dir, df_batch.image[i])
                img = rotate_preserve_size(path, angle, (self.dim, self.dim))

                img = np.array(img)
                X_vit.append(img)

                img = np.expand_dims(img, axis=0)
                X_conv.append(img)
                y.append(angle)

            except:
                pass
        
        X_vit = feature_extractor(images=X_vit, return_tensors="pt")["pixel_values"]
        X_vit = np.array(X_vit)
        X_conv = np.concatenate(X_conv, axis=0)
        y = np.array(y)

        return [X_vit, X_conv], y

In [7]:
# get ViT base model
vit_base = TFAutoModel.from_pretrained("google/vit-base-patch16-224")

2022-04-18 00:25:26.092286: I tensorflow/compiler/jit/xla_cpu_device.cc:41] Not creating XLA devices, tf_xla_enable_xla_devices not set
2022-04-18 00:25:26.093245: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcuda.so.1
2022-04-18 00:25:26.129915: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1720] Found device 0 with properties: 
pciBusID: 0000:0a:00.0 name: NVIDIA GeForce GTX 1080 Ti computeCapability: 6.1
coreClock: 1.582GHz coreCount: 28 deviceMemorySize: 10.92GiB deviceMemoryBandwidth: 451.17GiB/s
2022-04-18 00:25:26.129954: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcudart.so.10.1
2022-04-18 00:25:26.214365: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublas.so.10
2022-04-18 00:25:26.214504: I tensorflow/stream_executor/platform/default/dso_loader.cc:49] Successfully opened dynamic library libcublasLt.

In [8]:
IMAGE_SIZE=224
PATCH_SIZE = 16
PROJECTION_DIM = 768

In [9]:
# get CONV base model
conv_base = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
for layer in conv_base.layers:
    layer.trainable = False

In [14]:
class PatchAttention(L.Layer):
    def __init__(self, projection_dim):
        super(PatchAttention, self).__init__()
        self.mha = L.MultiHeadAttention(num_heads=1, key_dim=projection_dim, dropout=0.1)
        
    def call(self, encoded_patches, image_size, patch_size):
        batch_size = tf.shape(encoded_patches)[0]
        max_seq_len = tf.shape(encoded_patches)[1]
        # x = L.LayerNormalization(epsilon=1e-6)(encoded_patches)
        x = encoded_patches
        
        _, attention_weights = self.mha(x, x, return_attention_scores=True)
        attention_weights = tf.reshape(attention_weights, shape=(batch_size, max_seq_len, max_seq_len))
        attention_weights = tf.math.reduce_mean(attention_weights, axis=1)
        
        # Removing CLS token
        attention_weights = attention_weights[:,1:]
        patches = image_size // patch_size
        attention_weights = tf.reshape(attention_weights, shape=(batch_size, patches, patches))

        # move to image space
        pixel_weights = tf.repeat(attention_weights, repeats=[patch_size], axis=-1)
        pixel_weights = tf.repeat(pixel_weights, repeats=[patch_size], axis=1)
        pixel_weights = tf.expand_dims(pixel_weights, axis=-1)
        
        return pixel_weights

In [93]:
class PatchAttentionV2(L.Layer):
    def __init__(self, projection_dim):
        super(PatchAttentionV2, self).__init__()
        self.mha = L.MultiHeadAttention(num_heads=1, key_dim=projection_dim)
        
    def call(self, encoded_patches, image_size, patch_size):
        batch_size = tf.shape(encoded_patches)[0]
        
        _, attention_weights = self.mha(encoded_patches, encoded_patches, return_attention_scores=True)
        attention_weights = tf.squeeze(attention_weights, axis=1)
        attention_weights = attention_weights[:, 1:, 0]
        attention_weights = tf.nn.sigmoid(attention_weights)
        
        patches = image_size // patch_size
        attention_weights = tf.reshape(attention_weights, shape=(batch_size, patches, patches))

        # move to image space
        pixel_weights = tf.repeat(attention_weights, repeats=[patch_size], axis=-1)
        pixel_weights = tf.repeat(pixel_weights, repeats=[patch_size], axis=1)
        pixel_weights = tf.expand_dims(pixel_weights, axis=-1)
        
        return pixel_weights

In [15]:
# Define model
vit_input = L.Input(shape=(3,IMAGE_SIZE, IMAGE_SIZE))
conv_input = L.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))

vit_out = vit_base(vit_input)
pixel_weights = PatchAttention(PROJECTION_DIM)(vit_out[0], IMAGE_SIZE, PATCH_SIZE)

x = L.Multiply()([pixel_weights, conv_input])
x = conv_base(x)
x = L.Flatten()(x)
x = L.Dense(512, activation="relu")(x)
x = L.BatchNormalization()(x)
x = L.Dense(256, activation="relu")(x)
x = L.BatchNormalization()(x)
x = L.Dense(64, activation="relu")(x)
x = L.BatchNormalization()(x)
x = L.Flatten()(x)
y = L.Dense(1, activation="linear")(x)

model = Model([vit_input, conv_input], y)
model.summary()

Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Please report this to the TensorFlow team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output.
Cause: module 'gast' has no attribute 'Index'
Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            [(None, 3, 224, 224) 0                                            
__________________________________________________________________________________________________
tf_vi_t_model (TFViTModel)      TFBaseModelOutputWit 86389248    input_4[0][0]                    
____________________________________________________________________________

In [21]:
model.compile(loss=angular_loss_mae, optimizer=Adadelta(learning_rate=0.1))

train_gen = ViTRotGenerator("/data/chandanp/train2017/", 16, IMAGE_SIZE)
val_gen = ViTValidationTestGenerator(image_dir="/data/subhadip/data/", 
                                     df_label_path="/data/subhadip/data/validation-test.csv",
                                     batch_size=16, dim=IMAGE_SIZE, mode="valid")
cp = ModelCheckpoint("/data/subhadip/weights/model-vit-en-ang-loss.h5", save_weights_only=True, 
                     save_best_only=True, monitor="loss")
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5)
es = EarlyStopping(monitor="val_loss", patience=5)
model.fit(train_gen, validation_data=val_gen, epochs=10000, callbacks=[cp, es, reduce_lr])

Epoch 1/10000
Epoch 2/10000


KeyboardInterrupt: 

In [18]:
model.evaluate(val_gen, steps=2)



82.15802764892578