In [11]:
import sys
sys.path.append("..")

In [12]:
!nvidia-smi

Mon Apr 18 15:20:49 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 470.94       Driver Version: 470.94       CUDA Version: 11.4     |
|-------------------------------+----------------------+----------------------+
| GPU  Name        Persistence-M| Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ...  Off  | 00000000:05:00.0 Off |                  N/A |
| 50%   83C    P2   158W / 250W |   6746MiB / 11177MiB |     82%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
|   1  NVIDIA GeForce ...  Off  | 00000000:06:00.0 Off |                  N/A |
| 74%   87C    P2   216W / 250W |  10281MiB / 11178MiB |     61%      Default |
|       

In [13]:
import os
os.environ["CUDA_VISIBLE_DEVICES"]="-1"

In [14]:
# Import Libraries
from transformers import TFAutoModel
from utils import rotate_preserve_size
from loss import angular_loss_mae
import glob
import os
import numpy as np
import cv2
import random

from tensorflow.keras.models import Model
from tensorflow.keras import layers as L
import tensorflow as tf
import os
import pandas as pd
from tensorflow.keras.applications import Xception, EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, CSVLogger
from loguru import logger
from tensorflow.keras.utils import Sequence
from tensorflow.keras.optimizers import Adadelta
from generator import RotGenerator, ValidationTestGenerator

In [15]:
from transformers import ViTFeatureExtractor
feature_extractor = ViTFeatureExtractor.from_pretrained('google/vit-base-patch16-224')

class ViTRotGenerator(Sequence):
    def __init__(self, image_dir, batch_size, dim):
        self.files = glob.glob(os.path.join(image_dir, "*.jpg"))
        self.batch_size = batch_size
        self.dim = dim
        
    def __len__(self):
        if len(self.files) % self.batch_size == 0:
            return len(self.files) // self.batch_size
        return len(self.files) // self.batch_size + 1
    
    def __getitem__(self, idx):
        batch_slice = slice(idx * self.batch_size, (idx + 1) * self.batch_size)
        batch_files = self.files[batch_slice]

        X_conv = []
        X_vit = []
        y = []
        
        for i, f in enumerate(batch_files):
            try:
                angle = float(np.random.choice(range(0, 360)))
                img = rotate_preserve_size(f, angle, (self.dim, self.dim))
                img = np.array(img)
                X_vit.append(img)

                img = np.expand_dims(img, axis=0)
                X_conv.append(img)
                y.append(angle)

            except:
                pass
        
        X_vit = feature_extractor(images=X_vit, return_tensors="pt")["pixel_values"]
        X_vit = np.array(X_vit)
        X_conv = np.concatenate(X_conv, axis=0)
        y = np.array(y)

        return [X_vit, X_conv], y
    
    def on_epoch_end(self):
        random.shuffle(self.files)

In [16]:
class ViTValidationTestGenerator(Sequence):
    def __init__(self, image_dir, df_label_path, batch_size, dim, mode, channels_first=False, is_vit=False):
        self.image_dir = image_dir
        self.batch_size = batch_size
        self.dim = dim
        self.mode = mode
        self.channels_first = channels_first
        self.is_vit = is_vit
        
        df_label = pd.read_csv(df_label_path)
        self.df = df_label[df_label["mode"] == self.mode].reset_index(drop=True)
        
    def __len__(self):
        total = self.df.shape[0]
        if total % self.batch_size == 0:
            return total // self.batch_size
        return total // self.batch_size + 1
    
    def __getitem__(self, idx):
        batch_slice = slice(idx * self.batch_size, (idx + 1) * self.batch_size)
        df_batch = self.df[batch_slice].reset_index(drop=True).copy()
        

        X_conv = []
        X_vit = []
        y = []
        
        for i in range(len(df_batch)):
            try:
                angle = df_batch.angle[i]
                path = os.path.join(self.image_dir, df_batch.image[i])
                img = rotate_preserve_size(path, angle, (self.dim, self.dim))

                img = np.array(img)
                X_vit.append(img)

                img = np.expand_dims(img, axis=0)
                X_conv.append(img)
                y.append(angle)

            except:
                pass
        
        X_vit = feature_extractor(images=X_vit, return_tensors="pt")["pixel_values"]
        X_vit = np.array(X_vit)
        X_conv = np.concatenate(X_conv, axis=0)
        y = np.array(y)

        return [X_vit, X_conv], y

In [17]:
# get ViT base model
vit_base = TFAutoModel.from_pretrained("google/vit-base-patch16-224")

Some layers from the model checkpoint at google/vit-base-patch16-224 were not used when initializing TFViTModel: ['classifier']
- This IS expected if you are initializing TFViTModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing TFViTModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some layers of TFViTModel were not initialized from the model checkpoint at google/vit-base-patch16-224 and are newly initialized: ['vit/pooler/dense/kernel:0', 'vit/pooler/dense/bias:0']
You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.


In [18]:
IMAGE_SIZE=224
PATCH_SIZE = 16
PROJECTION_DIM = 768

In [19]:
# get CONV base model
conv_base = EfficientNetB0(weights="imagenet", include_top=False, input_shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
for layer in conv_base.layers:
    layer.trainable = False

In [20]:
# Define model
vit_input = L.Input(shape=(3,IMAGE_SIZE, IMAGE_SIZE))
vit_out = vit_base(vit_input)[1]
vit_out = L.Dense(512, activation="relu")(vit_out)

conv_input = L.Input(shape=(IMAGE_SIZE, IMAGE_SIZE, 3))
conv_out = conv_base(conv_input)
conv_out = L.Flatten()(conv_out)
conv_out = L.Dense(512, activation="relu")(conv_out)

x = L.Concatenate()([vit_out, conv_out])
x = L.Dense(512, activation="relu")(x)
x = L.Dense(256, activation="relu")(x)
x = L.Dense(64, activation="relu")(x)
y = L.Dense(1, activation="linear")(x)

model = Model([vit_input, conv_input], y)
model.summary()

Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
input_5 (InputLayer)            [(None, 3, 224, 224) 0                                            
__________________________________________________________________________________________________
efficientnetb0 (Functional)     (None, 7, 7, 1280)   4049571     input_6[0][0]                    
__________________________________________________________________________________________________
tf_vi_t_model_1 (TFViTModel)    TFBaseModelOutputWit 86389248    input_5[0][0]                    
____________________________________________________________________________________________

In [21]:
model.compile(loss=angular_loss_mae, optimizer=Adadelta(learning_rate=0.1))

train_gen = ViTRotGenerator("/data/chandanp/train2017/", 16, IMAGE_SIZE)
val_gen = ViTValidationTestGenerator(image_dir="/data/subhadip/data/validation-test/", 
                                     df_label_path="/data/subhadip/data/validation-test.csv",
                                     batch_size=16, dim=IMAGE_SIZE, mode="valid")
cp = ModelCheckpoint("/data/subhadip/weights/model-multi-ang-loss.h5", save_weights_only=False, 
                     save_best_only=True, monitor="loss")
reduce_lr = ReduceLROnPlateau(monitor="val_loss", factor=0.5, patience=3, min_lr=1e-5)
csv_logger = CSVLogger("/data/subhadip/weights/model-multi-ang-loss.csv")
es = EarlyStopping(monitor="val_loss", patience=5)
model.fit(train_gen, validation_data=val_gen, epochs=10000, callbacks=[cp, es, reduce_lr, csv_logger], 
          steps_per_epoch=1)

2022-04-18 15:27:54.478760: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:116] None of the MLIR optimization passes are enabled (registered 2)
2022-04-18 15:27:54.500863: I tensorflow/core/platform/profile_utils/cpu_utils.cc:112] CPU Frequency: 3597630000 Hz


Epoch 1/10000

[ WARN:0@2115.784] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('/data/subhadip/data/validation-test/000000029255.jpg'): can't open/read file: check file path/integrity
[ WARN:0@2115.784] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('/data/subhadip/data/validation-test/000000097816.jpg'): can't open/read file: check file path/integrity
[ WARN:0@2115.785] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('/data/subhadip/data/validation-test/000000097433.jpg'): can't open/read file: check file path/integrity
[ WARN:0@2115.785] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('/data/subhadip/data/validation-test/000000523570.jpg'): can't open/read file: check file path/integrity
[ WARN:0@2115.785] global /io/opencv/modules/imgcodecs/src/loadsave.cpp (239) findDecoder imread_('/data/subhadip/data/validation-test/000000353998.jpg'): can't open/read file: check file 

IndexError: list index out of range

In [158]:
def custom_accuracy(y_true, y_pred):

    y_true = tf.reshape(y_true, (-1, ))
    y_pred = tf.reshape(y_pred, (-1, ))

    N = tf.math.divide(y_pred, 360)
    N = tf.floor(N)

    y_pred = K.switch(K.greater_equal(y_pred, 360), y_pred - (N * 360), y_pred)
    y_pred = K.switch(K.less(y_pred, 0), y_pred + (K.abs(N) * 360), y_pred)
    
    pred = []
    for y in y_pred:
        if y <= 11.25:
            pred.append(0.0)
        elif y <= 33.75:
            pred.append(22.5)
        elif y <= 56.25:
            pred.append(45)
        elif y <= 78.75:
            pred.append(67.5)
        elif y <= 101.25:
            pred.append(90)
        elif y <= 123.75:
            pred.append(112.5)
        elif y <= 146.25:
            pred.append(135)
        elif y <= 168.75:
            pred.append(157.5)
        elif y <= 191.25:
            pred.append(180)
        elif y <= 213.75:
            pred.append(202.5)
        elif y <= 236.25:
            pred.append(225)
        elif y <= 258.75:
            pred.append(247.5)
        elif y <= 281.25:
            pred.append(270)
        elif y <= 303.5:
            pred.append(292.5)
        elif y <= 326:
            pred.append(315)
        elif y <= 348.5:
            pred.append(337.5)
        elif y <= 360:
            pred.append(0.0)

    y_pred = np.array(pred)
    print(y_pred)
    acc = tf.equal(y_true, y_pred)
    acc = tf.cast(acc, dtype=tf.float32)
    acc = tf.reduce_mean(acc)
    return acc

In [163]:
custom_accuracy(np.array([[337.5], [45.0]]), np.array([[330.0], [41.0]]))

[337.5  45. ]


<tf.Tensor: shape=(), dtype=float32, numpy=1.0>

In [110]:
K.switch(K.equal(a, 2), 10, a)

10

In [173]:
print([i * 22.5 for i in range(0, 17)])

[0.0, 22.5, 45.0, 67.5, 90.0, 112.5, 135.0, 157.5, 180.0, 202.5, 225.0, 247.5, 270.0, 292.5, 315.0, 337.5, 360.0]


In [169]:
[i * 22.5 for i in range(, 17)]

360.0

In [174]:
from sklearn.metrics import accuracy_score

In [179]:
accuracy_score(np.array([1.2, 1.3, 1.4]).astype("str"), np.array([1.20, 1.3, 1.4]).astype("str"))

1.0

In [186]:
np.array([1, 1.3, 1.4]).astype("str")

array(['1.0', '1.3', '1.4'], dtype='<U32')

In [185]:
np.array([1.00, 1.356, 1.4]).astype("str")

array(['1.0', '1.356', '1.4'], dtype='<U32')

In [220]:
def custom_accuracy(y_true, y_pred):
    y_true = y_true.reshape(-1, )
    y_pred = y_pred.reshape(-1, )
    
    y_pred_class = []
    for y in y_pred:
        if y <= 11.25:
            y_pred_class.append(0.0)
        elif y <= 33.75:
            y_pred_class.append(22.5)
        elif y <= 56.25:
            y_pred_class.append(45.0)
        elif y <= 78.75:
            y_pred_class.append(67.5)
        elif y <= 101.25:
            y_pred_class.append(90.0)
        elif y <= 123.75:
            y_pred_class.append(112.5)
        elif y <= 146.25:
            y_pred_class.append(135.0)
        elif y <= 168.75:
            y_pred_class.append(157.5)
        elif y <= 191.25:
            y_pred_class.append(180.0)
        elif y <= 213.75:
            y_pred_class.append(202.5)
        elif y <= 236.25:
            y_pred_class.append(225.0)
        elif y <= 258.75:
            y_pred_class.append(247.5)
        elif y <= 281.25:
            y_pred_class.append(270.0)
        elif y <= 303.5:
            y_pred_class.append(292.5)
        elif y <= 326:
            y_pred_class.append(315.0)
        elif y <= 348.5:
            y_pred_class.append(337.5)
        elif y <= 360:
            y_pred_class.append(0.0)

    y_pred_class = np.array(y_pred_class)
    y_true = y_true.astype("str")
    y_pred_class = y_pred_class.astype("str")

    acc = accuracy_score(y_true, y_pred_class)

    return acc


@tf.function(input_signature=[tf.TensorSpec(None, tf.float32), tf.TensorSpec(None, tf.float32)])
def tf_custom_accuracy(y_true, y_pred):    
    acc = tf.numpy_function(custom_accuracy, [y_true, y_pred], tf.double)
    return acc

In [228]:
a = np.array([[45.0], [0.0], [90.0]])
b = np.array([[48.0], [127.0], [-2]])

In [229]:
np.mod(b, 360)

array([[ 48.],
       [127.],
       [358.]])

In [233]:
np.mod(-20, 360)

340

In [219]:
tf_custom_accuracy(a, b)

InvalidArgumentError:  The second input must be a scalar, but it has shape [3]
	 [[{{node cond/y_pred/_6}}]] [Op:__inference_tf_custom_accuracy_58330]

Function call stack:
tf_custom_accuracy


In [196]:
import glob

In [198]:
files = []
for d_path in glob.glob("/data/subhadip/MIT-indoor/Images/*"):
    files += glob.glob(os.path.join(d_path, "*.jpg"))

In [199]:
len(files)

15613