In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from transformers import AutoModel , AutoImageProcessor , AutoTokenizer 
from datasets import load_dataset , Dataset
from PIL import Image
import torch
from torch import nn

torch.cuda.empty_cache()

# import os
# os.environ['TF_GPU_ALLOCATOR']='cuda_malloc_async'

2024-05-15 00:07:59.352127: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
  from .autonotebook import tqdm as notebook_tqdm


In [2]:
def training_img_address(input):
    IMAGE_DIR = "./train2014_3d"
    input["image_id"] = f"{IMAGE_DIR}/{input['image_id']}"
    return input

In [3]:
SAMPLES = 100

df_acc = pd.read_pickle("./vqa_v2_acc.pkl")
dataset = Dataset.from_pandas(df_acc)
dataset = dataset.remove_columns(['__index_level_0__'])
dataset = dataset.select(range(0,SAMPLES))
dataset = dataset.map(training_img_address)

dataset

Map: 100%|██████████| 100/100 [00:00<00:00, 12205.87 examples/s]


Dataset({
    features: ['question', 'question_type', 'question_id', 'image_id', 'answer_type', 'label'],
    num_rows: 100
})

In [4]:
Image.open(dataset[0]["image_id"]).size

(640, 480)

In [5]:
X_img = np.array([np.array(Image.open(i).resize((640 , 480))) for i in dataset["image_id"]])
X_text = np.array(dataset["question"])
X_img.shape , X_text.shape

((100, 480, 640, 3), (100,))

Creating Labels

In [6]:
import itertools

labels = [item['ids'] for item in dataset['label']]
flattened_labels = list(itertools.chain(*labels))
unique_labels = list(set(flattened_labels))

label2id = {label: idx for idx, label in enumerate(unique_labels)}
id2label = {idx: label for label, idx in label2id.items()} 

In [7]:
label2id

{'clothes': 0,
 'pillow case': 1,
 'white & lavender': 2,
 'flight markup': 3,
 'web page': 4,
 'family room': 5,
 'frowning': 6,
 'waiting': 7,
 'air trail': 8,
 'bear': 9,
 '0': 10,
 'plane trail': 11,
 'conference room': 12,
 "it's color of screen": 13,
 '100 inches': 14,
 'x box': 15,
 'brushing teeth': 16,
 'screen projection': 17,
 "can't tell": 18,
 'cover': 19,
 'word document': 20,
 'lavender and white': 21,
 'vent': 22,
 'beside tree': 23,
 'outside': 24,
 'standing': 25,
 'trees': 26,
 'living room': 27,
 'computer': 28,
 'office': 29,
 'there is 1 person in this image': 30,
 'on': 31,
 'very': 32,
 'backwards': 33,
 'skateboard': 34,
 '50 inch': 35,
 'flying disc': 36,
 'outlook': 37,
 'long neck': 38,
 'black, white, gray': 39,
 'mesh': 40,
 'motorcycle': 41,
 'tree trunk': 42,
 'snow ski': 43,
 'walking': 44,
 'blue and white': 45,
 'jet stream': 46,
 'netting': 47,
 'fallen tree': 48,
 'poorly': 49,
 'laptop': 50,
 'yes': 51,
 'red & white': 52,
 '2': 53,
 'smile': 54,
 

In [8]:
def replace_ids(inputs):
    '''Converting everything to one-hot-encoding'''
    h = [0 for i in id2label]
    for i in range(len(inputs["label"]["ids"])):
        t = inputs["label"]["ids"][i]
        w = inputs["label"]["weights"][i]
        if w > 0.5: w = 1
        else: w = 0.3
        # print(t , w)
        # print(label2id.get(t , 0))
        h[label2id.get(t , 0)] = w
    inputs["label"] = h
    return inputs


flat_dataset = dataset.map(replace_ids)
# flat_dataset = dataset.flatten()
NUM_CLASSES = np.array(flat_dataset["label"]).shape[1]
print("Number of Classes : " , NUM_CLASSES)

Map: 100%|██████████| 100/100 [00:00<00:00, 7617.14 examples/s]

Number of Classes :  179





Testing BERT and ViT

In [9]:
DEVICE = "cuda"

vit_processor = AutoImageProcessor.from_pretrained("google/vit-base-patch16-224")
# model = AutoModel.from_pretrained("google/vit-base-patch16-224").to(DEVICE)
# vit_model = AutoModel.from_pretrained("google/vit-base-patch16-224")



In [10]:
from transformers import BertTokenizer , BertModel

bert_processor = BertTokenizer.from_pretrained('bert-base-uncased')
# bert_model = BertModel.from_pretrained('bert-base-uncased')



In [None]:
ip = vit_processor(X_img , return_tensors="pt")

In [None]:
# outp = vit_model(**ip)
outp = vit_model(ip["pixel_values"])
outp.pooler_output.shape

Testing Concatentation

In [None]:
x = outp.pooler_output


batch_sentences = dataset["question"]
encoded_input = bert_processor(batch_sentences, padding=True, truncation=True, return_tensors="pt")
input_ids = encoded_input["input_ids"]
attention_mask = encoded_input["attention_mask"]
y = bert_model(input_ids , attention_mask=attention_mask).pooler_output

x.shape , y.shape

torch.cat([x,y] , axis=1).shape

In [None]:
class CustomVITModel(nn.Module):
    def __init__(self):
        super(CustomVITModel, self).__init__()
        self.vit = AutoModel.from_pretrained("google/vit-base-patch16-224")
        self.bert = BertModel.from_pretrained('bert-base-uncased')
        ### New layers:
        self.linear1 = nn.Linear(768 * 2, 256)
        self.linear2 = nn.Linear(256, NUM_CLASSES) ## 3 is the number of classes in this example
        self.sigmoid = nn.Softmax()

    def forward(self, pixel_values , input_ids , attention_mask):
        vit_outp = self.vit(pixel_values)
        vit_last_hidden_state, vit_pooled_output = vit_outp.last_hidden_state , vit_outp.pooler_output
        
        bert_outp = self.bert(input_ids , attention_mask=attention_mask)
        bert_last_hidden_state, bert_pooled_output = bert_outp.last_hidden_state , bert_outp.pooler_output
        
        # last_hidden_state = last_hidden_state.cuda()
        # pooled_output = pooled_output.cuda()

        # sequence_output has the following shape: (batch_size, sequence_length, 768)
        
        combined_input = torch.cat([vit_pooled_output,bert_pooled_output] , axis=1)
        
        # linear1_output = self.linear1(last_hidden_state[:,0,:].view(-1,768)) ## extract the 1st token's embeddings
        linear1_output = self.linear1(combined_input)

        linear2_output = self.linear2(linear1_output)
        
        pred = linear2_output
        # pred = self.sigmoid(linear2_output)

        return pred

# tokenizer = AutoTokenizer.from_pretrained("dbmdz/bert-base-italian-xxl-cased") 
model_custom = CustomVITModel() # You can pass the parameters if required to have more flexible model
# model_custom.to("cuda") ## can be gpu
criterion = nn.CrossEntropyLoss() ## If required define your own criterion
# criterion = nn.BCELoss() ## If required define your own criterion
optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model_custom.parameters()))

In [None]:
EPOCHS = 2
batch_size = 16
Y = torch.tensor(flat_dataset["label"])

for epoch in range(EPOCHS):
    for i in range(0 , len(X_img) , batch_size):

        data_img = X_img[i:i + batch_size]
        data_img = torch.from_numpy(data_img)
        data_text = X_text[i:i + batch_size]
        # targets = np.array([[0],[1]])
        targets = Y[i : i + batch_size]
        targets=targets.to(torch.float)
        # data = data.cuda()
        # targets = targets.cuda()
        
        optimizer.zero_grad()   
        # encoding = tokenizer.batch_encode_plus(data, return_tensors='pt', padding=True, truncation=True,max_length=50, add_special_tokens = True)
        pixel_values = vit_processor(data_img , return_tensors="pt")["pixel_values"]
        bertop = bert_processor(data_text.tolist(), padding=True, truncation=True, return_tensors="pt")
        input_ids , attention_mask = bertop["input_ids"] , bertop["attention_mask"]
        outputs = model_custom(pixel_values , input_ids , attention_mask)

        loss = criterion(outputs, targets)
        print(loss)
        loss.backward()
        optimizer.step()

In [None]:
ip1 = processor(X[0] , return_tensors="pt")
logits = model_custom(**ip1)
pred_probab = nn.Sigmoid()(logits)
pred_probab

### Testing PyTorch Training

In [None]:
class ClassificationHead(nn.Module):
    def __init__(self, input_size, num_classes):
        super(ClassificationHead, self).__init__()
        # self.fc1 = nn.Linear(input_size, 128)
        # self.fc2 = nn.Linear(128, 64)
        # self.fc3 = nn.Linear(64, num_classes)
        # self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(input_size, 256),
            nn.ReLU(),
            nn.Linear(256, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes),
        )
        
    
    def forward(self, x):
        # x = self.fc1(x)
        # x = self.relu(x)
        # x = self.fc2(x)
        # x = self.relu(x)
        # x = self.fc3(x)
        # # x = self.sigmoid(x)
        # return x
        
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

# Define your number of classes
num_classes = NUM_CLASSES  # Number of classes in your classification task

classification_head = ClassificationHead(vit_model.config.hidden_size, num_classes)


loss_fn = nn.CrossEntropyLoss() ## If required define your own criterion
# optimizer = torch.optim.Adam(vit_model.parameters(), lr=0.0001)
optimizer = torch.optim.Adam(classification_head.parameters(), lr=0.0001)

# Create the classification head

EPOCHS = 2
batch_size = 2
Y = torch.tensor(flat_dataset["label"])

print("Training:")
# Training Loop
running_loss = 0.0
batch = 0
for epoch in range(EPOCHS):
    classification_head.train()
    for i in range(0 , len(X_img) , batch_size):
        batch += 1
        inputs = X_img[i:i+batch_size]
        labels = Y[i:i+batch_size]
        # labels=labels.to(torch.float)
        
        ip = vit_processor(inputs , return_tensors="pt")

        # Forward pass
        outputs = vit_model(**ip).last_hidden_state[:, 0, :]
        logits = classification_head(outputs)
        
        # Calculate loss
        loss = loss_fn(logits, labels)
        running_loss += loss.item()
        
        # print(f"EPOCH : {epoch} | BATCH : {i} | LOSS : {running_loss}")
        # print(loss)

        # Backpropagation
        loss.backward()
        optimizer.step()
        optimizer.zero_grad()
        
        loss, current = loss.item(), batch * batch_size + len(inputs)
        print(f"loss: {loss:>7f} ")

# print("Inference:")
# # Inference
# with torch.no_grad():
#     inputs = X
#     ip = processor(inputs , return_tensors="pt")
#     outputs = model(**ip).last_hidden_state[:, 0, :]
#     logits = classification_head(outputs)
#     ans = nn.functional.sigmoid(logits)
#     print(ans)

In [None]:
print("Inference:")
# Inference
with torch.no_grad():
    inputs = X_img
    ip = vit_processor(inputs , return_tensors="pt")
    outputs = vit_model(**ip).last_hidden_state[:, 0, :]
    logits = classification_head(outputs)
    logits = nn.Softmax(dim=1)(logits)
    print(logits)

In [None]:
Training:
tensor(4.2284, grad_fn=<DivBackward1>)
tensor(3.3829, grad_fn=<DivBackward1>)
tensor(2.9335, grad_fn=<DivBackward1>)
tensor(5.4293, grad_fn=<DivBackward1>)
tensor(2.7480, grad_fn=<DivBackward1>)
tensor(4.1977, grad_fn=<DivBackward1>)
tensor(3.3491, grad_fn=<DivBackward1>)
tensor(2.8715, grad_fn=<DivBackward1>)
tensor(5.2210, grad_fn=<DivBackward1>)
tensor(2.5404, grad_fn=<DivBackward1>)

### Cross Entropy Test

In [None]:
# Example of target with class indices
loss = nn.CrossEntropyLoss()
# Example of target with class probabilities
input = torch.randn(3, 5, requires_grad=True)
print(input)
target = torch.randn(3, 5).softmax(dim=1)
for i in range(10):
    output = loss(input, target)
    output.backward()

print(input)

### TensorFlow

In [None]:
tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased")
tokenized_data = tokenizer(dataset["question"], return_tensors="np", padding=True)
# Tokenizer returns a BatchEncoding, but we convert that to a dict for Keras
tokenized_data = dict(tokenized_data)

labels = Y.detach().cpu().numpy()

In [None]:
tokenized_data

In [None]:
x = tf.keras.Input(shape=(None,))
tokenizer(x, return_tensors="np", padding=True)

In [11]:
from transformers import TFAutoModelForSequenceClassification , TFAutoModel , TFAutoModelForImageClassification , TFBertModel
from tensorflow.keras.optimizers import Adam

# Load and compile our model
# tf_model = tf.keras.models.Sequential(
#     [
#         TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased"),
#         tf.keras.layers.Dense(1 , input_shape=(768,) , activation='sigmoid')
#     ]
# )

# tf_model = TFAutoModelForSequenceClassification.from_pretrained("google-bert/bert-base-cased")

MAX_LEN = 40

y_test = np.array(flat_dataset["label"])
X_img_tf = vit_processor(X_img , return_tensors="np")["pixel_values"]
X_text_tf = bert_processor(X_text.tolist() , padding='max_length', max_length=MAX_LEN, truncation=True, return_tensors="np")

vit_tf_model = TFAutoModel.from_pretrained("google/vit-base-patch16-224")
bert_tf_model = TFBertModel.from_pretrained('bert-base-uncased')


2024-05-15 00:08:40.127916: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-15 00:08:40.152273: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysfs-bus-pci#L344-L355
2024-05-15 00:08:40.152402: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:995] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero. See more at https://github.com/torvalds/linux/blob/v6.0/Documentation/ABI/testing/sysf

In [16]:
# bert_tf_model(**X_text_tf)['pooler_output'].shape
bert_tf_model(X_text_tf["input_ids"] , attention_mask=X_text_tf['attention_mask']).pooler_output.shape

TensorShape([100, 768])

In [19]:
def build_model():
    vit_inputs = tf.keras.layers.Input(shape=(3, 224, 224))
    x_vit = vit_tf_model(vit_inputs).pooler_output
    
    bert_input1 = tf.keras.layers.Input(shape=(MAX_LEN,) , dtype=tf.int32)
    bert_input2 = tf.keras.layers.Input(shape=(MAX_LEN,) , dtype=tf.int32)
    x_bert = bert_tf_model(input_ids=bert_input1 , attention_mask=bert_input2).pooler_output
    
    # x = np.concatenate([x_vit , x_bert])
    x = tf.keras.layers.Concatenate(axis=1)([x_vit, x_bert])
    # x = x_bert
    
    x = tf.keras.layers.Dense(128 , input_shape=(768*2,) , activation='relu')(x)
    x = tf.keras.layers.Dense(64 , activation='relu')(x)
    x = tf.keras.layers.Dense(NUM_CLASSES , activation='softmax')(x)
    
    model = tf.keras.models.Model(inputs=[vit_inputs , bert_input1 , bert_input2] , outputs=x)
    
    return model


modelx = build_model()
modelx.compile(optimizer=Adam(3e-5) , loss="categorical_crossentropy", metrics=['accuracy'])
modelx.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_7 (InputLayer)        [(None, 3, 224, 224)]        0         []                            
                                                                                                  
 input_8 (InputLayer)        [(None, 40)]                 0         []                            
                                                                                                  
 input_9 (InputLayer)        [(None, 40)]                 0         []                            
                                                                                                  
 tf_vi_t_model (TFViTModel)  TFBaseModelOutputWithPooli   8638924   ['input_7[0][0]']             
                             ng(last_hidden_state=(None   8                                 

In [20]:
x_input_ids = X_text_tf["input_ids"]
x_attention_masks = X_text_tf["attention_mask"]

modelx.fit([X_img_tf , x_input_ids , x_attention_masks] , y_test , epochs=20)


Epoch 1/20


2024-05-15 00:13:41.190226: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:432] Loaded cuDNN version 8902
2024-05-15 00:13:51.462281: W tensorflow/tsl/framework/bfc_allocator.cc:485] Allocator (GPU_0_bfc) ran out of memory trying to allocate 73.88MiB (rounded to 77463552)requested by op model_1/tf_vi_t_model/vit/encoder/layer_._11/intermediate/Gelu/truediv
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2024-05-15 00:13:51.462332: I tensorflow/tsl/framework/bfc_allocator.cc:1039] BFCAllocator dump for GPU_0_bfc
2024-05-15 00:13:51.462339: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (256): 	Total Chunks: 205, Chunks in use: 203. 51.2KiB allocated for chunks. 50.8KiB in use in bin. 2.2KiB client-requested in use in bin.
2024-05-15 00:13:51.462343: I tensorflow/tsl/framework/bfc_allocator.cc:1046] Bin (512): 	To

ResourceExhaustedError: Graph execution error:

Detected at node 'model_1/tf_vi_t_model/vit/encoder/layer_._11/intermediate/Gelu/truediv' defined at (most recent call last):
    File "/home/user/miniconda3/envs/python38/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/user/miniconda3/envs/python38/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel_launcher.py", line 18, in <module>
      app.launch_new_instance()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/traitlets/config/application.py", line 1075, in launch_instance
      app.start()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 739, in start
      self.io_loop.start()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 205, in start
      self.asyncio_loop.run_forever()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 545, in dispatch_queue
      await self.process_one()
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 534, in process_one
      await dispatch(*args)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 437, in dispatch_shell
      await result
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 359, in execute_request
      await super().execute_request(stream, ident, parent)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 778, in execute_request
      reply_content = await reply_content
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 446, in do_execute
      res = shell.run_cell(
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 549, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_2761421/474652171.py", line 4, in <module>
      modelx.fit([X_img_tf , x_input_ids , x_attention_masks] , y_test , epochs=20)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 1742, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 1338, in train_function
      return step_function(self, iterator)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 1322, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 1303, in run_step
      outputs = model.train_step(data)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 1080, in train_step
      y_pred = self(x, training=True)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/training.py", line 569, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/modeling_tf_utils.py", line 764, in run_call_with_unpacked_inputs
      ignore_mismatched_sizes=ignore_mismatched_sizes,
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 766, in call
      outputs = self.vit(
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/modeling_tf_utils.py", line 764, in run_call_with_unpacked_inputs
      ignore_mismatched_sizes=ignore_mismatched_sizes,
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 612, in call
      encoder_outputs = self.encoder(
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 522, in call
      for i, layer_module in enumerate(self.layer):
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 526, in call
      layer_outputs = layer_module(
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 473, in call
      intermediate_output = self.intermediate(hidden_states=layer_output)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/engine/base_layer.py", line 1150, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/transformers/models/vit/modeling_tf_vit.py", line 398, in call
      hidden_states = self.intermediate_act_fn(hidden_states)
    File "/home/user/miniconda3/envs/python38/lib/python3.8/site-packages/keras/src/activations.py", line 363, in gelu
      return tf.nn.gelu(x, approximate)
Node: 'model_1/tf_vi_t_model/vit/encoder/layer_._11/intermediate/Gelu/truediv'
failed to allocate memory
	 [[{{node model_1/tf_vi_t_model/vit/encoder/layer_._11/intermediate/Gelu/truediv}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_86798]

In [None]:
modelx.predict(x_test)