In [1]:
import os
import io
#import imageio
#import medmnist
import ipywidgets
import numpy as np
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import cv2
import numpy as np
from IPython.display import clear_output, display, HTML
from base64 import b64encode
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from matplotlib import pyplot as plt
from matplotlib import animation
from IPython.display import HTML


# Setting seed for reproducibility
SEED = 42
os.environ["TF_CUDNN_DETERMINISTIC"] = "1"
keras.utils.set_random_seed(SEED)



2023-04-30 13:59:42.882050: I tensorflow/core/util/port.cc:110] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2023-04-30 13:59:42.912133: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX_VNNI FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# DATA
DATASET_NAME = "organmnist3d"
BATCH_SIZE = 8
AUTO = tf.data.AUTOTUNE
FRAMES_PER_CLIP = 16
INPUT_SHAPE = (FRAMES_PER_CLIP,256,256,1) 
NUM_CLASSES = 11

# OPTIMIZER
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5

# TRAINING
EPOCHS = 2

# TUBELET EMBEDDING
PATCH_SIZE = (8,8,8)
NUM_PATCHES = (INPUT_SHAPE[0] // PATCH_SIZE[0]) ** 2

# ViViT ARCHITECTURE
LAYER_NORM_EPS = 1e-6
PROJECTION_DIM = 128
NUM_HEADS = 8
NUM_LAYERS = 8


In [3]:
# displays video when given path
def display_video_path(video_path):
    video_data = open(video_path, "rb").read()
    video_encoded = b64encode(video_data).decode("ascii")
    video_tag = f'<video width="256" height="256" controls alt="test" src="data:video/mp4;base64,{video_encoded}">'
    display(HTML(video_tag))
    
def resize_and_pad(frame, target_size=256):
    # Calculate the aspect ratio of the frame
    height, width = frame.shape[:2]
    aspect_ratio = float(width) / float(height)

    # Calculate new dimensions based on the aspect ratio
    if height > width:
        new_height = target_size
        new_width = int(target_size * aspect_ratio)
    else:
        new_width = target_size
        new_height = int(target_size / aspect_ratio)

    # Resize the frame
    resized_frame = cv2.resize(frame, (new_width, new_height))

    # Calculate padding
    pad_height = target_size - new_height
    pad_width = target_size - new_width

    # Pad the resized frame with black pixels
    padded_frame = cv2.copyMakeBorder(resized_frame,
                                      top=pad_height//2,
                                      bottom=pad_height-(pad_height//2),
                                      left=pad_width//2,
                                      right=pad_width-(pad_width//2),
                                      borderType=cv2.BORDER_CONSTANT,
                                      value=[0, 0, 0])

    return padded_frame
    
def preprocess_frame(frame):
    # Convert to grayscale
    gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Resize or apply other preprocessing steps if necessary
    return gray_frame

def get_video_frames(video_path):
    frames = []
    video_capture = cv2.VideoCapture(video_path)

    while video_capture.isOpened():
        ret, frame = video_capture.read()
        if ret:
            gray_frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
            resized_frame = resize_and_pad(gray_frame, 256)
            frames.append(resized_frame)
        else:
            break

    video_capture.release()
    return frames

def split_video(video):
    """
    Splits a video into as many FRAMES_PER_CLIP-frame videos as possible.

    Parameters:
    video (numpy.ndarray): The video to split. Should be a 3D NumPy array of shape (num_frames, height, width).

    Returns:
    list: A list of 3D NumPy arrays, each representing a FRAMES_PER_CLIP-frame section of the original video.
    """
    num_frames, height, width = video.shape
    num_sections = num_frames // FRAMES_PER_CLIP
    remainder_frames = num_frames % FRAMES_PER_CLIP

    sections = []
    for i in range(num_sections):
        start_frame = i * FRAMES_PER_CLIP
        end_frame = start_frame + FRAMES_PER_CLIP
        section = video[start_frame:end_frame, :, :]
        sections.append(section)

    if remainder_frames > 0:
        last_section = video[-FRAMES_PER_CLIP:, :, :]
        sections.append(last_section)

    return sections

def get_frames(video_path):
  video_frames = get_video_frames(video_path)
  video_frames = np.array(video_frames)
  video_frames = video_frames.astype(np.float32)
  video_frames = np.array(split_video(video_frames))
  video_frames =  video_frames[..., np.newaxis]
  return video_frames

#Displays video when given numpy array
def display_video(frames):
   fig = plt.figure()
   im = plt.imshow(frames[0,:,:,:])

   plt.close() # this is required to not display the generated image

   def init():
       im.set_data(frames[0,:,:,:])

   def animate(i):
       im.set_data(frames[i,:,:,:])
       return im
 
   anim = animation.FuncAnimation(fig, animate, init_func=init, frames=frames.shape[0],
                                interval=50)
   HTML(anim.to_html5_video())


In [4]:
from os import listdir
from os.path import isfile, join
import os

def get_all_videos_from_directory(directory_name: str, max_videos_to_load=1000000):
  print('starting....')
  file_names = sorted([f for f in os.listdir(directory_name) if os.path.isfile(os.path.join(directory_name, f))])
  num_videos_to_load: int = min(max_videos_to_load, len(file_names))
  all_videos = []

  file_names = [name for name in file_names if name.endswith('.mp4')]
  for i in range(num_videos_to_load):
    file_name = file_names[i]
    full_path = f'{directory_name}/{file_name}'

    videos_from_get_frames = get_frames(full_path)
    print(len(videos_from_get_frames), len(videos_from_get_frames[0]))
    print(f'Loaded {len(videos_from_get_frames)} clips from video {i+1} of {num_videos_to_load} at {full_path}')
    for x in videos_from_get_frames:
      if len(x) == FRAMES_PER_CLIP:
        all_videos.append(x)
      else:
        print(f'Discarded video with {len(x)} frames from consideration')
      print(len(x))
    
    


  
  print(file_names)
  return all_videos

DECEPTIVE_VIDEOS_PATH = 'Real-life_Deception_Detection_2016/Clips/Deceptive'
TRUTHFUL_VIDEOS_PATH = 'Real-life_Deception_Detection_2016/Clips/Truthful'

deceptive_videos = get_all_videos_from_directory(directory_name=DECEPTIVE_VIDEOS_PATH, max_videos_to_load=3)
truthful_videos = get_all_videos_from_directory(directory_name=TRUTHFUL_VIDEOS_PATH, max_videos_to_load=3)
  
for x in deceptive_videos:
   print(x.shape)
   print(len(x))
train_x = np.array(deceptive_videos)

starting....
32 16
Loaded 32 clips from video 1 of 3 at Real-life_Deception_Detection_2016/Clips/Deceptive/trial_lie_001.mp4
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
118 16
Loaded 118 clips from video 2 of 3 at Real-life_Deception_Detection_2016/Clips/Deceptive/trial_lie_002.mp4
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
16
14 16
Loaded 14 clips from video 3 of 3 at Real-life_Deception_Detection_2016/Clips/Deceptive/trial_lie_003.mp4
16
16
16
16
16
16
16
16
16
16
16
16
16
16
['trial_lie_001.mp4', 'trial_lie_002.mp4', 'trial_lie_003.mp4', 'trial_lie_004.mp4', 'trial_lie_005.mp4', 'trial_lie_006.mp4', 'trial_lie_007.mp4', 'trial_li

In [5]:
train_x.shape

(164, 16, 256, 256, 1)

In [6]:
train_y = np.zeros(train_x.shape[0])

In [7]:
train_y.shape

(164,)

In [8]:
deceptive_videos = np.stack(deceptive_videos, axis=0)

In [9]:
video_path = "Real-life_Deception_Detection_2016/Clips/Deceptive/trial_lie_031.mp4"
display_video_path(video_path)

In [10]:
@tf.function
def preprocess(frames: tf.Tensor, label: tf.Tensor):
    """Preprocess the frames tensors and parse the labels."""
    # Preprocess images
    frames = tf.image.convert_image_dtype(
        frames[
            ..., tf.newaxis
        ],  # The new axis is to help for further processing with Conv3D layers
        tf.float32,
    )
    # Parse label
    label = tf.cast(label, tf.float32)
    return frames, label


def prepare_dataloader(
    videos: np.ndarray,
    labels: np.ndarray,
    loader_type: str = "train",
    batch_size: int = BATCH_SIZE,
):
    """Utility function to prepare the dataloader."""
    dataset = tf.data.Dataset.from_tensor_slices((videos, labels))

    if loader_type == "train":
        dataset = dataset.shuffle(BATCH_SIZE * 2)

    dataloader = (
        dataset.map(preprocess, num_parallel_calls=tf.data.AUTOTUNE)
        .batch(batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )
    return dataloader


trainloader = prepare_dataloader(deceptive_videos[:20], train_y[:20], "train")
#validloader = prepare_dataloader(valid_videos, valid_labels, "valid")
#testloader = prepare_dataloader(test_videos, test_labels, "test")

2023-04-30 13:59:54.329478: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-30 13:59:54.329555: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-30 13:59:54.346719: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:01:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-30 13:59:54.346798: I tensorflow/compiler/xla/stream_executor/cuda/cuda_gpu_executor.cc:982] could not open file to read NUMA node: /sys/bus/pci/devices/0000:08:00.0/numa_node
Your kernel may have been built without NUMA support.
2023-04-30 13:59:54.346813: I tensorflow/compile

In [11]:
class TubeletEmbedding(layers.Layer):
    def __init__(self, embed_dim, patch_size, **kwargs):
        super().__init__(**kwargs)
        self.projection = layers.Conv3D(
            filters=embed_dim,
            kernel_size=patch_size,
            strides=patch_size,
            padding="VALID",
        )
        self.flatten = layers.Reshape(target_shape=(-1, embed_dim))

    def call(self, videos):
        projected_patches = self.projection(videos)
        flattened_patches = self.flatten(projected_patches)
        return flattened_patches

In [12]:
class PositionalEncoder(layers.Layer):
    def __init__(self, embed_dim, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim

    def build(self, input_shape):
        _, num_tokens, _ = input_shape
        self.position_embedding = layers.Embedding(
            input_dim=num_tokens, output_dim=self.embed_dim
        )
        self.positions = tf.range(start=0, limit=num_tokens, delta=1)

    def call(self, encoded_tokens):
        # Encode the positions and add it to the encoded tokens
        encoded_positions = self.position_embedding(self.positions)
        encoded_tokens = encoded_tokens + encoded_positions
        return encoded_tokens

In [13]:
def create_vivit_classifier(
    tubelet_embedder,
    positional_encoder,
    input_shape=INPUT_SHAPE,
    transformer_layers=NUM_LAYERS,
    num_heads=NUM_HEADS,
    embed_dim=PROJECTION_DIM,
    layer_norm_eps=LAYER_NORM_EPS,
    num_classes=NUM_CLASSES,
):
    # Get the input layer
    inputs = layers.Input(shape=input_shape)
    # Create patches.
    patches = tubelet_embedder(inputs)
    # Encode patches.
    encoded_patches = positional_encoder(patches)

    # Create multiple layers of the Transformer block.
    for _ in range(transformer_layers):
        # Layer normalization and MHSA
        x1 = layers.LayerNormalization(epsilon=1e-6)(encoded_patches)
        attention_output = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim // num_heads, dropout=0.1
        )(x1, x1)

        # Skip connection
        x2 = layers.Add()([attention_output, encoded_patches])

        # Layer Normalization and MLP
        x3 = layers.LayerNormalization(epsilon=1e-6)(x2)
        x3 = keras.Sequential(
            [
                layers.Dense(units=embed_dim * 4, activation=tf.nn.gelu),
                layers.Dense(units=embed_dim, activation=tf.nn.gelu),
            ]
        )(x3)

        # Skip connection
        encoded_patches = layers.Add()([x3, x2])

    # Layer normalization and Global average pooling.
    representation = layers.LayerNormalization(epsilon=layer_norm_eps)(encoded_patches)
    representation = layers.GlobalAvgPool1D()(representation)

    # Classify outputs.
    outputs = layers.Dense(units=num_classes, activation="softmax")(representation)

    # Create the Keras model.
    model = keras.Model(inputs=inputs, outputs=outputs)
    return model

In [14]:
train_x.shape

(164, 16, 256, 256, 1)

In [15]:
train_x_1 = train_x[0]

In [16]:
def run_experiment():
    # Initialize model
    model = create_vivit_classifier(
        tubelet_embedder=TubeletEmbedding(
            embed_dim=PROJECTION_DIM, patch_size=PATCH_SIZE
        ),
        positional_encoder=PositionalEncoder(embed_dim=PROJECTION_DIM),
    )

    # Compile the model with the optimizer, loss function
    # and the metrics.
    optimizer = keras.optimizers.Adam(learning_rate=LEARNING_RATE)
    model.compile(
        optimizer=optimizer,
        loss="sparse_categorical_crossentropy",
        metrics=[
            keras.metrics.SparseCategoricalAccuracy(name="accuracy"),
            keras.metrics.SparseTopKCategoricalAccuracy(5, name="top-5-accuracy"),
        ],
    )

    # Train the model.
    model.fit(trainloader, epochs=EPOCHS)

 #   _, accuracy, top_5_accuracy = model.evaluate(testloader)
   # print(f"Test accuracy: {round(accuracy * 100, 2)}%")
   # print(f"Test top 5 accuracy: {round(top_5_accuracy * 100, 2)}%")

    return model


model = run_experiment()

Epoch 1/2


2023-04-30 13:59:56.482137: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [20,16,256,256,1]
	 [[{{node Placeholder/_0}}]]
2023-04-30 13:59:56.482348: I tensorflow/core/common_runtime/executor.cc:1197] [/device:CPU:0] (DEBUG INFO) Executor start aborting (this does not indicate an error and you can ignore this message): INVALID_ARGUMENT: You must feed a value for placeholder tensor 'Placeholder/_0' with dtype float and shape [20,16,256,256,1]
	 [[{{node Placeholder/_0}}]]
2023-04-30 14:00:02.984551: I tensorflow/compiler/xla/stream_executor/cuda/cuda_dnn.cc:424] Loaded cuDNN version 8900
2023-04-30 14:00:03.561402: I tensorflow/tsl/platform/default/subprocess.cc:304] Start cannot spawn child process: No such file or directory
2023-04-30 14:00:03.589417: I ten

ResourceExhaustedError: Graph execution error:

Detected at node 'model/multi_head_attention_3/softmax/Softmax' defined at (most recent call last):
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/runpy.py", line 197, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/traitlets/config/application.py", line 1043, in launch_instance
      app.start()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/kernelapp.py", line 725, in start
      self.io_loop.start()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/tornado/platform/asyncio.py", line 215, in start
      self.asyncio_loop.run_forever()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/asyncio/base_events.py", line 601, in run_forever
      self._run_once()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/asyncio/base_events.py", line 1905, in _run_once
      handle._run()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/asyncio/events.py", line 80, in _run
      self._context.run(self._callback, *self._args)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 513, in dispatch_queue
      await self.process_one()
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 502, in process_one
      await dispatch(*args)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 409, in dispatch_shell
      await result
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/kernelbase.py", line 729, in execute_request
      reply_content = await reply_content
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/ipkernel.py", line 422, in do_execute
      res = shell.run_cell(
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/ipykernel/zmqshell.py", line 540, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3009, in run_cell
      result = self._run_cell(
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3064, in _run_cell
      result = runner(coro)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3269, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3448, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/IPython/core/interactiveshell.py", line 3508, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_22414/1925655257.py", line 32, in <module>
      model = run_experiment()
    File "/tmp/ipykernel_22414/1925655257.py", line 23, in run_experiment
      model.fit(trainloader, epochs=EPOCHS)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 1685, in fit
      tmp_logs = self.train_function(iterator)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 1284, in train_function
      return step_function(self, iterator)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 1268, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 1249, in run_step
      outputs = model.train_step(data)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 1050, in train_step
      y_pred = self(x, training=True)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/training.py", line 558, in __call__
      return super().__call__(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/functional.py", line 512, in call
      return self._run_internal_graph(inputs, training=training, mask=mask)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/functional.py", line 669, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/layers/attention/multi_head_attention.py", line 598, in call
      attention_output, attention_scores = self._compute_attention(
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/layers/attention/multi_head_attention.py", line 529, in _compute_attention
      attention_scores = self._masked_softmax(
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/layers/attention/multi_head_attention.py", line 494, in _masked_softmax
      return self._softmax(attention_scores, attention_mask)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 65, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/engine/base_layer.py", line 1145, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/utils/traceback_utils.py", line 96, in error_handler
      return fn(*args, **kwargs)
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/layers/activation/softmax.py", line 103, in call
      return backend.softmax(inputs, axis=self.axis[0])
    File "/home/yolo/miniconda3/envs/test4/lib/python3.9/site-packages/keras/backend.py", line 5443, in softmax
      return tf.nn.softmax(x, axis=axis)
Node: 'model/multi_head_attention_3/softmax/Softmax'
OOM when allocating tensor with shape[8,8,2048,2048] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator gpu_async_0
	 [[{{node model/multi_head_attention_3/softmax/Softmax}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_20637]

In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
