In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
import cv2 as cv

import tensorflow as tf
from tensorflow import keras, Tensor
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, Dense, Dropout, Flatten, AveragePooling2D, Input, ReLU, BatchNormalization, Add, MaxPool2D, GlobalMaxPool2D, TimeDistributed, GRU
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.compat.v1.keras.layers import CuDNNGRU


import math
import datetime

import glob
from keras_video import VideoFrameGenerator, SlidingFrameGenerator, SplitFrameGenerator
import keras_video.utils

In [2]:
IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_CHANNELS = 512, 512, 3  
IMAGE_SIZE = (IMAGE_HEIGHT, IMAGE_WIDTH)
INPUT_SHAPE = IMAGE_SIZE + (IMAGE_CHANNELS,)

BATCH_SIZE = 2
NBFRAME = 3

In [3]:
# use sub directories names as classes
classes = [i.split(os.path.sep)[2] for i in glob.glob('./RLDD_fold_1_organized/*')]
classes.sort()
print(classes)

['alert', 'drowsy', 'low_vagilant']


In [4]:
# pattern to get videos and classes
glob_pattern='/media/data/determined/workdir/mosa274994/RLDD_fold_1_organized/{classname}/*.mp4'
glob_pattern

'/media/data/determined/workdir/mosa274994/RLDD_fold_1_organized/{classname}/*.mp4'

In [5]:
# data_aug = ImageDataGenerator(
#     # validation_split=0.3,
#     # zoom_range=0.1,
#     # horizontal_flip=True,
#     # vertical_flip=True,
#     # rotation_range=90)
#     rescale=1./255)

In [14]:
train = SplitFrameGenerator(
    glob_pattern=glob_pattern,
    nb_frames=NBFRAME,
    split_val=.4, 
    shuffle=True,
    classes=classes,
    batch_size=BATCH_SIZE,
    target_shape=IMAGE_SIZE,
    nb_channel=IMAGE_CHANNELS,
    # transformation=data_aug,
    use_frame_cache=True)

Total data: 3 classes for 18 files for train


In [12]:
valid = train.get_validation_generator()

Total data: 3 classes for 0 files for validation


In [9]:
len(train)

0

In [10]:
len(valid)

0

In [9]:
keras_video.utils.show_sample(train, random=True)

ValueError: Upper bound must be positive.

In [28]:
def build_convnet(shape=(112, 112, 3)):
    momentum = .9
    model = keras.Sequential()
    model.add(Conv2D(64, (3,3), input_shape=shape,
        padding='same', activation='relu'))
    model.add(Conv2D(64, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    model.add(MaxPool2D())
    
    model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(128, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    model.add(MaxPool2D())
    
    model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(256, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    model.add(MaxPool2D())
    
    model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    model.add(Conv2D(512, (3,3), padding='same', activation='relu'))
    model.add(BatchNormalization(momentum=momentum))
    
    # flatten...
    model.add(GlobalMaxPool2D())
    return model

def action_model(shape=(5, 112, 112, 3), nbout=3):
    # Create our convnet with (112, 112, 3) input shape
    convnet = build_convnet(shape[1:])
    
    # then create our final model
    model = keras.Sequential()
    # add the convnet with (5, 112, 112, 3) shape
    model.add(TimeDistributed(convnet, input_shape=shape))
    # here, you can also use GRU or LSTM
    model.add(CuDNNGRU(64))
    # and finally, we make a decision network
    model.add(Dense(1024, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(512, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(128, activation='relu'))
    model.add(Dropout(.5))
    model.add(Dense(64, activation='relu'))
    model.add(Dense(nbout, activation='softmax'))
    return model

In [29]:
INSHAPE = (NBFRAME,) + IMAGE_SIZE + (IMAGE_CHANNELS,) # (5, 112, 112, 3)
INSHAPE

(500, 512, 512, 3)

In [30]:
model = action_model(INSHAPE, len(classes))
optimizer = keras.optimizers.Adam(0.001)
model.compile(
    optimizer,
    'categorical_crossentropy',
    metrics=['acc', 'mse']
)
model.summary()

Model: "sequential_3"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 time_distributed_1 (TimeDis  (None, 500, 512)         4689216   
 tributed)                                                       
                                                                 
 cu_dnngru_1 (CuDNNGRU)      (None, 64)                110976    
                                                                 
 dense_5 (Dense)             (None, 1024)              66560     
                                                                 
 dropout_3 (Dropout)         (None, 1024)              0         
                                                                 
 dense_6 (Dense)             (None, 512)               524800    
                                                                 
 dropout_4 (Dropout)         (None, 512)               0         
                                                      

In [31]:
EPOCHS=50
# create a "chkp" directory before to run that
# because ModelCheckpoint will write models inside
callbacks = [
    keras.callbacks.ReduceLROnPlateau(verbose=1),
    tf.keras.callbacks.EarlyStopping(monitor='loss', patience=4),
    keras.callbacks.ModelCheckpoint(
        './chkp/weights.{epoch:02d}.hdf5',
        verbose=1),
]

steps_per_epoch_train = np.ceil(9 / BATCH_SIZE)
steps_per_epoch_valid = np.ceil(3 / BATCH_SIZE)

print('steps per epoch (train): {}'.format(steps_per_epoch_train))
print('steps per epoch (valid): {}'.format(steps_per_epoch_valid))

model.fit(train,
        # steps_per_epoch=3,
        epochs=EPOCHS,
        validation_data=valid,
        # validation_steps=3,
        callbacks=callbacks,
        verbose=1)

steps per epoch (train): 5.0
steps per epoch (valid): 2.0
Epoch 1/50


2023-02-20 10:33:20.395224: W tensorflow/core/common_runtime/bfc_allocator.cc:462] Allocator (GPU_0_bfc) ran out of memory trying to allocate 62.50GiB (rounded to 67108864000)requested by op sequential_3/time_distributed_1/conv2d_8/Conv2D
If the cause is memory fragmentation maybe the environment variable 'TF_GPU_ALLOCATOR=cuda_malloc_async' will improve the situation. 
Current allocation summary follows.
Current allocation summary follows.
2023-02-20 10:33:20.395291: I tensorflow/core/common_runtime/bfc_allocator.cc:1010] BFCAllocator dump for GPU_0_bfc
2023-02-20 10:33:20.395309: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (256): 	Total Chunks: 116, Chunks in use: 115. 29.0KiB allocated for chunks. 28.8KiB in use in bin. 8.9KiB client-requested in use in bin.
2023-02-20 10:33:20.395322: I tensorflow/core/common_runtime/bfc_allocator.cc:1017] Bin (512): 	Total Chunks: 40, Chunks in use: 40. 21.5KiB allocated for chunks. 21.5KiB in use in bin. 21.5KiB client-requested i

ResourceExhaustedError: Graph execution error:

Detected at node 'sequential_3/time_distributed_1/conv2d_8/Conv2D' defined at (most recent call last):
    File "/opt/conda/lib/python3.8/runpy.py", line 194, in _run_module_as_main
      return _run_code(code, main_globals, None,
    File "/opt/conda/lib/python3.8/runpy.py", line 87, in _run_code
      exec(code, run_globals)
    File "/opt/conda/lib/python3.8/site-packages/ipykernel_launcher.py", line 17, in <module>
      app.launch_new_instance()
    File "/opt/conda/lib/python3.8/site-packages/traitlets/config/application.py", line 846, in launch_instance
      app.start()
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/kernelapp.py", line 712, in start
      self.io_loop.start()
    File "/opt/conda/lib/python3.8/site-packages/tornado/platform/asyncio.py", line 199, in start
      self.asyncio_loop.run_forever()
    File "/opt/conda/lib/python3.8/asyncio/base_events.py", line 570, in run_forever
      self._run_once()
    File "/opt/conda/lib/python3.8/asyncio/base_events.py", line 1859, in _run_once
      handle._run()
    File "/opt/conda/lib/python3.8/asyncio/events.py", line 81, in _run
      self._context.run(self._callback, *self._args)
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 504, in dispatch_queue
      await self.process_one()
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 493, in process_one
      await dispatch(*args)
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 400, in dispatch_shell
      await result
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/kernelbase.py", line 724, in execute_request
      reply_content = await reply_content
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/ipkernel.py", line 390, in do_execute
      res = shell.run_cell(code, store_history=store_history, silent=silent)
    File "/opt/conda/lib/python3.8/site-packages/ipykernel/zmqshell.py", line 528, in run_cell
      return super().run_cell(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2863, in run_cell
      result = self._run_cell(
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 2909, in _run_cell
      return runner(coro)
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/async_helpers.py", line 129, in _pseudo_sync_runner
      coro.send(None)
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3106, in run_cell_async
      has_raised = await self.run_ast_nodes(code_ast.body, cell_name,
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3309, in run_ast_nodes
      if await self.run_code(code, result, async_=asy):
    File "/opt/conda/lib/python3.8/site-packages/IPython/core/interactiveshell.py", line 3369, in run_code
      exec(code_obj, self.user_global_ns, self.user_ns)
    File "/tmp/ipykernel_3876/1290539931.py", line 18, in <cell line: 18>
      model.fit(train,
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1384, in fit
      tmp_logs = self.train_function(iterator)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1021, in train_function
      return step_function(self, iterator)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1010, in step_function
      outputs = model.distribute_strategy.run(run_step, args=(data,))
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 1000, in run_step
      outputs = model.train_step(data)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/training.py", line 859, in train_step
      y_pred = self(x, training=True)
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1101, in __call__
      self._set_mask_metadata(inputs, outputs, input_masks, not eager)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/base_layer.py", line 2573, in _set_mask_metadata
      output_masks = self.compute_mask(inputs, previous_mask)
    File "/opt/conda/lib/python3.8/site-packages/keras/layers/wrappers.py", line 343, in compute_mask
      output_mask = self.layer.compute_mask(inner_inputs, inner_mask)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/sequential.py", line 407, in compute_mask
      outputs = self.call(inputs, mask=mask)  # pylint: disable=unexpected-keyword-arg
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/sequential.py", line 374, in call
      return super(Sequential, self).call(inputs, training=training, mask=mask)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/functional.py", line 451, in call
      return self._run_internal_graph(
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/functional.py", line 589, in _run_internal_graph
      outputs = node.layer(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 64, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/engine/base_layer.py", line 1096, in __call__
      outputs = call_fn(inputs, *args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/utils/traceback_utils.py", line 92, in error_handler
      return fn(*args, **kwargs)
    File "/opt/conda/lib/python3.8/site-packages/keras/layers/convolutional.py", line 248, in call
      outputs = self.convolution_op(inputs, self.kernel)
    File "/opt/conda/lib/python3.8/site-packages/keras/layers/convolutional.py", line 233, in convolution_op
      return tf.nn.convolution(
Node: 'sequential_3/time_distributed_1/conv2d_8/Conv2D'
OOM when allocating tensor with shape[1000,64,512,512] and type float on /job:localhost/replica:0/task:0/device:GPU:0 by allocator GPU_0_bfc
	 [[{{node sequential_3/time_distributed_1/conv2d_8/Conv2D}}]]
Hint: If you want to see a list of allocated tensors when OOM happens, add report_tensor_allocations_upon_oom to RunOptions for current allocation info. This isn't available when running in Eager mode.
 [Op:__inference_train_function_6879]