In [1]:
import os
import cv2
import numpy as np
import random

import tensorflow as tf
import keras
from keras import layers
import einops

In [2]:
#Setting gpu for limit memory
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    #Restrict Tensorflow to only allocate 6gb of memory on the first GPU
   try:
        tf.config.experimental.set_virtual_device_configuration(gpus[0],
       [tf.config.experimental.VirtualDeviceConfiguration(memory_limit=6144)])
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
   except RuntimeError as e:
       #virtual devices must be set before GPUs have been initialized
        print(e)

1 Physical GPUs, 1 Logical GPUs


In [3]:
base_url = './crime_data/Anomaly-Videos-Part-1/'
classes = [i for i in os.listdir(base_url) if not i.startswith('.')]
print(classes)

['Abuse', 'Arrest', 'Arson', 'Assault', 'Burglary', 'Explosion', 'Fighting', 'Normal_Videos_event', 'RoadAccidents', 'Robbery', 'Shooting', 'Shoplifting', 'Stealing', 'Vandalism']


In [4]:
class_ids = {}
id = 0
for c in classes:
    class_ids[c] = id
    id += 1

print(class_ids)

{'Abuse': 0, 'Arrest': 1, 'Arson': 2, 'Assault': 3, 'Burglary': 4, 'Explosion': 5, 'Fighting': 6, 'Normal_Videos_event': 7, 'RoadAccidents': 8, 'Robbery': 9, 'Shooting': 10, 'Shoplifting': 11, 'Stealing': 12, 'Vandalism': 13}


In [5]:
train_001 = open('./crime_data/UCF_Crimes-Train-Test-Split/Action_Regnition_splits/train_001.txt','r').read().split('\n')
len(train_001)

532

In [6]:
counts = {}
for i in train_001:
    x = i.split('/')[0]
    if x not in list(counts.keys()):
        counts[x] = 1
    else:
        counts[x] += 1

In [7]:
counts

{'Abuse': 38,
 'Arrest': 38,
 'Arson': 38,
 'Assault': 38,
 'Burglary': 38,
 'Explosion': 38,
 'Fighting': 38,
 'Normal_Videos_event': 38,
 'RoadAccidents': 38,
 'Robbery': 38,
 'Shooting': 38,
 'Shoplifting': 38,
 'Stealing': 38,
 'Vandalism': 38}

In [8]:
val_urls = []
val_counts = {}
train_urls = []
train_counts = {}
for i in train_001:
    x = i.split('/')[0]
    if x not in list(val_counts.keys()):
        val_counts[x] = 1
        val_urls.append(i)
    elif val_counts[x] < 4 :
        val_counts[x] += 1
        val_urls.append(i)
    else:
        if x not in list(train_counts.keys()):
            train_counts[x] = 1
            train_urls.append(i)
        else:
            train_counts[x] += 1
            train_urls.append(i)

In [9]:
val_counts

{'Abuse': 4,
 'Arrest': 4,
 'Arson': 4,
 'Assault': 4,
 'Burglary': 4,
 'Explosion': 4,
 'Fighting': 4,
 'Normal_Videos_event': 4,
 'RoadAccidents': 4,
 'Robbery': 4,
 'Shooting': 4,
 'Shoplifting': 4,
 'Stealing': 4,
 'Vandalism': 4}

In [10]:
train_counts

{'Abuse': 34,
 'Arrest': 34,
 'Arson': 34,
 'Assault': 34,
 'Burglary': 34,
 'Explosion': 34,
 'Fighting': 34,
 'Normal_Videos_event': 34,
 'RoadAccidents': 34,
 'Robbery': 34,
 'Shooting': 34,
 'Shoplifting': 34,
 'Stealing': 34,
 'Vandalism': 34}

In [11]:
random.shuffle(train_urls)
random.shuffle(val_urls)

In [29]:
test_urls = open('./crime_data/UCF_Crimes-Train-Test-Split/Action_Regnition_splits/test_001.txt','r').read().split(' \n')
len(test_urls)

168

In [13]:
nframes = 10
batch_size = 8
HEIGHT = 240
WIDTH = 320
def format_frames(frame):
    frame = tf.image.convert_image_dtype(frame, tf.float32)
    frame = tf.image.resize_with_crop_or_pad(frame, HEIGHT, WIDTH)
    return frame

def frames_from_video_file(video_path, n_frames, output_size = (HEIGHT,WIDTH), frame_step = 15):
  """
    Creates frames from each video file present for each category.

    Args:
      video_path: File path to the video.
      n_frames: Number of frames to be created per video file.
      output_size: Pixel size of the output frame image.

    Return:
      An NumPy array of frames in the shape of (n_frames, height, width, channels).
  """
  # Read each video frame by frame
  result = []
  src = cv2.VideoCapture(str(video_path))  

  video_length = src.get(cv2.CAP_PROP_FRAME_COUNT)

  need_length = 1 + (n_frames - 1) * frame_step

  if need_length > video_length:
    start = 0
  else:
    max_start = video_length - need_length
    start = random.randint(0, max_start + 1)

  src.set(cv2.CAP_PROP_POS_FRAMES, start)
  # ret is a boolean indicating whether read was successful, frame is the image itself
  ret, frame = src.read()
  result.append(format_frames(frame))

  for _ in range(n_frames - 1):
    for _ in range(frame_step):
      ret, frame = src.read()
    if ret:
      frame = format_frames(frame)
      result.append(frame)
    else:
      result.append(np.zeros_like(result[0]))
  src.release()
  result = np.array(result)[..., [2, 1, 0]]

  return result

class FrameGenerator:
  def __init__(self,paths, n_frames,testing = False):
    self.n_frames = n_frames
    self.paths = paths
    self.testing = testing

  def get_files_and_class_names(self):
    video_paths = []
    labels = []
    for path in self.paths:
      video_paths.append(f'{base_url}{path}'.strip())
      labels.append(path.split('/')[0])
    pairs = list(zip(video_paths,labels))
    return pairs

  def __call__(self):
    pairs = self.get_files_and_class_names()

    if not self.testing:
      random.shuffle(pairs)

    for path, name in pairs:
      video_frames = frames_from_video_file(path, self.n_frames)
      label = class_ids[name]
      #yield video_frames, to_categorical(label, len(classes))
      yield video_frames, label

In [14]:
class Conv2Plus1D(keras.layers.Layer):
  def __init__(self, filters, kernel_size, padding):
    """
      A sequence of convolutional layers that first apply the convolution operation over the
      spatial dimensions, and then the temporal dimension. 
    """
    super().__init__()
    self.seq = keras.Sequential([  
        # Spatial decomposition
        layers.Conv3D(filters=filters,
                      kernel_size=(1, kernel_size[1], kernel_size[2]),
                      padding=padding),
        # Temporal decomposition
        layers.Conv3D(filters=filters, 
                      kernel_size=(kernel_size[0], 1, 1),
                      padding=padding)
        ])

  def call(self, x):
    return self.seq(x)

In [15]:
class ResidualMain(keras.layers.Layer):
  """
    Residual block of the model with convolution, layer normalization, and the
    activation function, ReLU.
  """
  def __init__(self, filters, kernel_size):
    super().__init__()
    self.seq = keras.Sequential([
        Conv2Plus1D(filters=filters,
                    kernel_size=kernel_size,
                    padding='same'),
        layers.LayerNormalization(),
        layers.ReLU(),
        Conv2Plus1D(filters=filters, 
                    kernel_size=kernel_size,
                    padding='same'),
        layers.LayerNormalization()
    ])

  def call(self, x):
    return self.seq(x)

In [16]:
class Project(keras.layers.Layer):
  """
    Project certain dimensions of the tensor as the data is passed through different 
    sized filters and downsampled. 
  """
  def __init__(self, units):
    super().__init__()
    self.seq = keras.Sequential([
        layers.Dense(units),
        layers.LayerNormalization()
    ])

  def call(self, x):
    return self.seq(x)

In [17]:
def add_residual_block(input, filters, kernel_size):
  """
    Add residual blocks to the model. If the last dimensions of the input data
    and filter size does not match, project it such that last dimension matches.
  """
  out = ResidualMain(filters, 
                     kernel_size)(input)

  res = input
  # Using the Keras functional APIs, project the last dimension of the tensor to
  # match the new filter size
  if out.shape[-1] != input.shape[-1]:
    res = Project(out.shape[-1])(res)

  return layers.add([res, out])

In [18]:
class ResizeVideo(keras.layers.Layer):
  def __init__(self, height, width):
    super().__init__()
    self.height = height
    self.width = width
    self.resizing_layer = layers.Resizing(self.height, self.width)

  def call(self, video):
    """
      Use the einops library to resize the tensor.  

      Args:
        video: Tensor representation of the video, in the form of a set of frames.

      Return:
        A downsampled size of the video according to the new height and width it should be resized to.
    """
    # b stands for batch size, t stands for time, h stands for height, 
    # w stands for width, and c stands for the number of channels.
    old_shape = einops.parse_shape(video, 'b t h w c')
    images = einops.rearrange(video, 'b t h w c -> (b t) h w c')
    images = self.resizing_layer(images)
    videos = einops.rearrange(
        images, '(b t) h w c -> b t h w c',
        t = old_shape['t'])
    return videos

In [19]:
output_signature = (tf.TensorSpec(shape = (None, None, None, 3), dtype = tf.float32),
                    tf.TensorSpec(shape = (), dtype = tf.int16))
train_ds = tf.data.Dataset.from_generator(FrameGenerator(paths=train_urls,n_frames=nframes),
                                          output_signature = output_signature)
val_ds = tf.data.Dataset.from_generator(FrameGenerator(paths=val_urls,n_frames=nframes),
                                          output_signature = output_signature)
train_ds = train_ds.batch(batch_size)
val_ds = val_ds.batch(batch_size)

In [20]:
input_shape = (None, nframes, HEIGHT, WIDTH, 3)
input = layers.Input(shape=(input_shape[1:]))
x = input

x = Conv2Plus1D(filters=16, kernel_size=(3, 7, 7), padding='same')(x)
x = layers.BatchNormalization()(x)
x = layers.ReLU()(x)
x = ResizeVideo(HEIGHT // 2, WIDTH // 2)(x)

# Block 1
x = add_residual_block(x, 16, (3, 3, 3))
x = ResizeVideo(HEIGHT // 4, WIDTH // 4)(x)

# Block 2
x = add_residual_block(x, 32, (3, 3, 3))
x = ResizeVideo(HEIGHT // 8, WIDTH // 8)(x)

# Block 3
x = add_residual_block(x, 64, (3, 3, 3))
x = ResizeVideo(HEIGHT // 16, WIDTH // 16)(x)

# Block 4
x = add_residual_block(x, 128, (3, 3, 3))

x = layers.GlobalAveragePooling3D()(x)
x = layers.Flatten()(x)
x = layers.Dense(14)(x)

model = keras.Model(input, x)

In [21]:
model.compile(loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True), 
              optimizer = keras.optimizers.Adam(learning_rate = 0.0001), 
              metrics = ['accuracy'])

In [22]:
from keras.callbacks import ModelCheckpoint
checkpoint_path = './model/checkpoints/checkpoint.model01_weights.h5'
model_checkpoint_callback = ModelCheckpoint(filepath=checkpoint_path, monitor="val_accuracy", mode="max",save_best_only=True, save_weights_only=True)

In [24]:
history = model.fit(x = train_ds,
                    epochs = 50, 
                    validation_data = val_ds,
                    callbacks=[model_checkpoint_callback])

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [32]:
model.load_weights(checkpoint_path)

In [33]:
test_ds = tf.data.Dataset.from_generator(FrameGenerator(test_urls,nframes,testing=True),output_signature=output_signature).batch(batch_size)
model.evaluate(test_ds)



[2.6950976848602295, 0.125]