In [1]:
import tensorflow as tf
import pandas as pd
import os
import numpy as np
from natsort import natsorted

In [2]:
IMG_X_SIZE = 320
IMG_Y_SIZE = 240
EPOCHS = 25
NUM_FEATURES = 1024
MAX_SEQ_LENGTH = 512

In [3]:
train_data_df = pd.read_csv('sampled_50.csv')
train_data_df.head(10)

Unnamed: 0.1,Unnamed: 0,path,category
0,0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [4]:
filter = ['arrest', 'attack', 'blast', 'deliberate damage', 'firing', 'road accident', 'theft']
train_filtered_df = train_data_df[train_data_df['category'].isin(filter)]
train_filtered_df.head(10)

Unnamed: 0.1,Unnamed: 0,path,category
0,0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [5]:
train_filtered_df.reset_index(drop=True, inplace=True)
train_filtered_df.drop(columns='Unnamed: 0', inplace=True)
train_filtered_df.head(10)

Unnamed: 0,path,category
0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [6]:
from tensorflow.keras.preprocessing.image import load_img
from natsort import natsorted

def load_video(path):
    frames = []
    samples = natsorted(os.listdir(path))
    # print(samples)
    for sample in samples:
        imgpath = os.fsencode(path +'\\'+ os.fsdecode(sample))
        img = load_img(imgpath)
        frames.append(np.asarray(img))
    return (np.asarray(frames))

In [7]:
def build_feature_extractor():
    feature_extractor = tf.keras.applications.DenseNet121(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_Y_SIZE, IMG_X_SIZE, 3),
    )
    preprocess_input = tf.keras.applications.inception_v3.preprocess_input

    inputs = tf.keras.Input((IMG_Y_SIZE, IMG_X_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return tf.keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/densenet/densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5


In [8]:
label_processor = tf.keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_filtered_df["category"])
)
print(label_processor.get_vocabulary())

['arrest', 'attack', 'blast', 'deliberate damage', 'firing', 'road accident', 'theft']


In [21]:
def prepare_all_videos(df):
    num_samples = len(df)
    video_paths = df["path"].values.tolist()
    labels = df["category"].values
    labels = label_processor(labels[..., None]).numpy()

    #frame features to be learned
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        print("loading video from path:", path)
        frames = load_video(path)
        
                # Pad shorter videos.
        if len(frames) != 0 and len(frames) < MAX_SEQ_LENGTH:
            diff = MAX_SEQ_LENGTH - len(frames)
            padding = np.zeros((diff, IMG_Y_SIZE, IMG_X_SIZE, 3), dtype="float32")
            frames = np.concatenate((frames, padding))
        
        frames = frames[None, ...]

        # Initialize placeholders to store features of the current video.
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                if np.mean(batch[j, :]) > 0.0:
                    temp_frame_features[i, j, :] = feature_extractor.predict(
                        batch[None, j, :],
                        verbose=0
                    )

                else:
                    temp_frame_features[i, j, :] = 0.0

        frame_features[idx,] = temp_frame_features.squeeze()

    return frame_features, labels

In [46]:
# try smaller subset
train_filtered_df = train_filtered_df[:14]
train_data, train_labels = prepare_all_videos(train_filtered_df)

loading video from path: C:\Development\Code\JupyterProjects\MLBuzz22\Crime Dataset\train\arrest\video_4
loading video from path: C:\Development\Code\JupyterProjects\MLBuzz22\Crime Dataset\train\attack\video_98
loading video from path: C:\Development\Code\JupyterProjects\MLBuzz22\Crime Dataset\train\blast\video_12
loading video from path: C:\Development\Code\JupyterProjects\MLBuzz22\Crime Dataset\train\deliberate damage\video_3
loading video from path: C:\Development\Code\JupyterProjects\MLBuzz22\Crime Dataset\train\firing\video_22


KeyboardInterrupt: 

In [23]:
print(f"Frame features in train set: {train_data.shape}")
np.save('densenet_sample_2_features_train0', train_data)

Frame features in train set: (14, 512, 1024)


In [24]:
#override labels
labels = train_filtered_df['category'].values
labels = label_processor(labels[..., None]).numpy()
train_labels = labels
print(labels, len(train_labels))

[[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]
 [0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [6]] 14


In [None]:
#randomize extraction
size = len(train_data)
shuffle_idx = np.arange(size)
np.random.shuffle(shuffle_idx)
print(shuffle_idx)

train_data_shuffled = train_data[shuffle_idx]
train_labels_shuffled = train_labels[shuffle_idx]
print(train_data_shuffled)
print(train_labels_shuffled)

In [27]:
from tensorflow.keras import layers
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, output_dim, **kwargs):
        super().__init__(**kwargs)
        self.position_embeddings = layers.Embedding(
            input_dim=sequence_length, output_dim=output_dim
        )
        self.sequence_length = sequence_length
        self.output_dim = output_dim

    def call(self, inputs):
        # The inputs are of shape: `(batch_size, frames, num_features)`
        length = tf.shape(inputs)[1]
        positions = tf.range(start=0, limit=length, delta=1)
        embedded_positions = self.position_embeddings(positions)
        return inputs + embedded_positions

    def compute_mask(self, inputs, mask=None):
        mask = tf.reduce_any(tf.cast(inputs, "bool"), axis=-1)
        return mask

In [40]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        self.attention = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim, dropout=0.3
        )
        self.dense_proj = tf.keras.Sequential(
            [layers.Dense(dense_dim, activation=tf.nn.gelu), layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    def call(self, inputs, mask=None):
        if mask is not None:
            mask = mask[:, tf.newaxis, :]

        attention_output = self.attention(inputs, inputs, attention_mask=mask)
        proj_input = self.layernorm_1(inputs + attention_output)
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)

In [36]:
def get_compiled_model():
    sequence_length = MAX_SEQ_LENGTH
    embed_dim = NUM_FEATURES
    dense_dim = 4
    num_heads = 1
    classes = len(label_processor.get_vocabulary())

    inputs = tf.keras.Input(shape=(None, None))
    x = PositionalEmbedding(
        sequence_length, embed_dim, name="frame_position_embedding"
    )(inputs)
    x = TransformerEncoder(embed_dim, dense_dim, num_heads, name="transformer_layer")(x)
    x = layers.GlobalMaxPooling1D()(x)
    x = layers.Dropout(0.5)(x)
    outputs = layers.Dense(classes, activation="softmax")(x)
    model = tf.keras.Model(inputs, outputs)

    model.compile(
        optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
    )
    return model

In [42]:
def run_experiment(start, end, loadmodel=False):
    # filepath = "/tmp/video_classifier"
    # checkpoint = keras.callbacks.ModelCheckpoint(
    #     filepath, save_weights_only=True, save_best_only=True, verbose=1
    # )
    
    if loadmodel:
        #load model
        seq_model = tf.keras.models.load_model('xformer_video_identifier')
        print('Loaded model xformer_video_identifier')
    else:
        seq_model = get_compiled_model()
        print('Generated model graph')
    
    #partition to handle GPU memory
    print("Training from ", start, "to ", end)
    train_data_part = train_data[start:end]
    train_labels_part = train_labels[start:end]

    model = get_compiled_model()
    history = model.fit(
        train_data_part,
        train_labels_part,
        validation_split=0.15,
        epochs=EPOCHS,
        # callbacks=[checkpoint],
    )

    # model.load_weights(filepath)
    # _, accuracy = model.evaluate(test_data, test_labels)
    # print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    model.save('xformer_video_identifier')
    print("Saved model xformer_video_identifier")
    
    return model

In [38]:
tf.keras.backend.clear_session()

In [43]:
trained_model = run_experiment(0, 14)

Generated model graph
Training from  0 to  14
Epoch 1/25
Epoch 2/25
Epoch 3/25
Epoch 4/25
Epoch 5/25
Epoch 6/25
Epoch 7/25
Epoch 8/25
Epoch 9/25
Epoch 10/25
Epoch 11/25
Epoch 12/25
Epoch 13/25
Epoch 14/25
Epoch 15/25
Epoch 16/25
Epoch 17/25
Epoch 18/25
Epoch 19/25
Epoch 20/25
Epoch 21/25
Epoch 22/25
Epoch 23/25
Epoch 24/25
Epoch 25/25




INFO:tensorflow:Assets written to: xformer_video_identifier\assets


INFO:tensorflow:Assets written to: xformer_video_identifier\assets


Saved model xformer_video_identifier


  layer_config = serialize_layer_fn(layer)
  return generic_utils.serialize_keras_object(obj)


In [44]:
def test_predict(start, end, model):
    class_vocab = label_processor.get_vocabulary()
    results = model.predict(train_data[start:end])
    # print(results)
    for result in results:
        print("---------------------------------")
        for i in np.argsort(result)[::-1]:
            print(f" {class_vocab[i]}: {result[i] * 100:5.2f}%")

In [45]:
test_predict(0, 7, trained_model)

---------------------------------
 arrest: 96.62%
 blast:  1.13%
 attack:  0.96%
 firing:  0.78%
 road accident:  0.29%
 deliberate damage:  0.21%
 theft:  0.00%
---------------------------------
 attack: 58.93%
 arrest: 20.87%
 blast: 10.09%
 firing:  7.61%
 deliberate damage:  1.89%
 road accident:  0.61%
 theft:  0.00%
---------------------------------
 blast: 46.34%
 arrest: 43.93%
 attack:  6.13%
 road accident:  1.82%
 firing:  1.08%
 deliberate damage:  0.70%
 theft:  0.00%
---------------------------------
 deliberate damage: 72.87%
 attack:  9.13%
 arrest:  8.39%
 firing:  5.76%
 blast:  3.44%
 road accident:  0.40%
 theft:  0.00%
---------------------------------
 firing: 64.24%
 attack: 20.08%
 arrest:  9.99%
 blast:  2.54%
 deliberate damage:  2.11%
 road accident:  1.05%
 theft:  0.00%
---------------------------------
 road accident: 87.24%
 arrest: 10.49%
 firing:  1.04%
 blast:  0.86%
 deliberate damage:  0.19%
 attack:  0.18%
 theft:  0.00%
----------------------------