In [1]:
import tensorflow as tf
import pandas as pd
import os
import numpy as np
from natsort import natsorted

In [2]:
IMG_X_SIZE = 320
IMG_Y_SIZE = 240
BATCH = 64
EPOCHS = 25

MAX_SEQ_LENGTH = 512
NUM_FEATURES = 2048

In [3]:
# train_data_df = pd.read_csv('train.csv')
train_data_df = pd.read_csv('sampled_50.csv')
train_data_df.head(10)

Unnamed: 0.1,Unnamed: 0,path,category
0,0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [4]:
filter = ['arrest', 'attack', 'blast', 'deliberate damage', 'firing', 'road accident', 'theft']
train_filtered_df = train_data_df[train_data_df['category'].isin(filter)]
train_filtered_df.head(10)

Unnamed: 0.1,Unnamed: 0,path,category
0,0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [5]:
train_filtered_df.reset_index(drop=True, inplace=True)
train_filtered_df.drop(columns='Unnamed: 0', inplace=True)
train_filtered_df.head(10)

Unnamed: 0,path,category
0,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
1,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
2,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast
3,C:\Development\Code\JupyterProjects\MLBuzz22\C...,deliberate damage
4,C:\Development\Code\JupyterProjects\MLBuzz22\C...,firing
5,C:\Development\Code\JupyterProjects\MLBuzz22\C...,road accident
6,C:\Development\Code\JupyterProjects\MLBuzz22\C...,theft
7,C:\Development\Code\JupyterProjects\MLBuzz22\C...,arrest
8,C:\Development\Code\JupyterProjects\MLBuzz22\C...,attack
9,C:\Development\Code\JupyterProjects\MLBuzz22\C...,blast


In [6]:
train_filtered_df.to_csv('train_filtered.csv')

In [7]:
from tensorflow.keras.preprocessing.image import load_img
from natsort import natsorted

def load_video(path):
    frames = []
    samples = natsorted(os.listdir(path))
    # print(samples)
    for sample in samples:
        imgpath = os.fsencode(path +'\\'+ os.fsdecode(sample))
        img = load_img(imgpath)
        frames.append(np.asarray(img))
    return (np.asarray(frames))

In [8]:
test_path = train_filtered_df['path'][0]
vf = load_video(test_path)
vf.shape

(88, 240, 320, 3)

In [9]:
def build_feature_extractor():
    feature_extractor = tf.keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_Y_SIZE, IMG_X_SIZE, 3),
    )
    preprocess_input = tf.keras.applications.inception_v3.preprocess_input

    inputs = tf.keras.Input((IMG_Y_SIZE, IMG_X_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return tf.keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

In [10]:
label_processor = tf.keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=np.unique(train_filtered_df["category"])
)
print(label_processor.get_vocabulary())

['arrest', 'attack', 'blast', 'deliberate damage', 'firing', 'road accident', 'theft']


In [11]:
def prepare_all_videos(df):
    num_samples = len(df)
    video_paths = df["path"].values.tolist()
    labels = df["category"].values
    labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        print("loading video from path:", path)
        frames = load_video(path)
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :],
                    verbose=0
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


In [None]:
# train_data, train_labels = prepare_all_videos(train_filtered_df)
# test_data, test_labels = prepare_all_videos(test_df, "test")

In [15]:
# print(f"Frame features in train set: {train_data[0].shape}")
# print(f"Frame masks in train set: {train_data[1].shape}")
# np.save('sample_50_features_train0.txt', train_data[0])
# np.save('sample_50_features_train1.txt', train_data[1])

Frame features in train set: (350, 512, 2048)
Frame masks in train set: (350, 512)


In [12]:
temp1 = np.load('sample_50_features_train0.txt.npy')
temp2 = np.load('sample_50_features_train1.txt.npy')
train_data = (temp1, temp2)
print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

Frame features in train set: (350, 512, 2048)
Frame masks in train set: (350, 512)


In [None]:
#override labels
labels = train_filtered_df['category'].values
labels = label_processor(labels[..., None]).numpy()
train_labels = labels
print(labels, len(train_labels))

In [None]:
#randomize extraction
size = len(train_data[0])
shuffle_idx = np.arange(size)
np.random.shuffle(shuffle_idx)
print(shuffle_idx)

train_data_shuffled = ((train_data[0][shuffle_idx], train_data[1][shuffle_idx]))
train_labels_shuffled = train_labels[shuffle_idx]
print(train_data_shuffled)
print(train_labels_shuffled)

In [15]:
def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = tf.keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = tf.keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = tf.keras.layers.GRU(16, return_sequences=True)(
        frame_features_input, mask=mask_input
    )
    x = tf.keras.layers.GRU(8)(x)
    x = tf.keras.layers.Dropout(0.4)(x)
    x = tf.keras.layers.Dense(8, activation="relu")(x)
    output = tf.keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = tf.keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model

In [16]:
tf.keras.backend.clear_session()

In [17]:
def run_experiment(start, end, loadmodel=False):
    # filepath = "C:\Development\Code\JupyterProjects\MLBuzz22\video_classifier"
    # checkpoint = tf.keras.callbacks.ModelCheckpoint(
    #     filepath, save_weights_only=True, save_best_only=True, verbose=1
    # )
    
    if loadmodel:
        #load model
        seq_model = tf.keras.models.load_model('cnn_rnn_video_identifier')
        print('Loaded model cnn_rnn_video_identifier')
    else:
        seq_model = get_sequence_model()
        print('Generated model graph')
        
    
    #partition to handle GPU memory
    print("Training from ", start, "to ", end)
    train_data_part = (train_data[0][start:end], train_data[1][start:end])
    train_labels_part = train_labels[start:end]
    
#     train_data_part2 = (train_data[0][101:200], train_data[1][101:200])
#     train_labels_part2 = train_labels[101:200]
    
#     train_data_part3 = (train_data[0][201:300], train_data[1][201:300])
#     train_labels_part3 = train_labels[201:300]
    
#     train_data_part4 = (train_data[0][301:349], train_data[1][301:349])
#     train_labels_part4 = train_labels[301:349]

    
    history = seq_model.fit(
        [train_data_part[0], train_data_part[1]],
        train_labels_part,
        validation_split=0.3,
        epochs=EPOCHS,
        # callbacks=[checkpoint],
    )
    
#     history[1] = seq_model.fit(
#         [train_data_part2[0], train_data_part2[1]],
#         train_labels_part2,
#         validation_split=0.3,
#         epochs=EPOCHS,
#         # callbacks=[checkpoint],
#     )

#     history[2] = seq_model.fit(
#         [train_data_part3[0], train_data_part3[1]],
#         train_labels_part3,
#         validation_split=0.3,
#         epochs=EPOCHS,
#         # callbacks=[checkpoint],
#     )
        
#     history[3] = seq_model.fit(
#         [train_data_part4[0], train_data_part4[1]],
#         train_labels_part4,
#         validation_split=0.3,
#         epochs=EPOCHS,
#         # callbacks=[checkpoint],
#     )
    # seq_model.load_weights(filepath)
    # _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    # print(f"Test accuracy: {round(accuracy * 100, 2)}%")
    
    seq_model.save('cnn_rnn_video_identifier')
    print("Saved model cnn_rnn_video_identifier")

    return history, seq_model

In [18]:
history = []
# temp, sequence_model = run_experiment(0, 100)
# history.append(temp)

In [None]:

temp, sequence_model = run_experiment(0, 100, loadmodel = True)
history.append(temp)
temp, sequence_model = run_experiment(101, 200, loadmodel = True)
history.append(temp)
temp, sequence_model = run_experiment(201, 300, loadmodel = True)
history.append(temp)
temp, sequence_model = run_experiment(300, 349, loadmodel = True)
history.append(temp)

In [None]:
%matplotlib inline
import matplotlib,pyplot as plt

print(history.history.keys())

acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc)+1)

plt.plot(epochs, acc, 'bo', label='Training accuracy')
plt.plot(epochs, val_acc, 'b', label='Validation accuracy')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()
plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [23]:
print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")

Frame features in train set: (350, 512, 2048)
Frame masks in train set: (350, 512)


In [None]:
seq_model = tf.keras.models.load_model('cnn_rnn_video_identifier')

In [63]:
class_vocab = label_processor.get_vocabulary()
results = seq_model.predict([train_data[0][210:215], train_data[1][210:215]])
# print(results)
for result in results:
    print("---------------------------------")
    for i in np.argsort(result)[::-1]:
        print(f" {class_vocab[i]}: {result[i] * 100:5.2f}%")

---------------------------------
 arrest: 97.87%
 theft:  1.34%
 attack:  0.43%
 blast:  0.32%
 road accident:  0.02%
 firing:  0.01%
 deliberate damage:  0.01%
---------------------------------
 attack: 96.05%
 theft:  3.10%
 firing:  0.49%
 arrest:  0.30%
 blast:  0.05%
 road accident:  0.01%
 deliberate damage:  0.00%
---------------------------------
 deliberate damage: 36.23%
 firing: 24.16%
 blast: 18.39%
 attack:  8.74%
 road accident:  6.48%
 arrest:  3.44%
 theft:  2.55%
---------------------------------
 deliberate damage: 94.89%
 road accident:  3.01%
 blast:  1.57%
 firing:  0.44%
 arrest:  0.07%
 attack:  0.01%
 theft:  0.01%
---------------------------------
 firing: 97.62%
 deliberate damage:  1.02%
 road accident:  0.99%
 theft:  0.31%
 attack:  0.04%
 blast:  0.01%
 arrest:  0.01%


In [28]:
test_data_df = pd.read_csv('test.csv', index_col=False)
test_data_df.drop(columns='Unnames: 0', inpace=True)
test_data_df.head()

TypeError: drop() got an unexpected keyword argument 'inpace'

In [None]:
sample_test_data_df = test_data_df[:25]

In [None]:
def prepare_all_test_videos(df):
    num_samples = len(df)
    video_paths = df["path"].values.tolist()
    # labels = df["category"].values
    # labels = label_processor(labels[..., None]).numpy()

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        # Gather all its frames and add a batch dimension.
        print("loading video from path:", path)
        frames = load_video(os.path.join(path))
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks)


In [None]:
test_data = prepare_all_test_videos(sample_test_data_df)

In [None]:
print(f"Frame features in train set: {test_data[0].shape}")
print(f"Frame masks in train set: {test_data[1].shape}")

In [None]:
class_vocab = label_processor.get_vocabulary()

for in in range(len(test_data[0]):
    print("result[", i, "]")
    result = sequence_model.predict([test_data[0], test_data[1]])[i]
    for i in np.argsort(result)[::-1]:
        print(f" {class_vocab[i]}: {result[i] * 100:5.2f}%")