In [1]:
!pip install -q git+https://github.com/tensorflow/docs

In [2]:
from tensorflow_docs.vis import embed
from tensorflow import keras
from imutils import paths

import matplotlib.pyplot as plt
import tensorflow as tf
import pandas as pd
import numpy as np
import imageio
import cv2
import os


In [3]:
IMG_SIZE = 224
BATCH_SIZE = 64
EPOCHS = 10

MAX_SEQ_LENGTH = 20
NUM_FEATURES = 2048

In [4]:
def crop_center_square(frame):
    y, x = frame.shape[0:2]
    min_dim = min(y, x)
    start_x = (x // 2) - (min_dim // 2)
    start_y = (y // 2) - (min_dim // 2)
    return frame[start_y : start_y + min_dim, start_x : start_x + min_dim]


def load_video(path, max_frames=0, resize=(IMG_SIZE, IMG_SIZE)):
    cap = cv2.VideoCapture(path)
    frames = []
    try:
        while True:
            ret, frame = cap.read()
            if not ret:
                break
            frame = crop_center_square(frame)
            frame = cv2.resize(frame, resize)
            frame = frame[:, :, [2, 1, 0]]
            frames.append(frame)

            if len(frames) == max_frames:
                break
    finally:
        cap.release()
    return np.array(frames)

In [5]:
def build_feature_extractor():
    feature_extractor = keras.applications.InceptionV3(
        weights="imagenet",
        include_top=False,
        pooling="avg",
        input_shape=(IMG_SIZE, IMG_SIZE, 3),
    )
    preprocess_input = keras.applications.inception_v3.preprocess_input

    inputs = keras.Input((IMG_SIZE, IMG_SIZE, 3))
    preprocessed = preprocess_input(inputs)

    outputs = feature_extractor(preprocessed)
    return keras.Model(inputs, outputs, name="feature_extractor")


feature_extractor = build_feature_extractor()

In [24]:
train_df = pd.read_csv('train_df.csv')
train_df = train_df.iloc[: , 1:]
test_df = pd.read_csv('test_df.csv')
test_df = test_df.iloc[: , 1:]

In [25]:
import shutil
import glob

path = "/Users/nalthan/Desktop/vertiasopencvprojecy/"
normal_file_names = [path_normal + "/" + filename for filename in os.listdir(path) if filename.endswith('.mp4')]

'''for ind in train_df.index:
    from IPython.core.debugger import Pdb; Pdb().set_trace()
    if train_df.loc[ind, "frame_50"].where() == 1:
        name = train_df.loc[ind, "vidname"]
        length = 6 -len(name)
        name = name.zfill(length)
        print(name)
        os.rename(path+"Crash-1500/"+name+".mp4", path+"train/"+name+".mp4")
    if train_df.loc[ind, "frame_50"].item() == 0:
        name = train_df.loc[ind, "vidname"]
        print(name)
        os.rename(path+"Normal/"+name+".mp4", path+"train/"+name+".mp4")'''
'''for f in glob.glob(path+"train"):
    print(f)
    os.remove(f)
for f in glob.glob(path+"test"):
    print(f)
    os.remove(f)'''

for row in train_df.iterrows():
    if str(row[1][0]) == ".DS_Store":
        continue
    name = str(row[1][0])
    name = name.zfill(6)
    if row[1][1] == 1:
        shutil.copy(path+"Crash-1500/"+name+".mp4", path+"train/"+name+".mp4")
    else:
        shutil.copy(path+"Normal/"+name+".mp4", path+"train/"+name+".mp4")
        
for row in test_df.iterrows():
    name = str(row[1][0])
    name = name.zfill(6)
    if row[1][1] == 1:
        shutil.copy(path+"Crash-1500/"+name+".mp4", path+"test/"+name+".mp4")
    else:
        shutil.copy(path+"Normal/"+name+".mp4", path+"test/"+name+".mp4")

In [26]:

label_processor = keras.layers.StringLookup(
    num_oov_indices=0, vocabulary=tf.convert_to_tensor(np.unique(train_df["frame_50"].astype(str).tolist())))

print(label_processor.get_vocabulary())


['0', '1']


  return bool(asarray(a1 == a2).all())


In [None]:
def prepare_all_videos(df, root_dir):
    num_samples = len(df)
    video_paths = df["vidname"].values.tolist()
    labels = df["frame_50"].values

    # `frame_masks` and `frame_features` are what we will feed to our sequence model.
    # `frame_masks` will contain a bunch of booleans denoting if a timestep is
    # masked with padding or not.
    frame_masks = np.zeros(shape=(num_samples, MAX_SEQ_LENGTH), dtype="bool")
    frame_features = np.zeros(
        shape=(num_samples, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
    )

    # For each video.
    for idx, path in enumerate(video_paths):
        print(idx, path)
        print(type(path))
        # Gather all its frames and add a batch dimension.
        frames = load_video(os.path.join(root_dir, str(path).zfill(6)+'.mp4'))
        frames = frames[None, ...]

        # Initialize placeholders to store the masks and features of the current video.
        temp_frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
        temp_frame_features = np.zeros(
            shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32"
        )

        # Extract features from the frames of the current video.
        for i, batch in enumerate(frames):
            video_length = batch.shape[0]
            length = min(MAX_SEQ_LENGTH, video_length)
            for j in range(length):
                temp_frame_features[i, j, :] = feature_extractor.predict(
                    batch[None, j, :]
                )
            temp_frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

        frame_features[idx,] = temp_frame_features.squeeze()
        frame_masks[idx,] = temp_frame_mask.squeeze()

    return (frame_features, frame_masks), labels


train_data, train_labels = prepare_all_videos(train_df, "/Users/nalthan/Desktop/vertiasopencvprojecy/train")
test_data, test_labels = prepare_all_videos(test_df, "/Users/nalthan/Desktop/vertiasopencvprojecy/test")

print(f"Frame features in train set: {train_data[0].shape}")
print(f"Frame masks in train set: {train_data[1].shape}")


In [38]:
def get_sequence_model():
    class_vocab = label_processor.get_vocabulary()

    frame_features_input = keras.Input((MAX_SEQ_LENGTH, NUM_FEATURES))
    mask_input = keras.Input((MAX_SEQ_LENGTH,), dtype="bool")

    # Refer to the following tutorial to understand the significance of using `mask`:
    # https://keras.io/api/layers/recurrent_layers/gru/
    x = keras.layers.GRU(16, return_sequences=True)(
        frame_features_input, mask=mask_input
    )
    x = keras.layers.GRU(8)(x)
    x = keras.layers.Dropout(0.4)(x)
    x = keras.layers.Dense(8, activation="relu")(x)
    output = keras.layers.Dense(len(class_vocab), activation="softmax")(x)

    rnn_model = keras.Model([frame_features_input, mask_input], output)

    rnn_model.compile(
        loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
    )
    return rnn_model


# Utility for running experiments.
def run_experiment():
    filepath = "/tmp/video_classifier"
    checkpoint = keras.callbacks.ModelCheckpoint(
        filepath, save_weights_only=True, save_best_only=True, verbose=1
    )

    seq_model = get_sequence_model()
    history = seq_model.fit(
        [train_data[0], train_data[1]],
        train_labels,
        validation_split=0.3,
        epochs=EPOCHS,
        callbacks=[checkpoint],
    )

    seq_model.load_weights(filepath)
    _, accuracy = seq_model.evaluate([test_data[0], test_data[1]], test_labels)
    print(f"Test accuracy: {round(accuracy * 100, 2)}%")

    return history, seq_model


_, sequence_model = run_experiment()

Epoch 1/10
Epoch 1: val_loss improved from inf to 0.47573, saving model to /tmp/video_classifier
Epoch 2/10
Epoch 2: val_loss improved from 0.47573 to 0.44095, saving model to /tmp/video_classifier
Epoch 3/10
Epoch 3: val_loss improved from 0.44095 to 0.43681, saving model to /tmp/video_classifier
Epoch 4/10
Epoch 4: val_loss improved from 0.43681 to 0.42590, saving model to /tmp/video_classifier
Epoch 5/10
Epoch 5: val_loss improved from 0.42590 to 0.39159, saving model to /tmp/video_classifier
Epoch 6/10
Epoch 6: val_loss did not improve from 0.39159
Epoch 7/10
Epoch 7: val_loss did not improve from 0.39159
Epoch 8/10
Epoch 8: val_loss did not improve from 0.39159
Epoch 9/10
Epoch 9: val_loss did not improve from 0.39159
Epoch 10/10
Epoch 10: val_loss did not improve from 0.39159
Test accuracy: 92.5%


In [84]:
def prepare_single_video(frames):
    frames = frames[None, ...]
    frame_mask = np.zeros(shape=(1, MAX_SEQ_LENGTH,), dtype="bool")
    frame_features = np.zeros(shape=(1, MAX_SEQ_LENGTH, NUM_FEATURES), dtype="float32")

    for i, batch in enumerate(frames):
        video_length = batch.shape[0]
        length = min(MAX_SEQ_LENGTH, video_length)
        for j in range(length):
            frame_features[i, j, :] = feature_extractor.predict(batch[None, j, :])
        frame_mask[i, :length] = 1  # 1 = not masked, 0 = masked

    return frame_features, frame_mask


def sequence_prediction(path):
    class_vocab = label_processor.get_vocabulary()

    frames = load_video(os.path.join("test", path))
    frame_features, frame_mask = prepare_single_video(frames)
    probabilities = sequence_model.predict([frame_features, frame_mask])[0]

    return [round(probabilities[0]*100, 2), round(probabilities[1]*100, 2)] 



test_video = str(np.random.choice(test_df["vidname"].values.tolist())).zfill(6)
print(f"Test video path: {test_video}")
#test_frames = sequence_prediction(str(test_video)+".mp4")




Test video path: 001465


In [None]:
vidname = []
real = []
predict0 = []
predict1 = []

for index, row in test_df.iterrows():
    vidname.append(str(row["vidname"]).zfill(6))
    real.append(row["frame_50"])
    percentages = []
    percentages = sequence_prediction(str(row["vidname"]).zfill(6)+".mp4")
    predict0.append(percentages[0])
    predict1.append(percentages[1])

In [95]:
dict = {'vidname': vidname, 'real': real, 'predict0': predict0, 'predict1':predict1} 
    
results = pd.DataFrame(dict)
results.to_csv('inception_results.csv')