'''
Preprocess frames for LSTM

Timesteps = 30

Each Class 720 frames

720/30 = 24 

NUMBER OF SAMPLES, TIMESTEPS, WIDTH, HIEGHT, CHANNEL
Final Shape: (24, 30, 224, 224, 3)

'''

In [1]:
import os
import cv2
import time
import numpy as np
import numpy as np
import os
import cv2
import keras
import sklearn
import pandas
from time import time
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.utils import to_categorical
from keras.models import load_model
from keras.layers import *
from keras import layers
from keras import Model
from keras.callbacks import TensorBoard
from keras import optimizers
import matplotlib.pyplot as plt
from keras.applications import *
from sklearn.metrics import classification_report
import time
from keras.models import load_model
from sklearn.metrics import confusion_matrix
from keras.applications.resnet50 import preprocess_input
from keras.applications import *

Using TensorFlow backend.


# Load Data

In [2]:
v_path = "frames/violent/"
nv_path = "frames/non_violent/"

In [3]:
violent_frames = []
non_violent_frames = []

for frame in os.listdir(v_path):
    frame = cv2.imread(os.path.join(v_path,frame))
    violent_frames.append(frame)


for frame in os.listdir(nv_path):
    frame = cv2.imread(os.path.join(nv_path,frame))
    non_violent_frames.append(frame)

# Feature Extraction

In [4]:
#Creates sequence 
# SHAPE : (N_SAMPLES,TIMESTEP,WIDTH,HEIGHT,CHANNEL)
def create_seq(violent_frames, non_violent_frames):
    
    print("+++ Creating Sequence... +++")
    
    violent_vid = []
    non_violent_vid = []

    i = 0
    while i < len(violent_frames):
        violent_vid.append(violent_frames[i:i+30])
        i = i+30


    i = 0
    while i < len(non_violent_frames):
        non_violent_vid.append(non_violent_frames[i:i+30])
        i = i+30
        
    violent_vid = np.asarray(violent_vid)
    non_violent_vid = np.asarray(non_violent_vid)
    
    return violent_vid, non_violent_vid


In [5]:
def result(x,y):
    
    print("+++ Generating result... +++")
    
    pred = model.predict(x)
#     print('First prediction:', pred)
    
    score = model.evaluate(x, y,verbose=1)
    print("-----------------------------")
    print('Test loss:', score[0])
    print('Test accuracy:', score[1])
    
    prediction = []
    for p in pred:
        if p>=.5:
            prediction.append(1)
        else:
            prediction.append(0)
    print("-----------------------------")
    print("Classification report")
    print("-----------------------------")
    print(classification_report(y, prediction))
    
    print("-----------------------------")
    print("Confusion Matrix")
    print("-----------------------------")
    conf_mat = confusion_matrix(y, prediction)
    print(conf_mat)
    

In [6]:
def preprocess_lstm(features):
    
    print("+++ Preprocessing data... +++")
    
    violent_features = features[0:240]
    non_violent_features = features[240:]

    print("Violent features: ", violent_features.shape)
    print("Non Violent features: ", non_violent_features.shape)


    violent_vid,non_violent_vid = create_seq(violent_features, non_violent_features)
    violent_y, non_violent_y = np.zeros(len(violent_vid)), np.ones(len(non_violent_vid))

    print("Violent Video Seq: ", violent_vid.shape,"Non_violent video Seq: ", non_violent_vid.shape)
    print("Violent Label: ", violent_y.shape, "Non_violent Label: ", non_violent_y.shape)


    test_x = np.vstack((violent_vid,non_violent_vid))
    test_y = np.append(violent_y, non_violent_y)

    print("Total data: ", test_x.shape)
    print("Total target: ", test_y.shape)

    test_x = np.reshape(test_x, (test_x.shape[0],test_x.shape[1],np.prod(test_x.shape[2:])))
    print("(LSTM) After Rehshape: ", test_x.shape)
    
    return test_x,test_y

In [7]:
def feature_extract(violent_frames, non_violent_frames, model):
    
    print("+++ Extracting feature... +++")
    
    violent_frames = np.asarray(violent_frames)
    non_violent_frames = np.asarray(non_violent_frames)

    print ("Before Feature extraction: ")
    print(violent_frames.shape,non_violent_frames.shape)
    all_data = np.vstack((violent_frames,non_violent_frames))
    print("Adding all data: ", all_data.shape)

    #creates feature descriptors
    desc = preprocess_input(all_data)
    if(model == 'resnet50'):
        loaded_model = resnet50.ResNet50(input_shape=(224,224,3), include_top=False)
    elif(model == 'vgg19'):
        loaded_model = VGG19(input_shape=(224,224,3), include_top=False)
    elif(model == 'vgg16'):
        loaded_model = VGG16(input_shape=(224,224,3), include_top=False)
    else:
        print("Please give model name - 'resnet50', 'vgg19', 'vgg16'")
        
    loaded_model = Model(loaded_model.input,loaded_model.output)
    features = loaded_model.predict(desc,batch_size=10,verbose=1)

    print ("After Feature extraction: ", features.shape)
    
    return features

# MODEL TESTING

### Resnet LSTM

In [8]:
features = feature_extract(violent_frames, non_violent_frames, 'resnet50')
test_x, test_y = preprocess_lstm(features)


model = Sequential()
model.add(CuDNNLSTM(50, input_shape=(test_x.shape[1],test_x.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1,activation='sigmoid'))
model.load_weights('resnet_LSTM.h5')
model.summary()
optimizer = optimizers.adam(lr=0.001,decay=0.004)
model.compile(loss="binary_crossentropy",optimizer=optimizer,metrics=["accuracy"])


result(test_x,test_y)

+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)




After Feature extraction:  (480, 7, 7, 2048)
+++ Preprocessing data... +++
Violent features:  (240, 7, 7, 2048)
Non Violent features:  (240, 7, 7, 2048)
+++ Creating Sequence... +++
Violent Video Seq:  (8, 30, 7, 7, 2048) Non_violent video Seq:  (8, 30, 7, 7, 2048)
Violent Label:  (8,) Non_violent Label:  (8,)
Total data:  (16, 30, 7, 7, 2048)
Total target:  (16,)
(LSTM) After Rehshape:  (16, 30, 100352)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_1 (CuDNNLSTM)     (None, 50)                20080800  
_________________________________________________________________
dense_1 (Dense)              (None, 1)                 51        
Total params: 20,080,851
Trainable params: 20,080,851
Non-trainable params: 0
_________________________________________________________________
+++ Generating result... +++
-----------------------------
Test loss: 0.5757079124450684
Test accuracy: 0.9375
-------

### VGG19 LSTM

In [9]:
features = feature_extract(violent_frames, non_violent_frames, 'vgg19')
test_x, test_y = preprocess_lstm(features)

model = Sequential()
model.add(CuDNNLSTM(50, input_shape=(test_x.shape[1],test_x.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1,activation='sigmoid'))
model.load_weights('vgg19_LSTM.h5')
model.summary()
optimizer = optimizers.adam(lr=0.001,decay=0.004)
model.compile(loss="binary_crossentropy",optimizer=optimizer,metrics=["accuracy"])

result(test_x,test_y)

+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)
After Feature extraction:  (480, 7, 7, 512)
+++ Preprocessing data... +++
Violent features:  (240, 7, 7, 512)
Non Violent features:  (240, 7, 7, 512)
+++ Creating Sequence... +++
Violent Video Seq:  (8, 30, 7, 7, 512) Non_violent video Seq:  (8, 30, 7, 7, 512)
Violent Label:  (8,) Non_violent Label:  (8,)
Total data:  (16, 30, 7, 7, 512)
Total target:  (16,)
(LSTM) After Rehshape:  (16, 30, 25088)
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
cu_dnnlstm_2 (CuDNNLSTM)     (None, 50)                5028000   
_________________________________________________________________
dense_2 (Dense)              (None, 1)                 51        
Total params: 5,028,051
Trainable params: 5,028,051
Non-trainable params: 0
____________________________________________________________

### VGG16 LSTM

In [90]:
features = feature_extract(violent_frames, non_violent_frames, 'vgg16')
test_x, test_y = preprocess_lstm(features)

model = Sequential()
model.add(CuDNNLSTM(50, input_shape=(test_x.shape[1],test_x.shape[2]), return_sequences=False, kernel_regularizer=regularizers.l2(0.01)))
model.add(Dense(1,activation='sigmoid'))
model.load_weights('vgg16_LSTM.h5')
model.summary()
optimizer = optimizers.adam(lr=0.001,decay=0.004)
model.compile(loss="binary_crossentropy",optimizer=optimizer,metrics=["accuracy"])

result(test_x,test_y)


+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)
After Feature extraction:  (480, 7, 7, 512)
+++ Preprocessing data... +++
Violent features:  (240, 7, 7, 512)
Non Violent features:  (240, 7, 7, 512)
+++ Creating Sequence... +++
Violent Video Seq:  (8, 30, 7, 7, 512) Non_violent video Seq:  (8, 30, 7, 7, 512)
Violent Label:  (8,) Non_violent Label:  (8,)
Total data:  (16, 30, 7, 7, 512)
Total target:  (16,)
(LSTM) After Rehshape:  (16, 30, 25088)


ValueError: Dimension 1 in both shapes must be equal, but are 200 and 400. Shapes are [25088,200] and [25088,400]. for 'Assign_6572' (op: 'Assign') with input shapes: [25088,200], [25088,400].

# STN

In [10]:
from utils import get_initial_weights
from layers import BilinearInterpolation

In [11]:
def build_model(x, model_weights):
    
    num_classes=1
    image = Input(shape=(x.shape[1], x.shape[2], x.shape[3]))

    locnet = Conv2D(2, (1, 1),kernel_regularizer=regularizers.l2(0.01))(image)
    locnet = Conv2D(2, (1, 1),kernel_regularizer=regularizers.l2(0.01))(locnet)
    locnet = Flatten()(locnet)
    locnet = Dense(5,kernel_regularizer=regularizers.l2(0.01))(locnet)
    locnet = Activation('relu')(locnet)

    weights = get_initial_weights(5)

    locnet = Dense(6, weights=weights,kernel_regularizer=regularizers.l2(0.01))(locnet)

    x = BilinearInterpolation((3,3))([image, locnet])

    x = Conv2D(2, (1, 1), padding='same',kernel_regularizer=regularizers.l2(0.01))(x)
    x = Activation('relu')(x)

    x = Conv2D(2, (1, 1),kernel_regularizer=regularizers.l2(0.01))(x)
    x = Activation('relu')(x)

    x = Flatten()(x)
    x = Dense(64,kernel_regularizer=regularizers.l2(0.01))(x)
    x = Activation('relu')(x)
    x = Dense(num_classes)(x)
    x = Activation('sigmoid')(x)
    model = Model(inputs=image, outputs=x)

    model.summary()
    adam = optimizers.Adam(lr=.001)
    model.load_weights(model_weights)
    model.compile(loss='binary_crossentropy',optimizer=adam,metrics=['accuracy'])
    
    return model
    

### Resnet

In [12]:
features = feature_extract(violent_frames, non_violent_frames, 'resnet50')
violent_y, non_violent_y = np.zeros(240), np.ones(240)

test_x = features
test_y = np.append(violent_y, non_violent_y)

model = build_model(test_x,'resnet_att.h5')

result(test_x,test_y)

+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)




After Feature extraction:  (480, 7, 7, 2048)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 7, 7, 2048)   0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 7, 7, 2)      4098        input_4[0][0]                    
__________________________________________________________________________________________________
conv2d_2 (Conv2D)               (None, 7, 7, 2)      6           conv2d_1[0][0]                   
__________________________________________________________________________________________________
flatten_1 (Flatten)             (None, 98)           0           conv2d_2[0][0]                   
________________________________________________________________

### VGG19

In [13]:
features = feature_extract(violent_frames, non_violent_frames, 'vgg19')
violent_y, non_violent_y = np.zeros(240), np.ones(240)

test_x = features
test_y = np.append(violent_y, non_violent_y)

model = build_model(test_x,'vgg19_att.h5')

result(test_x,test_y)

+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)
After Feature extraction:  (480, 7, 7, 512)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 7, 7, 512)    0                                            
__________________________________________________________________________________________________
conv2d_5 (Conv2D)               (None, 7, 7, 2)      1026        input_6[0][0]                    
__________________________________________________________________________________________________
conv2d_6 (Conv2D)               (None, 7, 7, 2)      6           conv2d_5[0][0]                   
__________________________________________________________________________________________________
flatten_3 (Flatten)            

### VGG16

In [95]:
features = feature_extract(violent_frames, non_violent_frames, 'vgg16')
violent_y, non_violent_y = np.zeros(240), np.ones(240)

test_x = features
test_y = np.append(violent_y, non_violent_y)

model = build_model(test_x,'vgg16_att.h5')

result(test_x,test_y)

+++ Extracting feature... +++
Before Feature extraction: 
(240, 224, 224, 3) (240, 224, 224, 3)
Adding all data:  (480, 224, 224, 3)
After Feature extraction:  (480, 7, 7, 512)
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_47 (InputLayer)           (None, 7, 7, 512)    0                                            
__________________________________________________________________________________________________
conv2d_49 (Conv2D)              (None, 7, 7, 2)      1026        input_47[0][0]                   
__________________________________________________________________________________________________
conv2d_50 (Conv2D)              (None, 7, 7, 2)      6           conv2d_49[0][0]                  
__________________________________________________________________________________________________
flatten_25 (Flatten)           

ValueError: Dimension 0 in both shapes must be equal, but are 1 and 2. Shapes are [1,1,512,2] and [2,2048,1,1]. for 'Assign_6977' (op: 'Assign') with input shapes: [1,1,512,2], [2,2048,1,1].