In [None]:
# importing libraries
from tensorflow.keras.applications.vgg16 import VGG16
from tensorflow.keras.applications.densenet import DenseNet201
from tensorflow.keras.applications import ResNet101
from tensorflow.keras.applications import Xception
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Dense, Input
from tensorflow.keras.layers import GlobalAveragePooling2D, GlobalMaxPooling2D
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import GRU
from tensorflow.keras.layers import Bidirectional
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.optimizers import Nadam

In [None]:
#defining input shape
video = Input(shape=(70,
                     70,
                     150,
                     3))
#using resnet model
cnn_base = VGG16(input_shape=(70,
                              150,
                              3),
                 weights="imagenet",
                 include_top=False)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [None]:
#doing global averaging
cnn_out = GlobalMaxPooling2D()(cnn_base.output)

#made model
cnn = Model(inputs=cnn_base.input, outputs=cnn_out)

# will not train this part
cnn.trainable = False

# time distributed layer ecodes 70 frames input from video
encoded_frames = TimeDistributed(cnn)(video)

# passed into LSTM which returns the sequence
encoded_sequence = LSTM(256)(encoded_frames)

#output is then sent to fully connected layer
hidden_layer = Dense(units=1024, activation="relu")(encoded_sequence)

#it is then reduced to 2 output
outputs = Dense(2, activation="softmax")(hidden_layer)

In [None]:
# initialized the model
model = Model([video], outputs)

#defined optimizer
optimizer = Nadam(lr=0.002,
                  beta_1=0.9,
                  beta_2=0.999,
                  epsilon=1e-08,
                  schedule_decay=0.004)

#compilation of model
model.compile(loss="categorical_crossentropy",
              optimizer=optimizer,
              metrics=["categorical_accuracy"]) 

model.summary()

Model: "model_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_5 (InputLayer)        [(None, 70, 70, 150, 3)]  0         
                                                                 
 time_distributed_3 (TimeDis  (None, 70, 512)          14714688  
 tributed)                                                       
                                                                 
 lstm_3 (LSTM)               (None, 256)               787456    
                                                                 
 dense_2 (Dense)             (None, 1024)              263168    
                                                                 
 dense_3 (Dense)             (None, 2)                 2050      
                                                                 
Total params: 15,767,362
Trainable params: 1,052,674
Non-trainable params: 14,714,688
_______________________________________

  super(Nadam, self).__init__(name, **kwargs)


In [None]:
import os
import cv2
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras.metrics as k

In [None]:
from tensorflow.keras.callbacks import ModelCheckpoint, LearningRateScheduler, TensorBoard, EarlyStopping

In [None]:
data_dir = "/content/drive/MyDrive/clips/"
img_height , img_width = 150,70
seq_len = 70
 
classes = ["Violence","Non-Violence"]


In [None]:
def frames_extraction(video_path):
    frames_list = []
     
    vidObj = cv2.VideoCapture(video_path)
    # Used as counter variable 
    count = 1
 
    while count <= seq_len: 
         
        success, image = vidObj.read() 
        if success:
            image = cv2.resize(image, (img_height, img_width))
            # image  = image/255.0
            frames_list.append(image)
            count += 1  # as their will be success it will append in framelist and increment the count
        else:
            print("Defected frame")
            break
 
            #hence it returns frame list
    return frames_list

In [None]:
def create_data(input_dir):
    X = []
    Y = []
     
    classes_list = os.listdir(input_dir)
     
    for c in classes_list:
        print(c)
        files_list = os.listdir(os.path.join(input_dir, c))
        for f in files_list:
            # passing video for frame extraction which will extract and returns those frames
           frames = frames_extraction(os.path.join(os.path.join(input_dir, c), f))
           if len(frames) == seq_len:
                X.append(frames)
                # appending frames into X which is list of list
                y = [0]*len(classes) # corresponding y in list form is sent
                y[classes.index(c)] = 1
                Y.append(y) # y is appended into Y
     
    X = np.asarray(X)
    Y = np.asarray(Y)
    return X, Y

In [None]:
%%time
# create data will make X and Y so basically does preprocessing
X, Y = create_data(data_dir)

In [None]:
print(X.shape)

In [None]:
#data is divided into train and test
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.20, shuffle=True,  random_state=0,stratify=Y )


In [None]:
X.shape

In [None]:
Y.shape

In [None]:
in_shape = (SequenceLength, IMSIZE[0], IMSIZE[1], 3)
    model = Sequential()
    model.add(ConvLSTM2D(32, kernel_size=(7, 7), padding='valid', return_sequences=True, input_shape=in_shape))
    model.add(Activation('relu'))
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
    model.add(ConvLSTM2D(64, kernel_size=(5, 5), padding='valid', return_sequences=True))
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
    model.add(ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True))
    model.add(Activation('relu'))
    model.add(ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True))
    model.add(Activation('relu'))
    model.add(ConvLSTM2D(96, kernel_size=(3, 3), padding='valid', return_sequences=True))
    model.add(MaxPooling3D(pool_size=(1, 2, 2)))
    model.add(Dense(320))
    model.add(Activation('relu'))
    model.add(Dropout(0.5))

    out_shape = model.output_shape
    # print('====Model shape: ', out_shape)
    model.add(Reshape((SequenceLength, out_shape[2] * out_shape[3] * out_shape[4])))
    model.add(LSTM(64, return_sequences=False))
    model.add(Dropout(0.5))
    model.add(Dense(N_CLASSES, activation='softmax'))
    model.compile(loss='categorical_crossentropy', optimizer='sgd', metrics=['accuracy'])

    # model structure summary
    print(model.summary())

In [None]:
#defined earlystopping with pathience of 7 and put that object into callback
earlystop = EarlyStopping(patience=7)
callbacks = [earlystop]

In [None]:
#model.fit starts the training, which also calls callback after every epoch
#history = model.fit(X,Y, epochs=22, batch_size = 1 , shuffle=True)

#model.fit starts the training, which also calls callback after every epoch
history = model.fit(x = X_train, y = y_train, epochs=10, batch_size = 1 , shuffle=True, validation_split=0.2, callbacks=callbacks)

In [None]:
#model is saved into h5 file which will takes input X, and Y
model.save('/content/drive/MyDrive/clips/model_v1.h5')

In [None]:
cp '/content/drive/MyDrive/clips/model_v1.h5' '/content/drive/My Drive/clips'

In [None]:
from keras.utils.vis_utils import plot_model
plot_model(model, to_file='model_plot.png', show_shapes=True, show_layer_names=True)

In [None]:
from sklearn.metrics import classification_report

In [None]:
y_pred = model.predict(X_test,batch_size=1)
y_pred = np.argmax(y_pred, axis = 1)
y_test = np.argmax(y_test, axis = 1)
 
print(classification_report(y_test, y_pred))