In [125]:
!pip  install sk-video



In [2]:
import keras.backend as K
from keras.models import Sequential, Model
from keras.utils.data_utils import get_file
from keras.layers import Input, Conv3D, MaxPooling3D, Dense, Flatten, ZeroPadding3D, Dropout, Subtract, BatchNormalization, Lambda
import skvideo.io
import numpy as np

Using TensorFlow backend.


In [0]:
WEIGHTS_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/sports1M_weights_tf.h5'
C3D_MEAN_PATH = 'https://github.com/adamcasson/c3d/releases/download/v0.1/c3d_mean.npy'

In [0]:
if K.image_data_format() == 'channels_last':    
    shape0 = (16,112,112,3)    
else:   
    shape0 = (3,16,112,112)

model_base = Sequential()
    
model_base.add(Conv3D(64, 3, activation='relu', padding='same', name='conv1', input_shape=shape0))
model_base.add(MaxPooling3D(pool_size=(1,2,2), strides=(1,2,2), padding='same', name='pool1'))
    
model_base.add(Conv3D(128, 3, activation='relu', padding='same', name='conv2'))
model_base.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name='pool2'))
    
model_base.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3a'))
model_base.add(Conv3D(256, 3, activation='relu', padding='same', name='conv3b'))
model_base.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name='pool3'))
    
model_base.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4a'))
model_base.add(Conv3D(512, 3, activation='relu', padding='same', name='conv4b'))
model_base.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name='pool4'))
    
model_base.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5a'))
model_base.add(Conv3D(512, 3, activation='relu', padding='same', name='conv5b'))
model_base.add(ZeroPadding3D(padding=(0,1,1), name='padd'))

model_base.add(MaxPooling3D(pool_size=(2,2,2), strides=(2,2,2), padding='valid', name='pool5'))
    
model_base.add(Flatten())
    
model_base.add(Dense(4096, activation='relu', name='fc6', input_shape = (8192,)))
model_base.add(Dropout(0.5))
model_base.add(Dense(4096, activation='relu', name='fc7'))
model_base.add(Dropout(0.5))
model_base.add(Dense(487, activation='softmax', name='fc_8'))

weights_path = get_file('sports1M_weights_tf.h5',
                         WEIGHTS_PATH,
                         cache_subdir='models',
                         md5_hash='b7a93b2f9156ccbebe3ca24b41fc5402')
        
model_base.load_weights(weights_path)
model_base.pop()

generator = Sequential()
generator.add(Dense(8192, activation='relu', name='fc1', input_shape = (100,)))
generator.add(BatchNormalization())
generator.add(Dense(8192, activation='relu', name='fc2'))
generator.add(BatchNormalization())

discriminator = Sequential()
discriminator.add(model_base.get_layer('fc6'))
discriminator.add(Dropout(0.5))
discriminator.add(model_base.get_layer('fc7'))
discriminator.add(Dropout(0.5))

conv = Sequential()
conv.add(model_base.get_layer('conv1'))
conv.add(model_base.get_layer('pool1'))
conv.add(model_base.get_layer('conv2'))
conv.add(model_base.get_layer('pool2'))
conv.add(model_base.get_layer('conv3a'))
conv.add(model_base.get_layer('conv3b'))
conv.add(model_base.get_layer('pool3'))
conv.add(model_base.get_layer('conv4a'))
conv.add(model_base.get_layer('conv4b'))
conv.add(model_base.get_layer('pool4'))
conv.add(model_base.get_layer('conv5a'))
conv.add(model_base.get_layer('conv5b'))
conv.add(model_base.get_layer('padd'))
conv.add(model_base.get_layer('pool5'))
conv.add(Flatten())

In [13]:
start_window = Input(shape=shape0, dtype='float32', name='start_window')
followup_window = Input(shape=shape0, dtype='float32', name='followup_window')
noise = Input(shape=(100,), dtype='float32', name='noise')

# Global model
Psi_G = discriminator(generator(noise))
fc7 = model_base(start_window)
out2 = model_base(followup_window)
out = Subtract(name='out')([fc7, out2])

drop2 = Dropout(0.5)(fc7)
fc8 = Dense(3, activation='sigmoid', name='fc8')
fake = Lambda(lambda x: x,name='fake')(fc8(Dropout(0.5)(Psi_G)))

model_global = Model([start_window,followup_window,noise],[fc8(drop2),out,fake])
model_global.summary()

# Generator model
#discriminator.trainable = False
#model_base.trainable = False
sub = Subtract(name='sub')([fc7, Psi_G])
model_generator = Model([start_window,noise],sub)
model_generator.summary()

# Discriminator model
#generator.trainable = False
#conv.trainable = False
discriminator_s = discriminator(conv(start_window))
discriminator_f = discriminator(conv(followup_window))
discriminator_n = discriminator(generator(noise))
consist = Subtract(name='consist')([discriminator_f, discriminator_s])
hard_neg = Lambda(lambda x: x,name='hard_neg')(fc8(Dropout(0.5)(discriminator_n)))

model_discriminator = Model([start_window,followup_window,noise],[fc8(Dropout(0.5)(discriminator_s)),consist,hard_neg])
model_discriminator.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
noise (InputLayer)              (None, 100)          0                                            
__________________________________________________________________________________________________
start_window (InputLayer)       (None, 16, 112, 112, 0                                            
__________________________________________________________________________________________________
sequential_18 (Sequential)      (None, 8192)         68009984    noise[0][0]                      
__________________________________________________________________________________________________
sequential_17 (Sequential)      (None, 4096)         77995776    start_window[0][0]               
                                                                 followup_window[0][0]            
__________

In [0]:
def loss_classification(y_true, y_pred):
    return -K.mean(K.log(K.dot(y_pred,K.transpose(y_true))), axis=-1)
    
def loss_temporal_consistency(y_true,y_pred):
    return K.mean(K.square(K.dot(y_pred,K.transpose(y_true))), axis=-1)

def loss_matching(y_true,y_pred):
    return K.square(K.mean(K.dot(y_pred,K.transpose(y_true)), axis=-1))

In [0]:
model_global.compile(optimizer='adam',
              loss={'fc8': loss_classification, 'out': loss_temporal_consistency, 'fake': loss_classification},
              loss_weights={'fc8': 1, 'out': 1, 'fake':1})

model_discriminator.compile(optimizer='adam',
              loss={'fc8': loss_classification, 'consist': loss_temporal_consistency, 'hard_neg': loss_classification},
              loss_weights={'fc8': 1, 'consist': 1, 'hard_neg':1})

generator.compile(optimizer='adam', loss=loss_matching)

In [0]:
import random 
import matplotlib.pyplot as plt 


path1 = '../content/Normal_Videos_050_x264.mp4'
path2 = '../content/Abuse001_x264.mp4'


Normal_Video = skvideo.io.vread(path1,
                          outputdict={
                            "-sws_flags": "bilinear",
                            "-s": "112x112"
                          })

Abnormal_Video = skvideo.io.vread(path2,
                          outputdict={
                            "-sws_flags": "bilinear",
                            "-s": "112x112"
                          })

action_intervals = [[230, 365], [-1,-1]]

Videos = [Normal_Video, Abnormal_Video]


# Let's train the model on two videos (Extansion well be easy)
# We do just on training iteration, in which we create one batch containing Nb_training_examples = Nb_abnormal*15

print("Constructing Postive Exapmles")
positive_indexes = []

count = -1
for action in range(len(action_intervals)):
  positive_indexes.append([])
  start = action_intervals[action][0]
  idx = 0
  while (start!=-1) and (idx < len(action_intervals[action])):
    start = action_intervals[action][idx]
    idx += 2
    positive_indexes[action] = positive_indexes[action]+([start-15+i for i in range(15)])
    
    for i in range(15):
      count += 1
      if(count==0):
        actuals = np.expand_dims(Videos[action][start-15+i:start+i+1], axis=0)
        nexts = np.expand_dims(Videos[action][start+i+1:start+i+17], axis=0)
      if(count>0):
        actuals = np.vstack((actuals, np.expand_dims(Videos[action][start-15+i:start+i+1], axis=0)))
        nexts   = np.vstack((nexts, np.expand_dims(Videos[action][start+i+1:start+i+17], axis=0)))

nbr_positive = actuals.shape[0]
print("Constructing Negativve Exapmles")

nbr_neg = 0 # nbr of negative examples selected 
while (nbr_neg<nbr_positive):
  video_indx = random.randint(0,len(Videos)-1) #Pick randomly a video 
  start_frame = random.randint(0,Videos[video_indx].shape[0]-33)
  
  while (start_frame in positive_indexes[video_indx]):
    start_frame = random.randint(0,Videos[video_indx].shape[0]-33) #pick a new sequence till it is abnormal
  nbr_neg = nbr_neg + 1
  
  actuals = np.vstack((actuals, np.expand_dims(Videos[video_indx][start_frame:start_frame+16], axis=0)))
  nexts   = np.vstack((nexts, np.expand_dims(Videos[video_indx][start_frame+16:start_frame+32], axis=0)))
  
inputs = [actuals, nexts]
labels = np.zeros((nbr_positive+nbr_neg,1))
labels[0:nbr_positive,:] += 1 

labels_1 = labels

labels = np.zeros((nbr_positive+nbr_neg,4096))
labels[0:nbr_positive,:] += 1

labels_2 = labels

labels = [labels_1, labels_2]


print("Start Training")
loss = model.train_on_batch(inputs, labels)

In [0]:
loss