In [1]:
from C3D_model import C3D

In [2]:
import torch
from torchvision import models
import torch.nn as nn
from torchvision import transforms, datasets
import os
import torch.nn.functional as F
import torch.optim as optim
import time

In [3]:
import numpy as np
import cv2

import time
from __future__ import print_function

import torch
from torch.autograd import Variable

from os.path import join
from glob import glob

import skimage.io as io
from skimage.transform import resize

from C3D_model import C3D

In [4]:
from scipy.io import loadmat

In [5]:
model = C3D()
model.load_state_dict(torch.load('c3d.pickle'))

In [6]:
feature_extractor = torch.nn.Sequential(*(list(model.children())[:-6]))

In [7]:
list(model.children())[-6:-4]

[Linear(in_features=8192, out_features=4096, bias=True),
 Linear(in_features=4096, out_features=4096, bias=True)]

In [8]:
list(model.children())[13]

Linear(in_features=8192, out_features=4096, bias=True)

In [9]:
feature_extractor = torch.nn.Sequential(*(list(model.children())[:-6]))
feature_extractor_head = torch.nn.Sequential(*(list(model.children())[-6:-4]))

In [10]:
C3D_CNN_LIST = list(model.children())[:-6]
C3D_ANN_LIST = list(model.children())[-6:-4]

In [11]:
C3D_ANN_LIST

[Linear(in_features=8192, out_features=4096, bias=True),
 Linear(in_features=4096, out_features=4096, bias=True)]

In [12]:
class C3D_CNN(nn.Module):
    """
    The C3D network as described in [1].
    """

    def __init__(self):
        super(C3D_CNN, self).__init__()

        self.conv1 = C3D_CNN_LIST[0]
        self.pool1 = C3D_CNN_LIST[1]

        self.conv2 = C3D_CNN_LIST[2]
        self.pool2 = C3D_CNN_LIST[3]

        self.conv3a = C3D_CNN_LIST[4]
        self.conv3b = C3D_CNN_LIST[5]
        self.pool3 = C3D_CNN_LIST[6]

        self.conv4a = C3D_CNN_LIST[7]
        self.conv4b = C3D_CNN_LIST[8]
        self.pool4 = C3D_CNN_LIST[9]

        self.conv5a = C3D_CNN_LIST[10]
        self.conv5b = C3D_CNN_LIST[11]
        self.pool5 = C3D_CNN_LIST[12]

        #self.fc6 = nn.Linear(8192, 4096)
        #self.fc7 = nn.Linear(4096, 4096)
        #self.fc8 = nn.Linear(4096, 487)

        #self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):

        #print('input', x.shape)
        h = self.relu(self.conv1(x))
        h = self.pool1(h)
        #print('layer 1', h.shape)

        h = self.relu(self.conv2(h))
        h = self.pool2(h)
        #print('layer 2', h.shape)

        h = self.relu(self.conv3a(h))
        h = self.relu(self.conv3b(h))
        h = self.pool3(h)
        #print('layer 3', h.shape)

        h = self.relu(self.conv4a(h))
        h = self.relu(self.conv4b(h))
        h = self.pool4(h)
        #print('layer 4', h.shape)

        h = self.relu(self.conv5a(h))
        h = self.relu(self.conv5b(h))
        h = self.pool5(h)
        #print('layer 5', h.shape)

        #h = h.view(-1, 8192)
        #h = self.relu(self.fc6(h))
        #h = self.dropout(h)
        #h = self.relu(self.fc7(h))
        #h = self.dropout(h)
        #print('layer 6', h.shape)

        #logits = self.fc8(h)
        #probs = self.softmax(logits)

        return h

In [46]:
class C3D_ANN(nn.Module):
    """
    The C3D network as described in [1].
    """

    def __init__(self):
        super(C3D_ANN, self).__init__()

        self.fc6 = C3D_ANN_LIST[0]
        self.fc7 = C3D_ANN_LIST[1]

        #self.dropout = nn.Dropout(p=0.5)

        self.relu = nn.ReLU()
        self.softmax = nn.Softmax()

    def forward(self, x):
        #h = h.view(-1, 8192)
        h = self.relu(self.fc6(x))
        #h = self.dropout(h)
        #h = self.relu(self.fc7(h))
        #h = self.dropout(h)

        #logits = self.fc8(h)
        #probs = self.softmax(logits)

        return h

In [47]:
def conv_dict(dict2):
    i = 0
    dict = {}
    for i in range(len(dict2)):
        if str(i) in dict2:
            if dict2[str(i)].shape == (0, 0):
                dict[str(i)] = dict2[str(i)]
            else:
                weights = dict2[str(i)][0]
                weights2 = []
                for weight in weights:
                    if weight.shape in [(1, x) for x in range(0, 5000)]:
                        weights2.append(weight[0])
                    else:
                        weights2.append(weight)
                dict[str(i)] = weights2
    return dict

In [48]:
def get_weight(weight_path, layer):
    # To load the weights from file
    dict2 = loadmat(weight_path)
    weights = conv_dict(dict2)
    # TO get the required weight
    weight = np.array(weights[layer])
    shape = weight[0].shape
    weight = np.reshape(weight[0], (shape[1], shape[0]))
    weight = torch.tensor(weight)
    return weight

In [49]:
class anomaly_ann(nn.Module):
    def __init__(self):
        super(anomaly_ann, self).__init__()
        weights = './weights_L1L2.mat'
        self.layer1 = nn.Linear(4096, 512)
        self.layer1.weight.data = get_weight(weights, '0')

        self.layer2 = nn.Linear(512, 32)
        self.layer2.weight.data = get_weight(weights, '2')

        self.layer3 = nn.Linear(32, 1)
        self.layer3.weight.data = get_weight(weights, '4')
        
        self.relu = nn.ReLU()
        self.sigmoid = nn.Sigmoid()
        
    def forward(self, x):
        #print("x", x.shape)
        out = self.layer1(x)
        out = self.relu(out)
        
        #out = (out-out.mean())/(out.max()-out.mean())
        #print("after vgg", out.shape)
        out = self.layer2(out)
        #out = (out-out.mean())/(out.max()-out.mean())
        #print("after transpose", out.shape)
        out = self.layer3(out)
        #print("after output layer", out.shape)
        return out
        #return self.sigmoid(out)

In [50]:
class anomaly_detector(nn.Module):
    def __init__(self):
        super(anomaly_detector, self).__init__()
        self.feature_extractor = C3D_CNN()
        self.feature_extractor_head = C3D_ANN()
        self.ann = anomaly_ann()
        
    def forward(self, x):
        #print("x", x.shape)
        out = self.feature_extractor(x)
        
        out = out.view(-1, 8192)
        out = self.feature_extractor_head(out)
        out = self.ann(out)
        return out

In [51]:
net = anomaly_detector()

In [52]:
net

anomaly_detector(
  (feature_extractor): C3D_CNN(
    (conv1): Conv3d(3, 64, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (pool1): MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (conv2): Conv3d(64, 128, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (pool2): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (conv3a): Conv3d(128, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (conv3b): Conv3d(256, 256, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (pool3): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=False)
    (conv4a): Conv3d(256, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (conv4b): Conv3d(512, 512, kernel_size=(3, 3, 3), stride=(1, 1, 1), padding=(1, 1, 1))
    (pool4): MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2), padding=0, dilation=1, ceil_mode=F

In [70]:
def get_blocc(arr):
    blocc = np.array([resize(frame, output_shape=(112, 200), preserve_range=True) for frame in arr])

    blocc = blocc[:, :, 44:44+112, :]
    blocc = blocc.transpose(3, 0, 1, 2)  # ch, fr, h, w
    blocc = np.expand_dims(blocc, axis=0)  # batch axis
    #blocc = (blocc-blocc.mean())/(blocc.max()-blocc.mean())
    blocc = np.float32(blocc)
    blocc = torch.from_numpy(blocc)
    return blocc

In [71]:
def predict(video, model):
    cap = cv2.VideoCapture(video) 
    # Check if camera opened successfully
    if (cap.isOpened()== False): 
      print("Error opening video stream or file")
    
    font                   = cv2.FONT_HERSHEY_SIMPLEX
    bottomLeftCornerOfText = (10,30)
    fontScale              = 0.4
    fontColor              = (255,0,0)
    lineType               = 2
    frames = 0
    
    # Read until video is completed
    start_time = time.time()
    i = 0
    arr = []
    score = 0
    message = ""
    while(cap.isOpened()):
      # Capture frame-by-frame
        ret, frame = cap.read()
        if(i<32):
            arr.append(frame)
            i+=1
        else:
            i = 0
            arr = []
            print(message)

        if (ret == True):
            if(len(arr) == 32):
                frames+=32
                X = get_blocc(arr)
                #print(X.mean(), sep='\r', end='\r')
                prediction = model(X)
                score = prediction.data.cpu().numpy()
                score = str(score)
                end_time = time.time()-start_time
                frame_rate = int(frames/end_time)
                message = "framerate = {}, score = {}, frame_id = {}".format(frame_rate, score, i)
            cv2.putText(frame, message, 
                        bottomLeftCornerOfText, 
                        font, 
                        fontScale,
                        fontColor,
                        lineType)
            cv2.imshow('Frame',frame)
            #print(message, sep='\r', end = '\r')

            # Press Q on keyboard to  exit
            if cv2.waitKey(25) & 0xFF == ord('q'):
                break
            # Break the loop
        else: 
            break

    # When everything done, release the video capture object
    cap.release()

    # Closes all the frames
    cv2.destroyAllWindows()

In [72]:
vid_file = '/home/nevin/nevin/datasets/anomaly detection/assault/Assault015_x264.mp4'

In [73]:
predict(vid_file, net)

  warn("The default mode, 'constant', will be changed to 'reflect' in "
  warn("Anti-aliasing will be enabled by default in skimage 0.15 to "


framerate = 7, score = [[3.108469 ]
 [3.1666684]], frame_id = 32
framerate = 7, score = [[5.2511907]
 [4.6372533]], frame_id = 32
framerate = 7, score = [[6.3259788]
 [4.647742 ]], frame_id = 32
framerate = 7, score = [[3.9562206]
 [3.7307994]], frame_id = 32
framerate = 7, score = [[3.8554177]
 [2.9614887]], frame_id = 32
framerate = 7, score = [[6.0859847]
 [5.5639625]], frame_id = 32
framerate = 7, score = [[6.204521 ]
 [2.7206492]], frame_id = 32
framerate = 7, score = [[5.104228 ]
 [4.0239744]], frame_id = 32
framerate = 7, score = [[3.9461088]
 [3.7143111]], frame_id = 32
framerate = 7, score = [[3.5962553]
 [4.4732766]], frame_id = 32
framerate = 7, score = [[3.642368 ]
 [4.7454677]], frame_id = 32


KeyboardInterrupt: 