In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install bar-chart-race

Collecting bar-chart-race
  Downloading bar_chart_race-0.1.0-py3-none-any.whl.metadata (4.2 kB)
Downloading bar_chart_race-0.1.0-py3-none-any.whl (156 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/156.8 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m156.8/156.8 kB[0m [31m11.8 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: bar-chart-race
Successfully installed bar-chart-race-0.1.0


In [3]:
def warn(*args, **kwargs):
    pass
import warnings
warnings.warn = warn

import matplotlib.pyplot as plt
import matplotlib.animation as animation
import bar_chart_race as bcr
import cv2
import numpy as np

In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class FusionModel(nn.Module):
    def __init__(self):
        super(FusionModel, self).__init__()
        self.relu=nn.ReLU(inplace=True)

        ## Hint: Please refer to above table for constructing layers
        # Construct block of RGB layers which takes RGB channel(3) as input
        ## RGB 데이터를 처리하는 레이어 블록 (입력 채널 수: 3)
        self.rgb_conv1a = nn.Conv3d(3, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.rgb_conv1b = nn.Conv3d(16, 16, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.rgb_pool1a = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.rgb_conv1c = nn.Conv3d(16, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.rgb_conv1d = nn.Conv3d(16, 16, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.rgb_pool1b = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.rgb_conv2a = nn.Conv3d(16, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.rgb_conv2b = nn.Conv3d(32, 32, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.rgb_pool2a = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.rgb_conv2c = nn.Conv3d(32, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.rgb_conv2d = nn.Conv3d(32, 32, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.rgb_pool2b = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        # Construct block of optical flow layers which takes the optical flow channel(2) as input
        ## 옵티컬 플로우 데이터를 처리하는 레이어 블록 (입력 채널 수: 2)
        self.opt_conv1a = nn.Conv3d(2, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.opt_conv1b = nn.Conv3d(16, 16, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.opt_pool1a = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.opt_conv1c = nn.Conv3d(16, 16, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.opt_conv1d = nn.Conv3d(16, 16, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.opt_pool1b = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.opt_conv2a = nn.Conv3d(16, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.opt_conv2b = nn.Conv3d(32, 32, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.opt_pool2a = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        self.opt_conv2c = nn.Conv3d(32, 32, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.opt_conv2d = nn.Conv3d(32, 32, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.opt_pool2b = nn.MaxPool3d(kernel_size=(1, 2, 2), stride=(1, 2, 2))

        ## Fusion and Pooling
        self.fusion_pool = nn.MaxPool3d(kernel_size=(8, 1, 1), stride=(8, 1, 1))

        # Construct merging Block
        self.merge_conv1a = nn.Conv3d(32, 64, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.merge_conv1b = nn.Conv3d(64, 64, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.merge_pool1a = nn.MaxPool3d(kernel_size=(2, 2, 2), stride=(2, 2, 2))

        self.merge_conv1c = nn.Conv3d(64, 64, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.merge_conv1d = nn.Conv3d(64, 64, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.merge_pool1b = nn.MaxPool3d(kernel_size=(2, 3, 3), stride=(2, 2, 2))

        self.merge_conv2a = nn.Conv3d(64, 128, kernel_size=(1, 3, 3), stride=1, padding=(0, 1, 1))
        self.merge_conv2b = nn.Conv3d(128, 128, kernel_size=(3, 1, 1), stride=1, padding=(1, 0, 0))
        self.merge_pool2a = nn.MaxPool3d(kernel_size=(2, 3, 3), stride=(2, 2, 2))

        # Fully Connected Layers
        self.fc1 = nn.Linear(128, 128)
        self.dropout = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 32)
        self.fc3 = nn.Linear(32, 2)

        # Initialize weights
        self.__init_weight()

    def forward(self, x):
        x = x.transpose(2,4)
        x = x.transpose(3,4)
        x = x.transpose(1,2)
        rgb = x[:,:3,:,:,:]
        opt = x[:,3:5,:,:,:]

        # Pass through the RGB data through the blocks of RGB layers
        # RGB 데이터 처리
        #print("start rgb shape : ",rgb.shape)
        rgb = self.relu(self.rgb_conv1a(rgb))
        #print("rgb shape : ",rgb.shape)
        rgb = self.relu(self.rgb_conv1b(rgb))
        #print("rgb shape : ",rgb.shape)
        rgb = self.rgb_pool1a(rgb)
        #print("rgb shape pooling: ",rgb.shape)

        rgb = self.relu(self.rgb_conv1c(rgb))
        #print("rgb shape : ",rgb.shape)
        rgb = self.relu(self.rgb_conv1d(rgb))
        #print("rgb shape : ",rgb.shape)
        rgb = self.rgb_pool1b(rgb)
        #print("rgb shape pooling: ",rgb.shape)

        rgb = self.relu(self.rgb_conv2a(rgb))
        rgb = self.relu(self.rgb_conv2b(rgb))
        rgb = self.rgb_pool2a(rgb)

        rgb = self.relu(self.rgb_conv2c(rgb))
        rgb = self.relu(self.rgb_conv2d(rgb))
        rgb = self.rgb_pool2b(rgb)

        #print("rgb shape : ",rgb.shape)
        # Pass through the optical flow data through the blocks of RGB layers
        # 옵티컬 플로우 데이터 처리
        opt = self.relu(self.opt_conv1a(opt))
        opt = self.relu(self.opt_conv1b(opt))
        opt = self.opt_pool1a(opt)

        opt = self.relu(self.opt_conv1c(opt))
        opt = self.relu(self.opt_conv1d(opt))
        opt = self.opt_pool1b(opt)

        opt = self.relu(self.opt_conv2a(opt))
        opt = self.relu(self.opt_conv2b(opt))
        opt = self.opt_pool2a(opt)

        opt = self.relu(self.opt_conv2c(opt))
        opt = self.relu(self.opt_conv2d(opt))
        opt = self.opt_pool2b(opt)

        #print("opt shape : ",opt.shape)
        # Fuse by performing elementwise multiplication of rgb and opt tensors.
        fused = rgb * opt

        # Perform maxpooling of fused
        fused = self.fusion_pool(fused)

        # Pass through the fused data into merging block
        merged = self.relu(self.merge_conv1a(fused))
        #print("merged shape 1.1 : ",merged.shape)
        merged = self.relu(self.merge_conv1b(merged))
        #print("merged shape 1.2 : ",merged.shape)
        merged = self.merge_pool1a(merged)
        #print("merged shape 1.3 : ",merged.shape)

        merged = self.relu(self.merge_conv1c(merged))
        #print("merged shape 2.1 : ",merged.shape)
        merged = self.relu(self.merge_conv1d(merged))
        #print("merged shape 2.2 : ",merged.shape)
        merged = self.merge_pool1b(merged)
        #print("merged shape 2.3 : ",merged.shape)

        merged = self.relu(self.merge_conv2a(merged))
        #print("merged shape 3.1 : ",merged.shape)
        merged = self.relu(self.merge_conv2b(merged))
        #print("merged shape 3.2 : ",merged.shape)
        merged = self.merge_pool2a(merged)
        #print("merged shape 3.3 : ",merged.shape)


        # Fully Connected Layers
        x = merged.view(merged.size(0), -1)
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        x = self.relu(x)
        x = self.fc3(x)

        return x

    def __init_weight(self):
        for m in self.modules():
            if isinstance(m, nn.Conv3d):
                # Perform weight initialization ("kaiming normal")
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')

In [5]:
device = torch.device('cuda')

model = FusionModel().to(device)
# use your pretrained model path
model.load_state_dict(torch.load('/content/drive/MyDrive/Colab Notebooks/IC_PBL/best_model_epoch_ver1_28.pth'))

<All keys matched successfully>

In [6]:
def uniform_sampling(video, target_frames=64):
    # get total frames of input video and calculate sampling interval
    len_frames = int(len(video))
    interval = int(np.ceil(len_frames/target_frames))
    # init empty list for sampled video and
    sampled_video = []
    for i in range(0,len_frames,interval):
        sampled_video.append(video[i])
    # calculate numer of padded frames and fix it
    num_pad = target_frames - len(sampled_video)
    padding = []
    if num_pad>0:
        for i in range(-num_pad,0):
            try:
                padding.append(video[i])
            except:
                padding.append(video[0])
        sampled_video += padding
    # get sampled video
    return np.array(sampled_video, dtype=np.float32)


def normalize(data):
    mean = data.mean()
    std = data.std()
    return (data - mean) / std

In [None]:
# test video path
file_path ="/content/drive/MyDrive/Colab Notebooks/IC_PBL/dataset/Assault018_x264.mp4" # use your path

cap = cv2.VideoCapture(file_path)
# Get number of frames
len_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = int(cap.get(cv2.CAP_PROP_FPS))

frames = []
flows = []
preds = []
resize = (224, 224)

prev_frame = None
for iter in range(0, len_frames-1):
    _, frame = cap.read()
    frame = cv2.resize(frame, resize, interpolation=cv2.INTER_AREA)
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
    frame = np.reshape(frame, (224, 224, 3))
    frames.append(frame)

    img = cv2.cvtColor(frame, cv2.COLOR_RGB2GRAY)
    gray_img = np.reshape(img, (224, 224, 1))
    if prev_frame is None:
        prev_frame = gray_img

    flow = cv2.calcOpticalFlowFarneback(prev_frame, gray_img, None, 0.5, 3, 15, 3, 5, 1.2,
                                        cv2.OPTFLOW_FARNEBACK_GAUSSIAN)
    prev_frame = gray_img
    # subtract the mean in order to eliminate the movement of camera
    flow[..., 0] -= np.mean(flow[..., 0])
    flow[..., 1] -= np.mean(flow[..., 1])
    # normalize each component in optical flow
    flow[..., 0] = cv2.normalize(flow[..., 0], None, 0, 255, cv2.NORM_MINMAX)
    flow[..., 1] = cv2.normalize(flow[..., 1], None, 0, 255, cv2.NORM_MINMAX)

    flows.append(flow)
    result = np.zeros((len(flows), 224, 224, 5))
    result[..., :3] = frames
    result[..., 3:] = flows

    data = np.float32(result)
    # # sampling 64 frames uniformly from the entire video
    data = uniform_sampling(video=data, target_frames=64)
    # normalize rgb images and optical flows, respectively
    data[..., :3] = normalize(data[..., :3])
    data[..., 3:] = normalize(data[..., 3:])

    fr, w, h, ch = data.shape
    data = data.reshape((-1, fr, w, h, ch))
    # pred = model.predict(data)[0]
    pred = model(torch.Tensor(data).to(device))
    # fights.append(pred[0])
    # none.append(pred[1])
    pred = nn.functional.softmax(pred, dim=-1)
    preds.append(pred[0].detach().cpu().numpy())

cap.release()

In [8]:
import pandas as pd

print(np.array(preds).shape)

group_list = ["Violence", "Non-Violence"]
df = pd.DataFrame(preds, columns = ['Violence', 'Non-Violence'])

df

(149, 2)


Unnamed: 0,Violence,Non-Violence
0,0.160998,0.839001
1,0.152766,0.847234
2,0.154314,0.845686
3,0.153776,0.846224
4,0.151473,0.848527
...,...,...
144,0.978608,0.021392
145,0.991374,0.008626
146,0.992734,0.007266
147,0.989318,0.010682


In [15]:
bcr.bar_chart_race(df=df[:],
                   n_bars = 2,
                   figsize=(4, 4),
                   label_bars=False,
                   sort='desc',
                   #title='Violence detection',
                   fixed_order=['Violence', 'Non-Violence'],
                   orientation='h',
                   fixed_max=True,
                   period_length=int(1000/fps),
                  )