In [14]:
import numpy as np
import os
import sys
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision
import torch.nn.init as init
import torch.utils.data as data
import torch.utils.data.dataset as dataset
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.autograd import Variable
import torchvision.utils as v_utils
import matplotlib.pyplot as plt
import cv2
import math
from collections import OrderedDict
import copy
import time
from model.utils import DataLoader
from model.final_future_prediction_with_memory_spatial_sumonly_weight_ranking_top1 import *
from model.Reconstruction import *
from sklearn.metrics import roc_auc_score
from utils import *
import random
import glob

import argparse

In [15]:
# Define default arguments
default_args = {
    'gpus': '0',
    'batch_size': 2,
    'test_batch_size': 1,
    'h': 128,
    'w': 128,
    'c': 3,
    'method': 'pred',
    't_length': 5,
    'fdim': 512,
    'mdim': 512,
    'msize': 10,
    'alpha': 0.6,
    'th': 0.01,
    'num_workers': 2,
    'num_workers_test': 1,
    'dataset_type': 'ped2',
    'dataset_path': './dataset',
    'model_dir': './exp/ped2/pred/log/model.pth',
    'm_items_dir': './exp/ped2/pred/log/keys.pt'
}

In [16]:
# Create an ArgumentParser object
parser = argparse.ArgumentParser(description="MNAD")

# Add arguments to the parser
for arg, value in default_args.items():
    parser.add_argument(f'--{arg}', type=type(value), default=value)

# Parse the arguments
args = parser.parse_args([])

In [17]:
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
if args.gpus is None:
    gpus = "0"
    os.environ["CUDA_VISIBLE_DEVICES"]= gpus
else:
    gpus = ""
    for i in range(len(args.gpus)):
        gpus = gpus + args.gpus[i] + ","
    os.environ["CUDA_VISIBLE_DEVICES"]= gpus[:-1]

In [18]:
torch.backends.cudnn.enabled = True

In [19]:
test_folder = args.dataset_path+"/"+args.dataset_type+"/testing/frames"

In [20]:
# Loading dataset


test_dataset = DataLoader(test_folder, transforms.Compose([
             transforms.ToTensor(),            
             ]), resize_height=args.h, resize_width=args.w, time_step=args.t_length-1)

In [21]:
test_size = len(test_dataset)

test_size

352

In [22]:
test_batch = data.DataLoader(test_dataset, batch_size = args.test_batch_size, 
                             shuffle=False, num_workers=args.num_workers_test, drop_last=False)

In [23]:
loss_func_mse = nn.MSELoss(reduction='none')

In [24]:
# Loading the trained model
model = torch.load(args.model_dir)

model.cuda()
m_items = torch.load(args.m_items_dir)
labels = np.load('./data/frame_labels_'+args.dataset_type+'.npy')

videos = OrderedDict()
videos_list = sorted(glob.glob(os.path.join(test_folder, '*')))
for video in videos_list:
    video_name = video.split('/')[-1]
    videos[video_name] = {}
    videos[video_name]['path'] = video
    videos[video_name]['frame'] = glob.glob(os.path.join(video, '*.jpg'))
    videos[video_name]['frame'].sort()
    videos[video_name]['length'] = len(videos[video_name]['frame'])

labels_list = []
label_length = 0
psnr_list = {}
feature_distance_list = {}

print('Evaluation of', args.dataset_type)

Evaluation of ped2


In [25]:
# Setting for video anomaly detection
for video in sorted(videos_list):
    video_name = video.split('/')[-1]
    if args.method == 'pred':
        labels_list = np.append(labels_list, labels[0][4+label_length:videos[video_name]['length']+label_length])
    else:
        labels_list = np.append(labels_list, labels[0][label_length:videos[video_name]['length']+label_length])
    label_length += videos[video_name]['length']
    psnr_list[video_name] = []
    feature_distance_list[video_name] = []

label_length = 0
video_num = 0
label_length += videos[videos_list[video_num].split('/')[-1]]['length']
m_items_test = m_items.clone()

model.eval()

for k,(imgs) in enumerate(test_batch):
    
    if args.method == 'pred':
        if k == label_length-4*(video_num+1):
            video_num += 1
            label_length += videos[videos_list[video_num].split('/')[-1]]['length']
    else:
        if k == label_length:
            video_num += 1
            label_length += videos[videos_list[video_num].split('/')[-1]]['length']

    imgs = Variable(imgs).cuda()
    
    if args.method == 'pred':
        outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, _, _, _, compactness_loss = model.forward(imgs[:,0:3*4], m_items_test, False)
        mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0,3*4:]+1)/2)).item()
        mse_feas = compactness_loss.item()

        # Calculating the threshold for updating at the test time
        point_sc = point_score(outputs, imgs[:,3*4:])
    
    else:
        outputs, feas, updated_feas, m_items_test, softmax_score_query, softmax_score_memory, compactness_loss = model.forward(imgs, m_items_test, False)
        mse_imgs = torch.mean(loss_func_mse((outputs[0]+1)/2, (imgs[0]+1)/2)).item()
        mse_feas = compactness_loss.item()

        # Calculating the threshold for updating at the test time
        point_sc = point_score(outputs, imgs)

    if  point_sc < args.th:
        query = F.normalize(feas, dim=1)
        query = query.permute(0,2,3,1) # b X h X w X d
        m_items_test = model.memory.update(query, m_items_test, False)

    psnr_list[videos_list[video_num].split('/')[-1]].append(psnr(mse_imgs))
    feature_distance_list[videos_list[video_num].split('/')[-1]].append(mse_feas)


In [26]:

# Measuring the abnormality score and the AUC
anomaly_score_total_list = []
for video in sorted(videos_list):
    video_name = video.split('/')[-1]
    anomaly_score_total_list += score_sum(anomaly_score_list(psnr_list[video_name]), 
                                     anomaly_score_list_inv(feature_distance_list[video_name]), args.alpha)

anomaly_score_total_list = np.asarray(anomaly_score_total_list)

accuracy = AUC(anomaly_score_total_list, np.expand_dims(1-labels_list, 0))

print('The result of ', args.dataset_type)
print('AUC: ', accuracy*100, '%')


The result of  ped2
AUC:  95.0525335815933 %


In [29]:
anomaly_scores_per_video = {}  # Dictionary to store anomaly scores per video

for video in sorted(videos_list):
    video_name = video.split('/')[-1]
    anomaly_scores = score_sum(anomaly_score_list(psnr_list[video_name]), 
                               anomaly_score_list_inv(feature_distance_list[video_name]), args.alpha)
    
    # Store anomaly scores for each video
    anomaly_scores_per_video[video_name] = anomaly_scores
    
    # Append anomaly scores to the total list
    if 'anomaly_score_total_list' not in locals():
        anomaly_score_total_list = anomaly_scores
    else:
        anomaly_score_total_list = np.concatenate((anomaly_score_total_list, anomaly_scores))

    print(f"Video: {video_name}, Anomaly Scores: {anomaly_scores}")

accuracy = AUC(anomaly_score_total_list, np.expand_dims(1-labels_list, 0))

print('The result of ', args.dataset_type)
print('AUC: ', accuracy*100, '%')

# To print the anomaly score of each data frame folder separately
for video_name, scores in anomaly_scores_per_video.items():
    print(f"Anomaly Scores for {video_name}: {scores}")


Video: frames\01, Anomaly Scores: [0.8538311964051183, 0.8587613025528202, 0.858068239058255, 0.8449592955699353, 0.9560647329998385, 0.866674001671047, 0.8760369801919062, 0.9282919908505692, 0.846161150972803, 0.9144195569032647, 0.879834812545706, 0.8617419631921139, 0.8467941692515291, 0.9164330306857799, 0.912708777232526, 0.9016367031192484, 0.9931332516061302, 0.8968549018731532, 0.8940770046057642, 0.917801491209522, 0.7993232001871802, 0.9240636777800406, 0.8756784236301092, 0.6874653867880945, 0.8460273159763956, 0.8593692904256506, 0.8414729247159527, 0.6069877442596272, 0.5812291234016891, 0.8132546148276344, 0.7729247910897451, 0.5475055267411, 0.6558832669526875, 0.685007882500986, 0.7145112265228102, 0.869607510299923, 0.8330037653933451, 0.9104277369878909, 0.815770714303244, 0.7824649565649591, 0.827940758446398, 0.8228830633486628, 0.7289823767284691, 0.706845105573322, 0.6172305563708813, 0.74896196754705, 0.704912511559774, 0.6516222440321847, 0.655298725423509, 0.5

ValueError: Found input variables with inconsistent numbers of samples: [352, 704]