In [242]:
from glob import glob
import numpy
import pandas as pd
pd.set_option('display.max_rows', 5)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
import matplotlib.pyplot as plt
import seaborn as sns
sns.set()
%matplotlib inline
import os, sys, time, datetime, random, math

In [243]:
video_path = 'MNAD/dataset/ufc/'
bugs_path = 'MNAD/dataset/bugs/'

In [244]:
training_path = video_path + 'training/'
testing_path = video_path + 'testing/'

# use pandas to read folder of videos
training_videos = glob(training_path + '*.mp4')
training_videos = sorted(training_videos, key=lambda x: int(x.split('Videos')[-1].split('_')[0]))
training_videos = pd.DataFrame(training_videos, columns=['video'])

testing_videos = glob(testing_path + '*//' + '*.mp4')
testing_videos = sorted(testing_videos, key=lambda x: int(x[-7:-4]))
testing_videos = pd.DataFrame(testing_videos, columns=['video'])
testing_videos['file_name'] = testing_videos['video'].apply(lambda x: x.split('/')[-1])


# parse list (by lines) of video frame labels from MNAD/dataset/ufc/Temporal_Anomaly_Annotation_for_Testing_Videos.txt
testing_labels = pd.read_csv(video_path + 'Temporal_Anomaly_Annotation_for_Testing_Videos.txt', sep=' ', header=None)
testing_videos

Unnamed: 0,video,file_name
0,MNAD/dataset/ufc/testing/Fighting/Fighting044_...,Fighting044_x264.mp4
1,MNAD/dataset/ufc/testing/Fighting/Fighting024_...,Fighting024_x264.mp4
...,...,...
948,MNAD/dataset/ufc/testing/Stealing/Stealing063_...,Stealing063_x264.mp4
949,MNAD/dataset/ufc/testing/Stealing/Stealing043_...,Stealing043_x264.mp4


In [245]:
testing_labels.columns = ['file_name', 'catagory', 'start1', 'end1', 'start2', 'end2']
#replace all -1 with None
testing_labels = testing_labels.replace(-1, None)
testing_labels.head(5)

Unnamed: 0,file_name,catagory,start1,end1,start2,end2
0,Abuse028_x264.mp4,Abuse,165,240,,
1,Abuse030_x264.mp4,Abuse,1275,1360,,
2,Arrest001_x264.mp4,Arrest,1185,1485,,
3,Arrest007_x264.mp4,Arrest,1530,2160,,
4,Arrest024_x264.mp4,Arrest,1005,3105,,


In [246]:
# join testing_labels to testing_videos by file_name
testing_videos = testing_videos.merge(testing_labels, on='file_name', how='left')
# drop files that are not in testing_labels
testing_videos = testing_videos.dropna(subset=['catagory'])
# convert start1 and end1 to frame index range
testing_videos['anomaly_frames'] = testing_videos.apply(lambda x: list(range(int(x['start1']), int(x['end1']))), axis=1)
del testing_videos['start1']
del testing_videos['end1']
# convert start1 and end1 to frame index range if values are not None
testing_videos['anomaly_frames2'] = testing_videos.apply(lambda x: list(range(int(x['start2']), int(x['end2']))) if not x['start2'] == None else None, axis=1)
del testing_videos['start2']
del testing_videos['end2']
#combine all intefers from anomaly_frames and anomaly_frames2
testing_videos['anomaly_frames'] = testing_videos.apply(lambda x: x['anomaly_frames'] + x['anomaly_frames2'] if not x['anomaly_frames2'] == None else x['anomaly_frames'], axis=1)
del testing_videos['anomaly_frames2']
#sort index
testing_videos = testing_videos.sort_values(by=['file_name'])
#set integer index
testing_videos = testing_videos.reset_index(drop=True)
#replace filename with integer index, and then use zfill to pad with zeros
testing_videos['file_name'] = testing_videos.index
testing_videos['file_name'] = testing_videos['file_name'].apply(lambda x: str(x).zfill(3))

testing_videos

Unnamed: 0,video,file_name,catagory,anomaly_frames
0,MNAD/dataset/ufc/testing/Abuse/Abuse028_x264.mp4,000,Abuse,"[165, 166, 167, 168, 169, 170, 171, 172, 173, ..."
1,MNAD/dataset/ufc/testing/Abuse/Abuse030_x264.mp4,001,Abuse,"[1275, 1276, 1277, 1278, 1279, 1280, 1281, 128..."
...,...,...,...,...
138,MNAD/dataset/ufc/testing/Vandalism/Vandalism02...,138,Vandalism,"[1830, 1831, 1832, 1833, 1834, 1835, 1836, 183..."
139,MNAD/dataset/ufc/testing/Vandalism/Vandalism03...,139,Vandalism,"[540, 541, 542, 543, 544, 545, 546, 547, 548, ..."


In [247]:
# index training
training_videos['file_name'] = training_videos.index
training_videos['file_name'] = training_videos['file_name'].apply(lambda x: str(x).zfill(3))

training_videos

Unnamed: 0,video,file_name
0,MNAD/dataset/ufc/training/Normal_Videos001_x26...,000
1,MNAD/dataset/ufc/training/Normal_Videos002_x26...,001
...,...,...
798,MNAD/dataset/ufc/training/Normal_Videos949_x26...,798
799,MNAD/dataset/ufc/training/Normal_Videos950_x26...,799


In [248]:
# calculate output folder and name for each video based on file_name
testing_videos['output_folder'] = testing_videos['file_name'].apply(lambda x: bugs_path + 'symlink_loc/frames/' + x + '/')
training_videos['output_folder'] = training_videos['file_name'].apply(lambda x: bugs_path + 'training/frames/' + x + '/')

# create output folders
testing_videos['output_folder'].apply(lambda x: os.makedirs(x, exist_ok=True))
training_videos['output_folder'].apply(lambda x: os.makedirs(x, exist_ok=True))
testing_videos

Unnamed: 0,video,file_name,catagory,anomaly_frames,output_folder
0,MNAD/dataset/ufc/testing/Abuse/Abuse028_x264.mp4,000,Abuse,"[165, 166, 167, 168, 169, 170, 171, 172, 173, ...",MNAD/dataset/bugs/symlink_loc/frames/000/
1,MNAD/dataset/ufc/testing/Abuse/Abuse030_x264.mp4,001,Abuse,"[1275, 1276, 1277, 1278, 1279, 1280, 1281, 128...",MNAD/dataset/bugs/symlink_loc/frames/001/
...,...,...,...,...,...
138,MNAD/dataset/ufc/testing/Vandalism/Vandalism02...,138,Vandalism,"[1830, 1831, 1832, 1833, 1834, 1835, 1836, 183...",MNAD/dataset/bugs/symlink_loc/frames/138/
139,MNAD/dataset/ufc/testing/Vandalism/Vandalism03...,139,Vandalism,"[540, 541, 542, 543, 544, 545, 546, 547, 548, ...",MNAD/dataset/bugs/symlink_loc/frames/139/


In [249]:
testing_videos.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 140 entries, 0 to 139
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype 
---  ------          --------------  ----- 
 0   video           140 non-null    object
 1   file_name       140 non-null    object
 2   catagory        140 non-null    object
 3   anomaly_frames  140 non-null    object
 4   output_folder   140 non-null    object
dtypes: object(5)
memory usage: 5.6+ KB
