# 01. Transforming Video file(.avi) to 160*160 size, 30 framesss Numpy array(.npy)
* **Purpose of this Code** : Making train set & test set to train and evaluate Model
* **`Please!`** **Before Run this code, You should separate your videos to Fight folder / NonFight Folder**
* **`Output=.pickle`** : Very Large file.(Mabye about 30~50GB at each one?)
    * Please check your drive capacity, before run this code!

# Imports

In [2]:
import os
import pickle # save list as .pickle
import numpy as np
from tqdm import tqdm
import cv2 # read video file
from skimage.transform import resize # resizing images

# 01-A. Transform video files to Numpy array

In [3]:
def Save2Npy(file_dir, save_dir):
    """This function loads videos, transforms each of them to a Numpy array, and saves them in the selected folder.
    :: file_dir :: This folder has original video files.
    :: save_dir :: You'll save transformed Numpy arrays in this folder.
    """
    if not os.path.exists(save_dir):  # If there is no save_dir folder, then create a new folder there.
        os.makedirs(save_dir)
    
    file_list = os.listdir(file_dir)  # Make a list of video file names in file_dir folder.
    
    for file in tqdm(file_list):
        frames = np.zeros((30, 160, 160, 3), dtype=np.float64)
        i = 0
        
        vid = cv2.VideoCapture(os.path.join(file_dir, file))  # Create cv2.VideoCapture() Object for each video file.
        
        while i < 30:
            grabbed, frame = vid.read()
            
            if not grabbed:  # If the frame couldn't be read, break the loop
                break
                
            frm = resize(frame, (160, 160, 3))
            frm = np.expand_dims(frm, axis=0)
            
            if np.max(frm) > 1:
                frm = frm / 255.0   ##normalization
                
            frames[i][:] = frm      ##access each frame in frames (columns and rows)
            i += 1

        vid.release()  # Release the VideoCapture object to free up resources

        video_name = file.split('.')[0]
        save_path = os.path.join(save_dir, video_name + '.npy')

        np.save(save_path, frames)
        
    

In [4]:
file_dir='D:\\graduation project\\datasets\\RWF-2000 Dataset\\4-4data\\Fight' # Folder that have videos
save_dir='D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\numpy\\v'

In [5]:
Save2Npy(file_dir=file_dir, save_dir=save_dir)

  0%|          | 0/1000 [00:00<?, ?it/s]

  0%|          | 5/1000 [00:09<39:10,  2.36s/it]

In [None]:
file_dir='D:\\graduation project\\datasets\\RWF-2000 Dataset\\4-4data\\NonFight' # Folder that have videos
save_dir='D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\numpy\\nv'

In [None]:
Save2Npy(file_dir=file_dir, save_dir=save_dir)

  0%|          | 0/1000 [00:00<?, ?it/s]

100%|██████████| 1000/1000 [26:26<00:00,  1.59s/it] 


# 01-B. Make list of Numpy arrays

## 1. Fight Videos

In [2]:
Fight_dir='D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\numpy\\v' # Folder that contains Fight(Violence) Video files
file_list_npy = os.listdir(Fight_dir) # File name list

data_Fight=[]
for file in file_list_npy:
    file_path=os.path.join(Fight_dir, file) ##just paths
    x=np.load(file_path)  ##loads the data from each file (array(frame))
    data_Fight.append(x)

print(len(data_Fight))


602


## 2. NonFight Videos

In [3]:
NonFight_dir='D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\numpy\\nv'
file_list_npy=os.listdir(NonFight_dir)

data_NonFight=[]
for file in file_list_npy:
    file_path=os.path.join(NonFight_dir, file)
    x=np.load(file_path)
    data_NonFight.append(x)

print(len(data_NonFight))

601


# 01-C. Save list as .pickle

In [4]:
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\v\\v.pickle","wb") as fw:
    pickle.dump(data_Fight, fw, protocol=pickle.HIGHEST_PROTOCOL)  ##Serializing (convert the data to byte stream)

In [28]:
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\nv\\nv.pickle","wb") as fw:
    pickle.dump(data_NonFight, fw, protocol=pickle.HIGHEST_PROTOCOL)

# 01-D. Make & Save Label(Real Values) as .pickle
> Memo
```
prediction=preds.argmax(axis=0):[0 0]
Results =  [[0.09166703 0.9092251 ]]
Maximun Probability =  0.9092251
Difference of prob  0.8184502124786377
Alert : violence - 90.92%
```
> labels1=[] : list of each video file's lavel(Fight/NonFight). 
> * Violence(Fight) : [0,1]
> * Non-Violence(NonFight) : [1,0]

## 1. Make Label list

In [4]:
label_Fight_per_video=np.array([0,1])
label_Fight=[label_Fight_per_video]*len(data_Fight) # As amount as count of Violence(Fight) Video

In [5]:
label_NonFight_per_video=np.array([1,0])
label_NonFight=[label_NonFight_per_video]*len(data_NonFight) # As amount as count of Non-Violence(NonFight) Video

In [6]:
len(label_Fight), len(label_NonFight)

(602, 601)

In [7]:
label_Fight[55]

array([0, 1])

## 2. Save Label list as .pickle

In [30]:
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\v\\label_v.pickle","wb") as fw:
    pickle.dump(label_Fight, fw)

In [29]:
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\nv\\label_nv.pickle","wb") as fw:
    pickle.dump(label_NonFight, fw)

## Notebook 2

In [8]:
import numpy as np
import pickle
from random import shuffle

In [6]:
# Fight Video frames Numpy array list
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\v\\v.pickle","rb") as fr:
    data_Fight=pickle.load(fr)
# print(len(data_Fight))

SystemError: deallocated bytearray object has exported buffers

MemoryError: 

In [3]:
# Fight label frames Numpy array list
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\v\\label_v.pickle","rb") as fr:
    label_Fight=pickle.load(fr)
print(len(label_Fight))

1000


In [12]:
len(label_Fight)

602

In [3]:
# NONFight video Numpy array list
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\nv\\nv.pickle","rb") as fr:
    data_NonFight=pickle.load(fr)
print(len(data_NonFight))

SystemError: deallocated bytearray object has exported buffers

MemoryError: 

In [6]:
# NONFight label Numpy array list
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\nv\\label_nv.pickle","rb") as fr:
    label_NonFight=pickle.load(fr)
print(len(label_NonFight))

1000


## 1. Merge data : Fight + NonFight

In [9]:
data_total=data_Fight+data_NonFight
print(len(data_total))

1203


In [10]:
label_total=label_Fight+label_NonFight
print(len(label_total))

1203


## 2. Shuffle merged dataset

In [11]:
np.random.seed(42)

In [12]:
c=list(zip(data_total, label_total)) # zip 
shuffle(c) # Random Shuffle
data_total, label_total=zip(*c)  # unpacking

## 3. save shuffled dataset as .pickle
* **`pickle.dump(protocol=pickle.HIGHEST_PROTOCOL)`** : You can solve lack of memory issue when pickle save process

In [21]:
# Save data
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\total\\total2000.pickle","wb") as fw:
    pickle.dump(data_total, fw, protocol=pickle.HIGHEST_PROTOCOL)

In [22]:
# Save label
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\total\\total2000_label.pickle","wb") as fw:
    pickle.dump(label_total, fw)

# 02-C. Split training set / test set

## 1. Load shuffled dataset(.pickle)
* **`The reason why I repeated saving and loading .pickle is`** :
    * Just, because of RAM & memory issues.

In [23]:
td="D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\total\\total2000.pickle"
tl="D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\total\\total2000_label.pickle"

In [11]:
# load data
with open(td,"rb") as fr:
    data_total=pickle.load(fr)

NameError: name 'td' is not defined

In [None]:
# load label
with open(tl,"rb") as fr:
    label_total=pickle.load(fr)

## 2. Split dataset as training set / test set (8:2 ratio)


### 1) The number of training set, test set

In [13]:
training_set=int(len(data_total)*0.8)
test_set=int(len(data_total)*0.2)

In [14]:
data_training=data_total[0:training_set] # Training set data
data_test=data_total[training_set:] # Test set data

label_training=label_total[0:training_set] # Training set label
label_test=label_total[training_set:] # Test set label

In [15]:
len(data_training), len(label_training), len(data_test), len(label_test)

(962, 962, 241, 241)

### 2) Check the shape of elements

In [16]:
data_training[900].shape, label_training[0].shape

((30, 160, 160, 3), (2,))

In [17]:
data_training[12][0, :, :, 0]

array([[0.7430077 , 0.69625578, 0.63656031, ..., 0.95644944, 0.96701296,
        0.95813254],
       [0.73568934, 0.6875909 , 0.62088489, ..., 0.96891968, 0.97043729,
        0.96289282],
       [0.71244198, 0.66309651, 0.54427322, ..., 0.96460135, 0.96557748,
        0.94797657],
       ...,
       [0.58068958, 0.63040458, 0.69043349, ..., 0.30207519, 0.3594045 ,
        0.30902719],
       [0.62196339, 0.64201595, 0.68509924, ..., 0.27631474, 0.28947046,
        0.29899182],
       [0.6795203 , 0.65403556, 0.66961267, ..., 0.26545851, 0.28742633,
        0.28680905]])

## 3. Save training set & test set as .pickle file
* **`The reason why I repeated saving and loading .pickle is`** :
    * Just, because of RAM & memory issues.

In [22]:
# training set, data
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\train\\train_data.pickle","wb") as fw:
    pickle.dump(data_training, fw, protocol=pickle.HIGHEST_PROTOCOL)

In [3]:
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\train\\train_data.pickle","rb") as fr:
    data_training=pickle.load(fr)
print(len(data_training))

962


In [23]:
# training set, label
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\train\\train_label.pickle","wb") as fw:
    pickle.dump(label_training, fw)

In [24]:
# test set, data
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\test\\test_data.pickle","wb") as fw:
    pickle.dump(data_test, fw, protocol=pickle.HIGHEST_PROTOCOL)

In [25]:
# test set, label
with open("D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\test\\test_label.pickle","wb") as fw:
    pickle.dump(label_test, fw)

## 2. Transform training set & test set as Numpy array, and save them (.npy)

### 1) Training set

In [4]:
data_training_ar=np.array(data_training, dtype=np.float16) #> (2878, 30, 160, 160, 3)

In [5]:
np.save('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\train\\data_train_TRANSNumpy.npy', data_training_ar)

In [33]:
label_training_ar=np.array(label_training) #> (2878, 2)

In [36]:
np.save('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\train\\label_train_TRANSNumpy.npy', label_training_ar)

In [8]:
data_training_ar.shape, label_training_ar.shape

(962, 30, 160, 160, 3)

### 2) Test set

In [27]:
data_test_ar=np.array(data_test, dtype=np.float16) #> (720, 30, 160, 160, 3)

In [28]:
np.save('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\test\\data_test_TRANSNumpy.npy', data_test_ar)

In [29]:
label_test_ar=np.array(label_test) #> (720, 2)

In [30]:
np.save('D:\\graduation project\\datasets\\A-Dataset-for-Automatic-Violence-Detection-in-Videos\\violence-detection-dataset\\4-4 data\\pickle\\split\\test\\label_data_test_TRANSNumpy.npy', label_test_ar)

In [31]:
data_test_ar.shape, label_test_ar.shape

((241, 30, 160, 160, 3), (241, 2))