In [1]:
import cv2     # for capturing videos
import os 
import shutil
import math   # for mathematical operations
import pandas as pd
import pickle
import numpy as np   
from sklearn.metrics import confusion_matrix
from sklearn.metrics import accuracy_score
from glob import glob
from tqdm import tqdm
import tensorflow as tf 
from tensorflow.keras.preprocessing import image   # for preprocessing the images
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import LSTM
from tensorflow.keras.layers import Dropout
from ax.service.ax_client import AxClient
from ax.utils.notebook.plotting import render, init_notebook_plotting
from tensorflow.keras.callbacks import ModelCheckpoint
from sklearn.model_selection import train_test_split
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.layers import Flatten, Input
from tensorflow.keras.models import Model
from tensorflow.keras.applications.resnet50 import preprocess_input
from tensorflow.keras.callbacks import EarlyStopping
from sklearn.model_selection import StratifiedKFold

In [2]:
from tensorflow.python.client import device_lib
print(device_lib.list_local_devices())

[name: "/device:CPU:0"
device_type: "CPU"
memory_limit: 268435456
locality {
}
incarnation: 16379497450660707184
, name: "/device:GPU:0"
device_type: "GPU"
memory_limit: 7347613216
locality {
  bus_id: 1
  links {
  }
}
incarnation: 6892062830417803198
physical_device_desc: "device: 0, name: GeForce RTX 2080 SUPER, pci bus id: 0000:0a:00.0, compute capability: 7.5"
, name: "/device:GPU:1"
device_type: "GPU"
memory_limit: 7596800736
locality {
  bus_id: 1
  links {
  }
}
incarnation: 11252568984326227376
physical_device_desc: "device: 1, name: GeForce RTX 2080 SUPER, pci bus id: 0000:0b:00.0, compute capability: 7.5"
]


In [3]:
with tf.device('/device:GPU:0'):
    if tf.test.gpu_device_name():
        print("GPU")
    else:
        print("no GPU")

GPU


In [21]:
def count_video_frame_num(video_name):  
    cap = cv2.VideoCapture(video_name)
    length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
    return length

In [22]:
def exclude_suspense_frame(length,j,k,sus_frame):
    if k == len(sus_frame[j]) - 1:
        non_sus_start_frame = sus_frame[j][k][1] + 1
        non_sus_end_frame = length[j]
    else:
        non_sus_start_frame = sus_frame[j][k][1] + 1
        non_sus_end_frame = sus_frame[j][k+1][0] - 1
    return non_sus_start_frame, non_sus_end_frame

In [23]:
def read_txt_file(filename):
    f = open(filename, "r")
    temp = f.read()
    video_path = temp.split('\n')
    
    pd_sus = pd.DataFrame()
    pd_sus['video_path'] = video_path
    pd_sus = pd_sus[:]
    
    video_name = []
    s_label = []
    ns_label = []
    for i in range(pd_sus.shape[0]):
        video_name.append(pd_sus['video_path'][i].split('/')[1])
        s_label.append("suspense")
        ns_label.append("non-suspense")
        
    pd_sus['video_name'] = video_name
    pd_non_sus = pd_sus.copy()
    
    pd_sus['label'] = s_label
    pd_non_sus['label'] = ns_label
    
    video_rnn = pd.DataFrame(pd_sus.sort_values(by = "video_name"))
    video_rnn = video_rnn.reset_index(drop = True)
    
    return pd_sus, pd_non_sus, video_rnn

In [24]:
def cal_start_end_frame(txtfilefolder, videofolder, pd_sus, pd_non_sus):
    # open the .txt file which have suspense label
    sus_frame = []
    video_text_file = []
    total_frame_per_video = []
    for i in range(pd_sus.video_name.shape[0]):
        if("mkv" in pd_sus.video_name[i]):
            video_text_file.append(pd_sus.video_name[i].replace("mkv", "txt"))
        elif ("mp4" in pd_sus.video_name[i]):
            video_text_file.append(pd_sus.video_name[i].replace("mp4", "txt"))
        else:
            video_text_file.append(pd_sus.video_name[i].replace("webm", "txt"))
        f = open(txtfilefolder + video_text_file[i], "rt")
        sus_frame.append([[int(token) for token in line.split()] for line in f.readlines()[::]])
        total_frame_per_video.append(count_video_frame_num(videofolder + pd_sus.video_name[i]))
        
    sus_start_frame = []
    sus_end_frame = []
    non_sus_start_frame = []
    non_sus_end_frame = []
    non_sus_start_frame_each = []
    non_sus_end_frame_each = []
    sus_scene_per_video = []
    non_sus_scene_per_video = []
    
    for j in range(len(sus_frame)):
        non_sus_scene_count = 0
        sus_scene_per_video.append(len(sus_frame[j]))
        for k in range(len(sus_frame[j])):
            if k == 0 and sus_frame[j][k][0] != 0:
                non_sus_start_frame.append(0)
                non_sus_end_frame.append(sus_frame[j][k][0] - 1)
                non_sus_scene_count += 1
            non_sus_start_frame_each, non_sus_end_frame_each = exclude_suspense_frame(total_frame_per_video,j,k,sus_frame)
            sus_start_frame.append(sus_frame[j][k][0])
            sus_end_frame.append(sus_frame[j][k][1])
            non_sus_start_frame.append(non_sus_start_frame_each)
            non_sus_end_frame.append(non_sus_end_frame_each)
            non_sus_scene_count += 1
        if non_sus_scene_count == 0:
            non_sus_start_frame.append(0)
            non_sus_end_frame.append(count_video_frame_num(videofolder + pd_sus.video_name[j]))
            non_sus_scene_per_video.append(1)
        else:
            non_sus_scene_per_video.append(non_sus_scene_count)
    
    pd_sus['scene_per_video'] = sus_scene_per_video
    pd_non_sus['scene_per_video'] = non_sus_scene_per_video
    pd_sus = pd_sus.loc[pd_sus.index.repeat(pd_sus.scene_per_video)].reset_index(drop=True)
    pd_non_sus = pd_non_sus.loc[pd_non_sus.index.repeat(pd_non_sus.scene_per_video)].reset_index(drop=True)
    pd_sus['start_frame'] = sus_start_frame
    pd_sus['end_frame'] = sus_end_frame
    pd_non_sus['start_frame'] = non_sus_start_frame
    pd_non_sus['end_frame'] = non_sus_end_frame
    data = pd.concat([pd_sus, pd_non_sus], ignore_index=True)
    
    return data

In [25]:
def extract_frames(path, data):
    if os.path.exists(path):
        shutil.rmtree(path)
    
    # storing the frames from training videos
    for i in tqdm(range(data.shape[0])):
        count = 0
        currentframe = 0
        # Read the video from specified path 
        cam = cv2.VideoCapture(data.video_path[i]) 
        frameRate = cam.get(5) #frame rate
        
        try: 
            # creating a folder named data 
            if not os.path.exists(path): 
                os.makedirs(path) 
        
        # if not created then raise error 
        except OSError: 
            print ('Error: Creating directory of data') 
            
        # frame 
        currentframe = data.start_frame[i]
        cam.set(1, currentframe)
        while(currentframe <= data.end_frame[i]): 
            
            # reading from frame
            ret,frame = cam.read()
            
            if (ret != True):
                break
                
            if math.floor(currentframe) % math.floor(frameRate) == 0:
                if("mkv" in data.video_name[i]):
                    name = path + '/' + data.label[i] + '_' + data.video_name[i].replace(".mkv", "_") + str(currentframe) + '.jpg'
                elif("mp4" in data.video_name[i]):
                    name = path + '/' + data.label[i] + '_' + data.video_name[i].replace(".mp4", "_") + str(currentframe) + '.jpg'
                else:
                    name = path + '/' + data.label[i] + '_' + data.video_name[i].replace(".webm", "_") + str(currentframe) + '.jpg'
                cv2.imwrite(name, frame)
                
            currentframe += 1
        
        # Release all space and windows once done 
        cam.release() 
        cv2.destroyAllWindows() 

In [26]:
def calculate_index(df_data):
    index = []
    for i in range(len(df_data)):
        if "_" in df_data.iloc[i,2]:
            index.append(df_data.iloc[i, 0].split('_')[3].split('.')[0])
        else:
            index.append(df_data.iloc[i, 0].split('_')[2].split('.')[0])
    index = [int(i) for i in index]
    return index

In [27]:
def cal_each_video_frame(df_data):
    video = df_data.iloc[0,2]
    each_video_frame = []
    each_video_extra_frame = []
    ct = 0
    suspense = 0
    non_suspense = 0
    for i in range(df_data.shape[0]):
        ct += 1
        if df_data.iloc[i, 1] == 1:
            suspense += 1
        else:
            non_suspense += 1
        if video != df_data.iloc[i, 2]:
            each_video_frame.append(ct - 1)
            if df_data.iloc[i, 1] == 1:
                each_video_extra_frame.append(non_suspense - (suspense - 1))
                suspense = 1
                non_suspense = 0
            else:
                each_video_extra_frame.append((non_suspense - 1) - suspense)
                suspense = 0
                non_suspense = 1
            
            video = df_data.iloc[i, 2]    
            ct = 1
        if i == (df_data.shape[0] -1):
            each_video_frame.append(ct)
            each_video_extra_frame.append(non_suspense -  suspense)
    return each_video_frame, each_video_extra_frame

In [28]:
def save_frame_to_csv(path, csv_name):
    # getting the names of all the images
    images = glob(path + "/*.jpg")
    list_image = []
    list_class = []
    list_video_name = []
    for i in tqdm(range(len(images))):
        # creating the image name
        list_image.append(images[i].split('/')[1])
        # creating the class of image
        if (images[i].split('/')[1].split('_')[0] == 'non-suspense'):
            list_class.append(0)
        else:
            list_class.append(1)
        if "XLWx0_I1qLQ" in images[i].split('/')[1] or "_y3rFsvz8qQ" in images[i].split('/')[1]:
            temp = "_".join(images[i].split('/')[1].split('_')[1:3])
            list_video_name.append(temp)
        else:
            list_video_name.append(images[i].split('/')[1].split('_')[1])
        
    # storing the images and their class in a dataframe
    df_data = pd.DataFrame()
    df_data['image'] = list_image
    df_data['class'] = list_class
    df_data['video'] = list_video_name
    df_data['index'] = calculate_index(df_data)
    
    df_data = df_data.sort_values(by = ['video', 'index'], ascending = True)
    
    each_video_frame, each_video_extra_frame = cal_each_video_frame(df_data)
    
    df_data = df_data.reset_index(drop = True)
    df_data = df_data.drop(['index'], axis = 1)
    df_data = df_data.drop(['video'], axis = 1)
    df_data.to_csv(csv_name,header=True, index=False)
    
    return each_video_frame, each_video_extra_frame

In [29]:
def img_to_array(path, df_csv):
    with tf.device('/device:GPU:0'):
        if tf.test.gpu_device_name():
            print("Using GPU")
        base_model = tf.keras.applications.ResNet50(weights='imagenet', pooling='avg', include_top = False) 
        for layer in base_model.layers:
            layer.trainable = False

        list_image = []
    
        # for loop to read and store frames
        for i in tqdm(range(df_csv.shape[0])):
            # loading the image and keeping the target size as (224,224,3)
            img = image.load_img(path + df_csv['image'][i], target_size=(224,224,3))
            # converting it to array
            x = image.img_to_array(img)
            x = np.expand_dims(x, axis=0)
            x = preprocess_input(x)
            features = base_model.predict(x)
            features = features.squeeze()

            list_image.append(features)

        X = np.array(list_image)
        y = df_csv['class']
    
    return X, y

In [30]:
def calculate_for_loop_no(each_video_frame, each_video_extra_frame):
    each_video_frame = [int(i) for i in each_video_frame]
    each_video_extra_frame = [int(i) for i in each_video_extra_frame]
    each_video_frame = np.array(each_video_frame, dtype=float)
    each_video_extra_frame = np.array(each_video_extra_frame)
    sus_count = (each_video_frame - each_video_extra_frame) / 2
    for_loop_num = np.divide(each_video_extra_frame, sus_count, out=np.zeros_like(each_video_frame), where=sus_count!=0)
    return (np.floor(for_loop_num))

In [31]:
def transform_to_rnn_shape(each_video_frame, video_rnn, X, y, option, n):
    each_video_frame = [int(i) for i in each_video_frame]
    
    X_rnn = []
    y_rnn = []
    frame_count = 0
    j = 0
    for i in range(video_rnn.shape[0]):
        rnn_end_frame = each_video_frame[i]
        loop_count = n[i].astype(np.int8)
        print("video ", i , " last frame of video ", rnn_end_frame)
        for k in range(20, rnn_end_frame):
            if y[k + frame_count] == 1:
                if(option == "train"):
                    for m in range(loop_count):
                        original = X[frame_count + j:frame_count + k, :]
                        noise = np.random.normal(0, .0001, original.shape)
                        new =  np.float32(original + noise)
                        X_rnn.append(new)
                        y_rnn.append(y[k + frame_count])
                X_rnn.append(X[frame_count + j:frame_count + k, :])
                y_rnn.append(y[k + frame_count])
                j+=1
            else:
                X_rnn.append(X[frame_count + j:frame_count + k, :])
                y_rnn.append(y[k + frame_count])
                j+=1
        frame_count += each_video_frame[i]
        print("total frames processed: ", frame_count)
        j=0
    
    X_rnn, y_rnn = np.array(X_rnn), np.array(y_rnn)

    return X_rnn, y_rnn

# Test data

In [32]:
pd_sus, pd_non_sus, video_rnn = read_txt_file("testlist01.txt")
test = cal_start_end_frame("testfiles/", "testvideos/", pd_sus, pd_non_sus)

extract_frames('testdata', test)

each_video_frame, each_video_extra_frame = save_frame_to_csv('testdata', 'test_new.csv')
n = calculate_for_loop_no(each_video_frame, each_video_extra_frame)

test = pd.read_csv('test_new.csv')
X, y = img_to_array('testdata/', test)
X_testing, y_testing = transform_to_rnn_shape(each_video_frame, video_rnn, X, y, "test", n)

100%|██████████| 173/173 [27:31<00:00,  9.55s/it]
100%|██████████| 34404/34404 [00:00<00:00, 649222.94it/s]


Using GPU


100%|██████████| 34404/34404 [24:44<00:00, 23.17it/s]


video  0  last frame of video  1500
total frames processed:  1500
video  1  last frame of video  1475
total frames processed:  2975
video  2  last frame of video  2775
total frames processed:  5750
video  3  last frame of video  2539
total frames processed:  8289
video  4  last frame of video  5369
total frames processed:  13658
video  5  last frame of video  6847
total frames processed:  20505
video  6  last frame of video  7663
total frames processed:  28168
video  7  last frame of video  1396
total frames processed:  29564
video  8  last frame of video  4840
total frames processed:  34404


In [33]:
with open('pickle/X_testing.pickle', 'wb') as f:
    pickle.dump(X_testing, f)
    
with open('pickle/y_testing.pickle', 'wb') as f:
    pickle.dump(y_testing, f)