In [2]:
import tensorflow
from keras.optimizers import Adam, SGD
from keras.callbacks import ModelCheckpoint, LearningRateScheduler, TerminateOnNaN, CSVLogger
from keras import backend as K
from keras.models import load_model
from math import ceil
import numpy as np
from matplotlib import pyplot as plt
import time
from models.keras_ssd300 import ssd_300
from models.keras_ssd512 import ssd_512
from keras_loss_function.keras_ssd_loss import SSDLoss
from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes
from keras_layers.keras_layer_DecodeDetections import DecodeDetections
from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast
from keras_layers.keras_layer_L2Normalization import L2Normalization

from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder
from ssd_encoder_decoder.ssd_output_decoder import decode_detections, decode_detections_fast

# from data_generator.object_detection_2d_data_generator import DataGenerator
# from data_generator.object_detection_2d_geometric_ops import Resize
# from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels
# from data_generator.data_augmentation_chain_original_ssd import SSDDataAugmentation
# from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms
from bs4 import BeautifulSoup
import lxml
import et_xmlfile
from et_xmlfile import xmlfile
import requests
import argparse
import os
import math
import cv2
%matplotlib inline

Using TensorFlow backend.


# EXTRACT FRAMES FROM VIDEO WITH OBJECT DETECTION

In [4]:
img_height = 300 # Height of the model input images
img_width = 300 # Width of the model input images
img_channels = 3 # Number of color channels of the model input images
mean_color = [123, 117, 104] # The per-channel mean of the images in the dataset. Do not change this value if you're using any of the pre-trained weights.
swap_channels = [2, 1, 0] # The color channel order in the original SSD is BGR, so we'll have the model reverse the color channel order of the input images.
n_classes = 20 # Number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO
scales_pascal = [0.1, 0.2, 0.37, 0.54, 0.71, 0.88, 1.05] # The anchor box scaling factors used in the original SSD300 for the Pascal VOC datasets
scales_coco = [0.07, 0.15, 0.33, 0.51, 0.69, 0.87, 1.05] # The anchor box scaling factors used in the original SSD300 for the MS COCO datasets
scales = scales_pascal
aspect_ratios = [[1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5, 3.0, 1.0/3.0],
                 [1.0, 2.0, 0.5],
                 [1.0, 2.0, 0.5]] # The anchor box aspect ratios used in the original SSD300; the order matters
two_boxes_for_ar1 = True
steps = [8, 16, 32, 64, 100, 300] # The space between two adjacent anchor box center points for each predictor layer.
offsets = [0.5, 0.5, 0.5, 0.5, 0.5, 0.5] # The offsets of the first anchor box center points from the top and left borders of the image as a fraction of the step size for each predictor layer.
clip_boxes = False # Whether or not to clip the anchor boxes to lie entirely within the image boundaries
variances = [0.1, 0.1, 0.2, 0.2] # The variances by which the encoded target coordinates are divided as in the original implementation
normalize_coords = True

In [5]:
# 1: Build the Keras model.

K.clear_session() # Clear previous models from memory.

model = ssd_300(image_size=(img_height, img_width, img_channels),
                n_classes=n_classes,
                mode='training',
                l2_regularization=0.0005,
                scales=scales,
                aspect_ratios_per_layer=aspect_ratios,
                two_boxes_for_ar1=two_boxes_for_ar1,
                steps=steps,
                offsets=offsets,
                clip_boxes=clip_boxes,
                variances=variances,
                normalize_coords=normalize_coords,
                subtract_mean=mean_color,
                swap_channels=swap_channels)

# 2: Load some weights into the model.

# TODO: Set the path to the weights you want to load.
weights_path ='utils/checkpoints/object/VGG_VOC0712Plus_SSD_300x300_ft_iter_160000.h5'


model.load_weights(weights_path, by_name=True)

# 3: Instantiate an optimizer and the SSD loss function and compile the model.
#    If you want to follow the original Caffe implementation, use the preset SGD
#    optimizer, otherwise I'd recommend the commented-out Adam optimizer.
sgd = SGD(lr=0.001, momentum=0.9, decay=0.0, nesterov=False)

ssd_loss = SSDLoss(neg_pos_ratio=3, alpha=1.0)

model.compile(optimizer=sgd, loss=ssd_loss.compute_loss,metrics=['accuracy'])

#model.compile(optimizer=sgd, loss=[focal_loss(alpha=0.25, gamma=2)],metrics=['accuracy'])


Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Use tf.cast instead.
Instructions for updating:
Use tf.cast instead.


# AUTOMATICALLY EXTRACT RANDOM GLOBAL + LOCAL  FRAME WHENEVER OBJECT IS DETECTED 

In [15]:
classes = ['background',
           'aeroplane', 'bicycle', 'bird', 'boat',
           'bottle', 'bus', 'car', 'cat',
           'chair', 'cow', 'diningtable', 'dog',
           'horse', 'motorbike', 'person', 'pottedplant',
           'sheep', 'sofa', 'train', 'tvmonitor']

np.set_printoptions(precision=2, suppress=True, linewidth=90)
data_path = 'edited_videos'
import random
ctg = os.listdir(data_path)
#ctg = ['happy','sad','submissive']
file_dir = os.path.join(os.getcwd(),data_path)
for x in ctg:
    count_file = 1
    listing = os.listdir(os.path.join(data_path,x))
    for file in listing:
        count_clip = 1
        count_image = 0
        vid_dir = os.path.join(file_dir,x,file)
        video = cv2.VideoCapture(vid_dir)
        print(video.isOpened())
        framerate = video.get(5)
        totalframe = int(video.get(7))    
        if(totalframe > 500):
            randlist = []
            rand = random.sample(range(1,totalframe-500),1)[0]        
            for i in range(rand, rand+500):
                randlist.append(i)
        else:
            randlist = range(0, (totalframe-(totalframe%25)+1))
        print(count_file)
        if(count_file <= 20):
            gen = 'train'
        elif(count_file >= 21 and count_file < 26):
            gen = 'val'
        elif(count_file >=26 and count_file <31):
            gen = 'test'
        else:
            gen = 'deposit'
        new_path = os.path.join(os.getcwd(),'data',gen,x,"video_" + str("{:02d}".format(count_file)))
        object_path = os.path.join(os.getcwd(),'object_data',gen,x,"video_" + str("{:02d}".format(count_file))) 
        OF_path = os.path.join(os.getcwd(),'OF_data',gen,x,"video_" + str("{:02d}".format(count_file)))
        ret, frame1 = video.read()
        prvs = cv2.cvtColor(frame1,cv2.COLOR_BGR2GRAY)
        hsv = np.zeros_like(frame1)
        hsv[...,1] = 25
        while (video.isOpened()):
            start = time.time()
            success,image = video.read()
            if (success != True):
                break
            frameId = video.get(1)
            if(frameId in randlist):
                program_starts = time.time()
                clip_path = os.path.join(new_path+'_clip_'+str("{:02d}".format(count_clip)))
                filename = os.path.join(clip_path,"image_"+str("{:05d}".format(count_image))+'.jpg')  

                objclip_path = os.path.join(object_path+'_clip_'+str("{:02d}".format(count_clip)))
                obj_filename = os.path.join(objclip_path,"image_"+str("{:05d}".format(count_image))+'.jpg')

                OFclip_path = os.path.join(OF_path+'_clip_'+str("{:02d}".format(count_clip)))
                OFfilename = os.path.join(OFclip_path,"image_"+str("{:05d}".format(count_image))+'.jpg')  
            
                if not os.path.exists(clip_path):
                    os.makedirs(clip_path)   
                if not os.path.exists(objclip_path):
                    os.makedirs(objclip_path)  
                if not os.path.exists(OFclip_path):
                    os.makedirs(OFclip_path)   
                img_resized = cv2.resize(image,(img_width,img_height))
                img_reshape=img_resized.reshape(1,img_width,img_height,3)
                y_pred = model.predict(img_reshape)
                y_pred_decoded = decode_detections(y_pred,
                                                   confidence_thresh=0.2,
                                                   iou_threshold=0.4,
                                                   top_k=200,
                                                   normalize_coords=normalize_coords,
                                                   img_height=img_height,
                                                   img_width=img_width)
                countdog = 1
                next = cv2.cvtColor(image,cv2.COLOR_BGR2GRAY)
                flow = cv2.calcOpticalFlowFarneback(prvs,next, None, 0.5, 3, 15, 3, 5, 1.2, 0)
                mag, ang = cv2.cartToPolar(flow[...,0], flow[...,1])
                hsv[...,0] = ang*180/np.pi/2
                hsv[...,2] = cv2.normalize(mag,None,0,255,cv2.NORM_MINMAX)
                bgr = cv2.cvtColor(hsv,cv2.COLOR_HSV2BGR)
                for i in range(len(y_pred_decoded[0])):
                    pred = y_pred_decoded[0][i][0]
                    conf = y_pred_decoded[0][i][1]
                    if(pred == 12 and conf > 0.3 and countdog == 1): #CLASS = DOG
                        #class   conf xmin   ymin   xmax   ymax
                        #print(pred,conf)
                        xmin = max(0,y_pred_decoded[0][i][2] * 1280 / 300) #xmin
                        ymin = max(0,y_pred_decoded[0][i][3]  * 720 / 300) #ymin
                        xmax = max(0,y_pred_decoded[0][i][4] * 1280 / 300) #xmax
                        ymax = max(0,y_pred_decoded[0][i][5]  * 720 / 300) #ymax
                        newimg = image[int(ymin):int(ymax), int(xmin):int(xmax), :]
                        bgr = cv2.resize(bgr,(640,480))
                        image = cv2.resize(image,(640,480))
                        cv2.imwrite(str(filename),image)
                        cv2.imwrite(str(obj_filename),newimg)     
                        cv2.imwrite(str(OFfilename),bgr) 
                        countdog += 1
                        count_image +=1
                prvs = next
                if(count_image % 50 == 0 and count_image > 0):
                    count_clip+=1  
                stop = time.time()
                duration = stop-start
                print("file:",filename, 'duration:', duration, end = '\r')
        video.release()
        count_file += 1

True
1
True: G:\Project_Theo\Project\data\train\angry\video_01_clip_04\image_00197.jpg duration: 0.27106332778930664
2
True: G:\Project_Theo\Project\data\train\angry\video_02_clip_03\image_00129.jpg duration: 0.26005721092224124
3
True: G:\Project_Theo\Project\data\train\angry\video_03_clip_10\image_00417.jpg duration: 0.27106046676635743
4
True: G:\Project_Theo\Project\data\train\angry\video_04_clip_10\image_00473.jpg duration: 0.27606248855590825
5
True: G:\Project_Theo\Project\data\train\angry\video_05_clip_09\image_00434.jpg duration: 0.30706882476806646
6
True: G:\Project_Theo\Project\data\train\angry\video_06_clip_05\image_00237.jpg duration: 0.30306863784790046
7
True: G:\Project_Theo\Project\data\train\angry\video_07_clip_05\image_00238.jpg duration: 0.26906061172485355
8
True: G:\Project_Theo\Project\data\train\angry\video_08_clip_07\image_00346.jpg duration: 0.27706313133239746
9
True: G:\Project_Theo\Project\data\train\angry\video_09_clip_09\image_00258.jpg duration: 0.27806

True: G:\Project_Theo\Project\data\val\submissive\video_24_clip_04\image_00184.jpg duration: 0.28126788139343263
25
True: G:\Project_Theo\Project\data\val\submissive\video_25_clip_08\image_00355.jpg duration: 0.28126835823059084
26
True: G:\Project_Theo\Project\data\test\submissive\video_26_clip_01\image_00043.jpg duration: 0.26564240455627443
27
True: G:\Project_Theo\Project\data\test\submissive\video_27_clip_09\image_00265.jpg duration: 0.26563954353332523
28
True: G:\Project_Theo\Project\data\test\submissive\video_28_clip_09\image_00423.jpg duration: 0.29689598083496094
29
True: G:\Project_Theo\Project\data\test\submissive\video_29_clip_01\image_00000.jpg duration: 0.25001358985900885
30
True: G:\Project_Theo\Project\data\test\submissive\video_30_clip_01\image_00036.jpg duration: 0.29689455032348633
31
True: G:\Project_Theo\Project\data\deposit\submissive\video_31_clip_08\image_00373.jpg duration: 0.29689359664916993
32
True: G:\Project_Theo\Project\data\deposit\submissive\video_32_

In [None]:
import shutil
np.set_printoptions(precision=2, suppress=True, linewidth=90)
data_path = 'data'
obj_datapath = 'object_data'
of_datapath = 'OF_data'
import random
ctg = os.listdir(data_path)
#ctg = ['happy','sad','submissive']
for x in ctg:
    count_file = 1
    listing = os.listdir(os.path.join(data_path,x))
    for file in listing:
        filename = os.listdir(os.path.join(data_path,x,file))
        for clip in filename:           
            globdir = os.path.join(data_path,x,file,clip)
            locdir = os.path.join(obj_datapath,x,file,clip)
            ofdir = os.path.join(of_datapath,x,file,clip)
            vidlist = os.listdir(globdir)
            loclist = os.listdir(locdir)
            oflist = os.listdir(ofdir)
            if(len(vidlist) < 50 and len(loclist) < 50 and len(oflist) < 50):
                shutil.rmtree(globdir)
                shutil.rmtree(locdir)                
                shutil.rmtree(ofdir) 
                print(globdir, "deleted")
                print(locdir, "deleted")
                print(ofdir, "deleted")