In [1]:
import cv2
import numpy as np
import h5py

import matplotlib.pyplot as plt

  from ._conv import register_converters as _register_converters


In [2]:
import sys 
import os

sys.path.append(os.path.abspath("../"))

In [3]:
def getBoundCenter(bound):
    corner_a = bound[0:2]
    corner_b = bound[2:4]
    
    width = abs(corner_a[0]-corner_b[0])
    height = abs(corner_a[1]-corner_b[1])
    
    x = (width/2) + min(corner_a[0], corner_b[0])
    y = (height/2) + min(corner_a[1], corner_b[1])

    return [int(x), int(y)]

In [4]:
import lib

file_name = '../Etiquetado/bounds2.txt'
boundsdict, _ = lib.Utils.openBounds(file_name)

cap = cv2.VideoCapture('../DCIM/saturated-noaudio.avi')

racap = lib.RACapWrapper(cap, 30)

frame_height = 420 #para recortar la fecha y hora

window_size = 40
step_size = 4

height, width = None, None

plot = False

x_data = []
y_data = []

for frame_n in sorted(boundsdict.keys()):    
    frame, _ = racap.readAt(frame_n)        
    frame = frame[:frame_height,:]
    
    if height is None:
        [height, width, _] = frame.shape      
    
    centers = [getBoundCenter(bound) for bound in boundsdict[frame_n]]    
    
    start_index, end_index = 0, window_size
    
    while end_index < width:
        x_data.append(frame[:, start_index:end_index].astype('float32') / 255)
        center_in_frame = len([c for c in centers if start_index < c[0] < end_index]) > 0
        y_data.append(int(center_in_frame))      
        
        if plot:
            fig = plt.figure(figsize=(8, 8))
            
            fig.add_subplot(1, 1, 1)                
            plt.imshow(frame[:, start_index:end_index])                
            
            print(int(center_in_frame))
        
        start_index += step_size
        end_index += step_size   

cap.release()

In [5]:
import numpy as np

def get_balance_data(y_data):
    y_data = np.array(y_data)
    
    zeros_args = np.argwhere(y_data == 0)
    ones_args = np.argwhere(y_data == 1)
    
    print("zeros:", len(zeros_args))
    print("ones:", len(ones_args))
    
    diff = len(zeros_args) - len(ones_args)
    print("diff:", diff)
    
    return zeros_args, ones_args, abs(diff)
    
def balance_data(x_data, y_data, method='balance'):
    x_data = x_data.copy()
    y_data = y_data.copy()
    
    zeros_args, ones_args, diff = get_balance_data(y_data)
    
    if diff==0: return
    
    if method=='balance':
        remove_count = int(diff/2)
        add_count = int(diff/2)
    elif method=='remove':
        remove_count = diff
        add_count = 0
    elif method=='add':
        remove_count = 0
        add_count = diff
    
    if method != 'remove':
        add_args = ones_args if len(zeros_args)>len(ones_args) else zeros_args   

        for i in range(add_count):
            arg = add_args[np.random.randint(0, len(add_args)-1)][0]

            x_data.append(x_data[arg].copy())
            y_data.append(y_data[arg])
    
    if method != 'add':
        remove_args = zeros_args if len(zeros_args)>len(ones_args) else ones_args   
        np.random.shuffle(remove_args)
        remove_args = remove_args[0:remove_count]

        offset = 0
        for arg in np.sort(remove_args, axis=None):       
            x_data.pop(arg-offset)
            y_data.pop(arg-offset)

            offset += 1
    
    return x_data, y_data

In [6]:
get_balance_data(y_data);

zeros: 49976
ones: 4624
diff: 45352


In [7]:
balanced_x_data, balanced_y_data = balance_data(x_data, y_data, method='remove')

zeros: 49976
ones: 4624
diff: 45352


In [8]:
get_balance_data(balanced_y_data);

zeros: 4624
ones: 4624
diff: 0


In [9]:
data_fila = h5py.File('data.h5', 'w')

data_fila.create_dataset('x_data', data = balanced_x_data[:int(len(balanced_x_data))])
data_fila.create_dataset('y_data', data = balanced_y_data[:int(len(balanced_y_data))])

data_fila.close()