In [1]:
root_path = "/tf/home/sergio/Tesis"

In [2]:
import sys
sys.path.append(root_path+"/TinyYOLOv3-Pedestrian-Detection")

from YOLOblocks import TinyYOLOv3,BasicBlock,PredictionLayer#,YOLOLossBasicBlock
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import tensorflow as tf
#from tensorflow.python.tools import freeze_graph
#from skimage.io import imread,imshow
#from skimage.transform import resize 
import time
#from tensorflow.compat.v1.image import decode_image
import imgaug.augmenters as iaa
from imgaug.augmentables.bbs import BoundingBox, BoundingBoxesOnImage
import os

In [3]:
from tensorflow.keras.mixed_precision import experimental as mixed_precision

In [4]:
policy = mixed_precision.Policy('mixed_float16')
mixed_precision.set_policy(policy)

INFO:tensorflow:Mixed precision compatibility check (mixed_float16): OK
Your GPU will likely run quickly with dtype policy mixed_float16 as it has compute capability of at least 7.0. Your GPU: GeForce RTX 2070 SUPER, compute capability 7.5


## Dataset preparation

In [5]:
image_feature_description = {
    'bboxes': tf.io.FixedLenFeature([], tf.string),
    'image_raw': tf.io.FixedLenFeature([], tf.string),
    'num_real_boxes':tf.io.FixedLenFeature([], tf.int64),
}

In [6]:
def get_iou_matrix_tf(box_arr1, box_arr2):
    
    box_arr1 = box_arr1 -tf.tile(box_arr1[:,:2],[1,2])
    #print(box_arr1)
    x11, y11, x12, y12 = tf.split(box_arr1, 4, axis=1)
    x21, y21, x22, y22 = tf.split(box_arr2, 4, axis=1)
    xA = tf.maximum(x11, tf.transpose(x21))
    yA = tf.maximum(y11, tf.transpose(y21))
    xB = tf.minimum(x12, tf.transpose(x22))
    yB = tf.minimum(y12, tf.transpose(y22))
    interArea = tf.maximum((xB - xA + 1e-9), 0) * tf.maximum((yB - yA + 1e-9), 0)
    boxAArea = (x12 - x11 + 1e-9) * (y12 - y11 + 1e-9)
    boxBArea = (x22 - x21 + 1e-9) * (y22 - y21 + 1e-9)
    iou = interArea / (boxAArea + tf.transpose(boxBArea) - interArea)
    return iou,tf.argmax(iou,axis=1)#[:,tf.newaxis]


def fill_yolo_output(boxes,grid_size,num_anchors,which_anchor_box,which_anchor_box_index):
    #print(boxes.shape)
    #noobj_mask = tf.ones((1,grid_size*grid_size*num_anchors))
    #print(noobj_mask.shape)
    
    x_min,y_min,x_max,y_max =tf.split(boxes,4,axis=1)

    #Transforma las coordenadas de (xmin,ymin,xmax,ymax) --> (xcenter,ycenter,width,height)
    width = x_max-x_min
    height = y_max-y_min
    x_global =x_min + tf.math.divide(x_max - x_min,2)
    y_global =y_min + tf.math.divide(y_max - y_min,2)
    
    
    x_min_anchor,y_min_anchor,x_max_anchor,y_max_anchor =tf.split(which_anchor_box,4,axis=1)
    
    width_anchor = x_max_anchor-x_min_anchor
    height_anchor = y_max_anchor-y_min_anchor
    x_global_anchor =x_min_anchor + tf.math.divide(x_max_anchor - x_min_anchor,2)
    y_global_anchor =y_min_anchor + tf.math.divide(y_max_anchor - y_min_anchor,2)   

    
    #print("el x original",x_global)
    #print("el y original",y_global)
    #print("el w original",width)
    #print("el h original",height)
    
    #porción de la imagen que hay en cada celda
    pixel_per_grid = tf.math.divide(1.,grid_size)
    #print(pixel_per_grid)
    
    #Obtenemos la coordenada de la celda donde están los boundingboxes
    offset_grid_x = x_global//pixel_per_grid
    offset_grid_y = y_global//pixel_per_grid
    
    #Obtenemos el el centro locacon referencia  al celda encontrada previamente
    x_local =tf.math.floormod(x_global,pixel_per_grid)
    y_local =tf.math.floormod(y_global,pixel_per_grid)
    #print(x_local,y_local)
    
    #Valores tx e ty del groudtruth
    tx = tf.math.log(x_local + 1e-07/(1-x_local))
    ty = tf.math.log(y_local+1e-07/(1-y_local))
    tw = tf.math.log(tf.math.divide(width+1e-07,width_anchor))
    th = tf.math.log(tf.math.divide(height+1e-07,height_anchor))
    tobj_mask = tf.ones_like(tx)
    tobj = tf.concat([tobj_mask,tobj_mask],axis=0)
    
    #tnoobj = tf.zeros_like(tx)    
    #tobj = tf.ones((grid_size*grid_size*num_anchors,1))
    #tnoobj = tf.zeros((grid_size*grid_size*num_anchors,1))
    #print("Lo que la red debe predecir",tx.numpy(),ty.numpy(),tw.numpy(),th.numpy())
    #x_global = (offset_grid_x * pixel_per_grid) + tf.math.sigmoid(tx)
    #y_global = (offset_grid_y * pixel_per_grid) + tf.math.sigmoid(ty)
    #w = width_anchor*tf.math.exp(tw)
    #h = height_anchor*tf.math.exp(th)
    #print("obtnemos el x_real",x_global)
    #print("obtenemos el y_real",y_global)
    #print("obtenemos el w real",w)
    #print("obtenemos el h real",h)
    
    #anchor_boxes_per_output = num_anchors//2

    #Residuo indica cual de los 3 anchor boxes de la coordenada es la que llevara el 1
    #Coord representa la coordenada del grid
    
    residuo = tf.math.floormod(which_anchor_box_index,num_anchors)[:,tf.newaxis]
    coord = tf.cast(num_anchors*(offset_grid_y*grid_size + offset_grid_x),dtype=tf.int64)
    
    coord_objectness = tf.cast(2*(offset_grid_y*grid_size + offset_grid_x),dtype=tf.int64)
    coord_objectness2 = coord_objectness+1
    coord_objectess_global = tf.concat([coord_objectness,coord_objectness2],axis=0)
    
    output_position = residuo+coord
    print("tipo de aoutput_positivon",output_position)
    
    print(output_position)
    
    dense_shape = grid_size*grid_size*num_anchors
    print(dense_shape)
    tx_vector = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=tx[:,0], dense_shape=[dense_shape]))
    ty_vector = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=ty[:,0], dense_shape=[dense_shape]))
    tw_vector = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=tw[:,0], dense_shape=[dense_shape]))
    th_vector = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=th[:,0], dense_shape=[dense_shape]))
    obj_mask = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=tobj_mask[:,0], dense_shape=[dense_shape]))
    objectness_vector = tf.sparse.reorder(tf.sparse.SparseTensor(indices=coord_objectess_global, values=tobj[:,0], dense_shape=[dense_shape]))
    #noobj_mask = tf.sparse.reorder(tf.sparse.SparseTensor(indices=output_position, values=tnoobj[:,0], dense_shape=[dense_shape]))
    #obj_mask =tx_vector=ty_vector=tw_vector=th_vector = tf.zeros((1,grid_size*grid_size*num_anchors))
    
    tx_vector_dense = tf.sparse.to_dense(tx_vector, default_value=0, validate_indices=False, name="Dense_tx")
    ty_vector_dense = tf.sparse.to_dense(ty_vector, default_value=0, validate_indices=False, name="Dense_ty")
    tw_vector_dense = tf.sparse.to_dense(tw_vector, default_value=0, validate_indices=False, name="Dense_tw")
    th_vector_dense = tf.sparse.to_dense(th_vector, default_value=0, validate_indices=False, name="Dense_th")
    obj_mask_dense =  tf.sparse.to_dense(obj_mask, default_value=0, validate_indices=False, name="Dense_obj")
    #noobj_mask_dense = 1-obj_mask_dense
    objectness_vector_dense =  tf.sparse.to_dense(objectness_vector, default_value=0, validate_indices=False)
    
    #noobj_mask_dense= tf.sparse.to_dense(noobj_mask, default_value=1, validate_indices=False, name="Dense_noobj")
    ##print(tx_vector.to_dense)
    #print(tf.sparse.to_dense(tx_vector, default_value=0, validate_indices=True, name=None)
    #tx_vector=tx_vector[[3,2],]
    #tx_vector[output_position[:,0]] = tx
    #print("coordenada de la salida:",output_position)
    
    #return ((tx_vector_dense,ty_vector_dense,obj_mask_dense),(tw_vector_dense,th_vector_dense,obj_mask_dense),(objectness),(objectness))
    
    return tx_vector_dense,ty_vector_dense,tw_vector_dense,th_vector_dense,obj_mask_dense,objectness_vector_dense

def build_targets(image,image_bboxes,num_real_boxes,anchor_boxes):
    
    images_bboxes_original = image_bboxes
    #Obtenemos los boduing boxes que son reales
    image_bboxes = image_bboxes[:num_real_boxes,:]
    #print("Bouding boxes de la imagen",image_bboxes)
    #Obteneos  la matriz de IoU , y el índice del anchor box que dió mejor resultado
    
    #Nprmalizamos con respecto al tamaño de la imagen y obtenemos la Iou con los anchor boxes
    image_bboxes = tf.math.divide(image_bboxes,416)
    iou_matrix,which_anchor_box_index = get_iou_matrix_tf(image_bboxes,anchor_boxes)
    
    print(which_anchor_box_index)

    anchor_boxes_per_output = len(anchor_boxes)//2
    #Indices de los bouding boxes que irian en cada salida, index_best_ yolo nos dice que bouding boxes de la imagen van a la salida YOLO1,
    #porque su mejor IoU fue con los len(anchor_boxes)//2 anchor boxes mas grandes
    index_best_yolo1 = tf.where(which_anchor_box_index>=anchor_boxes_per_output)[:,0]
    index_best_yolo2 = tf.where(which_anchor_box_index<anchor_boxes_per_output)[:,0]
    index_best_anchor_yolo1 = tf.gather(which_anchor_box_index,index_best_yolo1,axis=0)
    index_best_anchor_yolo2 = tf.gather(which_anchor_box_index,index_best_yolo2,axis=0)
    
    print(index_best_yolo1)
    print(index_best_anchor_yolo1)

    print(index_best_yolo2)
    print(index_best_anchor_yolo2)

    
    best_bboxes_yolo1 = tf.gather(image_bboxes,index_best_yolo1,axis =0)
    best_anchors_yolo1 = tf.gather(anchor_boxes,index_best_anchor_yolo1, axis =0) #LOs dos anchor boxes grandes corrsponden a YOLO1
    best_bboxes_yolo2 = tf.gather(image_bboxes,index_best_yolo2,axis =0)
    best_anchors_yolo2 = tf.gather(anchor_boxes,index_best_anchor_yolo2, axis =0) #Los dos anchor boxes pequeños corresponden a YOLO2
    
    
    if best_anchors_yolo1.shape[0] !=0:
        tx_vector_yolo1,ty_vector_yolo1,tw_vector_yolo1,th_vector_yolo1,obj_mask_yolo1,obj_vector_yolo1= fill_yolo_output(best_bboxes_yolo1,13,anchor_boxes_per_output,best_anchors_yolo1,index_best_anchor_yolo1)
    else:
        tx_vector_yolo1=ty_vector_yolo1=tw_vector_yolo1=th_vector_yolo1=obj_mask_yolo1= obj_vector_yolo1=tf.zeros((1,grid_size*grid_size*num_anchors))
        #noobj_mask_yolo1 = tf.ones((1,13*13*num_anchors))
    
    if best_anchors_yolo2.shape[0] != 0:
        tx_vector_yolo2,ty_vector_yolo2,tw_vector_yolo2,th_vector_yolo2,obj_mask_yolo2,obj_vector_yolo2 = fill_yolo_output(best_bboxes_yolo2,26,anchor_boxes_per_output,best_anchors_yolo2,index_best_anchor_yolo2)
    else:
        tx_vector_yolo2=ty_vector_yolo2=tw_vector_yolo2=th_vector_yolo2=obj_mask_yolo2 = obj_vector_yolo2=tf.zeros((1,grid_size*grid_size*num_anchors))
        #noobj_mask_yolo2 = tf.ones((1,26*26*num_anchors))
        
    tx_vector = tf.concat([tx_vector_yolo1,tx_vector_yolo2],axis=0)[:,tf.newaxis]
    ty_vector = tf.concat([ty_vector_yolo1,ty_vector_yolo2],axis=0)[:,tf.newaxis]
    tw_vector = tf.concat([tw_vector_yolo1,tw_vector_yolo2],axis=0)[:,tf.newaxis]
    th_vector = tf.concat([th_vector_yolo1,th_vector_yolo2],axis=0)[:,tf.newaxis]
    obj_mask = tf.concat([obj_mask_yolo1,obj_mask_yolo2],axis=0)[:,tf.newaxis]
    #noobj_mask = tf.concat([noobj_mask_yolo1,noobj_mask_yolo2],axis=0)[:,tf.newaxis]
    obj_vector = tf.concat([obj_vector_yolo1,obj_vector_yolo2],axis=0)[:,tf.newaxis]
    
    #output = tf.concat([tx_vector,ty_vector,tw_vector,th_vector,obj_mask,noobj_mask,obj_vector],axis=1)
    #images_bboxes_original
    #return image,output
    #Vamos a regresar obj mask que es 1 cuando hay objeto en grid y el anchor box especifico
    return tf.cast(image,tf.float32)/255,(tf.concat([tx_vector,ty_vector,obj_mask],axis=1),tf.concat([tw_vector,th_vector,obj_mask],axis=1),(obj_mask),(obj_mask))

def imgaug_data_augmentation(image,bboxes,num_real_boxes):
    im_shape = image.shape
    bbs = BoundingBoxesOnImage.from_xyxy_array(bboxes*416, shape=(416,416))
    
    policy = np.random.randint(5)
    
    #policy = 2
    if policy == 0:
        
        p = np.random.random()
        if p<=0.6:
            aug = iaa.TranslateX(px=(-60, 60),cval=128)
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs.remove_out_of_image().clip_out_of_image()
    
        p = np.random.random()
        if p<=0.8:
            aug = iaa.HistogramEqualization()
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs.remove_out_of_image().clip_out_of_image()
    
    elif policy==1:
        
        p=np.random.random()
        if p<=0.2:
            aug = iaa.TranslateY(px=(int(-0.18*416), int(0.18*416)),cval=128)
            for i in bbs.to_xyxy_array(np.int32)[:num_real_boxes,:]:
                bbox = image[i[1]:i[3],i[0]:i[2]]
                bbox_augmented = aug(image=bbox)
                image[i[1]:i[3],i[0]:i[2]] = bbox_augmented
        
        p=np.random.random()
        if p<=0.8:
            square_size = np.random.randint(48)
            aug = iaa.Cutout(nb_iterations=1, size=square_size/416, squared=True)
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs.remove_out_of_image().clip_out_of_image()
            
    elif policy==2:
        p=np.random.random()
        if p<=1:
            aug = iaa.ShearY(shear=(int(-0.06*416), int(0.06*416)), order=1, cval=128)
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs.remove_out_of_image().clip_out_of_image()
            
        p=np.random.random()
        if p<=0.6:
            aug = iaa.TranslateY(px=(int(-0.18*416), int(0.18*416)),cval=128)
            for i in bbs.to_xyxy_array(np.int32)[:num_real_boxes,:]:
                bbox = image[i[1]:i[3],i[0]:i[2]]
                bbox_augmented = aug(image=bbox)
                image[i[1]:i[3],i[0]:i[2]] = bbox_augmented
            
    elif policy==3:
        p=np.random.random()
        if p<=0.6:    
            aug = iaa.Rotate(rotate=(-30, 30), order=1, cval=128)
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs_aug.remove_out_of_image().clip_out_of_image()
        
        p=np.random.random()
        if p<=1:
            aug = iaa.MultiplySaturation((0.54, 1.54))
            image, bbs = aug(image=image, bounding_boxes=bbs)
            #bbs.remove_out_of_image().clip_out_of_image()
            
    bbs.remove_out_of_image()
    
    return image,np.clip(bbs.to_xyxy_array(np.float32),1,415),num_real_boxes
    
    
def preprocessing(example_proto):
    image_features = tf.io.parse_single_example(example_proto, image_feature_description)
    image = tf.image.decode_jpeg(image_features['image_raw'],channels = 3)
    image = tf.cast(tf.image.resize(image,size=(416,416)), tf.uint8)
    bboxes =  tf.io.parse_tensor(image_features['bboxes'], out_type=tf.float32)
    
    num_real_boxes = image_features['num_real_boxes']
    return image,bboxes,num_real_boxes

def preprocessing_validation_set(example_proto):
    image_features = tf.io.parse_single_example(example_proto, image_feature_description)
    image = tf.image.decode_jpeg(image_features['image_raw'],channels = 3)
    image = tf.cast(tf.image.resize(image,size=(416,416)), tf.uint8)
    bboxes =  tf.io.parse_tensor(image_features['bboxes'], out_type=tf.float32)
    bboxes = tf.clip_by_value(bboxes*416,1,415)
    
    num_real_boxes = image_features['num_real_boxes']
    return image,bboxes,tf.cast(num_real_boxes,tf.int64)
    
@tf.function(input_signature=[tf.TensorSpec((416,416,3), tf.uint8),tf.TensorSpec((None,4), tf.float32),tf.TensorSpec((), tf.int64)]) 
def tf_numpy_albumentations_real(image,bboxes,num_real_boxes):
    
    boxes_shape = bboxes.shape
    im_shape = image.shape

    image,bboxes,num_real_boxes = tf.numpy_function(imgaug_data_augmentation,[image,bboxes,num_real_boxes],Tout =[tf.uint8,tf.float32,tf.int64])
 
    image.set_shape(im_shape)
    bboxes.set_shape(boxes_shape)
    print("Imagen data type",image.dtype)
    print("Bboxes data type",bboxes.dtype)
    print("num_real_boxes",num_real_boxes.dtype)

    return image,bboxes,num_real_boxes

### Tranfer Learning without Data Augmentation

Comentamos la operacion de map en la función de tf_numpy_albumentations y ademas usamos la función preprocessing_validation, esto nos permitirá leer los datos sin data augmentatio, además usaremos la la opcion de mode="transfer" para hacer No entrenables todas las capas menos las de detección

In [7]:
#USANDO TF.IMAGE MODULE
#anchors =tf.constant(np.array([[0,0,0.026,0.062],[0,0,0.067,0.183],[0,0,0.128,0.323],[0,0,0.343,0.650]]),dtype=tf.float32)
anchors =tf.constant(np.array([[0,0,0.02078,0.049],[0,0,0.0426,0.128],[0,0,0.08523,0.19356],[0,0,0.1506,0.4163],[0,0,0.27835,0.58651],[0,0,0.5632,0.78614]]),dtype=tf.float32)

os.chdir(root_path+"/pedestrian_dataset_train_tfr")
filenames = os. listdir()
raw_image_dataset = tf.data.TFRecordDataset(filenames)

os.chdir(root_path+"/pedestrian_dataset_val_tfr_fixed")
filenames = os. listdir()
raw_image_dataset_val =tf.data.TFRecordDataset(filenames)

os.chdir(root_path+"/pedestrian_dataset_train_tfr")

train_dataset = raw_image_dataset.map(preprocessing_validation_set,num_parallel_calls=8)
#train_dataset = train_dataset.map(tf_numpy_albumentations_real,num_parallel_calls=8)
train_dataset = train_dataset.map(lambda x,y,z:build_targets(x,y,z,anchors),num_parallel_calls=8)
train_dataset = train_dataset.batch(16)

val_dataset = raw_image_dataset_val.map(preprocessing_validation_set,num_parallel_calls=8)
val_dataset = val_dataset.map(lambda x,y,z:build_targets(x,y,z,anchors),num_parallel_calls=8)
val_dataset = val_dataset.batch(16)

Tensor("ArgMax:0", dtype=int64)
Tensor("strided_slice_2:0", shape=(None,), dtype=int64)
Tensor("GatherV2:0", dtype=int64)
Tensor("strided_slice_3:0", shape=(None,), dtype=int64)
Tensor("GatherV2_1:0", dtype=int64)
tipo de aoutput_positivon Tensor("add_18:0", dtype=int64)
Tensor("add_18:0", dtype=int64)
507
tipo de aoutput_positivon Tensor("add_30:0", dtype=int64)
Tensor("add_30:0", dtype=int64)
2028
Tensor("ArgMax:0", dtype=int64)
Tensor("strided_slice_2:0", shape=(None,), dtype=int64)
Tensor("GatherV2:0", dtype=int64)
Tensor("strided_slice_3:0", shape=(None,), dtype=int64)
Tensor("GatherV2_1:0", dtype=int64)
tipo de aoutput_positivon Tensor("add_18:0", dtype=int64)
Tensor("add_18:0", dtype=int64)
507
tipo de aoutput_positivon Tensor("add_30:0", dtype=int64)
Tensor("add_30:0", dtype=int64)
2028


### Loss Function

In [8]:
from tensorflow.keras.losses import Loss,BinaryCrossentropy,MeanSquaredError,MeanSquaredLogarithmicError

def loss_xy(y_true,y_pred):
    
    mse = MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
    
    tx_true,ty_true,obj_mask = tf.split(y_true, [1,1,1], axis=-1)
    tx_pred,ty_pred = tf.split(y_pred, [1,1], axis=-1)
    
    loss_x = tf.reduce_mean(tf.reduce_sum(obj_mask*(mse(tx_true,tx_pred)[:,:,tf.newaxis]),axis=1))
    loss_y = tf.reduce_mean(tf.reduce_sum(obj_mask*(mse(ty_true,ty_pred)[:,:,tf.newaxis]),axis=1))

    return loss_x+loss_y

def loss_wh(y_true,y_pred):
    
    mse = MeanSquaredError(reduction=tf.keras.losses.Reduction.NONE)
    
    tw_true,th_true,obj_mask = tf.split(y_true, [1,1,1], axis=-1)
    tw_pred,th_pred = tf.split(y_pred, [1,1], axis=-1)
    
    loss_w = tf.reduce_mean(tf.reduce_sum(obj_mask*(mse(tw_true,tw_pred)[:,:,tf.newaxis]),axis=1))
    loss_h = tf.reduce_mean(tf.reduce_sum(obj_mask*(mse(th_true,th_pred)[:,:,tf.newaxis]),axis=1))


    return loss_w+loss_h


def loss_objectness(y_true,y_pred):
    bce = BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    
    #tw_true,th_true,obj_mask = tf.split(y_true, [1,1,1], axis=-1)
    #tw_pred,th_pred = tf.split(y_pred, [1,1], axis=-1)   
    
    loss_obj =tf.reduce_mean(tf.reduce_sum( y_true*bce(y_true,y_pred)[:,tf.newaxis],axis=1))
    
    return loss_obj

def loss_no_objectness(y_true,y_pred):
    bce = BinaryCrossentropy(reduction=tf.keras.losses.Reduction.NONE)
    
    
    loss_noobj =tf.reduce_mean(tf.reduce_sum((1-y_true)*bce(y_true,y_pred)[:,tf.newaxis],axis=1))
    
    return loss_noobj


## Training with Model.fit()

In [9]:
#anchors =tf.constant(np.array([[0.026,0.062],[0.067,0.183],[0.128,0.323],[0.343,0.650]]),dtype=tf.float32)
anchors =tf.constant(np.array([[0,0,0.02078,0.049],[0,0,0.0426,0.128],[0,0,0.08523,0.19356],[0,0,0.1506,0.4163],[0,0,0.27835,0.58651],[0,0,0.5632,0.78614]]),dtype=tf.float32)

model = TinyYOLOv3(1,anchor_boxes=anchors,train=True,mode = "transfer")
model.build(batch_input_shape=(None,416,416,3))
model.load_weights(root_path+'/weights_saved/pesos_transfer_learning_5521_20_epoch_nadam_0dot00001_mse_3anchors_con_data_aug')



To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Modo entrenamiento


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7f9773503390>

In [10]:
for l in model.layers:
    print(l.name, l.trainable)#,l.weights[0].shape)
    pass

BasicBlock1 False
BasicBlock2 False
BasicBlock3 False
BasicBlock4 False
BasicBlock5 False
BasicBlock6 False
BasicBlock7 False
BasicBlock8 False
BasicBlock9 False
FinalBlock1 True
BasicBlock11 False
BasicBlock12 False
FinalBlock2 True
Concatenate True
Upsampling True
Prediction1 True
Prediction2 True
Concatenate_BBOX True


In [11]:
model.summary()

Model: "tiny_yol_ov3"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
BasicBlock1 (BasicBlock)     multiple                  496       
_________________________________________________________________
BasicBlock2 (BasicBlock)     multiple                  4736      
_________________________________________________________________
BasicBlock3 (BasicBlock)     multiple                  18688     
_________________________________________________________________
BasicBlock4 (BasicBlock)     multiple                  74240     
_________________________________________________________________
BasicBlock5 (BasicBlock)     multiple                  295936    
_________________________________________________________________
BasicBlock6 (BasicBlock)     multiple                  1181696   
_________________________________________________________________
BasicBlock7 (BasicBlock)     multiple                 

In [12]:
tf.random.set_seed(0)

from tensorflow.keras.metrics import TrueNegatives,TruePositives,FalseNegatives,FalsePositives,Precision,Recall

opt = tf.keras.optimizers.Nadam(learning_rate=1e-4)

losses = {"output_1": loss_xy,
          "output_2": loss_wh,
          "output_3":loss_objectness,
          "output_4":loss_no_objectness  
}

metrics = {"output_3":[Precision(0.5),Recall(0.5),TrueNegatives(0.5),TruePositives(0.5),FalseNegatives(0.5),FalsePositives(0.5)]}
model.compile(optimizer=opt, loss=losses,metrics=metrics,loss_weights=[5,5,2,1])
os.chdir(root_path+"/pedestrian_dataset_train_tfr")

Entrenamiento tipo transfer learning con lr = 0.0001 y por 20 epocas, la función de costo será con mse normal y sin Data Augmentation y ahora con 4 anchors totales, no con 6

In [13]:
history = model.fit(train_dataset, epochs=10,validation_data=val_dataset)

Epoch 1/10
Modo entrenamiento
Modo entrenamiento
   4607/Unknown - 196s 43ms/step - loss: 44.0756 - output_1_loss: 5.5150 - output_2_loss: 0.6571 - output_3_loss: 0.0377 - output_4_loss: 13.1389 - output_3_precision: 0.6143 - output_3_recall: 0.1324 - output_3_true_negatives: 186531488.0000 - output_3_true_positives: 35822.0000 - output_3_false_negatives: 234702.0000 - output_3_false_positives: 22496.0000Modo entrenamiento
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [10]:
os.chdir("/home/sergio/Documents/json_experiments")
import json
#json.dumps(str(a))
with open('history_transfer_5521_20_epoch_nadam_0dot00001_mse_2anchors.json', 'w') as fp:
    json.dump(str(history.history), fp)

In [14]:
model.save_weights(root_path+'/weights_saved/pesos_5521_20_30_epoch_nadam_0dot00001_mse_2anchors_tl')

## CONTINUACION

In [9]:
#anchors =tf.constant(np.array([[0.026,0.062],[0.067,0.183],[0.128,0.323],[0.343,0.650]]),dtype=tf.float32)
anchors =tf.constant(np.array([[0,0,0.02078,0.049],[0,0,0.0426,0.128],[0,0,0.08523,0.19356],[0,0,0.1506,0.4163],[0,0,0.27835,0.58651],[0,0,0.5632,0.78614]]),dtype=tf.float32)

model = TinyYOLOv3(1,anchor_boxes=anchors,train=True,mode = "transfer")
model.build(batch_input_shape=(None,416,416,3))
model.load_weights(root_path+'/weights_saved/pesos_5521_20_30_epoch_nadam_0dot00001_mse_2anchors_tl')



To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.

Modo entrenamiento


<tensorflow.python.training.tracking.util.CheckpointLoadStatus at 0x7fbcdf319400>

In [10]:
tf.random.set_seed(0)

from tensorflow.keras.metrics import TrueNegatives,TruePositives,FalseNegatives,FalsePositives,Precision,Recall

opt = tf.keras.optimizers.Nadam(learning_rate=1e-4)

losses = {"output_1": loss_xy,
          "output_2": loss_wh,
          "output_3":loss_objectness,
          "output_4":loss_no_objectness  
}

metrics = {"output_3":[Precision(0.5),Recall(0.5),TrueNegatives(0.5),TruePositives(0.5),FalseNegatives(0.5),FalsePositives(0.5)]}
model.compile(optimizer=opt, loss=losses,metrics=metrics,loss_weights=[2,2,3,1])
os.chdir(root_path+"/pedestrian_dataset_train_tfr")

In [11]:
history = model.fit(train_dataset, epochs=10,validation_data=val_dataset)

Epoch 1/10
Modo entrenamiento
Modo entrenamiento
   4607/Unknown - 187s 41ms/step - loss: 24.8305 - output_1_loss: 5.4214 - output_2_loss: 0.4453 - output_3_loss: 0.0372 - output_4_loss: 12.9852 - output_3_precision: 0.6113 - output_3_recall: 0.1414 - output_3_true_negatives: 186529696.0000 - output_3_true_positives: 38240.0000 - output_3_false_negatives: 232284.0000 - output_3_false_positives: 24314.0000Modo entrenamiento
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [12]:
model.save_weights(root_path+'/weights_saved/pesos_5521_30_40_epoch_nadam_0dot00001_mse_3anchors_tl')

In [13]:
tf.random.set_seed(0)

from tensorflow.keras.metrics import TrueNegatives,TruePositives,FalseNegatives,FalsePositives,Precision,Recall

opt = tf.keras.optimizers.Nadam(learning_rate=1e-5)

losses = {"output_1": loss_xy,
          "output_2": loss_wh,
          "output_3":loss_objectness,
          "output_4":loss_no_objectness  
}

metrics = {"output_3":[Precision(0.5),Recall(0.5),TrueNegatives(0.5),TruePositives(0.5),FalseNegatives(0.5),FalsePositives(0.5)]}
model.compile(optimizer=opt, loss=losses,metrics=metrics,loss_weights=[2,2,3,1])
os.chdir(root_path+"/pedestrian_dataset_train_tfr")

In [14]:
history = model.fit(train_dataset, epochs=10,validation_data=val_dataset)

Epoch 1/10
Modo entrenamiento
Modo entrenamiento
   4606/Unknown - 196s 43ms/step - loss: 24.7161 - output_1_loss: 5.3660 - output_2_loss: 0.4397 - output_3_loss: 0.0373 - output_4_loss: 12.9927 - output_3_precision_1: 0.6200 - output_3_recall_1: 0.1363 - output_3_true_negatives_1: 186526272.0000 - output_3_true_positives_1: 36878.0000 - output_3_false_negatives_1: 233640.0000 - output_3_false_positives_1: 22604.0000Modo entrenamiento
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


In [15]:
model.save_weights(root_path+'/weights_saved/pesos_5521_40_50_epoch_nadam_0dot000001_mse_3anchors_tl')