In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
pd.options.mode.chained_assignment = None
sns.set_theme()

## Desarrollo de posible metrica de desempeño
## Para ejecucion de codigo ir a seccion Naive_Aproach

In [None]:
def logistic_function(x,tolerance = 1.):    
    return 1/ (1 + np.exp(-tolerance*x))
def diff_normalization(x,num_pixel_tol = 100):
    tolerance = (200/num_pixel_tol)/100
    return np.tanh(tolerance * x)
def diff_assignation(x): 
    return 1 - x

In [None]:
x = np.linspace(0,10,10)
y = diff_normalization(x,10)
# print(y)
plt.plot(x,y)
plt.show()

In [None]:
example_table = pd.read_csv('etiquetas.csv')
# example_table.head()

new_label_table = pd.read_csv('renombramiento_etiquetas.csv')
new_label_table = new_label_table.rename(columns = {'new_class' : 'class'})
new_label_table.head()

In [None]:
new_label_table.loc[new_label_table['class'] == 'Descripciones', 'class'] = 'Etiqueta'

In [None]:
example_table['xsize'] =  example_table['xmax'] - example_table['xmin']
example_table['ysize'] =  example_table['ymax'] - example_table['ymin']
print(example_table['xsize'].astype(float).mean() * 0.1 ,example_table['ysize'].astype(float).mean() * 0.1)

In [None]:
def calculate_iou(box1, box2):
    """
    Calculates the IoU (Intersection over Union) between two bounding boxes.
    
    Arguments:
    box1 -- list or tuple containing [x1, y1, x2, y2] coordinates of the first bounding box
    box2 -- list or tuple containing [x1, y1, x2, y2] coordinates of the second bounding box
    
    Returns:
    iou -- float value representing the IoU between the two bounding boxes
    """
    # calculate the area of each bounding box
    area_box1 = (box1[2] - box1[0]) * (box1[3] - box1[1])
    area_box2 = (box2[2] - box2[0]) * (box2[3] - box2[1])
    
    # calculate the coordinates of the intersection rectangle
    x1 = max(box1[0], box2[0])
    y1 = max(box1[1], box2[1])
    x2 = min(box1[2], box2[2])
    y2 = min(box1[3], box2[3])
    
    # calculate the area of the intersection rectangle
    intersection_area = max(0, x2 - x1) * max(0, y2 - y1)
    
    # calculate the union of the two bounding boxes
    union_area = area_box1 + area_box2 - intersection_area
    
    # calculate the IoU
    iou = intersection_area / union_area
    
    return iou


def evaluate_object_detection(pred_boxes, true_boxes, iou_thresh):
    """
    Computes the average precision for object detection using IoU metric.
    
    Args:
    - pred_boxes (list of tuples): Predicted bounding boxes (x1, y1, x2, y2) for each object
    - true_boxes (list of tuples): Ground truth bounding boxes (x1, y1, x2, y2) for each object
    - iou_thresh (float): Threshold for determining a true positive detection
    
    Returns:
    - ap (float): Average precision for the given set of predictions and ground truth boxes
    """
    
    # Create a dictionary to store the number of true positives and false positives for each class
    tp = {}
    fp = {}
    for i in range(len(pred_boxes)):
        # Find the class of the predicted box (assuming all boxes have a class label)
        cls = pred_boxes[i][4]
        if cls not in tp:
            tp[cls] = 0
            fp[cls] = 0
        
        # Check if the predicted box overlaps with any of the ground truth boxes
        overlaps = []
        for j in range(len(true_boxes)):
            overlap = calculate_iou(pred_boxes[i], true_boxes[j])
            overlaps.append(overlap)
        
        # If there is at least one ground truth box with an IoU greater than the threshold, count it as a true positive
        max_overlap = max(overlaps)
        if max_overlap >= iou_thresh:
            tp[cls] += 1
        else:
            fp[cls] += 1
    
    # Compute precision and recall for each class
    precisions = {}
    recalls = {}
    for cls in tp:
        precisions[cls] = tp[cls] / (tp[cls] + fp[cls])
        recalls[cls] = tp[cls] / len([box for box in true_boxes if box[4] == cls])
    
    # Compute the average precision across all classes
    ap = sum(precisions.values()) / len(precisions)
    
    return ap


In [None]:
def measuring_model_precision(table:pd.DataFrame,num_pixel_tol:int = 100):
    """vamos a calcular la precision del modelo pormedio de la evaluacion directa de la ubicacion de las cajas contenedoras generadas 
       y los labels asignados a dichas cajas, cada dato se encuentra apareado de manera apropiada.
       
       La evaluacion se ejecuta calculando la diferencia entre las coordenadas predichas por el modelo y las puestas a mano, esta diferencia se transforma
       por medio de una funcion logistica 

    Args:
        table (pd.DataFrame): tabla que aloja las posiciones de los labels de test y los arrojados por el modelo
        tolerance (float)  : 
    """
    score = 0
    same_label = True if table['class'] == table['predicted_class'] else False
    diff_posx = abs(table['xmin']-table['xmin_pred']) + abs(table['xmax']-table['xmax_pred'])
    diff_posy = abs(table['ymin']-table['ymin_pred']) + abs(table['ymax']-table['ymax_pred'])
    normalize_diff_posx = diff_normalization(diff_posx,num_pixel_tol = num_pixel_tol)
    normalize_diff_posy = diff_normalization(diff_posy,num_pixel_tol = num_pixel_tol)
    # score += diff_assignation(normalize_diff_posx) / 3
    # score += diff_assignation(normalize_diff_posy) / 3
    box_labeled = [table['xmin'],table['xmax'],table['ymin'],table['ymax']]
    box_pred = [table['xmin_pred'],table['xmax_pred'],table['ymin_pred'],table['ymax_pred']]
    score += evaluate_object_detection(box_labeled, box_pred, num_pixel_tol) / 2
    if same_label:
        score += 1 / 2
    table['score'] = score
    return table

In [None]:
def contar_clases_por_imagen(table:pd.DataFrame):
    """crea una colunma llamada class_count con la cantidad de etiquetas diferentes en la imagen

    Args:
        table (pd.DataFrame): tabla de datos de las imagenes
    """
    # table['class_count'] = ''
    l = []
    for image in table.filename.unique():
        table_filter = table.loc[table.filename == image]
        number_of_different_classes = len(table_filter['class'].unique())
        number_of_labels = len(table_filter['class'])
        table_filter['class_count'] = number_of_different_classes
        table_filter['label_count'] = number_of_labels
        l.append(table_filter)
    table_modify = pd.concat(l)
    return table_modify 

In [None]:
def train_balance_table_compilation(table:pd.DataFrame, fraction :float = 0.6):
    """" Genera una tabla de imagenes con sus etiquetas y posiciones de tal manera que el conjunto resultante sea el mas balanceado posible 
    en terminos de tipos y numeros de etiquetas

    Args:
        table (pd.tableFrame): tabla de imagenes y etiquetas totales
    """
    tabla_contada = contar_clases_por_imagen(table)
    tabla_contada['class_ratio'] = tabla_contada['class_count'] / tabla_contada['class_count'].max()
    # print(tabla_contada.loc[tabla_contada.class_count == tabla_contada.label_count])
    # print(tabla_contada[tabla_contada.class_ratio.ge(1)])
    tabla_contada.loc[tabla_contada.class_count == tabla_contada.label_count, 'class_ratio'] += tabla_contada['label_count'] / tabla_contada['class_count'].max()
    tabla_contada.loc[tabla_contada.class_count.lt(tabla_contada.label_count * 0.66), 'class_ratio'] = 0 #tabla_contada['label_count'] / tabla_contada['class_count'].max()
    tabla_contada_simplificada = tabla_contada.drop_duplicates(subset = 'filename').reset_index()
    # print(tabla_contada_simplificada[tabla_contada_simplificada.class_ratio.ge(1)])
    train_sample = tabla_contada_simplificada.sample(weights = tabla_contada_simplificada['class_ratio'],frac = fraction)
    # print(train_sample[train_sample.class_ratio.ge(1)])
    table_train_sample = table.loc[table.filename.isin(train_sample.filename)].reset_index(drop = True)
    table_test_sample = table.loc[~table.filename.isin(train_sample.filename)].reset_index(drop = True)
    return table_train_sample,table_test_sample
    

In [None]:
train_set,test_set = train_balance_table_compilation(new_label_table)

In [None]:
sns.histplot(data = train_set, y = "class").set(title = "set entremamiento")
plt.tight_layout()

In [None]:
sns.histplot(data = test_set, y = "class").set(title = "set testeo")
plt.tight_layout()

In [None]:
sns.histplot(data = new_label_table, y = "class").set(title = "set datos originales")
plt.tight_layout()

## Naive Aproach

In [3]:
import numpy as np
import pandas as pd

import os


In [4]:
def assign_number_according_to_class(series : pd.Series):
    hash_dict = {}
    for i,label in enumerate(series.unique()):
        hash_dict[label] = i
    return hash_dict

def put_hash_column(table : pd.DataFrame):
    hash_dict = assign_number_according_to_class(table['class_name'])
    for k,v in hash_dict.items():
        table.loc[table.class_name == k,'class_hash'] = int(v)
    return table

# Yolo txt annotation creation

In [None]:

data_table = pd.read_csv('etiquetas.csv')
data_table = data_table.drop(columns = 'valid_path')
data_table = put_hash_column(data_table)

yolo_annotation_folder = os.path.join(os.getcwd(),'data','labels')

if not os.path.exists(yolo_annotation_folder):
    os.makedirs(yolo_annotation_folder)

annotations_grouped_by_image = data_table.groupby("filename")

for i,file_table in annotations_grouped_by_image:
    with open(f"{os.path.join(yolo_annotation_folder,i[:-4])}.txt","w+") as file:
        for j,row in file_table.iterrows():
            class_width = row["xmax"]-row["xmin"]
            class_height = row["ymax"]-row["ymin"]
            line = f'{int(row["class_hash"])} {(row["xmin"] + (0.5 * class_width))/row["width"]} {(row["ymin"] + (0.5 * class_height))/row["height"]} {class_width/row["width"]} {class_height/row["height"]}\n'
            file.write(line)
        

# TRANSFORMACION DE DATOS A COCO FORMAT

In [5]:
from dataclasses import dataclass, field
from typing import Dict, List 
import functools

@dataclass 
class JsonCOCOFormatter:
    
    ##### creacion de formato
    info: Dict = field(init = False, default_factory =  dict),
    licenses: List[Dict] = field(default_factory = list),
    images: List[Dict] = field(default_factory = list),
    annotations: List[Dict] = field(default_factory = list),
    categories: List[Dict] = field(default_factory = list), 
    # segment_info: List[Dict] = field(default_factory = list), 
    
    ##### input variables
    url : str = field(default="None")
    id_lincense : int = field(default = None)

    lincense_name : str = field(default = "generic_lincense")
    file_name : str = field(default = "")
    coco_url : str = field(default = "None")
    height : int = field(default = None)
    width : int = field(default = None)
    date_captured : str = field(default = "None")
    flickr_url : str = field(default = "None")
    id_photo : int = field(default = None)
 
    category_name : str = field(default = "")
    subcategory_name : str = field(default = "")
    id_category : int = field(default = None)
 
    segmentation : List[int] = field(default_factory = list)
    is_crowd : str = field(default = "FALSE")
    bbox : List[int] = field(default_factory = list)
    id_annotation : int = field(default = None)
    SAVE_PATH : str = field(default = os.path.join(os.getcwd(),'annotations'))
    FILE_NAME : str = field(default = 'dataset.json')
   

    def __post_init__(self):
        self.info = {
                    "year": "2023",
                    "version": "1.0",
                    "description": "National Herbarium DataSet",
                    "contributor": "DataLab Universidad Nacional de Colombia",
                    "url": "",
                    "date_created": "2023/04/20"
                    }
    
    def make_lincense(self):
        return self.licenses.append({
                "url": self.url,
                "id": self.id_lincense,
                "name": self.lincense_name
            })
        
    def make_image(self):
        condition = True
        for x in self.images:
            if self.file_name in x.values():
                condition = False
        
        if condition:
            return self.images.append({
                "license": self.id_lincense,
                "file_name": self.file_name,
                "coco_url": self.coco_url,
                "height": self.height,
                "width": self.width,
                "date_captured": self.date_captured,
                "flickr_url": self.flickr_url,
                "id": self.id_photo,
                # "annotations" : self.annotations
                })
        
    def make_category(self):
        condition = True
        for x in self.categories:
            if self.subcategory_name in x.values():
                condition = False
        if condition:
            return self.categories.append({
                    "supercategory": self.category_name,
                    "id": self.id_category,
                    "name": self.subcategory_name
                })
            
    def make_annotation(self):
        return self.annotations.append({
                "segmentation": self.segmentation,
                "area": self.height * self.width,
                "iscrowd": self.is_crowd,
                "image_id": self.id_photo,
                "bbox": self.bbox,
                "category_id": self.id_category,
                "id": self.id_annotation
            })
            
    def to_file(self):
        target_keys =['info','licenses','images','annotations','categories']
        target_dict = {key: value for key, value in self.__dict__.items() if key in target_keys}
        if not os.path.exists(self.SAVE_PATH):
            os.makedirs(self.SAVE_PATH)
        with open(os.path.join(self.SAVE_PATH,self.FILE_NAME),'w+') as file:
            # file.write(str(target_dict).replace('"',"'").strip('"<>()'))
            file.write(str(target_dict).replace("'",'"').strip("'<>()"))
    
    

In [6]:
data_table = pd.read_csv('etiquetas.csv')
data_table = data_table.drop(columns = 'valid_path')
data_table = put_hash_column(data_table)

def bbox_COCO_format(bbox:list[float])->list[float]:
    x_center = 0.5 * (bbox[0] + bbox[2])
    y_center = 0.5 * (bbox[1] + bbox[3])
    width = np.abs(bbox[2] - bbox[0])
    height = np.abs(bbox[3] - bbox[1])
    return [x_center,y_center,width,height]

def bbox_de_COCO_format(bbox:list[float])->list[float]:
    x_min = bbox[0] - (0.5 * bbox[2])
    y_min = bbox[1] - (0.5 * bbox[3])
    x_max = x_min + bbox[2]
    y_max = y_min + bbox[3]
    return [x_min,y_min,x_max,y_max]

file_information_gather = data_table.groupby('filename')
id_picture = 0
id_licencia = 0
imagenes , licensias, anotaciones, categorias = [],[],[],[]
for picture_name, sub_table in file_information_gather:
    id_picture += 1
    id_licencia += 1
    id_anotacion = 0
    picture_path = os.path.abspath(picture_name) #os.path.join(os.getcwd(),'data','imagenes', picture_name)
    single_photo_data = functools.partial(JsonCOCOFormatter,images=imagenes,licenses=licensias,annotations=anotaciones,categories=categorias,file_name=picture_path,height=sub_table.height.unique()[0],width=sub_table.width.unique()[0],id_lincense = id_licencia, id_photo = id_picture)
    for i,row in sub_table.iterrows():
        id_anotacion += 1
        bbox_paste = bbox_COCO_format([row["xmin"],row["ymin"],row['xmax'],row['ymax']]) # coordenadas para dar puntos medios y tamaños de las cajas[(row["xmin"] + (0.5 * class_width))/row["width"],(row["ymin"] + (0.5 * class_height))/row["height"],class_width/row["width"],class_height/row["height"]]#
        whole_data = single_photo_data(category_name = "etiquetas", subcategory_name = row['class_name'],segmentation = ["empty"],bbox = bbox_paste,id_annotation=id_anotacion,id_category=row['class_hash'])
        whole_data.make_lincense()
        whole_data.make_category()
        whole_data.make_annotation()
        whole_data.make_image()
        

In [None]:
whole_data.to_file()#.to_json()

In [None]:
import tensorflow as tf
print(tf.__version__)
physical_devices = tf.config.list_physical_devices('GPU')
tf.print(physical_devices)


if tf.config.list_physical_devices('GPU'):
    print("GPU is available.")
else:
    print("GPU is not available.")

# EJEMPLO DE CREACION NUEVA ARQUITECTURA

In [None]:
from make_cfg import SAMPLE_DICT,make_cfg_file

new_arch_values = [[64,192,128,512,1024],[7,3,3,3,3],['same','same','same','same','same'],['relu','relu','relu','relu','relu'],[2,2,2,2,2],[None,None,None,None,None],['valid','valid','valid','valid','valid']]
new_dict_arch = {}
for k,nv in zip(SAMPLE_DICT.keys(),new_arch_values):
    new_dict_arch[k] = nv

make_cfg_file(new_dict_arch,'config/yolo_simple.cfg')


# Ejecucion de codigo

In [3]:
!python naive_model.py --batch_size 50 --epochs 200 --model_structure config/yolo_simple.cfg --save_model YoloSimpBatch50Epochs200 

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 500, 500, 3)]        0         []                            
                                                                                                  
 bl_1 (Rescaling)            (None, 500, 500, 3)          0         ['input_1[0][0]']             
                                                                                                  
 bl_conv_0 (Conv2D)          (None, 500, 500, 64)         9472      ['bl_1[0][0]']                
                                                                                                  
 bl_maxpol_0 (MaxPooling2D)  (None, 250, 250, 64)         0         ['bl_conv_0[0][0]']           
                                                                                              

# Ejecucion arquitectura default

In [1]:
!python naive_model.py --batch_size 50 --device 'GPU' --epochs 200 --save_model BaseEpoch200Batch50

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_1 (InputLayer)        [(None, 500, 500, 3)]        0         []                            
                                                                                                  
 bl_1 (Rescaling)            (None, 500, 500, 3)          0         ['input_1[0][0]']             
                                                                                                  
 bl_conv_0 (Conv2D)          (None, 500, 500, 8)          224       ['bl_1[0][0]']                
                                                                                                  
 bl_maxpol_0 (MaxPooling2D)  (None, 250, 250, 8)          0         ['bl_conv_0[0][0]']           
                                                                                              

# lectura de modelo entrenado

In [None]:
import tensorflow as tf
from models import CustomIoUMetric
custom_objects = {"CustomIoUMetric": CustomIoUMetric}

scanned_model = tf.keras.models.load_model('models/CNN_modeltest1',custom_objects=custom_objects)

# evaluacion del modelo

In [None]:
from utils import get_maximun_number_of_annotation_in_set, train_batch_consolidation, read_data, image_test_set, image_train_set
import pickle


IMG_SHAPE = (500, 500)
total_test_images = []
total_test_targets = []

images,annotations,_,_ = read_data()

test_bites = open('sets/test_image_set','rb')
test_images = pickle.load(test_bites)#image_test_set(images,train_images)

max_n_boxes_test = get_maximun_number_of_annotation_in_set(annotations,images)
images_for_test , test_targets = train_batch_consolidation(test_images,total_test_images,annotations,total_test_targets,max_n_boxes_test,IMG_SHAPE)
images_for_test = tf.cast(images_for_test, dtype=tf.float32)
evaluations = scanned_model.evaluate(images_for_test,test_targets,batch_size=len(images_for_test))

MELUB102930a_sp64414160451423553680_medium image
MELUA002114a_sp64023357247815722155_medium annotation

In [2]:
import yaml

def display_yaml_content(yaml_file):
    with open(yaml_file) as file:
        documents = yaml.full_load(file)
        for item,doc in documents.items():
            print(item,':',doc)

path : None
train : D:\Maestria\tesis\herbario\data\train\images
val : D:\Maestria\tesis\herbario\data\train\images
names : {0: 'Mascara_Codigos', 1: 'Mascara_ColorChecker', 2: 'Mascara_Descripciones', 3: 'Mascara_Encabezados', 4: 'Mascara_Escalas', 5: 'Mascara_Sellos'}


In [6]:
type(list(documents['names'].keys())[0])

int

0 small database label
1 handwritten data
2 stamp
3 annotation label
4 scale
5 swing tag
6 full database label
7 database label
8 swatch
9 institutional label
10 number(ii)

In [1]:
from utils import dump_file

base_path = '/Users/juanpablovargasacosta/herbario/data/imagenes/'
nw = {'train':base_path,
 'val':base_path,
 'names':{
              0: 'small database label',
              1: 'handwritten data',
              2: 'stamp' ,
              3: 'annotation label',
              4: 'scale',
              5: 'swing tag' ,
              6: 'full database label',
              7: 'database label',
              8: 'swatch', 
              9: 'institutional label',
              10: 'number(ii)'
               }
}

dump_file(nw,'conf_try.yaml')

In [5]:
def confg_yaml(train_path:str,validation_path:str,categories:dict,config_file_name:str = 'default')->None:
    '''
    creates a configuration file for the ultralytics yolo package, recives the path to training
    and validation images along with a dictionary containing the desired target classes
    '''

    config_dict = {
        'train':train_path,
        'val': validation_path,
        'names' : categories
        }
    
    with open(f'{config_file_name}.yaml' , 'w') as config_file:
        yaml.dump(config_dict,config_file,default_flow_style=False)


In [None]:
# from drive.MyDrive.tesis.COCO_formatter import bbox_de_COCO_format,bbox_COCO_format
from cv2 import imread, imshow, rectangle,putText,FONT_HERSHEY_SIMPLEX
from google.colab.patches import cv2_imshow

def draw_bbox(image_path:str,ids:list[int],coordinates:list[list[float]],cloud:bool=False):
    image_to_visualize = image_path #os.path.join(IMG_MEL_PATH,image_max_name)
    img = imread(image_to_visualize)
    for id,coordinate in zip(ids,coordinates) :
        nc = bbox_de_COCO_format([float(x.replace('\n','')) for x in coordinate])
        rectangle(img,(int(nc[0]*img.shape[1]),int(nc[1]*img.shape[0])),((int(nc[2]*img.shape[1]),int(nc[3]*img.shape[0]))),(255,0,0),3)
        putText(img, id, (int(nc[0]*img.shape[1])-20,int(nc[1]*img.shape[0])+10), FONT_HERSHEY_SIMPLEX, 0.9, (255,0,0), 2)
    if cloud : cv2_imshow(img)
    else : imshow(img) 

In [6]:
base_path = '/Users/juanpablovargasacosta/herbario/data/imagenes/'
confg_yaml(base_path,base_path,documents['names'])

In [None]:
for name,value in zip(scanned_model.metrics_names,evaluations) :
    print(f'metric name : {name} - value : {value}')

# metricas usando sigmoide para bbox normalizadas y softmax para clases

metric name : loss - value : 0.14827848970890045
metric name : bbox_head0_loss - value : 0.002939825179055333
metric name : bbox_head1_loss - value : 0.007251664996147156
metric name : bbox_head2_loss - value : 0.03685268387198448
metric name : bbox_head3_loss - value : 0.017508987337350845
metric name : bbox_head4_loss - value : 0.029640620574355125
metric name : bbox_head5_loss - value : 0.05408470705151558
metric name : class_head0_loss - value : 0.0
metric name : class_head1_loss - value : 0.0
metric name : class_head2_loss - value : 0.0
metric name : class_head3_loss - value : 0.0
metric name : class_head4_loss - value : 0.0
metric name : class_head5_loss - value : 0.0
metric name : bbox_head0_custom_iou - value : 0.0
metric name : bbox_head0_auc - value : 0.5
metric name : bbox_head1_custom_iou - value : 0.0
metric name : bbox_head1_auc - value : 0.5
metric name : bbox_head2_custom_iou - value : 0.47345197200775146
metric name : bbox_head2_auc - value : 0.6789854764938354
metric name : bbox_head3_custom_iou - value : 0.0
metric name : bbox_head3_auc - value : 0.5
metric name : bbox_head4_custom_iou - value : 0.0
metric name : bbox_head4_auc - value : 0.5
metric name : bbox_head5_custom_iou - value : 0.4330933392047882
metric name : bbox_head5_auc - value : 0.7748516201972961
metric name : class_head0_custom_iou - value : 1.0
metric name : class_head0_auc - value : 0.0
metric name : class_head1_custom_iou - value : 0.23481781780719757
metric name : class_head1_auc - value : 0.5
metric name : class_head2_custom_iou - value : 1.0
metric name : class_head2_auc - value : 0.0
metric name : class_head3_custom_iou - value : 0.29149797558784485
metric name : class_head3_auc - value : 0.5
metric name : class_head4_custom_iou - value : 0.9959514141082764
metric name : class_head4_auc - value : 0.5
metric name : class_head5_custom_iou - value : 1.0
metric name : class_head5_auc - value : 0.0

# metricas usando linear para bbox y softmax para clases usando bbox normalizadas 

metric name : loss - value : 0.17858585715293884
metric name : bbox_head0_loss - value : 0.0026182420551776886
metric name : bbox_head1_loss - value : 0.007999297231435776
metric name : bbox_head2_loss - value : 0.07438085228204727
metric name : bbox_head3_loss - value : 0.014708954840898514
metric name : bbox_head4_loss - value : 0.022220784798264503
metric name : bbox_head5_loss - value : 0.05665772780776024
metric name : class_head0_loss - value : 0.0
metric name : class_head1_loss - value : 0.0
metric name : class_head2_loss - value : 0.0
metric name : class_head3_loss - value : 0.0
metric name : class_head4_loss - value : 0.0
metric name : class_head5_loss - value : 0.0
metric name : bbox_head0_custom_iou - value : 0.11331154406070709
metric name : bbox_head0_auc - value : 0.6937500238418579
metric name : bbox_head1_custom_iou - value : 0.09039562940597534
metric name : bbox_head1_auc - value : 0.9382540583610535
metric name : bbox_head2_custom_iou - value : 0.0823870450258255
metric name : bbox_head2_auc - value : 0.8888224959373474
metric name : bbox_head3_custom_iou - value : 0.07096018642187119
metric name : bbox_head3_auc - value : 0.8575149774551392
metric name : bbox_head4_custom_iou - value : 0.19312578439712524
metric name : bbox_head4_auc - value : 1.0
metric name : bbox_head5_custom_iou - value : 0.07338213920593262
metric name : bbox_head5_auc - value : 0.8548846244812012
metric name : class_head0_custom_iou - value : 1.0
metric name : class_head0_auc - value : 0.0
metric name : class_head1_custom_iou - value : 0.2955465614795685
metric name : class_head1_auc - value : 0.5
metric name : class_head2_custom_iou - value : 1.0
metric name : class_head2_auc - value : 0.0
metric name : class_head3_custom_iou - value : 0.26720649003982544
metric name : class_head3_auc - value : 0.5
metric name : class_head4_custom_iou - value : 1.0
metric name : class_head4_auc - value : 0.0
metric name : class_head5_custom_iou - value : 1.0
metric name : class_head5_auc - value : 0.0

# metricas usando linear para bbox y softmax para las clases

metric name : loss - value : 25366.60546875
metric name : bbox_head0_loss - value : 359.25372314453125
metric name : bbox_head1_loss - value : 1561.6563720703125
metric name : bbox_head2_loss - value : 10720.51953125
metric name : bbox_head3_loss - value : 3087.791748046875
metric name : bbox_head4_loss - value : 2807.509033203125
metric name : bbox_head5_loss - value : 6829.875
metric name : class_head0_loss - value : 0.0
metric name : class_head1_loss - value : 0.0
metric name : class_head2_loss - value : 0.0
metric name : class_head3_loss - value : 0.0
metric name : class_head4_loss - value : 0.0
metric name : class_head5_loss - value : 0.0
metric name : bbox_head0_custom_iou - value : 0.017898140475153923
metric name : bbox_head0_auc - value : 0.8218186497688293
metric name : bbox_head1_custom_iou - value : 0.014815075322985649
metric name : bbox_head1_auc - value : 0.8571521043777466
metric name : bbox_head2_custom_iou - value : 0.17787572741508484
metric name : bbox_head2_auc - value : 0.6871663331985474
metric name : bbox_head3_custom_iou - value : 0.05940670892596245
metric name : bbox_head3_auc - value : 0.6853057742118835
metric name : bbox_head4_custom_iou - value : 0.1887882947921753
metric name : bbox_head4_auc - value : 0.8044888377189636
metric name : bbox_head5_custom_iou - value : 0.17907769978046417
metric name : bbox_head5_auc - value : 0.6605395674705505
metric name : class_head0_custom_iou - value : 1.0
metric name : class_head0_auc - value : 0.0
metric name : class_head1_custom_iou - value : 0.2753036320209503
metric name : class_head1_auc - value : 0.5
metric name : class_head2_custom_iou - value : 1.0
metric name : class_head2_auc - value : 0.0
metric name : class_head3_custom_iou - value : 0.31578946113586426
metric name : class_head3_auc - value : 0.5
metric name : class_head4_custom_iou - value : 0.9919028282165527
metric name : class_head4_auc - value : 0.5
metric name : class_head5_custom_iou - value : 1.0
metric name : class_head5_auc - value : 0.0

# metricas con funcion de activacion relu para bbox y sigmoide para clases

metric name : loss - value : 34790.7421875
metric name : bbox_head0_loss - value : 305.9351806640625
metric name : bbox_head1_loss - value : 2154.948486328125
metric name : bbox_head2_loss - value : 14255.30078125
metric name : bbox_head3_loss - value : 4846.25048828125
metric name : bbox_head4_loss - value : 3412.474365234375
metric name : bbox_head5_loss - value : 9815.8359375
metric name : class_head0_loss - value : 0.0
metric name : class_head1_loss - value : 0.0
metric name : class_head2_loss - value : 0.0
metric name : class_head3_loss - value : 0.0
metric name : class_head4_loss - value : 0.0
metric name : class_head5_loss - value : 0.0
metric name : bbox_head0_custom_iou - value : 0.5003455281257629
metric name : bbox_head0_auc - value : 0.8652960062026978
metric name : bbox_head1_custom_iou - value : 0.05979517847299576
metric name : bbox_head1_auc - value : 0.760465145111084
metric name : bbox_head2_custom_iou - value : 0.2548713982105255
metric name : bbox_head2_auc - value : 0.7495487928390503
metric name : bbox_head3_custom_iou - value : 0.0863441526889801
metric name : bbox_head3_auc - value : 0.688020646572113
metric name : bbox_head4_custom_iou - value : 0.5988608598709106
metric name : bbox_head4_auc - value : 0.8660455942153931
metric name : bbox_head5_custom_iou - value : 0.3285883665084839
metric name : bbox_head5_auc - value : 0.7502604126930237
metric name : class_head0_custom_iou - value : 0.0017493302002549171
metric name : class_head0_auc - value : 0.0
metric name : class_head1_custom_iou - value : 0.0
metric name : class_head1_auc - value : 0.5
metric name : class_head2_custom_iou - value : 0.0
metric name : class_head2_auc - value : 0.0
metric name : class_head3_custom_iou - value : 0.0
metric name : class_head3_auc - value : 0.5
metric name : class_head4_custom_iou - value : 0.0
metric name : class_head4_auc - value : 0.5
metric name : class_head5_custom_iou - value : 0.0
metric name : class_head5_auc - value : 0.0

# metrica con funciones de activacion sigmoide para ambos bbox y clases

metric name : loss - value : 62710.8125
metric name : bbox_head0_loss - value : 579.985107421875
metric name : bbox_head1_loss - value : 2669.833984375
metric name : bbox_head2_loss - value : 27072.76171875
metric name : bbox_head3_loss - value : 5961.2978515625
metric name : bbox_head4_loss - value : 7870.39404296875
metric name : bbox_head5_loss - value : 18556.5390625
metric name : class_head0_loss - value : 0.0
metric name : class_head1_loss - value : 0.0
metric name : class_head2_loss - value : 0.0
metric name : class_head3_loss - value : 0.0
metric name : class_head4_loss - value : 0.0
metric name : class_head5_loss - value : 0.0
metric name : bbox_head0_custom_iou - value : 0.4661273956298828
metric name : bbox_head0_auc - value : 0.9547063112258911
metric name : bbox_head1_custom_iou - value : 0.1277552843093872
metric name : bbox_head1_auc - value : 0.9228104948997498
metric name : bbox_head2_custom_iou - value : 0.29493966698646545
metric name : bbox_head2_auc - value : 0.844988226890564
metric name : bbox_head3_custom_iou - value : 0.06415440887212753
metric name : bbox_head3_auc - value : 0.7879701852798462
metric name : bbox_head4_custom_iou - value : 0.3272666037082672
metric name : bbox_head4_auc - value : 0.9145087599754333
metric name : bbox_head5_custom_iou - value : 0.2419300675392151
metric name : bbox_head5_auc - value : 0.796000599861145
metric name : class_head0_custom_iou - value : 1.0
metric name : class_head0_auc - value : 0.0
metric name : class_head1_custom_iou - value : 0.0
metric name : class_head1_auc - value : 0.5
metric name : class_head2_custom_iou - value : 0.9959514141082764
metric name : class_head2_auc - value : 0.5
metric name : class_head3_custom_iou - value : 0.0
metric name : class_head3_auc - value : 0.5
metric name : class_head4_custom_iou - value : 0.0
metric name : class_head4_auc - value : 0.5
metric name : class_head5_custom_iou - value : 0.0
metric name : class_head5_auc - value : 0.0

# Revision de predicciones

In [None]:
for image in images :
    if 'COL000000671.png' in image['file_name']:
        print(image['id'])

In [None]:
for annotation in annotations :
    if annotation['image_id'] == 21:
        print(annotation)
    
        

In [None]:
from tensorflow.keras.preprocessing.image import load_img, img_to_array 
import os

path_image_to_predict_1 = os.path.join(os.getcwd(), 'data', 'imagenes', 'COL000000671.png')
predict_image_1 = load_img(path_image_to_predict_1)
imaga_re_size_predict_1 = tf.image.resize(predict_image_1, IMG_SHAPE)
image_array_predict_1 = img_to_array(imaga_re_size_predict_1)

image_array_predict_1 = tf.expand_dims(image_array_predict_1, axis=0)

In [None]:
predictions = scanned_model.predict(image_array_predict_1)

In [None]:
predictions

# arquitectura de consulta

In [None]:
import argparse
import os
import numpy as np
from tensorflow.keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, add, concatenate
# from tensorflow.keras.layers.merge import add, concatenate
from tensorflow.keras.models import Model
import struct
import cv2

np.set_printoptions(threshold= 1000 )#np.nan)
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"]="0"

argparser = argparse.ArgumentParser(
    description='test yolov3 network with coco weights')

argparser.add_argument(
    '-w',
    '--weights',
    help='path to weights file')

argparser.add_argument(
    '-i',
    '--image',
    help='path to image file')

class WeightReader:
    def __init__(self, weight_file):
        with open(weight_file, 'rb') as w_f:
            major,    = struct.unpack('i', w_f.read(4))
            minor,    = struct.unpack('i', w_f.read(4))
            revision, = struct.unpack('i', w_f.read(4))

            if (major*10 + minor) >= 2 and major < 1000 and minor < 1000:
                w_f.read(8)
            else:
                w_f.read(4)

            transpose = (major > 1000) or (minor > 1000)
            
            binary = w_f.read()

        self.offset = 0
        self.all_weights = np.frombuffer(binary, dtype='float32')
        
    def read_bytes(self, size):
        self.offset = self.offset + size
        return self.all_weights[self.offset-size:self.offset]

    def load_weights(self, model):
        for i in range(106):
            try:
                conv_layer = model.get_layer('conv_' + str(i))
                print("loading weights of convolution #" + str(i))

                if i not in [81, 93, 105]:
                    norm_layer = model.get_layer('bnorm_' + str(i))

                    size = np.prod(norm_layer.get_weights()[0].shape)

                    beta  = self.read_bytes(size) # bias
                    gamma = self.read_bytes(size) # scale
                    mean  = self.read_bytes(size) # mean
                    var   = self.read_bytes(size) # variance            

                    weights = norm_layer.set_weights([gamma, beta, mean, var])  

                if len(conv_layer.get_weights()) > 1:
                    bias   = self.read_bytes(np.prod(conv_layer.get_weights()[1].shape))
                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
                    
                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
                    kernel = kernel.transpose([2,3,1,0])
                    conv_layer.set_weights([kernel, bias])
                else:
                    kernel = self.read_bytes(np.prod(conv_layer.get_weights()[0].shape))
                    kernel = kernel.reshape(list(reversed(conv_layer.get_weights()[0].shape)))
                    kernel = kernel.transpose([2,3,1,0])
                    conv_layer.set_weights([kernel])
            except ValueError:
                print("no convolution #" + str(i))     
    
    def reset(self):
        self.offset = 0

class BoundBox:
    def __init__(self, xmin, ymin, xmax, ymax, objness = None, classes = None):
        self.xmin = xmin
        self.ymin = ymin
        self.xmax = xmax
        self.ymax = ymax
        
        self.objness = objness
        self.classes = classes

        self.label = -1
        self.score = -1

    def get_label(self):
        if self.label == -1:
            self.label = np.argmax(self.classes)
        
        return self.label
    
    def get_score(self):
        if self.score == -1:
            self.score = self.classes[self.get_label()]
            
        return self.score

def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0
    
    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                   name='conv_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if skip else x

def _interval_overlap(interval_a, interval_b):
    x1, x2 = interval_a
    x3, x4 = interval_b

    if x3 < x1:
        if x4 < x1:
            return 0
        else:
            return min(x2,x4) - x1
    else:
        if x2 < x3:
             return 0
        else:
            return min(x2,x4) - x3          

def _sigmoid(x):
    return 1. / (1. + np.exp(-x))

def bbox_iou(box1, box2):
    intersect_w = _interval_overlap([box1.xmin, box1.xmax], [box2.xmin, box2.xmax])
    intersect_h = _interval_overlap([box1.ymin, box1.ymax], [box2.ymin, box2.ymax])
    
    intersect = intersect_w * intersect_h

    w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin
    w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin
    
    union = w1*h1 + w2*h2 - intersect
    
    return float(intersect) / union

def make_yolov3_model():
    input_image = Input(shape=(None, None, 3))

    # Layer  0 => 4
    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])

    # Layer  5 => 8
    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])

    # Layer  9 => 11
    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])

    # Layer 12 => 15
    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])

    # Layer 16 => 36
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
        
    skip_36 = x
        
    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])

    # Layer 41 => 61
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
        
    skip_61 = x
        
    # Layer 62 => 65
    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])

    # Layer 66 => 74
    for i in range(3):
        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
        
    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)

    # Layer 80 => 82
    yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                              {'filter':  255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)

    # Layer 83 => 86
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_61])

    # Layer 87 => 91
    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)

    # Layer 92 => 94
    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)

    # Layer 95 => 98
    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
    x = UpSampling2D(2)(x)
    x = concatenate([x, skip_36])

    # Layer 99 => 106
    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)

    model = Model(input_image, [yolo_82, yolo_94, yolo_106])    
    return model

def preprocess_input(image, net_h, net_w):
    new_h, new_w, _ = image.shape


    # determine the new size of the image
    if (float(net_w)/new_w) < (float(net_h)/new_h):
        new_h = (new_h * net_w)/new_w
        new_w = net_w
    else:
        new_w = (new_w * net_h)/new_h
        new_h = net_h

    # resize the image to the new size
    resized = cv2.resize(image[:,:,::-1]/255., (int(new_w), int(new_h)))

    # embed the image into the standard letter box
    new_image = np.ones((net_h, net_w, 3)) * 0.5
    new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized
    new_image = np.expand_dims(new_image, 0)

    return new_image

def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w):
    grid_h, grid_w = netout.shape[:2]
    nb_box = 3
    netout = netout.reshape((grid_h, grid_w, nb_box, -1))
    nb_class = netout.shape[-1] - 5

    boxes = []

    netout[..., :2]  = _sigmoid(netout[..., :2])
    netout[..., 4:]  = _sigmoid(netout[..., 4:])
    netout[..., 5:]  = netout[..., 4][..., np.newaxis] * netout[..., 5:]
    netout[..., 5:] *= netout[..., 5:] > obj_thresh

    for i in range(grid_h*grid_w):
        row = i / grid_w
        col = i % grid_w
        
        for b in range(nb_box):
            # 4th element is objectness score
            objectness = netout[int(row)][int(col)][b][4]
            #objectness = netout[..., :4]
            
            if(objectness.all() <= obj_thresh): continue
            
            # first 4 elements are x, y, w, and h
            x, y, w, h = netout[int(row)][int(col)][b][:4]

            x = (col + x) / grid_w # center position, unit: image width
            y = (row + y) / grid_h # center position, unit: image height
            w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width
            h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height  
            
            # last elements are class probabilities
            classes = netout[int(row)][col][b][5:]
            
            box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes)
            #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes)

            boxes.append(box)

    return boxes

def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w):
    if (float(net_w)/image_w) < (float(net_h)/image_h):
        new_w = net_w
        new_h = (image_h*net_w)/image_w
    else:
        new_h = net_w
        new_w = (image_w*net_h)/image_h
        
    for i in range(len(boxes)):
        x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w
        y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h
        
        boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w)
        boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w)
        boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h)
        boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h)
        
def do_nms(boxes, nms_thresh):
    if len(boxes) > 0:
        nb_class = len(boxes[0].classes)
    else:
        return
        
    for c in range(nb_class):
        sorted_indices = np.argsort([-box.classes[c] for box in boxes])

        for i in range(len(sorted_indices)):
            index_i = sorted_indices[i]

            if boxes[index_i].classes[c] == 0: continue

            for j in range(i+1, len(sorted_indices)):
                index_j = sorted_indices[j]

                if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh:
                    boxes[index_j].classes[c] = 0
                    
def draw_boxes(image, boxes, labels, obj_thresh):
    for box in boxes:
        label_str = ''
        label = -1
        
        for i in range(len(labels)):
            if box.classes[i] > obj_thresh:
                label_str += labels[i]
                label = i
                print(labels[i] + ': ' + str(box.classes[i]*100) + '%')
                
        if label >= 0:
            cv2.rectangle(image, (box.xmin,box.ymin), (box.xmax,box.ymax), (0,255,0), 3)
            cv2.putText(image, 
                        label_str + ' ' + str(box.get_score()), 
                        (box.xmin, box.ymin - 13), 
                        cv2.FONT_HERSHEY_SIMPLEX, 
                        1e-3 * image.shape[0], 
                        (0,255,0), 2)
        
    return image      

def _main_(args):
    weights_path = args.weights
    image_path   = args.image

    # set some parameters
    net_h, net_w = 416, 416
    obj_thresh, nms_thresh = 0.5, 0.45
    anchors = [[116,90,  156,198,  373,326],  [30,61, 62,45,  59,119], [10,13,  16,30,  33,23]]
    labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
              "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
              "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
              "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
              "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
              "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
              "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
              "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
              "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
              "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]

    # make the yolov3 model to predict 80 classes on COCO
    yolov3 = make_yolov3_model()

    # load the weights trained on COCO into the model
    weight_reader = WeightReader(weights_path)
    weight_reader.load_weights(yolov3)

    # preprocess the image
    image = cv2.imread(image_path)
    image_h, image_w, _ = image.shape
    new_image = preprocess_input(image, net_h, net_w)

    # run the prediction
    yolos = yolov3.predict(new_image)
    boxes = []

    for i in range(len(yolos)):
        # decode the output of the network
        boxes += decode_netout(yolos[i][0], anchors[i], obj_thresh, nms_thresh, net_h, net_w)

    # correct the sizes of the bounding boxes
    correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w)

    # suppress non-maximal boxes
    do_nms(boxes, nms_thresh)     

    # draw bounding boxes on the image using labels
    draw_boxes(image, boxes, labels, obj_thresh) 
 
    # write the image with bounding boxes to file
    cv2.imwrite(image_path[:-4] + '_detected' + image_path[-4:], (image).astype('uint8')) 

# if __name__ == '__main__':
#     args = argparser.parse_args()
#     _main_(args)

In [None]:
model_version1 =  make_yolov3_model()