In [1]:
cd models/research/object_detection

/home/scar3crow/Dropbox/WorkStation-Subrata/python/models/research/object_detection


In [3]:
import numpy as np
import math
import pandas as pd
import cv2
import os
import tqdm
from scipy.io import loadmat

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

from PIL import Image
import pytesseract

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import backend as K

from utils import *

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam, SGD
from keras.layers import *

from keras.applications import MobileNetV2
from keras.applications import InceptionResNetV2

from keras.models import Model
from keras.models import model_from_json


In [4]:
target_size = [480, 480]
target_w = 480 # target sizes of image in model input
target_h = 480 #target sizes of image in model input

grid_size = [15, 15]
grid_y_axis = 15  # each image is to be segmented to 13 x 13 grid
grid_x_axis = 15  # # each image is to be segmented to 13 x 13 grid

grid_w = target_w / grid_x_axis  # grid cell width
grid_h = target_h / grid_y_axis  # grid cell height

channels = 3
num_anchors = 3
class_num = 5 # vendor, invoice, inv_date, po, buyer
info = 5 + class_num    # pc, x, y, h, w, and class probabilities

categories = ['vendor', 'invoice', 'inv_date', 'po', 'buyer'] # details of classes

In [5]:
# Making a list of image path

inv_directory = '/home/scar3crow/Downloads/8-6-new-scan'  ## 'invoices' is a zip file of jpg images in ...../Downloads 
                                                        
inv_new_image = ['/home/scar3crow/Downloads/8-6-new-scan/{}'.format(i) for i in os.listdir(inv_directory)] # making the list
inv_new_image.sort() # Sorting the list

num_images = len(inv_new_image)

print('Number of images = ', num_images)
inv_new_image[24]

Number of images =  36


'/home/scar3crow/Downloads/8-6-new-scan/50a.jpg'

In [6]:
# Check sizes of exiting images & Create a Dataframe with image id and height(row) and width(column):

rows = []
columns = []
image_sl = []
df_new = pd.DataFrame()

for i in range(len(inv_new_image)):
    image = cv2.imread(inv_new_image[i]) ## Loading image
    height, width, _ = image.shape
    rows.append(height)
    columns.append(width)
    image_sl.append(inv_new_image[i])
    
row_values = pd.Series(rows)
col_values = pd.Series(columns)
image_num = pd.Series(image_sl)


df_new.insert(loc=0, column='image_serial', value=image_num)
df_new.insert(loc=1, column='rows', value=row_values)
df_new.insert(loc=2, column='columns', value=col_values)

df_new.head(3)

Unnamed: 0,image_serial,rows,columns
0,/home/scar3crow/Downloads/8-6-new-scan/101a.jpg,160,416
1,/home/scar3crow/Downloads/8-6-new-scan/102a.jpg,406,870
2,/home/scar3crow/Downloads/8-6-new-scan/103a.jpg,260,416


In [7]:
# Loading output of VGG Image Annotation tool and create a dataframe

r_new_data = pd.read_csv('/home/scar3crow/Downloads/via_new_data.csv')
num_obj = r_new_data['region_count'][0] # number of objects in each photo
r_new_data.drop(r_new_data.columns[[1, 2, 3, 4]], axis=1, inplace=True) # reduce unnecessary columns
r_new_data.sort_values(by=['#filename'], ascending=True) # Sorting based on image-id
num_images = r_new_data["#filename"].nunique() # Find out number of unique images

print('Number of classes = ', num_obj)
print('Number of unique images = ', num_images)
r_new_data[58:61]

Number of classes =  5
Number of unique images =  36


Unnamed: 0,#filename,region_shape_attributes,region_attributes
58,63a.jpg,"{""name"":""rect"",""x"":211,""y"":64,""width"":76,""heig...","{""text"":""po""}"
59,63a.jpg,"{""name"":""rect"",""x"":2,""y"":68,""width"":165,""heigh...","{""text"":""buyer""}"
60,101a.jpg,"{""name"":""rect"",""x"":6,""y"":23,""width"":119,""heigh...","{""text"":""vendor""}"


In [8]:
# Making a dataframe for Image_id, x, y, width, height, class, image_width and image_height

x = []
y = []
width = []
height = []
obj_class = []
i_width = []
i_height = []
img_path = []
img_index = []

for i in range(len(r_new_data)):
    
    r_size = r_new_data.values[i, 1][1:(len(r_new_data.values[i, 1])-1)]
    r_size_par = r_size.split(",")
    
    x.append(int("".join(filter(str.isdigit, r_size_par[1]))))
    y.append(int("".join(filter(str.isdigit, r_size_par[2]))))
    width.append(int("".join(filter(str.isdigit, r_size_par[3]))))
    height.append(int("".join(filter(str.isdigit, r_size_par[4]))))
    
    r_attribs = r_new_data.values[i, 2][1:(len(r_new_data.values[i, 2])-1)]
    r_attribs_par = r_attribs.split(':')[1]
    obj_class.append(r_attribs_par[1:(len(r_attribs_par)-1)])
    
    foto_id = r_new_data['#filename'][i]
    i_path = '/home/scar3crow/Downloads/8-6-new-scan/' + foto_id
    foto_index = int(df_new[df_new['image_serial'] == i_path].index[0])
    foto_width = df_new['columns'][foto_index]
    foto_height = df_new['rows'][foto_index]
    i_width.append(foto_width)
    i_height.append(foto_height)
    img_path.append(i_path)
    img_index.append(foto_index)
    
x_values = pd.Series(x)
y_values = pd.Series(y)
width_values = pd.Series(width)
height_values = pd.Series(height)
class_values = pd.Series(obj_class)
i_width_values = pd.Series(i_width)
i_height_values = pd.Series(i_height)
img_path_values = pd.Series(img_path)
img_index_values = pd.Series(img_index)

r_new_data.insert(loc=1, column='img_idx', value=img_index_values)
r_new_data.insert(loc=2, column='i_path', value=img_path_values)
r_new_data.insert(loc=3, column='x', value=x_values)
r_new_data.insert(loc=4, column='y', value=y_values)
r_new_data.insert(loc=5, column='width', value=width_values)
r_new_data.insert(loc=6, column='height', value=height_values)
r_new_data.insert(loc=7, column='obj_class', value=class_values)
r_new_data.insert(loc=8, column='img_wd', value=i_width_values)
r_new_data.insert(loc=9, column='img_ht', value=i_height_values)

r_new_data.drop(r_new_data.columns[[10, 11]], axis=1, inplace=True) # reduce unnecessary columns

r_new_data.rename({'#filename': 'img_id'}, axis=1, inplace=True) # changing column name

r_new_data[3:6]


Unnamed: 0,img_id,img_idx,i_path,x,y,width,height,obj_class,img_wd,img_ht
3,50a.jpg,24,/home/scar3crow/Downloads/8-6-new-scan/50a.jpg,221,59,103,24,po,416,209
4,50a.jpg,24,/home/scar3crow/Downloads/8-6-new-scan/50a.jpg,5,57,206,56,buyer,416,209
5,51a.jpg,25,/home/scar3crow/Downloads/8-6-new-scan/51a.jpg,5,0,120,56,vendor,416,194


In [9]:
print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images
print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts())

Number of unique images =  36
Number of classes in diff. categories =  buyer      38
invoice    36
vendor     36
date       36
po         33
order       1
Name: obj_class, dtype: int64


In [10]:
# We have to correct above :

# To find smallest width & height boxes in 'buyer' which should be 'po'
gb = r_new_data.groupby('obj_class')    
[gb.get_group('buyer') for x in gb.groups]

[       img_id  img_idx                                           i_path    x  \
 4     50a.jpg       24   /home/scar3crow/Downloads/8-6-new-scan/50a.jpg    5   
 9     51a.jpg       25   /home/scar3crow/Downloads/8-6-new-scan/51a.jpg    4   
 14    52a.jpg       26   /home/scar3crow/Downloads/8-6-new-scan/52a.jpg    1   
 19    53a.jpg       27   /home/scar3crow/Downloads/8-6-new-scan/53a.jpg    0   
 24    54a.jpg       28   /home/scar3crow/Downloads/8-6-new-scan/54a.jpg   31   
 29    55a.jpg       29   /home/scar3crow/Downloads/8-6-new-scan/55a.jpg    1   
 34    56a.jpg       30   /home/scar3crow/Downloads/8-6-new-scan/56a.jpg    1   
 39    59a.jpg       31   /home/scar3crow/Downloads/8-6-new-scan/59a.jpg    3   
 44    60a.jpg       32   /home/scar3crow/Downloads/8-6-new-scan/60a.jpg    0   
 49    61a.jpg       33   /home/scar3crow/Downloads/8-6-new-scan/61a.jpg    1   
 54    62a.jpg       34   /home/scar3crow/Downloads/8-6-new-scan/62a.jpg    4   
 59    63a.jpg       35   /h

In [11]:
# Correcting above wrong spelling & converting buyer to po of object classes and rechecking

id_1 = r_new_data.index[r_new_data['obj_class'] == 'order'] # Finding the index
id_2 = r_new_data.index[r_new_data['obj_class'] == 'date'] # to change 'date' to 'inv_date' to be consistent with old data

r_new_data.at[id_1, 'obj_class'] = 'po' # writing the correct spelling 
r_new_data.at[88, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[163, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[id_2, 'obj_class'] = 'inv_date' # # 'date' to 'inv_date'

print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images
print('Number of unique classes = ', r_new_data['obj_class'].nunique())
print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts())

# r_new_data.drop(r_new_data.columns[[0]], axis=1, inplace=True) # reduce unnecessary columns

Number of unique images =  36
Number of unique classes =  5
Number of classes in diff. categories =  inv_date    36
po          36
invoice     36
vendor      36
buyer       36
Name: obj_class, dtype: int64


In [12]:
## For each image, we have to find : (a) line_index = integer, (b) img_path = string, (c) boxes = shape [N, 4], 
## N is the ground truth count, elements in the second dimension are [x_min, y_min, x_max, y_max] (d) labels = shape
## [N]. class index. (e) img_width = int.  =f) img_height = int

def single_image_info(lines):
    
    ## lines will be a dataframe like, for i in range(num_images), lines = r_new_data[i*5:(i+1)*5]
    
    line_idx = lines.iat[0, 1]
    pic_path = lines.iat[0, 2]
    img_width = lines.iat[0, 8]
    img_height = lines.iat[0, 9]
    
    boxes = []
    labels = []
    for i in range(len(lines)):
        label, x_min, y_min, x_max, y_max = int(i), float(lines.iat[i,3]), float(lines.iat[i,4]), float(lines.iat[i,3]+lines.iat[i,5]), float(lines.iat[i,4]+lines.iat[i,6])
        boxes.append([x_min, y_min, x_max, y_max])
        labels.append(label)
        
    boxes = np.asarray(boxes, np.float32)
    labels = np.asarray(labels, np.int64)
    
    return line_idx, pic_path, boxes, labels, img_width, img_height  ## boxes are in format xmin, ymin, xmax, ymax


In [13]:
## Creating the complete data set :

all_image_line = []
for i in range(num_images):
    image_line = []
    limit_lower = i*5
    limit_upper = limit_lower+5
    lines = r_new_data[limit_lower:limit_upper]
    line_idx, pic_path, boxes, labels, img_width, img_height = single_image_info(lines)
    image_line.append(line_idx)
    image_line.append(pic_path)
    image_line.append(boxes)
    image_line.append(labels)
    image_line.append(img_width)
    image_line.append(img_height)
    all_image_line.append(image_line)
    
print(len(all_image_line))
print(all_image_line[1])   ##  boxes are in format xmin, ymin, xmax, ymax

36
[25, '/home/scar3crow/Downloads/8-6-new-scan/51a.jpg', array([[  5.,   0., 125.,  56.],
       [239.,   1., 279.,  20.],
       [328.,   1., 382.,  21.],
       [238.,  51., 302.,  74.],
       [  4.,  53., 156., 117.]], dtype=float32), array([0, 1, 2, 3, 4]), 416, 194]


In [14]:
# Train and Test split

data_train, data_val = train_test_split(all_image_line, train_size = 0.8 , shuffle = True)

num_all_bbox = len(all_image_line) * len(all_image_line[0][2])
num_bb_train = len(data_train) * len(data_train[0][2])
num_bb_val = len(data_val) * len(data_val[0][2])
print(num_all_bbox, num_bb_train, num_bb_val)

180 140 40




In [15]:
## calculating anchors from true boundary boxes :

def iou_kmeans(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou = intersection / (box_area + cluster_area - intersection)

    return iou

def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]


    while True:
        for row in range(rows):
            distances[row] = 1 - iou_kmeans(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

In [16]:
## Finding out anchors :
## Firstly, converting true boundary box width, height to width & height with respect to target image :
## finaly find anchors. Anchors here are in absolute size w.r.t. target image but not as % of target image or 
## as multiple of unit grids.

# num_all_bb = len(r_new_data) # if no. of bboxes varies for images, this formula should be used 

anchors_wrt_target = np.zeros((3,2))

num_all_bb = len(all_image_line) * len(all_image_line[0][2])  ## from all image line data

b_box_wrt_target = np.zeros((num_all_bb,2))

for i in range(num_all_bb):
    
    image_w = r_new_data['img_wd'][i]
    image_h = r_new_data['img_ht'][i]

    x_ratio = target_w / image_w 
    y_ratio = target_h / image_h
    
    anchor_w = r_new_data['width'][i] * x_ratio
    anchor_h = r_new_data['height'][i] * y_ratio
    b_box_wrt_target[i, 0] = anchor_w
    b_box_wrt_target[i, 1] = anchor_h
    
anchors_wrt_target = kmeans(b_box_wrt_target, num_anchors)

print(anchors_wrt_target.shape)
print(anchors_wrt_target)     ## anchors wrt target image in abs. value and in format width, height


(3, 2)
[[129.23076923  37.92592593]
 [ 68.07692308  65.30612245]
 [189.80769231 160.36697248]]


In [18]:
## Anchors w.r.t target image but in terms of no. of grids and in format width and height

grid_stride = target_size[0] / grid_size[0]
anchors_wrt_ti = (anchors_wrt_target / grid_stride).astype(np.float32)
print(anchors_wrt_ti)

[[4.0384617 1.1851852]
 [2.1274037 2.0408163]
 [5.9314904 5.011468 ]]


In [148]:
## Pre-processing the original data to get y_true :

def process_box(ori_boxes, ori_img_width, ori_img_height, labels, target_size, class_num, anchors_wrt_target):
    '''
    Generate the y_true label, i.e. the ground truth feature_map.
    params:
        boxes: [N, 5] shape, float32 dtype. `x_min, y_min, x_max, y_mix, mixup_weight`.
        labels: [N] shape, int64 dtype.
        class_num: int64 num.
        anchors: [3,2] shape, float32 dtype.
    '''
    
    img_width = ori_img_width
    img_height = ori_img_height
    boxes = ori_boxes           ## boxes in format xmin, ymin, xmax, ymax in absolute value
    anchors = anchors_wrt_target
    
    x_ratio = target_size[1] / img_width
    y_ratio = target_size[0] / img_height
    
    boxes_wrt_target = np.zeros((5,4)).astype(np.float32)
    box_centers_target = np.zeros((5,2)).astype(np.float32)

    boxes_wrt_target[:,0] = boxes[:,0] * x_ratio  # xmin absolute value wrt target image
    boxes_wrt_target[:,1] = boxes[:,1] * y_ratio  # ymin absolute value wrt target image
    boxes_wrt_target[:,2] = boxes[:,2] * x_ratio  # xmax absolute value wrt target image
    boxes_wrt_target[:,3] = boxes[:,3] * y_ratio  # ymax absolute value wrt target image
    
    # In above, boxes_wrt_target shape is (5, 4), now this will be taken to (5. 5) by adding 1 at end
#    boxes_wrt_target = np.concatenate((boxes_wrt_target, np.full(shape=(boxes_wrt_target.shape[0], 1), fill_value=1., dtype=np.float32)), axis=-1)
    box_centers_target = (boxes_wrt_target[:, 0:2] + boxes_wrt_target[:, 2:4]) / 2  ## centers wrt target, abs values
    
    box_sizes = boxes[:, 2:4] - boxes[:, 0:2]  #xmax-xmin = width and ymax-ymin = height wrt original image
    box_sizes[:,0] = box_sizes[:,0] * x_ratio  # width w.r.t target image in absolute value
    box_sizes[:,1] = box_sizes[:,1] * y_ratio  # width w.r.t target image in absolute value
    
#    y_true_13 = np.zeros((target_size[1] // 32, target_size[0] // 32, 3, 6 + class_num), np.float32)
    y_true_13 = np.zeros((target_size[1] // 32, target_size[0] // 32, 3, 5 + class_num), np.float32)

#    y_true = [y_true_13]
    
    box_sizes_exp = np.expand_dims(box_sizes, 1)
    mins = np.maximum(- box_sizes_exp / 2, - anchors / 2)
    maxs = np.minimum(box_sizes_exp / 2, anchors / 2)
    whs = maxs - mins

    iou = (whs[:, :, 0] * whs[:, :, 1]) / (
                box_sizes_exp[:, :, 0] * box_sizes_exp[:, :, 1] + anchors[:, 0] * anchors[:, 1] - whs[:, :, 0] * whs[:, :,
                                                                                                         1] + 1e-10)
    best_match_idx = np.argmax(iou, axis=1)

    anchor_mask = np.zeros((target_size[1] // 32, target_size[0] // 32, 3))

    grid_stride = 32  ## = targetsize / no. of grid cells
    
    for i, idx in enumerate(best_match_idx):
        
        c_x = box_centers_target[i, 0] / grid_stride
        c_y = box_centers_target[i, 1] / grid_stride
        
        b_w = box_sizes[i, 0] / grid_stride
        b_h = box_sizes[i, 1] / grid_stride

        x = int(c_x)
        y = int(c_y)
        k = int(idx)
        c = int(labels[i])

        print(x, y, k, c)

# Very Imp : Now preparing y_true: all values x_center, y_cemter, width & height are being taken to % of target image

        c_x_x = c_x - x
        if c_x_x == 0.:
            c_x_x = (c_x - x) + .0000001
            
        c_y_y = c_y - y
        
        if c_y_y == 0.:
            c_y_y = (c_y - y) + .0000001


        y_true_13[y, x, k, 0] = np.log((c_x_x / (1 - c_x_x)))
        y_true_13[y, x, k, 1] = np.log((c_y_y / (1 - c_y_y)))
        
        y_true_13[y, x, k, 2] = np.log(b_w / anchors[k,0])
        y_true_13[y, x, k, 3] = np.log(b_h / anchors[k,1])

        y_true_13[y, x, k, 4] = 1.
        y_true_13[y, x, k, 5 + c] = 1.

        anchor_mask[y, x, k] = 1

    return y_true_13, anchor_mask  ## all data are  w,r,t, grid cells

In [149]:
## Single image-wise image/boundary box preprocessing:

def parse_data(line, class_num, target_size, anchors):   ## (mode, letterbox_resize):
    '''
    param:
        line: a line from the training/test txt file
        class_num: totol class nums.
        target_size: the size of image to be resized to. [width, height] format.
        anchors: anchors.
        mode: 'train' or 'val'. When set to 'train', data_augmentation will be applied.
        letterbox_resize: whether to use the letterbox resize, i.e., keep the original aspect ratio in the resized image.
    '''
    
    img_idx, pic_path, boxes, labels,img_width, img_height = line  # boxes in format xmin, ymin, xmax, ymax
    img = cv2.imread(pic_path)
    img_resized = cv2.resize(img,(target_size[0], target_size[1]))
    
    # expand the 2nd dimension, mix up weight default to 1.
    boxes = np.concatenate((boxes, np.full(shape=(boxes.shape[0], 1), fill_value=1., dtype=np.float32)), axis=-1)

    img_resized = cv2.cvtColor(img_resized, cv2.COLOR_BGR2RGB).astype(np.float32)

    # the input of yolo_v3 should be in range 0~1, lets change to -0.5 to +0.5
    
#    y = (x - min) / (max - min)
    
    img_resized = (img_resized - 127.5)/ 255.

    y_true_13, anchor_mask = process_box(boxes, img_width, img_height, labels, target_size, class_num, anchors)

    return img_idx, img_resized, y_true_13, anchor_mask


In [150]:
## Making the data ready for entering into network :

anchors = anchors_wrt_target
image_index = []
image_resized = []
image_y_true = []
image_anchor_mask = []

for i in range(len(data_train)):

    line = data_train[i]
    
    img_idx, img_resized, y_true, anchor_mask = parse_data(line, class_num, target_size, anchors)
    
    
    image_index.append(img_idx)
    image_resized.append(img_resized)
    image_y_true.append(y_true)
    image_anchor_mask.append(anchor_mask)
    
train_image_index = image_index
X_train = np.array(image_resized).astype(np.float32)
Y_train = np.array(image_y_true).astype(np.float32)
train_anchor_mask = np.array(image_anchor_mask).astype(np.float32)

image_index = []
image_resized = []
image_y_true = []
image_anchor_mask = []

for i in range(len(data_val)):
    line = data_val[i]
    
    img_idx, img_resized, y_true, anchor_mask = parse_data(line, class_num, target_size, anchors)
    image_index.append(img_idx)
    image_resized.append(img_resized)
    image_y_true.append(y_true)
    image_anchor_mask.append(anchor_mask)
val_image_index = image_index
X_val = np.array(image_resized).astype(np.float32)
Y_val = np.array(image_y_true).astype(np.float32)
val_anchor_mask = np.array(image_anchor_mask).astype(np.float32)

image_index = []
image_resized = []
image_y_true = []


3 2 2 0
9 1 1 1
12 1 1 2
9 5 0 3
3 6 2 4
3 6 2 0
9 5 1 1
12 5 1 2
9 8 0 3
3 9 2 4
2 5 2 0
8 3 1 1
12 3 1 2
9 8 1 3
3 11 2 4
4 3 2 0
9 1 1 1
12 1 1 2
9 5 1 3
3 10 2 4
7 2 2 0
11 9 0 1
11 10 0 2
11 12 0 3
4 11 2 4
3 2 2 0
9 1 1 1
12 1 1 2
9 5 1 3
2 6 2 4
2 3 2 0
9 1 1 1
12 1 1 2
10 7 1 3
3 10 2 4
2 2 2 0
9 0 1 1
12 0 1 2
9 3 1 3
3 6 2 4
11 3 2 0
9 9 1 1
13 9 0 2
10 12 0 3
4 11 2 4
5 2 2 0
2 5 0 1
2 4 0 2
2 7 0 3
10 12 2 4
7 5 2 0
10 10 0 1
10 11 0 2
11 13 0 3
4 12 2 4
3 2 2 0
9 1 1 1
12 1 1 2
9 5 1 3
3 6 2 4
2 2 2 0
9 0 1 1
12 0 1 2
9 4 1 3
2 6 2 4
4 3 2 0
9 1 1 1
12 1 1 2
9 4 1 3
3 10 2 4
2 2 2 0
8 1 1 1
12 1 1 2
8 6 1 3
3 7 2 4
2 3 0 0
9 5 0 1
9 7 0 2
9 12 0 3
3 9 2 4
2 3 2 0
9 1 1 1
12 1 1 2
10 7 1 3
3 11 2 4
2 2 2 0
9 1 1 1
12 0 1 2
9 5 1 3
3 6 2 4
2 4 2 0
9 3 1 1
12 2 1 2
10 7 1 3
2 8 2 4
2 3 2 0
8 1 1 1
12 1 1 2
9 7 1 3
2 10 2 4
2 2 2 0
9 1 1 1
12 1 1 2
9 6 1 3
2 9 2 4
2 2 2 0
8 1 1 1
12 1 1 2
9 6 1 3
3 7 2 4
2 3 0 0
9 5 0 1
9 7 0 2
9 12 0 3
3 10 2 4
3 2 2 0
9 0 1 1
12 0 1 2
10 5 1

In [96]:
Y_train[0,1, 12]

array([[ 0.       ,  0.       ,  0.       ,  0.       ,  0.       ,
         0.       ,  0.       ,  0.       ,  0.       ,  0.       ],
       [ 0.6466272, -2.0660553, -3.5179217, -3.6778758,  1.       ,
         0.       ,  0.       ,  1.       ,  0.       ,  0.       ],
       [ 0.       ,  0.       ,  0.       ,  0.       ,  0.       ,
         0.       ,  0.       ,  0.       ,  0.       ,  0.       ]],
      dtype=float32)

In [123]:
all_image_line[25]

[13,
 '/home/scar3crow/Downloads/8-6-new-scan/114a.jpg',
 array([[  3.,  14., 145.,  78.],
        [218.,  21., 264.,  45.],
        [320.,  16., 379.,  41.],
        [217.,  61., 310.,  90.],
        [  6.,  83., 195., 129.]], dtype=float32),
 array([0, 1, 2, 3, 4]),
 416,
 138]

In [124]:
true_box = all_image_line[25][2]
true_box

array([[  3.,  14., 145.,  78.],
       [218.,  21., 264.,  45.],
       [320.,  16., 379.,  41.],
       [217.,  61., 310.,  90.],
       [  6.,  83., 195., 129.]], dtype=float32)

In [146]:
true_y, _ = process_box(true_box, 416., 138., labels, target_size, class_num, anchors_wrt_target)

2 5 2 0
8 3 1 1
12 3 1 2
9 8 1 3
3 11 2 4


In [147]:
true_y[5,2]

array([[  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
          0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
          0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.7003671, -16.118095 ,  -3.6128194,  -3.1377852,   1.       ,
          1.       ,   0.       ,   0.       ,   0.       ,   0.       ]],
      dtype=float32)

In [151]:
print(len(X_train), len(Y_train), len(train_image_index), len(X_val), len(Y_val), len(val_image_index))
print(train_image_index)
print(X_train.shape, Y_train.shape)
print(train_image_index[0], X_train[0].shape, Y_train[0].shape)
print(train_anchor_mask.shape, 'and', train_anchor_mask[0].shape)

28 28 28 8 8 8
[24, 2, 13, 19, 21, 26, 30, 31, 10, 7, 9, 11, 25, 18, 35, 12, 22, 16, 0, 33, 4, 20, 23, 27, 17, 32, 34, 28]
(28, 480, 480, 3) (28, 15, 15, 3, 10)
24 (480, 480, 3) (15, 15, 3, 10)
(28, 15, 15, 3) and (15, 15, 3)


In [152]:
# https://github.com/ethanyanjiali/deep-vision/blob/master/YOLO/tensorflow/utils.py

def xywh_to_x1y1x2y2(box):
    xy = box[..., 0:2]
    wh = box[..., 2:4]

    x1y1 = xy - wh / 2
    x2y2 = xy + wh / 2

    y_box = K.concatenate([x1y1, x2y2], axis=-1)
    return y_box

def broadcast_iou(box_a, box_b):
    """
    calculate iou between box_a and multiple box_b in a broadcast way
    inputs: box_a: a tensor full of boxes, eg. (B, N, 4), box is in x1y1x2y2
            box_b: another tensor full of boxes, eg. (B, M, 4)
    """

    # (B, N, 1, 4)
    box_a = tf.expand_dims(box_a, -2)
    # (B, 1, M, 4)
    box_b = tf.expand_dims(box_b, -3)
    # (B, N, M, 4)
    new_shape = tf.broadcast_dynamic_shape(tf.shape(box_a), tf.shape(box_b))

    # (B, N, M, 4)
    # (B, N, M, 4)
    box_a = tf.broadcast_to(box_a, new_shape)
    box_b = tf.broadcast_to(box_b, new_shape)

    # (B, N, M, 1)
    al, at, ar, ab = tf.split(box_a, 4, -1)
    bl, bt, br, bb = tf.split(box_b, 4, -1)

    # (B, N, M, 1)
    left = tf.math.maximum(al, bl)
    right = tf.math.minimum(ar, br)
    top = tf.math.maximum(at, bt)
    bot = tf.math.minimum(ab, bb)

    # (B, N, M, 1)
    iw = tf.clip_by_value(right - left, 0, 1)
    ih = tf.clip_by_value(bot - top, 0, 1)
    i = iw * ih

    # (B, N, M, 1)
    area_a = (ar - al) * (ab - at)
    area_b = (br - bl) * (bb - bt)
    union = area_a + area_b - i

    # (B, N, M)
    iou = tf.squeeze(i / (union + 1e-7), axis=-1)

    return iou

## https://github.com/ethanyanjiali/deep-vision/blob/master/YOLO/tensorflow/yolov3.py#L213

def calc_ignore_mask(ignore_thresh, true_box, pred_box):
    
        # YOLOv3:
        # "If the bounding box prior is not the best but does overlap a ground
        # truth object by more than some threshold we ignore the prediction,
        # following [17]. We use the threshold of .5."
        # calculate the iou for each pair of pred bbox and true bbox, then find the best among them

        # (None, 13, 13, 3, 4)
        
        true_box_reorganised = xywh_to_x1y1x2y2(true_box)  # reorganised to x1, y1, x2, y2
        pred_box_reorganised = xywh_to_x1y1x2y2(pred_box)
        
        true_box_shape = tf.shape(true_box_reorganised)  
        # (None, 13, 13, 3, 4)
        pred_box_shape = tf.shape(pred_box_reorganised)  
        # (None, 507, 4)
        true_box_reorganised = tf.reshape(true_box_reorganised, [true_box_shape[0], -1, 4])
        # sort true_box to have non-zero boxes rank first
        true_box_reorganised = tf.sort(true_box_reorganised, axis=1, direction="DESCENDING")
        # (None, 100, 4)
        # only use maximum 100 boxes per groundtruth to calcualte IOU, otherwise
        # GPU emory comsumption would explode for a matrix like (16, 52*52*3, 52*52*3, 4)
        true_box_reorganised = true_box_reorganised[:, 0:100, :]
        # (None, 507, 4)
        pred_box_reorganised = tf.reshape(pred_box_reorganised, [pred_box_shape[0], -1, 4])

        # https://github.com/dmlc/gluon-cv/blob/06bb7ec2044cdf3f433721be9362ab84b02c5a90/gluoncv/model_zoo/yolo/yolo_target.py#L198
        # (None, 507, 507)
        iou = broadcast_iou(pred_box_reorganised, true_box_reorganised)
        # (None, 507)
        best_iou = tf.reduce_max(iou, axis=-1)
        # (None, 13, 13, 3)
        best_iou = tf.reshape(best_iou, [pred_box_shape[0], pred_box_shape[1], pred_box_shape[2], pred_box_shape[3]])
        # ignore_mask = 1 => don't ignore
        # ignore_mask = 0 => should ignore
        ignore_mask = tf.cast(best_iou < ignore_thresh, tf.float32)
        # (None, 13, 13, 3, 1)
        ignore_mask = tf.expand_dims(ignore_mask, axis=-1)
        
        return ignore_mask

In [156]:
## made on 16/7/2020 at 8:48 pm

from functools import partial

anchors = anchors_wrt_target

def my_custom_loss(y_true, y_pred):
    
#    def pre_loss(my_custom_loss, anchors):
        
    num_anchors = len(anchors)
    num_classes = 5
    ignore_thresh = 0.5
    grid_size = [15., 15.]
    grid_stride = 480. / grid_size[0]
    batch_size = 4
    
#    batch_shape = y_pred.get_shape()
#    batch_size = batch_shape[0]
    
    
#        scaled_anchors = anchors / grid_stride
    
    Lambda_Coord = 5.0
    Lambda_no_obj = 0.5
    
#    grid_x = np.arange(grid_size[1])
#    grid_y = np.arange(grid_size[0])
    
#        a = np.array(np.meshgrid(grid_x, grid_y))
#        b = np.array(np.meshgrid(grid_x, grid_y))
#        c = np.array(np.meshgrid(grid_x, grid_y))
#        d = np.concatenate((a,b,c), axis = 0)
#        e = d.transpose(2, 1, 0)
#        grid_final = np.reshape(e,[1,15,15,3,2])
    
    tot_loss = tf.zeros(1, dtype='float32')

    obj_mask = y_true[..., 4:5]
    no_obj_mask = 1. - obj_mask

## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 

    t_x_pred = y_pred[..., 0:1]
    t_y_pred = y_pred[..., 1:2]
    t_w_pred = y_pred[..., 2:3]
    t_h_pred = y_pred[..., 3:4]
        
    t_x_true = y_true[..., 0:1]
    t_y_true = y_true[..., 1:2]
    t_w_true = y_true[..., 2:3]
    t_h_true = y_true[..., 3:4]
        
    box_loss = K.square(t_x_pred - t_x_true) + K.square(t_y_pred - t_y_true) + K.square(t_w_pred - t_w_true) + K.square(t_h_pred - t_h_true)
    box_loss = Lambda_Coord * K.sum(box_loss * obj_mask) / batch_size
    
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 

    t_obj_pred = K.sigmoid(y_pred[..., 4:5])  # shape = 28, 15, 15, 3, 1
    
    obj_loss = K.sum((-K.log(t_obj_pred)) * obj_mask) / batch_size
        
    noobj_loss = Lambda_no_obj * K.sum((-K.log(1-t_obj_pred)) * no_obj_mask) / batch_size
                                          
        
        
#        true_box_wrt_ti = K.concatenate([true_box_xy_wrt_target_image, true_box_wdht], axis = -1)  ## in x,y,w,h format
#        pred_box_wrt_ti = K.concatenate([pred_box_xy_wrt_target_image, pred_box_wdht], axis = -1)  ## in x,y,w,h format
    
#        ignore_mask = calc_ignore_mask(ignore_thresh, true_box_wrt_ti, pred_box_wrt_ti)
        
#        bce = tf.keras.losses.BinaryCrossentropy()        
#        obj_loss = K.sum(bce(obj_mask, pred_obj_mask) * obj_mask)

##    obj_loss_arr = K.square(t_obj_pred - obj_mask)
##    obj_loss = K.sum(obj_loss_arr * obj_mask) / batch_size
    
#        no_obj_mask = 1. - obj_mask
                
##    noobj_loss_arr = Lambda_no_obj * K.square(t_obj_pred - obj_mask)
##    noobj_loss = K.sum(noobj_loss_arr * no_obj_mask) / batch_size
    
#    noobj_loss = K.sum(noobj_loss_arr * no_obj_mask * ignore_mask) / batch_size
        
        
## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

    true_classes = y_true[..., 5:10]
    
    pred_classes = K.sigmoid(y_pred[..., 5:10]) * t_obj_pred
        
        
    bce = tf.keras.losses.BinaryCrossentropy()
    class_loss = K.sum(bce(pred_classes, true_classes) * obj_mask) / batch_size
        
        
#        cce = tf.keras.losses.CategoricalCrossentropy()
    
#        class_loss = K.sum(bce(true_classes, pred_classes) * obj_mask)

#        class_loss_arr = K.square(true_classes - pred_classes)
#        class_loss = K.sum(class_loss_arr * obj_mask) / batch_size

    tot_loss = box_loss + obj_loss + noobj_loss + class_loss

##    return box_loss
        
    return tot_loss
    
#    loss = pre_loss(my_custom_loss, anchors)
    
#    return loss
    
    

In [55]:
def _conv_block(inp, convs, skip=True):
    x = inp
    count = 0
    
    for conv in convs:
        if count == (len(convs) - 2) and skip:
            skip_connection = x
        count += 1
        
        if conv['stride'] > 1: x = ZeroPadding2D(((1,0),(1,0)))(x) # peculiar padding as darknet prefer left and top
        x = Conv2D(conv['filter'], 
                   conv['kernel'], 
                   strides=conv['stride'], 
                   padding='valid' if conv['stride'] > 1 else 'same', # peculiar padding as darknet prefer left and top
                   name='conv_' + str(conv['layer_idx']), 
                   use_bias=False if conv['bnorm'] else True)(x)
        if conv['bnorm']: x = BatchNormalization(epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x)
        if conv['leaky']: x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x)

    return add([skip_connection, x]) if skip else x


In [56]:
def make_yolov3_model():
    input_image = Input(shape=(480, 480, 3))

    # Layer  0 => 4
    x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0},
                                  {'filter': 64, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 1},
                                  {'filter': 32, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 2},
                                  {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}])

    # Layer  5 => 8
    x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5},
                        {'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 6},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}])

    # Layer  9 => 11
    x = _conv_block(x, [{'filter':  64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9},
                        {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}])

    # Layer 12 => 15
    x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12},
                        {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 13},
                        {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}])

    # Layer 16 => 36
    for i in range(7):
        x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3},
                            {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}])
        
    skip_36 = x
        
    # Layer 37 => 40
    x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37},
                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 38},
                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}])

    # Layer 41 => 61
    for i in range(7):
        x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3},
                            {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}])
        
    skip_61 = x
        
#    # Layer 62 => 65
#    x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
#                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
#                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])
    
    # Layer 62 => 65
    x = _conv_block(x, [{'filter':  256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62},
                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 63},
                        {'filter':  256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}])


#    # Layer 66 => 74
#    for i in range(3):
#        x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
#                            {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])
    
     # Layer 66 => 74
    for i in range(3):
        x = _conv_block(x, [{'filter':  128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3},
                            {'filter':  256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}])

#    # Layer 75 => 79
#    x = _conv_block(x, [{'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
#                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
#                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
#                        {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
#                        {'filter':  512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
    
    # Layer 75 => 79
    x = _conv_block(x, [{'filter':  128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75},
                        {'filter':  256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 76},
                        {'filter':  128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 77},
                        {'filter':  256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 78},
                        {'filter':  128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False)
    
    # Layer 80 => 82
    yolo_82 = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 80},
                              {'filter':  30, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False)

#    # Layer 83 => 86
#    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False)
#    x = UpSampling2D(2)(x)
#    x = concatenate([x, skip_61])

#    # Layer 87 => 91
#    x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87},
#                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 88},
#                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 89},
#                        {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 90},
#                        {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False)

#    # Layer 92 => 94
#    yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 92},
#                              {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False)

#    # Layer 95 => 98
#    x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True,   'layer_idx': 96}], skip=False)
#    x = UpSampling2D(2)(x)
#    x = concatenate([x, skip_36])

#    # Layer 99 => 106
#    yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 99},
#                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 100},
#                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 101},
#                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 102},
#                               {'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 103},
#                               {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True,  'leaky': True,  'layer_idx': 104},
#                               {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False)

#    model = Model(input_image, [yolo_82, yolo_94, yolo_106])

    final = Reshape((grid_y_axis,grid_x_axis,num_anchors,info))(yolo_82)
    model = Model(input_image, final)
    return model


In [168]:
input_size = (target_w, target_h, 3)


my_model_1 = make_yolov3_model()
my_model_2 = make_yolov3_model()
my_model_3 = make_yolov3_model()
my_model_4 = make_yolov3_model()

print(my_model_1.summary())


Model: "model_9"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_9 (InputLayer)            (None, 480, 480, 3)  0                                            
__________________________________________________________________________________________________
conv_0 (Conv2D)                 (None, 480, 480, 32) 864         input_9[0][0]                    
__________________________________________________________________________________________________
bnorm_0 (BatchNormalization)    (None, 480, 480, 32) 128         conv_0[0][0]                     
__________________________________________________________________________________________________
leaky_0 (LeakyReLU)             (None, 480, 480, 32) 0           bnorm_0[0][0]                    
____________________________________________________________________________________________

In [64]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

my_model_1.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [65]:
my_model_1.fit(X_train ,Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))


Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f0f58ba7550>

In [66]:
opt = SGD(lr=0.0001, momentum = 0.9)

my_model_2.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [67]:
my_model_1.fit(X_train ,Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f0f54866048>

In [68]:
my_model_2.fit(X_train ,Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f0f5454f588>

In [70]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

my_model_3.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [71]:
my_model_3.fit(X_train ,Y_train, epochs= 2, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7f0f50d78be0>

In [72]:
pred_1 = my_model_3.predict(X_train)
print(pred_1.shape)
pred_1

(28, 15, 15, 3, 10)


array([[[[[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         ...,

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]],

         [[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan]]],


        [[[nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., nan, nan, nan],
          [nan, nan, nan, ..., 

In [77]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

my_model_1.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [78]:
my_model_1.fit(X_train ,Y_train, epochs= 2, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7f0f3f704c50>

In [154]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

my_model_3.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [155]:
my_model_3.fit(X_train ,Y_train, epochs= 2, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7f0f9e1ac710>

In [157]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)

my_yolo_invoice_model.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [158]:
my_yolo_invoice_model.fit(X_train ,Y_train, epochs= 2, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/2
Epoch 2/2


<keras.callbacks.callbacks.History at 0x7f0f22b23668>

In [159]:
my_yolo_invoice_model.fit(X_train ,Y_train, epochs= 200, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/200
Epoch 2/200
Epoch 3/200
Epoch 4/200
Epoch 5/200
Epoch 6/200
Epoch 7/200
Epoch 8/200
Epoch 9/200
Epoch 10/200
Epoch 11/200
Epoch 12/200
Epoch 13/200
Epoch 14/200
Epoch 15/200
Epoch 16/200
Epoch 17/200
Epoch 18/200
Epoch 19/200
Epoch 20/200
Epoch 21/200
Epoch 22/200
Epoch 23/200
Epoch 24/200
Epoch 25/200
Epoch 26/200
Epoch 27/200
Epoch 28/200
Epoch 29/200
Epoch 30/200
Epoch 31/200
Epoch 32/200
Epoch 33/200
Epoch 34/200
Epoch 35/200
Epoch 36/200
Epoch 37/200
Epoch 38/200
Epoch 39/200
Epoch 40/200
Epoch 41/200
Epoch 42/200
Epoch 43/200
Epoch 44/200
Epoch 45/200
Epoch 46/200
Epoch 47/200
Epoch 48/200
Epoch 49/200
Epoch 50/200
Epoch 51/200
Epoch 52/200
Epoch 53/200
Epoch 54/200
Epoch 55/200
Epoch 56/200


Epoch 57/200
Epoch 58/200
Epoch 59/200
Epoch 60/200
Epoch 61/200
Epoch 62/200
Epoch 63/200
Epoch 64/200
Epoch 65/200
Epoch 66/200
Epoch 67/200
Epoch 68/200
Epoch 69/200
Epoch 70/200
Epoch 71/200
Epoch 72/200
Epoch 73/200
Epoch 74/200
Epoch 75/200
Epoch 76/200
Epoch 77/200
Epoch 78/200
Epoch 79/200
Epoch 80/200
Epoch 81/200
Epoch 82/200
Epoch 83/200
Epoch 84/200
Epoch 85/200
Epoch 86/200
Epoch 87/200
Epoch 88/200
Epoch 89/200
Epoch 90/200
Epoch 91/200
Epoch 92/200
Epoch 93/200
Epoch 94/200
Epoch 95/200
Epoch 96/200
Epoch 97/200
Epoch 98/200
Epoch 99/200
Epoch 100/200
Epoch 101/200
Epoch 102/200
Epoch 103/200
Epoch 104/200
Epoch 105/200
Epoch 106/200
Epoch 107/200
Epoch 108/200
Epoch 109/200
Epoch 110/200
Epoch 111/200
Epoch 112/200
Epoch 113/200


Epoch 114/200
Epoch 115/200
Epoch 116/200
Epoch 117/200
Epoch 118/200
Epoch 119/200
Epoch 120/200
Epoch 121/200
Epoch 122/200
Epoch 123/200
Epoch 124/200
Epoch 125/200
Epoch 126/200
Epoch 127/200
Epoch 128/200
Epoch 129/200
Epoch 130/200
Epoch 131/200
Epoch 132/200
Epoch 133/200
Epoch 134/200
Epoch 135/200
Epoch 136/200
Epoch 137/200
Epoch 138/200
Epoch 139/200
Epoch 140/200
Epoch 141/200
Epoch 142/200
Epoch 143/200
Epoch 144/200
Epoch 145/200
Epoch 146/200
Epoch 147/200
Epoch 148/200
Epoch 149/200
Epoch 150/200
Epoch 151/200
Epoch 152/200
Epoch 153/200
Epoch 154/200
Epoch 155/200
Epoch 156/200
Epoch 157/200
Epoch 158/200
Epoch 159/200
Epoch 160/200
Epoch 161/200
Epoch 162/200
Epoch 163/200
Epoch 164/200
Epoch 165/200
Epoch 166/200
Epoch 167/200
Epoch 168/200


Epoch 169/200
Epoch 170/200
Epoch 171/200
Epoch 172/200
Epoch 173/200
Epoch 174/200
Epoch 175/200
Epoch 176/200
Epoch 177/200
Epoch 178/200
Epoch 179/200
Epoch 180/200
Epoch 181/200
Epoch 182/200
Epoch 183/200
Epoch 184/200
Epoch 185/200
Epoch 186/200
Epoch 187/200
Epoch 188/200
Epoch 189/200
Epoch 190/200
Epoch 191/200
Epoch 192/200
Epoch 193/200
Epoch 194/200
Epoch 195/200
Epoch 196/200
Epoch 197/200
Epoch 198/200
Epoch 199/200
Epoch 200/200


<keras.callbacks.callbacks.History at 0x7f0f18cc3c88>

In [160]:
my_yolo_invoice_model.fit(X_train ,Y_train, epochs= 100, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x7f0f18c56c88>

In [87]:
obj_mask = Y_train[..., 4:5]
no_obj_mask = 1. - obj_mask

## ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 
Y_pred = np.zeros((28, 6750))
Y_pred = np.reshape(Y_pred, [28, 15, 15, 3, 10])


t_x_pred = Y_pred[..., 0:1]
t_y_pred = Y_pred[..., 1:2]
t_w_pred = Y_pred[..., 2:3]
t_h_pred = Y_pred[..., 3:4]
        
t_x_true = Y_train[..., 0:1]
t_y_true = Y_train[..., 1:2]
t_w_true = Y_train[..., 2:3]
t_h_true = Y_train[..., 3:4]
        
box_loss = K.square(t_x_pred - t_x_true) + K.square(t_y_pred - t_y_true) + K.square(t_w_pred - t_w_true) + K.square(t_h_pred - t_h_true)
box_loss = 5. * K.sum(box_loss * obj_mask) / 4


In [107]:
print(K.eval(K.sum(K.square(t_x_pred - t_x_true) * obj_mask)))

464.0127166977621


In [108]:
print(K.eval(K.sum(K.square(t_y_pred - t_y_true) * obj_mask)))

inf


In [109]:
print(K.eval(K.sum(K.square(t_w_pred - t_w_true) * obj_mask)))

1679.946553050103


In [110]:
print(K.eval(K.sum(K.square(t_h_pred - t_h_true) * obj_mask)))

1657.7068980551937


In [117]:
Y_train[2,...,1:2]

array([[[[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0.        ],
         [ 0.        ]],

        [[ 0.        ],
         [ 0. 

In [122]:
all_image_line[25]

[13,
 '/home/scar3crow/Downloads/8-6-new-scan/114a.jpg',
 array([[  3.,  14., 145.,  78.],
        [218.,  21., 264.,  45.],
        [320.,  16., 379.,  41.],
        [217.,  61., 310.,  90.],
        [  6.,  83., 195., 129.]], dtype=float32),
 array([0, 1, 2, 3, 4]),
 416,
 138]

In [162]:
xx = X_train[2]
XXX = np.expand_dims(xx, axis=0)
XXX.shape

(1, 480, 480, 3)

In [163]:
pred_1 = my_yolo_invoice_model.predict(XXX)

In [164]:
Y_train[2, 5, 2]

array([[  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
          0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.       ,   0.       ,   0.       ,   0.       ,   0.       ,
          0.       ,   0.       ,   0.       ,   0.       ,   0.       ],
       [  0.7003671, -16.118095 ,  -3.6128194,  -3.1377852,   1.       ,
          1.       ,   0.       ,   0.       ,   0.       ,   0.       ]],
      dtype=float32)

In [167]:
pred_1[0,5,2]

array([[  0.7255685 ,  -0.74695927,  -1.9000641 ,  -1.6721799 ,
         -5.877833  , -10.6284685 ,  -1.8506002 ,  -7.5909595 ,
         -1.7050235 ,  -6.371971  ],
       [ -1.2239833 ,   2.140383  ,   1.2755095 ,   0.6673165 ,
         -9.753118  ,  -8.85055   ,  -5.78881   ,  -2.8534703 ,
         -1.9281044 ,  -6.7498527 ],
       [  0.8463026 , -13.92913   ,  -3.3317375 ,  -2.5032406 ,
          2.034382  ,  -6.839702  ,  -8.581668  ,  -9.369587  ,
         -5.6633873 ,  -6.6871963 ]], dtype=float32)

In [169]:
opt = SGD(lr=0.0001)

my_model_1.compile(optimizer= opt, loss = my_custom_loss, metrics = ['accuracy'])

In [170]:
my_model_1.fit(X_train ,Y_train, epochs= 100, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100


Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78/100
Epoch 79/100
Epoch 80/100
Epoch 81/100
Epoch 82/100
Epoch 83/100
Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


<keras.callbacks.callbacks.History at 0x7f0f0b1968d0>

In [171]:
pred_ = my_model_1.predict(XXX)

In [172]:
pred_[0,5,2]

array([[  0.11884286,  -0.29294455,  -3.8280103 ,  -4.58905   ,
         -7.485161  ,  -1.4699419 ,   1.3593652 ,  -2.5104914 ,
         -0.4770757 ,   1.8778927 ],
       [ -1.7047958 ,   1.9640722 ,  -2.9230008 ,  -4.355827  ,
         -8.039365  ,  -1.657332  ,  -1.5517566 ,  -3.4764194 ,
         -1.9580714 ,   1.997337  ],
       [  0.8648745 , -15.780776  ,  -3.6722846 ,  -3.1603458 ,
         -3.936662  ,   0.6864209 ,  -2.154814  ,   0.01957085,
         -2.6341605 ,   4.421023  ]], dtype=float32)