In [1]:
cd models/research/object_detection

/home/scar3crow/Dropbox/WorkStation-Subrata/python/models/research/object_detection


In [3]:
import numpy as np
import pandas as pd
import cv2
import os
import tqdm
from scipy.io import loadmat

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

from PIL import Image
import pytesseract

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import backend as K

from utils import *

from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.optimizers import RMSprop
from keras.layers import *

from keras.applications import MobileNetV2
from keras.applications import InceptionResNetV2

from keras.models import Model
from keras.models import model_from_json


In [15]:
target_size = [448, 448]
target_w = 448 # target sizes of image in model input
target_h = 448 #target sizes of image in model input

grid_y_axis = 14  # each image is to be segmented to 14 x 14 grid
grid_x_axis = 14  # # each image is to be segmented to 14 x 14 grid

grid_w = target_w / grid_x_axis  # grid cell width
grid_h = target_h / grid_y_axis  # grid cell height

channels = 3
num_anchors = 2
classes = 5 # vendor, invoice, inv_date, po, buyer
info = 5 + classes    # pc, x, y, h, w, and class probabilities

categories = ['vendor', 'invoice', 'inv_date', 'po', 'buyer'] # details of classes

In [4]:
# Making a list of image path

inv_directory = '/home/scar3crow/Downloads/8-6-new-scan'  ## 'invoices' is a zip file of jpg images in ...../Downloads 
                                                        
inv_new_image = ['/home/scar3crow/Downloads/8-6-new-scan/{}'.format(i) for i in os.listdir(inv_directory)] # making the list
inv_new_image.sort() # Sorting the list

print('Number of images = ', len(inv_new_image))
inv_new_image[20]

Number of images =  36


'/home/scar3crow/Downloads/8-6-new-scan/121a.jpg'

In [5]:
# Check sizes of exiting images & Create a Dataframe with image id and height(row) and width(column):

rows = []
columns = []
image_sl = []
df_new = pd.DataFrame()

for i in range(len(inv_new_image)):
    image = cv2.imread(inv_new_image[i]) ## Loading image
    height, width, _ = image.shape
    rows.append(height)
    columns.append(width)
    image_sl.append(inv_new_image[i])
    
row_values = pd.Series(rows)
col_values = pd.Series(columns)
image_num = pd.Series(image_sl)


df_new.insert(loc=0, column='image_serial', value=image_num)
df_new.insert(loc=1, column='rows', value=row_values)
df_new.insert(loc=2, column='columns', value=col_values)

df_new.head(3)

Unnamed: 0,image_serial,rows,columns
0,/home/scar3crow/Downloads/8-6-new-scan/101a.jpg,160,416
1,/home/scar3crow/Downloads/8-6-new-scan/102a.jpg,406,870
2,/home/scar3crow/Downloads/8-6-new-scan/103a.jpg,260,416


In [6]:
# Loading output of VGG Image Annotation tool and create a dataframe

r_new_data = pd.read_csv('/home/scar3crow/Downloads/via_new_data.csv')
num_obj = r_new_data['region_count'][0] # number of objects in each photo
r_new_data.drop(r_new_data.columns[[1, 2, 3, 4]], axis=1, inplace=True) # reduce unnecessary columns
r_new_data.sort_values(by=['#filename'], ascending=True) # Sorting based on image-id
num_images = r_new_data["#filename"].nunique() # Find out number of unique images

print('Number of classes = ', num_obj)
print('Number of unique images = ', num_images)
r_new_data[58:61]

Number of classes =  5
Number of unique images =  36


Unnamed: 0,#filename,region_shape_attributes,region_attributes
58,63a.jpg,"{""name"":""rect"",""x"":211,""y"":64,""width"":76,""heig...","{""text"":""po""}"
59,63a.jpg,"{""name"":""rect"",""x"":2,""y"":68,""width"":165,""heigh...","{""text"":""buyer""}"
60,101a.jpg,"{""name"":""rect"",""x"":6,""y"":23,""width"":119,""heigh...","{""text"":""vendor""}"


In [7]:
# Making a dataframe for Image_id, x, y, width, height, class, image_width and image_height

x = []
y = []
width = []
height = []
obj_class = []
i_width = []
i_height = []


for i in range(len(r_new_data)):
    
    r_size = r_new_data.values[i, 1][1:(len(r_new_data.values[i, 1])-1)]
    r_size_par = r_size.split(",")
    
    x.append(int("".join(filter(str.isdigit, r_size_par[1]))))
    y.append(int("".join(filter(str.isdigit, r_size_par[2]))))
    width.append(int("".join(filter(str.isdigit, r_size_par[3]))))
    height.append(int("".join(filter(str.isdigit, r_size_par[4]))))
    
    r_attribs = r_new_data.values[i, 2][1:(len(r_new_data.values[i, 2])-1)]
    r_attribs_par = r_attribs.split(':')[1]
    obj_class.append(r_attribs_par[1:(len(r_attribs_par)-1)])
    
    foto_id = r_new_data['#filename'][i]
    img_path = '/home/scar3crow/Downloads/8-6-new-scan/' + foto_id
    foto_index = df_new.index[df_new['image_serial'] == img_path]
    foto_width = df_new['columns'][foto_index].tolist()
    foto_height = df_new['rows'][foto_index].tolist()
    i_width.append(foto_width[0])
    i_height.append(foto_height[0])
    
x_values = pd.Series(x)
y_values = pd.Series(y)
width_values = pd.Series(width)
height_values = pd.Series(height)
class_values = pd.Series(obj_class)
i_width_values = pd.Series(i_width)
i_height_values = pd.Series(i_height)

r_new_data.insert(loc=1, column='x', value=x_values)
r_new_data.insert(loc=2, column='y', value=y_values)
r_new_data.insert(loc=3, column='width', value=width_values)
r_new_data.insert(loc=4, column='height', value=height_values)
r_new_data.insert(loc=5, column='obj_class', value=class_values)
r_new_data.insert(loc=8, column='image_width', value=i_width_values)
r_new_data.insert(loc=9, column='image_height', value=i_height_values)


r_new_data.drop(r_new_data.columns[[6, 7]], axis=1, inplace=True) # reduce unnecessary columns

r_new_data.rename({'#filename': 'img_id'}, axis=1, inplace=True) # changing column name

r_new_data[3:6]

Unnamed: 0,img_id,x,y,width,height,obj_class,image_width,image_height
3,50a.jpg,221,59,103,24,po,416,209
4,50a.jpg,5,57,206,56,buyer,416,209
5,51a.jpg,5,0,120,56,vendor,416,194


In [8]:
print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images

print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts())

Number of unique images =  36
Number of classes in diff. categories =  buyer      38
invoice    36
vendor     36
date       36
po         33
order       1
Name: obj_class, dtype: int64


In [9]:
# We have to correct above :

# To find smallest width & height boxes in 'buyer' which should be 'po'
gb = r_new_data.groupby('obj_class')    
[gb.get_group('buyer') for x in gb.groups]


[       img_id    x    y  width  height obj_class  image_width  image_height
 4     50a.jpg    5   57    206      56     buyer          416           209
 9     51a.jpg    4   53    152      64     buyer          416           194
 14    52a.jpg    1   50    161      74     buyer          416           188
 19    53a.jpg    0   50    177      76     buyer          416           194
 24    54a.jpg   31  103    186      61     buyer          416           168
 29    55a.jpg    1   56    183      74     buyer          416           144
 34    56a.jpg    1   56    166      62     buyer          416           123
 39    59a.jpg    3   58    175      62     buyer          416           200
 44    60a.jpg    0   44    165      52     buyer          416           106
 49    61a.jpg    1   56    155      63     buyer          416           121
 54    62a.jpg    4   58    163      61     buyer          416           123
 59    63a.jpg    2   68    165      55     buyer          416           191

In [10]:
# Correcting above wrong spelling & converting buyer to po of object classes and rechecking

id_1 = r_new_data.index[r_new_data['obj_class'] == 'order'] # Finding the index
id_2 = r_new_data.index[r_new_data['obj_class'] == 'date'] # to change 'date' to 'inv_date' to be consistent with old data

r_new_data.at[id_1, 'obj_class'] = 'po' # writing the correct spelling 
r_new_data.at[88, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[163, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[id_2, 'obj_class'] = 'inv_date' # # 'date' to 'inv_date'

print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images
print('Number of unique classes = ', r_new_data['obj_class'].nunique())
print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts()) 


Number of unique images =  36
Number of unique classes =  5
Number of classes in diff. categories =  invoice     36
vendor      36
inv_date    36
buyer       36
po          36
Name: obj_class, dtype: int64


In [11]:
# Converting categories into one-hot-coding :

categories = ['vendor', 'invoice', 'inv_date', 'po', 'buyer']

values = np.array(categories)
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)

onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
cat_encoded = onehot_encoder.fit_transform(integer_encoded)
                               
print(cat_encoded)

[[0. 0. 0. 0. 1.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]
 [1. 0. 0. 0. 0.]]


In [12]:
## iou based on width and height for the purpose of calculating anchors through k-means :

def iou_kmeans(box, clusters):
    """
    Calculates the Intersection over Union (IoU) between a box and k clusters.
    :param box: tuple or array, shifted to the origin (i. e. width and height)
    :param clusters: numpy array of shape (k, 2) where k is the number of clusters
    :return: numpy array of shape (k, 0) where k is the number of clusters
    """
    x = np.minimum(clusters[:, 0], box[0])
    y = np.minimum(clusters[:, 1], box[1])
    if np.count_nonzero(x == 0) > 0 or np.count_nonzero(y == 0) > 0:
        raise ValueError("Box has no area")

    intersection = x * y
    box_area = box[0] * box[1]
    cluster_area = clusters[:, 0] * clusters[:, 1]

    iou = intersection / (box_area + cluster_area - intersection)

    return iou


In [13]:
## calculating anchors from true boundary boxes :

def kmeans(boxes, k, dist=np.median):
    """
    Calculates k-means clustering with the Intersection over Union (IoU) metric.
    :param boxes: numpy array of shape (r, 2), where r is the number of rows
    :param k: number of clusters
    :param dist: distance function
    :return: numpy array of shape (k, 2)
    """
    rows = boxes.shape[0]

    distances = np.empty((rows, k))
    last_clusters = np.zeros((rows,))

    np.random.seed()

    # the Forgy method will fail if the whole array contains the same rows
    clusters = boxes[np.random.choice(rows, k, replace=False)]


    while True:
        for row in range(rows):
            distances[row] = 1 - iou_kmeans(boxes[row], clusters)

        nearest_clusters = np.argmin(distances, axis=1)

        if (last_clusters == nearest_clusters).all():
            break

        for cluster in range(k):
            clusters[cluster] = dist(boxes[nearest_clusters == cluster], axis=0)

        last_clusters = nearest_clusters

    return clusters

In [16]:
## Finding out anchors :
## Firstly, converting true boundary box width, height to width & height with respect to grid cells :
## and then convert dimensions w.r.t. target dimensions and then devide by cell dimension finalyy dind anchors

num_anchors = 3

num_all_bb = len(r_new_data) # total number of boundary boxes = no. of images * 5

b_box_wrt_cell = np.zeros((num_all_bb,2))

for i in range(num_all_bb):
    
    image_w = r_new_data['image_width'][i]
    image_h = r_new_data['image_height'][i]

    x_ratio = target_w / image_w 
    y_ratio = target_h / image_h
    
    anchor_w = r_new_data['width'][i] * x_ratio / (target_w/grid_x_axis)
    anchor_h = r_new_data['height'][i] * y_ratio / (target_h/grid_y_axis)
    b_box_wrt_cell[i, 0] = anchor_w
    b_box_wrt_cell[i, 1] = anchor_h
    
anchors_wrt_cell = kmeans(b_box_wrt_cell, num_anchors)

print(anchors_wrt_cell.shape)
print(anchors_wrt_cell)


(3, 2)
[[2.5        1.89845361]
 [6.30769231 6.77419968]
 [6.23076923 4.54528915]]


In [58]:
## Creating y_true for training or here, it is called matching_true_boxes :

def preprocess_true_boxes(true_boxes, anchors, target_size):
    """Find detector in YOLO where ground truth box should appear.
    Parameters
    ----------
    true_boxes : array
        List of ground truth boxes in form of relative x, y, w, h, class.
        Relative coordinates are in the range [0, 1] indicating a percentage
        of the original image dimensions.
    anchors : array
        List of anchors in form of w, h.
        Anchors are assumed to be in the range [0, conv_size] where conv_size
        is the spatial dimension of the final convolutional features.
    image_size : array-like
        List of image dimensions in form of h, w in pixels.
    Returns
    -------
    detectors_mask : array
        0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1]
        that should be compared with a matching ground truth box.
    matching_true_boxes: array
        Same shape as detectors_mask with the corresponding ground truth box
        adjusted for comparison with predicted parameters at training time.
        
    """
    
    height, width = target_size
    num_anchors = len(anchors)
    
    conv_height = height // 32  ## cell dimension is 32 X 32 and no. of cells are 14 X 14
    conv_width = width // 32   ## cell dimension is 32 X 32 and no. of cells are 14 x 14
        
    detectors_mask = np.zeros((conv_height, conv_width, num_anchors, 1), dtype=np.float32)
    matching_true_boxes = np.zeros((conv_height, conv_width, num_anchors, info),dtype=np.float32)

    for box in true_boxes:
        
        # scale box to convolutional feature spatial dimensions
        box_class = int(box[4:5])
        box = box[0:4] * np.array([conv_width, conv_height, conv_width, conv_height])
        
        m = np.floor(box[1]).astype('int')
             
#        n = min(np.floor(box[0]).astype('int'),1)
        n = np.floor(box[0]).astype('int')
                
        best_iou = 0
        best_anchor = 0
                
        for k, anchor in enumerate(anchors):
            # Find IOU between box shifted to origin and anchor box.
            box_maxes = box[2:4] / 2.
            box_mins = -box_maxes
            anchor_maxes = (anchor / 2.)
            anchor_mins = -anchor_maxes

            intersect_mins = np.maximum(box_mins, anchor_mins)
            intersect_maxes = np.minimum(box_maxes, anchor_maxes)
            intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.)
            intersect_area = intersect_wh[0] * intersect_wh[1]
            box_area = box[2] * box[3]
            anchor_area = anchor[0] * anchor[1]
            iou = intersect_area / (box_area + anchor_area - intersect_area)
            if iou > best_iou:
                best_iou = iou
                best_anchor = k
                
        if best_iou > 0:
            detectors_mask[m, n, best_anchor] = 1
            
            adjusted_box = np.array(
                [   1,
                    box[0] - n, box[1] - m,
                    np.log(round(box[2], 5) / round(anchors[best_anchor][0], 5)),
                    np.log(round(box[3], 5) / round(anchors[best_anchor][1], 5))
                ],
                dtype=np.float32)
            matching_true_boxes[m, n, best_anchor] = np.hstack((adjusted_box, cat_encoded[box_class]))
    return detectors_mask, matching_true_boxes

In [186]:
# Preparing Input(X) and Target(Y) file for training :

X_final = [] # X_final list to convert to np array later
Y_true_final = [] # Y_final list to convert to np array later
Y_mask_final = []

Y_true = np.zeros((grid_y_axis,grid_x_axis,num_anchors,info))
Y_mask = np.zeros((grid_y_axis,grid_x_axis,num_anchors,1))

true_boxes = np.zeros((num_images, num_obj, 5))


image_list = r_new_data['img_id'].unique() # make a list of unique images

for i in range(len(image_list)):
    
    image_path = '/home/scar3crow/Downloads/8-6-new-scan/' + image_list[i]
    
    x = cv2.imread(image_path)
    x_ratio = target_w / x.shape[1]
    y_ratio = target_h / x.shape[0]
    img = cv2.resize(x,(target_w, target_h))
    
    X_final.append(img)
    
# Y_true = np.zeros((grid_y_axis,grid_x_axis,num_anchors,info))
# Y_mask = np.zeros((grid_y_axis,grid_x_axis,num_anchors,1))

# for j in range(len(image_list)):

    r_new_data_slice = r_new_data.loc[r_new_data['img_id'].isin([image_list[i]])].reset_index(drop=True)

    obj = 0

    for j in range(len(r_new_data_slice)):
    
        image_w = r_new_data_slice['image_width'][j]
        image_h = r_new_data_slice['image_height'][j]
    
        x_ratio = target_size[1] / image_w
        y_ratio = target_size[0] / image_h
    
        xmin = r_new_data_slice['x'][j] * x_ratio
        ymin = r_new_data_slice['y'][j] * y_ratio
        
        xmax = (r_new_data_slice['x'][j] + r_new_data_slice['width'][j]) * x_ratio
        ymax = (r_new_data_slice['y'][j] + r_new_data_slice['height'][j]) * y_ratio
        
        w = (r_new_data_slice['width'][j] * x_ratio) / target_size[1]
        h = (r_new_data_slice['height'][j] * y_ratio) / target_size[0]
    
        x = (xmin + (xmax-xmin)/2) / target_size[1]
        y = (ymin + (ymax-ymin)/2) / target_size[0] 
        
        true_boxes[i, j][0] = x
        true_boxes[i, j][1] = y
        true_boxes[i, j][2] = w
        true_boxes[i, j][3] = h
        true_boxes[i, j][4] = obj
        obj = obj+1

    print('==========', i, '++', j)
    print(true_boxes.shape)
    print(image_list[i])
    print(true_boxes[i])
    Y_mask, Y_true = preprocess_true_boxes(true_boxes[i], anchors_wrt_cell, target_size)
    
    print('\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\')
    print(Y_true)
    
    Y_true_final.append(Y_true)
    Y_mask_final.append(Y_mask)
    

print('##############################################################################')
print(Y_true_final[0])
X = np.array(X_final) 
#X_final = []
Y_true_target = np.array(Y_true_final)
Y_mask_target = np.array(Y_mask_final)
#Y_final = []

# X = (X - 127.5)/127.5  # X normalising since pixels vary from 0 to 255

X = X / 255  # X normalising since pixels vary from 0 to 255
    
# np.save('/home/scar3crow/Downloads/Data1/X_short.npy',X)
# np.save('/home/scar3crow/Downloads/Data1/Y_short.npy',Y)


(36, 5, 5)
50a.jpg
[[0.24038462 0.14114833 0.45673077 0.27272727 0.        ]
 [0.60096154 0.06698565 0.12980769 0.09569378 1.        ]
 [0.84375    0.07416268 0.13461538 0.11004785 2.        ]
 [0.65504808 0.33971292 0.24759615 0.11483254 3.        ]
 [0.25961538 0.40669856 0.49519231 0.26794258 4.        ]]
[3.36538462 1.97607656 6.39423077 3.81818182]
mmmmmm =  1     nnnnnn =  3
[8.41346154 0.93779904 1.81730769 1.33971292]
mmmmmm =  0     nnnnnn =  8
[11.8125      1.03827751  1.88461538  1.54066986]
mmmmmm =  1     nnnnnn =  11
[9.17067308 4.75598086 3.46634615 1.6076555 ]
mmmmmm =  4     nnnnnn =  9
[3.63461538 5.6937799  6.93269231 3.75119617]
mmmmmm =  5     nnnnnn =  3
\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[[[[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.        ]]

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.      

\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[[[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  ...

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  ...

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]]


 [[[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  ...

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0. 0. 0.]]

  [[0. 0. 0. ... 0. 0. 0.]
   [0. 0. 0. ... 0.

[9.40625    3.875      3.12980769 1.91666667]
mmmmmm =  3     nnnnnn =  9
[3.19711538 7.375      6.32692308 5.41666667]
mmmmmm =  7     nnnnnn =  3
\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
[[[[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.        ]]

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.        ]]

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.        ]]

  ...

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [1.         0.84615386 0.8333333  ... 0.         0.
    0.        ]]

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.         0.         ... 0.         0.
    0.        ]]

  [[0.         0.         0.         ... 0.         0.
    0.        ]
   [0.         0.    

In [63]:
print(X.shape)
print(Y_true_target.shape)
print(Y_mask_target.shape)

(36, 448, 448, 3)
(36, 14, 14, 2, 10)
(36, 14, 14, 2, 1)


In [187]:
X[0]

array([[[0.99607843, 0.99607843, 0.99607843],
        [1.        , 1.        , 1.        ],
        [1.        , 1.        , 1.        ],
        ...,
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843]],

       [[0.99607843, 0.99607843, 0.99607843],
        [1.        , 1.        , 1.        ],
        [1.        , 1.        , 1.        ],
        ...,
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843]],

       [[0.99607843, 0.99607843, 0.99607843],
        [1.        , 1.        , 1.        ],
        [0.99607843, 0.99607843, 0.99607843],
        ...,
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843],
        [0.99607843, 0.99607843, 0.99607843]],

       ...,

       [[1.        , 1.        , 1.        ],
        [1.        , 1.        , 1.        ],
        [0.99607843, 0

In [188]:
X[16]

array([[[0.83529412, 0.86666667, 0.86666667],
        [0.90980392, 0.94509804, 0.94509804],
        [0.77647059, 0.80784314, 0.80784314],
        ...,
        [0.82745098, 0.8745098 , 0.89803922],
        [0.82745098, 0.8745098 , 0.89803922],
        [0.82745098, 0.8745098 , 0.89803922]],

       [[0.83529412, 0.86666667, 0.86666667],
        [0.91372549, 0.94509804, 0.94509804],
        [0.77647059, 0.80784314, 0.80784314],
        ...,
        [0.82745098, 0.8745098 , 0.89803922],
        [0.82745098, 0.8745098 , 0.89803922],
        [0.82745098, 0.8745098 , 0.89803922]],

       [[0.87843137, 0.90980392, 0.90980392],
        [0.89411765, 0.9254902 , 0.9254902 ],
        [0.74117647, 0.77254902, 0.77254902],
        ...,
        [0.82745098, 0.87843137, 0.89803922],
        [0.82745098, 0.87843137, 0.89803922],
        [0.82745098, 0.87843137, 0.89803922]],

       ...,

       [[0.93333333, 0.95686275, 0.95294118],
        [0.91372549, 0.9372549 , 0.93333333],
        [0.93333333, 0

In [189]:
XX = X
YY = Y_true_target
ZZ = Y_mask_target

X_train , X_val , Y_train , Y_val  = train_test_split(XX, YY,train_size = 0.8 , shuffle = True)




In [24]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(5, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model

input_size = (target_h,target_w,3)

my_invoice_yolo_model = my_model(input_size)

#  print(my_invoice_yolo_model.summary())


model_yolo = my_invoice_yolo_model
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()

#  model_yolo.summary()

last_2 = Conv2D(768,(5,5) , activation='relu' , padding='same')(model_yolo.layers[-1].output)
    
last_1  = Conv2D(384,(3,3) , activation='relu' , padding='same')(last_2)
    
last_0 = Conv2D(1690,(3,3) , activation='relu' , padding='valid')(last_1)

# last = Conv2D(5070,(3,3) , activation='relu', padding='valid')(last_0)

last = Conv2D(3380,(3,3), padding='valid')(last_0)
    
final = Reshape((13, 13, 2, 10))(last)
        
model_yolo_1 = Model(model_yolo.input, final)
    

model_yolo_1.summary()



Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 208, 208, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 103, 103, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 103, 103, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 103, 103, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

batch_normalization_135 (BatchN (None, 11, 11, 160)  480         conv2d_135[0][0]                 
__________________________________________________________________________________________________
activation_135 (Activation)     (None, 11, 11, 160)  0           batch_normalization_135[0][0]    
__________________________________________________________________________________________________
conv2d_133 (Conv2D)             (None, 11, 11, 192)  208896      block17_14_ac[0][0]              
__________________________________________________________________________________________________
conv2d_136 (Conv2D)             (None, 11, 11, 192)  215040      activation_135[0][0]             
__________________________________________________________________________________________________
batch_normalization_133 (BatchN (None, 11, 11, 192)  576         conv2d_133[0][0]                 
__________________________________________________________________________________________________
batch_norm

__________________________________________________________________________________________________
conv2d_163 (Conv2D)             (None, 5, 5, 320)    829440      activation_162[0][0]             
__________________________________________________________________________________________________
batch_normalization_158 (BatchN (None, 5, 5, 384)    1152        conv2d_158[0][0]                 
__________________________________________________________________________________________________
batch_normalization_160 (BatchN (None, 5, 5, 288)    864         conv2d_160[0][0]                 
__________________________________________________________________________________________________
batch_normalization_163 (BatchN (None, 5, 5, 320)    960         conv2d_163[0][0]                 
__________________________________________________________________________________________________
activation_158 (Activation)     (None, 5, 5, 384)    0           batch_normalization_158[0][0]    
__________

In [25]:
opt = Adam(lr=0.000001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
model_yolo_1.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
model_yolo_1.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7ff3badc4278>

In [21]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(5, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model

input_size = (target_h,target_w,3)

my_invoice_yolo_model = my_model(input_size)

#  print(my_invoice_yolo_model.summary())


model_yolo = my_invoice_yolo_model
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()

#  model_yolo.summary()

last_2 = Conv2D(768,(5,5) , activation='relu' , padding='same')(model_yolo.layers[-1].output)
    
last_1  = Conv2D(384,(3,3) , activation='relu' , padding='same')(last_2)
    
last_0 = Conv2D(1690,(3,3) , activation='relu' , padding='valid')(last_1)

# last = Conv2D(5070,(3,3) , activation='relu', padding='valid')(last_0)

last = Conv2D(3380,(3,3), padding='valid')(last_0)
    
final = Reshape((13, 13, 2, 10))(last)
        
model_yolo_2 = Model(model_yolo.input, final)
    

model_yolo_2.summary()



Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 208, 208, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 103, 103, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 103, 103, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 103, 103, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

In [23]:
opt = RMSprop(lr=0.000001, epsilon=0.00001)
model_yolo_2.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
model_yolo_2.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f945cbdf860>

In [24]:
opt = RMSprop(lr=0.00001, epsilon=0.0001)
model_yolo_2.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
model_yolo_2.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f944cee0978>

In [50]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(5, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model

input_size = (target_h,target_w,3)

my_invoice_yolo_model = my_model(input_size)

#  print(my_invoice_yolo_model.summary())


model_yolo = my_invoice_yolo_model
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()
model_yolo.layers.pop()

#  model_yolo.summary()

last_2 = Conv2D(768,(5,5) , activation='relu' , padding='same')(model_yolo.layers[-1].output)
    
last_1  = Conv2D(384,(3,3) , activation='relu' , padding='same')(last_2)
    
last_0 = Conv2D(1690,(3,3) , activation='relu' , padding='valid')(last_1)

# last = Conv2D(5070,(3,3) , activation='relu', padding='valid')(last_0)

last = Conv2D(3380,(3,3), padding='valid')(last_0)
    
final = Reshape((13, 13, 2, 10))(last)
        
model_yolo_3 = Model(model_yolo.input, final)
    

model_yolo_3.summary()

Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 208, 208, 3)  0                                            
__________________________________________________________________________________________________
conv2d_208 (Conv2D)             (None, 103, 103, 32) 864         input_2[0][0]                    
__________________________________________________________________________________________________
batch_normalization_204 (BatchN (None, 103, 103, 32) 96          conv2d_208[0][0]                 
__________________________________________________________________________________________________
activation_204 (Activation)     (None, 103, 103, 32) 0           batch_normalization_204[0][0]    
____________________________________________________________________________________________

In [51]:
opt = RMSprop(lr=0.00001, epsilon=0.0001)
model_yolo_3.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
model_yolo_3.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f93eff139b0>

In [52]:
opt = RMSprop(lr=0.0001, epsilon=0.0001)
model_yolo_3.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
model_yolo_3.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f93e67cbf60>

In [53]:
opt = RMSprop(lr=0.001, epsilon=0.0001) 
loss_fn = tf.keras.losses.mean_squared_error
model_yolo_3.compile(optimizer= opt,loss = loss_fn, metrics=['accuracy'])
model_yolo_3.fit(X_train, Y_train, epochs= 50, batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


<keras.callbacks.callbacks.History at 0x7f93c97a49e8>

In [66]:
def yolo_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = MobileNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    conv = Conv2D(512,(3,3) , activation='relu' , padding='same')(last_layer)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)
    
    
    conv = Conv2D(128,(5,5) , activation='relu' , padding='same')(lr)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)
    
    
    conv = Conv2D(20,(3,3) , activation='relu' , padding='same')(lr)
    
    final = Reshape((grid_y_axis,grid_x_axis,num_anchors,info))(conv)
   
    model = Model(inp,final)
    
    return model


input_size = (target_h,target_w,channels)
yolo_invoice_model = yolo_model(input_size)

print(yolo_invoice_model.summary())

# save_model(yolo_invoice_model)
 



Model: "model_2"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 448, 448, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 449, 449, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 224, 224, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 224, 224, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

In [67]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
yolo_invoice_model.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
yolo_invoice_model.fit(X_train ,Y_train ,epochs= 10 ,batch_size = 4, validation_data=(X_val,Y_val))

Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f87b858a588>

In [158]:
Y_train.shape

(28, 14, 14, 2, 10)

In [164]:
Y_train[5,13]

array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0.,

In [203]:
def yolo_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = MobileNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    conv = Conv2D(512,(3,3) , activation='relu' , padding='same')(last_layer)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)
    
    
    conv = Conv2D(128,(5,5) , activation='relu' , padding='same')(lr)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)
    
    
    conv = Conv2D(20,(3,3) , activation='relu' , padding='same')(lr)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)

    
    final = Reshape((grid_y_axis,grid_x_axis,num_anchors,info))(lr)
   
    model = Model(inp,final)
    
    return model


input_size = (target_h,target_w,channels)
my_yolo_invoice_model_1 = yolo_model(input_size)

print(my_yolo_invoice_model_1.summary())




Model: "model_6"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 448, 448, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 449, 449, 3)  0           input_6[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 224, 224, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 224, 224, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

In [199]:
opt = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_yolo_invoice_model.compile(optimizer= opt,loss=tf.keras.losses.MeanAbsoluteError(),metrics=['accuracy'])
my_yolo_invoice_model.fit(X_train ,Y_train ,epochs= 5 ,batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f873369aeb8>

In [191]:
print(np.any(np.isnan(X_train)))
print(np.any(np.isnan(Y_train)))


False
False


In [200]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_yolo_invoice_model.compile(optimizer= opt,loss=tf.keras.losses.MeanAbsoluteError(),metrics=['accuracy'])
my_yolo_invoice_model.fit(X_train ,Y_train ,epochs= 5 ,batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f87240d6a20>

In [201]:

opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_yolo_invoice_model.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_yolo_invoice_model.fit(X_train ,Y_train ,epochs= 10 ,batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7f87336f9748>

In [None]:
opt = Adam(lr=0.00001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_yolo_invoice_model_1.compile(optimizer= opt,loss='mean_squared_error',metrics=['accuracy'])
my_yolo_invoice_model_1.fit(X_train ,Y_train ,epochs= 5 ,batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5

In [205]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_yolo_invoice_model.compile(optimizer= opt,loss='mean_squared_error',metrics=['accuracy'])
my_yolo_invoice_model.fit(X_train ,Y_train ,epochs= 5 ,batch_size = 4, validation_data=(X_val,Y_val))

Train on 28 samples, validate on 8 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7f86edb41630>

In [15]:
def yolo_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    new_last_layer = UpSampling2D(2)(last_layer)
   
    conv = Conv2D(512,(5,5) , activation='relu' , padding='valid')(new_last_layer)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)
    
    
    conv = Conv2D(128,(5,5) , activation='relu' , padding='valid')(lr)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)

    conv = Conv2D(64,(5,5) , activation='relu' , padding='valid')(lr)
    conv = Dropout(0.4)(conv)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)

  
    conv = Conv2D(30,(3, 3) , activation='relu' , padding='same')(lr)
    bn = BatchNormalization()(conv)
    lr = LeakyReLU(alpha=0.1)(bn)

    
    final = Reshape((grid_y_axis, grid_x_axis, num_anchors,info))(lr)
   
    model = Model(inp,final)
    
    return model


input_size = (512, 512, 3)
my_yolo_invoice_model_4 = yolo_model(input_size)

print(my_yolo_invoice_model_4.summary())




Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 512, 512, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 255, 255, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 255, 255, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 255, 255, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

activation_170 (Activation)     (None, 14, 14, 224)  0           batch_normalization_170[0][0]    
__________________________________________________________________________________________________
conv2d_168 (Conv2D)             (None, 14, 14, 192)  399360      block8_1_ac[0][0]                
__________________________________________________________________________________________________
conv2d_171 (Conv2D)             (None, 14, 14, 256)  172032      activation_170[0][0]             
__________________________________________________________________________________________________
batch_normalization_168 (BatchN (None, 14, 14, 192)  576         conv2d_168[0][0]                 
__________________________________________________________________________________________________
batch_normalization_171 (BatchN (None, 14, 14, 256)  768         conv2d_171[0][0]                 
__________________________________________________________________________________________________
activation

In [14]:
target_size = [512, 512]
target_w = 512 # target sizes of image in model input
target_h = 512 #target sizes of image in model input

grid_y_axis = 16  # each image is to be segmented to 14 x 14 grid
grid_x_axis = 16  # # each image is to be segmented to 14 x 14 grid

grid_w = target_w / grid_x_axis  # grid cell width
grid_h = target_h / grid_y_axis  # grid cell height

channels = 3
num_anchors = 3
classes = 5 # vendor, invoice, inv_date, po, buyer
info = 5 + classes    # pc, x, y, h, w, and class probabilities

categories = ['vendor', 'invoice', 'inv_date', 'po', 'buyer'] # details of classes

In [23]:
# Preparing Input(X) and Target(Y) file for training :

X_final = [] # X_final list to convert to np array later
Y_true_final = [] # Y_final list to convert to np array later
Y_mask_final = []

Y_true = np.zeros((grid_y_axis,grid_x_axis,num_anchors,info))
Y_mask = np.zeros((grid_y_axis,grid_x_axis,num_anchors,1))

true_boxes = np.zeros((num_images, num_obj, 5))


image_list = r_new_data['img_id'].unique() # make a list of unique images

for i in range(len(image_list)):
    
    image_path = '/home/scar3crow/Downloads/8-6-new-scan/' + image_list[i]
    
    x = cv2.imread(image_path)
    x_ratio = target_w / x.shape[1]
    y_ratio = target_h / x.shape[0]
    img = cv2.resize(x,(target_w, target_h))
    
    X_final.append(img)
    
# Y_true = np.zeros((grid_y_axis,grid_x_axis,num_anchors,info))
# Y_mask = np.zeros((grid_y_axis,grid_x_axis,num_anchors,1))

# for j in range(len(image_list)):

    r_new_data_slice = r_new_data.loc[r_new_data['img_id'].isin([image_list[i]])].reset_index(drop=True)

    obj = 0

    for j in range(len(r_new_data_slice)):
    
        image_w = r_new_data_slice['image_width'][j]
        image_h = r_new_data_slice['image_height'][j]
    
        x_ratio = target_size[1] / image_w
        y_ratio = target_size[0] / image_h
    
        xmin = r_new_data_slice['x'][j] * x_ratio
        ymin = r_new_data_slice['y'][j] * y_ratio
        
        xmax = (r_new_data_slice['x'][j] + r_new_data_slice['width'][j]) * x_ratio
        ymax = (r_new_data_slice['y'][j] + r_new_data_slice['height'][j]) * y_ratio
        
        w = (r_new_data_slice['width'][j] * x_ratio) / target_size[1]
        h = (r_new_data_slice['height'][j] * y_ratio) / target_size[0]
    
        x = (xmin + (xmax-xmin)/2) / target_size[1]
        y = (ymin + (ymax-ymin)/2) / target_size[0] 
        
#        true_boxes[i, j][0] = x
#        true_boxes[i, j][1] = y
#        true_boxes[i, j][2] = w
#        true_boxes[i, j][3] = h
#        true_boxes[i, j][4] = obj
#        obj = obj+1
        
        true_boxes[i, j][0] = xmin
        true_boxes[i, j][1] = ymin
        true_boxes[i, j][2] = xmax
        true_boxes[i, j][3] = ymax
        true_boxes[i, j][4] = obj
        obj = obj+1



In [24]:
print(true_boxes.shape)
print(anchors_wrt_cell.shape)

(36, 5, 5)
(3, 2)


In [25]:
true_boxes[0]

array([[  6.15384615,   2.44976077, 240.        , 142.0861244 ,
          0.        ],
       [274.46153846,   9.79904306, 340.92307692,  58.79425837,
          1.        ],
       [397.53846154,   9.79904306, 466.46153846,  66.14354067,
          2.        ],
       [272.        , 144.53588517, 398.76923077, 203.33014354,
          3.        ],
       [  6.15384615, 139.63636364, 259.69230769, 276.82296651,
          4.        ]])

In [22]:
anchors_wrt_cell

array([[2.5       , 1.89845361],
       [6.30769231, 6.77419968],
       [6.23076923, 4.54528915]])

In [41]:
box_centers = (true_boxes[:, :, 0:2] + true_boxes[:, :, 2:4]) / 2
box_sizes = true_boxes[:, :, 2:4] - true_boxes[:, :, 0:2]
print(box_centers.shape)
print(box_sizes.shape)

(36, 5, 2)
(36, 5, 2)


In [36]:
box_centers[0]

array([[123.07692308,  72.26794258],
       [307.69230769,  34.29665072],
       [432.        ,  37.97129187],
       [335.38461538, 173.93301435],
       [132.92307692, 208.22966507]])

In [37]:
y_true_13 = np.zeros((target_w // 32, target_h // 32, 3, 5 + classes+1), np.float32)
y_true_13.shape

(16, 16, 3, 11)

In [38]:
y_true_13[..., -1] = 1.
y_true_13[0, 0]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.],
       [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.]], dtype=float32)

In [42]:
box_sizes = np.expand_dims(box_sizes, 1)
print(box_sizes.shape)
mins = np.maximum(- box_sizes / 2, - anchors_wrt_cell / 2)
maxs = np.minimum(box_sizes / 2, anchors / 2)


(36, 1, 5, 2)


ValueError: operands could not be broadcast together with shapes (36,1,5,2) (3,2) 

In [43]:
def process_box(boxes, labels, img_size, class_num, anchors):
    '''
    Generate the y_true label, i.e. the ground truth feature_maps in 3 different scales.
    params:
        boxes: [N, 5] shape, float32 dtype. `x_min, y_min, x_max, y_mix, mixup_weight`.
        labels: [N] shape, int64 dtype.
        class_num: int64 num.
        anchors: [9, 4] shape, float32 dtype.
    '''
    anchors_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]

    # convert boxes form:
    # shape: [N, 2]
    # (x_center, y_center)
    box_centers = (boxes[:, 0:2] + boxes[:, 2:4]) / 2
    # (width, height)
    box_sizes = boxes[:, 2:4] - boxes[:, 0:2]

    # [13, 13, 3, 5+num_class+1] `5` means coords and labels. `1` means mix up weight. 
    y_true_13 = np.zeros((img_size[1] // 32, img_size[0] // 32, 3, 6 + class_num), np.float32)
    y_true_26 = np.zeros((img_size[1] // 16, img_size[0] // 16, 3, 6 + class_num), np.float32)
    y_true_52 = np.zeros((img_size[1] // 8, img_size[0] // 8, 3, 6 + class_num), np.float32)

    # mix up weight default to 1.
    y_true_13[..., -1] = 1.
    y_true_26[..., -1] = 1.
    y_true_52[..., -1] = 1.

    y_true = [y_true_13, y_true_26, y_true_52]

    # [N, 1, 2]
    box_sizes = np.expand_dims(box_sizes, 1)
    # broadcast tricks
    # [N, 1, 2] & [9, 2] ==> [N, 9, 2]
    mins = np.maximum(- box_sizes / 2, - anchors / 2)
    maxs = np.minimum(box_sizes / 2, anchors / 2)
    # [N, 9, 2]
    whs = maxs - mins

    # [N, 9]
    iou = (whs[:, :, 0] * whs[:, :, 1]) / (
                box_sizes[:, :, 0] * box_sizes[:, :, 1] + anchors[:, 0] * anchors[:, 1] - whs[:, :, 0] * whs[:, :,
                                                                                                         1] + 1e-10)
    # [N]
    best_match_idx = np.argmax(iou, axis=1)

    ratio_dict = {1.: 8., 2.: 16., 3.: 32.}
    for i, idx in enumerate(best_match_idx):
        # idx: 0,1,2 ==> 2; 3,4,5 ==> 1; 6,7,8 ==> 0
        feature_map_group = 2 - idx // 3
        # scale ratio: 0,1,2 ==> 8; 3,4,5 ==> 16; 6,7,8 ==> 32
        ratio = ratio_dict[np.ceil((idx + 1) / 3.)]
        x = int(np.floor(box_centers[i, 0] / ratio))
        y = int(np.floor(box_centers[i, 1] / ratio))
        k = anchors_mask[feature_map_group].index(idx)
        c = labels[i]
        # print(feature_map_group, '|', y,x,k,c)

        y_true[feature_map_group][y, x, k, :2] = box_centers[i]
        y_true[feature_map_group][y, x, k, 2:4] = box_sizes[i]
        y_true[feature_map_group][y, x, k, 4] = 1.
        y_true[feature_map_group][y, x, k, 5 + c] = 1.
        y_true[feature_map_group][y, x, k, -1] = boxes[i, -1]

    return y_true_13, y_true_26, y_true_52

In [None]:
8832.
























































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































