In [1]:
cd models/research/object_detection

/home/scar3crow/Dropbox/WorkStation-Subrata/python/models/research/object_detection


In [3]:
import numpy as np
import pandas as pd
import cv2
import os
import tqdm
from scipy.io import loadmat

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

from PIL import Image
import pytesseract

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import backend as K

from utils import *

from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.layers import *

from keras.applications import MobileNetV2
from keras.applications import InceptionResNetV2

from keras.models import Model
from keras.models import model_from_json


In [4]:
# Making a list of image path

inv_directory = '/home/scar3crow/Downloads/8-6-new-scan'  ## 'invoices' is a zip file of jpg images in ...../Downloads 
                                                        
inv_new_image = ['/home/scar3crow/Downloads/8-6-new-scan/{}'.format(i) for i in os.listdir(inv_directory)] # making the list
inv_new_image.sort() # Sorting the list

print('Number of images = ', len(inv_new_image))
inv_new_image[20]

Number of images =  36


'/home/scar3crow/Downloads/8-6-new-scan/121a.jpg'

In [5]:
# Check sizes of exiting images :

# Create a Dataframe with image id and height(row) and width(column):

rows = []
columns = []
image_sl = []
df_new = pd.DataFrame()

for i in range(len(inv_new_image)):
    image = cv2.imread(inv_new_image[i]) ## Loading image
    height, width, _ = image.shape
    rows.append(height)
    columns.append(width)
    image_sl.append(inv_new_image[i])
    
row_values = pd.Series(rows)
col_values = pd.Series(columns)
image_num = pd.Series(image_sl)


df_new.insert(loc=0, column='image_serial', value=image_num)
df_new.insert(loc=1, column='rows', value=row_values)
df_new.insert(loc=2, column='columns', value=col_values)

df_new.head()


Unnamed: 0,image_serial,rows,columns
0,/home/scar3crow/Downloads/8-6-new-scan/101a.jpg,160,416
1,/home/scar3crow/Downloads/8-6-new-scan/102a.jpg,406,870
2,/home/scar3crow/Downloads/8-6-new-scan/103a.jpg,260,416
3,/home/scar3crow/Downloads/8-6-new-scan/104a.jpg,405,911
4,/home/scar3crow/Downloads/8-6-new-scan/105a.jpg,147,416


In [6]:
# Loading output of VGG Image Annotation tool and create a dataframe

r_new_data = pd.read_csv('/home/scar3crow/Downloads/via_new_data.csv')
num_obj = r_new_data['region_count'][0] # number of objects in each photo
r_new_data.drop(r_new_data.columns[[1, 2, 3, 4]], axis=1, inplace=True) # reduce unnecessary columns
r_new_data.sort_values(by=['#filename'], ascending=True) # Sorting based on image-id
num_images = r_new_data["#filename"].nunique() # Find out number of unique images

print('Number of classes = ', num_obj)
print('Number of unique images = ', num_images)
r_new_data[55:65]

Number of classes =  5
Number of unique images =  36


Unnamed: 0,#filename,region_shape_attributes,region_attributes
55,63a.jpg,"{""name"":""rect"",""x"":1,""y"":2,""width"":140,""height...","{""text"":""vendor""}"
56,63a.jpg,"{""name"":""rect"",""x"":210,""y"":1,""width"":70,""heigh...","{""text"":""invoice""}"
57,63a.jpg,"{""name"":""rect"",""x"":314,""y"":1,""width"":63,""heigh...","{""text"":""date""}"
58,63a.jpg,"{""name"":""rect"",""x"":211,""y"":64,""width"":76,""heig...","{""text"":""po""}"
59,63a.jpg,"{""name"":""rect"",""x"":2,""y"":68,""width"":165,""heigh...","{""text"":""buyer""}"
60,101a.jpg,"{""name"":""rect"",""x"":6,""y"":23,""width"":119,""heigh...","{""text"":""vendor""}"
61,101a.jpg,"{""name"":""rect"",""x"":254,""y"":23,""width"":40,""heig...","{""text"":""invoice""}"
62,101a.jpg,"{""name"":""rect"",""x"":331,""y"":21,""width"":52,""heig...","{""text"":""date""}"
63,101a.jpg,"{""name"":""rect"",""x"":251,""y"":70,""width"":71,""heig...","{""text"":""po""}"
64,101a.jpg,"{""name"":""rect"",""x"":6,""y"":66,""width"":142,""heigh...","{""text"":""buyer""}"


In [7]:
# Making a dataframe for Image_id, x, y, width, height and class

x = []
y = []
width = []
height = []
obj_class = []


for i in range(len(r_new_data)):
    
    r_size = r_new_data.values[i, 1][1:(len(r_new_data.values[i, 1])-1)]
    r_size_par = r_size.split(",")
    
    x.append(int("".join(filter(str.isdigit, r_size_par[1]))))
    y.append(int("".join(filter(str.isdigit, r_size_par[2]))))
    width.append(int("".join(filter(str.isdigit, r_size_par[3]))))
    height.append(int("".join(filter(str.isdigit, r_size_par[4]))))
    
    r_attribs = r_new_data.values[i, 2][1:(len(r_new_data.values[i, 2])-1)]
    r_attribs_par = r_attribs.split(':')[1]
    obj_class.append(r_attribs_par[1:(len(r_attribs_par)-1)])
    
x_values = pd.Series(x)
y_values = pd.Series(y)
width_values = pd.Series(width)
height_values = pd.Series(height)
class_values = pd.Series(obj_class)

r_new_data.insert(loc=1, column='x', value=x_values)
r_new_data.insert(loc=2, column='y', value=y_values)
r_new_data.insert(loc=3, column='width', value=width_values)
r_new_data.insert(loc=4, column='height', value=height_values)
r_new_data.insert(loc=5, column='obj_class', value=class_values)


r_new_data.drop(r_new_data.columns[[6, 7]], axis=1, inplace=True) # reduce unnecessary columns

r_new_data.rename({'#filename': 'img_id'}, axis=1, inplace=True) # changing column name

r_new_data.head()

Unnamed: 0,img_id,x,y,width,height,obj_class
0,50a.jpg,5,1,190,57,vendor
1,50a.jpg,223,4,54,20,invoice
2,50a.jpg,323,4,56,23,date
3,50a.jpg,221,59,103,24,po
4,50a.jpg,5,57,206,56,buyer


In [8]:
print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images

print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts()) 


Number of unique images =  36
Number of classes in diff. categories =  buyer      38
date       36
invoice    36
vendor     36
po         33
order       1
Name: obj_class, dtype: int64


In [9]:
# To find smallest width & height boxes in 'buyer' which should be 'po'
gb = r_new_data.groupby('obj_class')    
[gb.get_group('buyer') for x in gb.groups]


[       img_id    x    y  width  height obj_class
 4     50a.jpg    5   57    206      56     buyer
 9     51a.jpg    4   53    152      64     buyer
 14    52a.jpg    1   50    161      74     buyer
 19    53a.jpg    0   50    177      76     buyer
 24    54a.jpg   31  103    186      61     buyer
 29    55a.jpg    1   56    183      74     buyer
 34    56a.jpg    1   56    166      62     buyer
 39    59a.jpg    3   58    175      62     buyer
 44    60a.jpg    0   44    165      52     buyer
 49    61a.jpg    1   56    155      63     buyer
 54    62a.jpg    4   58    163      61     buyer
 59    63a.jpg    2   68    165      55     buyer
 64   101a.jpg    6   66    142      47     buyer
 69   102a.jpg  431  140    307     164     buyer
 74   103a.jpg   12  126    154      68     buyer
 79   104a.jpg   21  249    431     152     buyer
 84   105a.jpg    4   53    158      80     buyer
 88   106a.jpg  230   63     89      22     buyer
 89   106a.jpg    1   53    154      72     buyer


In [10]:
# Correcting above wrong spelling of object classes and rechecking

id_1 = r_new_data.index[r_new_data['obj_class'] == 'order'] # Finding the index
id_2 = r_new_data.index[r_new_data['obj_class'] == 'date'] # to change 'date' to 'inv_date' to be consistent with old data

r_new_data.at[id_1, 'obj_class'] = 'po' # writing the correct spelling 
r_new_data.at[88, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[163, 'obj_class'] = 'po' # # 'buyer' to 'po'
r_new_data.at[id_2, 'obj_class'] = 'inv_date' # # 'date' to 'inv_date'

print('Number of unique images = ', r_new_data['img_id'].nunique())  # print total no, of unique images
print('Number of unique classes = ', r_new_data['obj_class'].nunique())
print('Number of classes in diff. categories = ', r_new_data['obj_class'].value_counts()) 


Number of unique images =  36
Number of unique classes =  5
Number of classes in diff. categories =  inv_date    36
po          36
invoice     36
buyer       36
vendor      36
Name: obj_class, dtype: int64


In [11]:
## Creating train images by cropping original images as per x, y, width, height of bbox created in VGG I.A. tool

my_class_image = [] # List of class images with proper id
kount = 0

for i in range(len(inv_new_image)):
    
    id = inv_new_image[i].split('/')[-1] # finding the image_id
    indices = r_new_data.index[r_new_data['img_id'] == id].tolist() # making the list of indices of r_data where same image_id exists
    
    if len(indices) != 0:
        
        image = cv2.imread(inv_new_image[i]) ## Loading image
    
        kount = kount + 1
        
        # Cropping, making a training image list and saving training images in 'train_invoice' folder
    
        for j in indices:
        
            image_new_crop = image[r_new_data['y'][j]:r_new_data['y'][j]+r_new_data['height'][j], r_new_data['x'][j]:r_new_data['x'][j]+r_new_data['width'][j]]
        
            class_sl_id = '/home/scar3crow/Downloads/train_invoice/train_val_data/' + id.split('.')[0] + '-' + str(r_new_data['obj_class'][j]) + str(kount) + '.jpg'
        
            my_class_image.append(class_sl_id)
            
            cv2.imwrite(class_sl_id, image_new_crop)

In [12]:
# Read images for classification

image_directory = '/home/scar3crow/Downloads/train_invoice/train_val_data'  ## 'invoices' is a zip file of jpg images in ...../Downloads 
                                                        
inv_crop_image = ['/home/scar3crow/Downloads/train_invoice/train_val_data/{}'.format(i) for i in os.listdir(image_directory)] # making the list
inv_crop_image.sort() # Sorting the list

print('Number of images = ', len(inv_crop_image))
inv_crop_image[50]

Number of images =  330


'/home/scar3crow/Downloads/train_invoice/train_val_data/109a-buyer9.jpg'

In [13]:
# Check sizes of exiting images :

# Create a Dataframe with image id and height(row) and width(column):

rows = []
columns = []
image_sl = []
df_1 = pd.DataFrame()

for i in range(len(inv_crop_image)):
    image = cv2.imread(inv_crop_image[i]) ## Loading image
    height, width, _ = image.shape
    rows.append(height)
    columns.append(width)
    image_sl.append(inv_crop_image[i])
    
row_values = pd.Series(rows)
col_values = pd.Series(columns)
image_num = pd.Series(image_sl)


df_1.insert(loc=0, column='image_serial', value=image_num)
df_1.insert(loc=1, column='rows', value=row_values)
df_1.insert(loc=2, column='columns', value=col_values)

df_1.head()


Unnamed: 0,image_serial,rows,columns
0,/home/scar3crow/Downloads/train_invoice/train_...,86,248
1,/home/scar3crow/Downloads/train_invoice/train_...,14,126
2,/home/scar3crow/Downloads/train_invoice/train_...,16,134
3,/home/scar3crow/Downloads/train_invoice/train_...,18,136
4,/home/scar3crow/Downloads/train_invoice/train_...,60,342


In [14]:
# Finding maximum sizes :

i = df_1[['rows']].idxmax()
j = df_1[['columns']].idxmax()

print(i)
print(j)

print(df_1.loc[i])
print(df_1.loc[j])

rows    15
dtype: int64
columns    19
dtype: int64
                                         image_serial  rows  columns
15  /home/scar3crow/Downloads/train_invoice/train_...   164      307
                                         image_serial  rows  columns
19  /home/scar3crow/Downloads/train_invoice/train_...    94      601


In [15]:
categories = ['vendor', 'buyer', 'invoice', 'inv_date', 'po']

values = np.array(categories)
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)

onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y_encoded = onehot_encoder.fit_transform(integer_encoded)

print(categories)                               
print(y_encoded)


['vendor', 'buyer', 'invoice', 'inv_date', 'po']
[[0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 1. 0.]]


In [11]:
def letterbox_image(image, size):
    '''resize image with unchanged aspect ratio using padding'''
    iw, ih = image.size
    w, h = size
    scale = min(w/iw, h/ih)
    nw = int(iw*scale)
    nh = int(ih*scale)

    image = image.resize((nw,nh), Image.BICUBIC)
    new_image = Image.new('RGB', size, (128,128,128))
    new_image.paste(image, ((w-nw)//2, (h-nh)//2))
    return new_image

In [12]:
# Preparing Input(X) and Target(Y) file for training :

# Variable Definition

img_w = 212 # target sizes of image during classification
img_h = 212 # target sizes of image during classification

# Start :

X_final = [] # X_final list to convert to np array later
Y_final = [] # Y_final list to convert to np array later


for i in range(len(inv_crop_image)):
    
    image_path = inv_crop_image[i]
    
    x = Image.open(image_path)
    
#    x = cv2.imread(image_path)
    x_ratio = img_w / x.size[0]
    y_ratio = img_h / x.size[1]
    
    img = letterbox_image(x, [img_w, img_h])
    
    open_cv_image = np.array(img) 

    open_cv_image = open_cv_image[:, :, ::-1].copy() # Convert RGB to BGR
    
    
#    img = cv2.resize(x,(img_w, img_h))
    
    X_final.append(open_cv_image)
    
    Y = np.zeros((5))
    
    y_1 = inv_crop_image[i].split('/')
    y_2 = y_1[-1].split('-')[-1].split('.')
    y_3 = ''.join(i for i in y_2[0] if not i.isdigit())
    
    Y =  y_encoded[categories.index(y_3), :]
                    
    Y_final.append(Y)
    
X = np.array(X_final)
X_final = []
Y = np.array(Y_final)
Y_final = []

X = (X - 127.5)/127.5  # X normalising since pixels vary from 0 to 255
    
np.save('/home/scar3crow/Downloads/Data1/X_2.npy',X)
np.save('/home/scar3crow/Downloads/Data1/Y_2.npy',Y)


In [13]:
#import data
#X and Y numpy arrays are created using the Prepocess.py file
X = np.load('/home/scar3crow/Downloads/Data1/X_2.npy')
Y = np.load('/home/scar3crow/Downloads/Data1/Y_2.npy')

X_train , X_val , Y_train , Y_val  = train_test_split(X,Y,train_size = 0.8 , shuffle = True)
X = []
Y = []
X_train.shape



(264, 212, 212, 3)

In [16]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(5, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model


In [15]:
input_size = (img_h,img_w,3)

my_invoice_class_model= my_model(input_size)

print(my_invoice_class_model.summary())




Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 212, 212, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 105, 105, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 105, 105, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 105, 105, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

In [16]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_invoice_class_model.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_invoice_class_model.fit(X_train, Y_train, epochs= 20, batch_size = 4, validation_data=(X_val,Y_val))


Train on 264 samples, validate on 66 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7fb40d59dc18>

In [17]:
my_invoice_class_model.fit(X_train, Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))

Train on 264 samples, validate on 66 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7fb40d66c898>

In [18]:
my_invoice_class_model.fit(X_train, Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))

Train on 264 samples, validate on 66 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7fb478d84860>

In [19]:
my_invoice_class_model.fit(X_train, Y_train, epochs= 5, batch_size = 4, validation_data=(X_val,Y_val))

Train on 264 samples, validate on 66 samples
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.callbacks.History at 0x7fb3fcaf4e48>

In [17]:
# Preparing Input(X) and Target(Y) file for training :

# Variable Definition

img_w = 208 # target sizes of image during classification
img_h = 208 # target sizes of image during classification

# Start :

X_final = [] # X_final list to convert to np array later
Y_final = [] # Y_final list to convert to np array later


for i in range(len(inv_crop_image)):
    
    image_path = inv_crop_image[i]
    
#    x = Image.open(image_path)
    
    x = cv2.imread(image_path)
    x_ratio = img_w / x.shape[1]
    y_ratio = img_h / x.shape[0]
    
#    img = letterbox_image(x, [img_w, img_h])
    
#    open_cv_image = np.array(img) 

#    open_cv_image = open_cv_image[:, :, ::-1].copy() # Convert RGB to BGR
    
    
    img = cv2.resize(x,(img_w, img_h))
    
    X_final.append(img)
    
    Y = np.zeros((5))
    
    y_1 = inv_crop_image[i].split('/')
    y_2 = y_1[-1].split('-')[-1].split('.')
    y_3 = ''.join(i for i in y_2[0] if not i.isdigit())
    
    Y =  y_encoded[categories.index(y_3), :]
                    
    Y_final.append(Y)
    
X = np.array(X_final)
X_final = []
Y = np.array(Y_final)
Y_final = []

X = (X - 127.5)/127.5  # X normalising since pixels vary from 0 to 255
    
np.save('/home/scar3crow/Downloads/Data1/X_3.npy',X)
np.save('/home/scar3crow/Downloads/Data1/Y_3.npy',Y)


In [18]:
#import data
#X and Y numpy arrays are created using the Prepocess.py file
X = np.load('/home/scar3crow/Downloads/Data1/X_3.npy')
Y = np.load('/home/scar3crow/Downloads/Data1/Y_3.npy')

X_train , X_val , Y_train , Y_val  = train_test_split(X,Y,train_size = 0.8 , shuffle = True)
X = []
Y = []
X_train.shape



(264, 208, 208, 3)

In [19]:
input_size = (img_h,img_w,3)

my_invoice_class_model_5= my_model(input_size)

print(my_invoice_class_model_5.summary())




Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 208, 208, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 103, 103, 32) 864         input_1[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 103, 103, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 103, 103, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

In [None]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_invoice_class_model_5.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_invoice_class_model_5.fit(X_train, Y_train, epochs= 20, batch_size = 4, validation_data=(X_val,Y_val))


Train on 264 samples, validate on 66 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
 28/264 [==>...........................] - ETA: 3:38 - loss: 0.1764 - accuracy: 0.8929