In [45]:
import numpy as np
import pandas as pd
import cv2
import os
import tqdm
from scipy.io import loadmat

from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import LabelEncoder

from PIL import Image
import pytesseract

import matplotlib.pyplot as plt

import tensorflow as tf
from tensorflow import keras
from keras import backend as K

from utils import *

from sklearn.model_selection import train_test_split
from keras.callbacks import ModelCheckpoint
from keras.optimizers import Adam
from keras.layers import *

from keras.applications import MobileNetV2
from keras.applications import InceptionResNetV2

from keras.models import Model
from keras.models import model_from_json


In [3]:
cd models/research/object_detection

/home/scar3crow/Dropbox/WorkStation-Subrata/python/models/research/object_detection


In [4]:
from utils import *

In [5]:
image_directory = '/home/scar3crow/Downloads/train_invoice'  ## 'invoices' is a zip file of jpg images in ...../Downloads 
                                                        
inv_crop_image = ['/home/scar3crow/Downloads/train_invoice/{}'.format(i) for i in os.listdir(image_directory)] # making the list
inv_crop_image.sort() # Sorting the list

print('Number of images = ', len(inv_crop_image))
inv_crop_image[50]

Number of images =  210


'/home/scar3crow/Downloads/train_invoice/16-inv_date8.jpg'

In [7]:
# Check sizes of exiting images :

# Create a Dataframe with image id and height(row) and width(column):

rows = []
columns = []
image_sl = []
df = pd.DataFrame()

for i in range(len(inv_crop_image)):
    image = cv2.imread(inv_crop_image[i]) ## Loading image
    height, width, _ = image.shape
    rows.append(height)
    columns.append(width)
    image_sl.append(inv_crop_image[i])
    
row_values = pd.Series(rows)
col_values = pd.Series(columns)
image_num = pd.Series(image_sl)


df.insert(loc=0, column='image_serial', value=image_num)
df.insert(loc=1, column='rows', value=row_values)
df.insert(loc=2, column='columns', value=col_values)

df.head()


Unnamed: 0,image_serial,rows,columns
0,/home/scar3crow/Downloads/train_invoice/1-buye...,86,248
1,/home/scar3crow/Downloads/train_invoice/1-inv_...,14,126
2,/home/scar3crow/Downloads/train_invoice/1-invo...,16,134
3,/home/scar3crow/Downloads/train_invoice/1-item...,59,481
4,/home/scar3crow/Downloads/train_invoice/1-po1.jpg,18,136


In [8]:
# Finding minimimum sizes :

i = df[['rows']].idxmin()
j = df[['columns']].idxmin()

print(i)
print(j)

print(df.loc[i])
print(df.loc[j])

rows    120
dtype: int64
columns    86
dtype: int64
                                          image_serial  rows  columns
120  /home/scar3crow/Downloads/train_invoice/25-inv...    13      169
                                         image_serial  rows  columns
86  /home/scar3crow/Downloads/train_invoice/20-inv...    26       44


In [9]:
# Finding maximum sizes :

i = df[['rows']].idxmax()
j = df[['columns']].idxmax()

print(i)
print(j)

print(df.loc[i])
print(df.loc[j])

rows    87
dtype: int64
columns    185
dtype: int64
                                         image_serial  rows  columns
87  /home/scar3crow/Downloads/train_invoice/20-ite...   235      460
                                          image_serial  rows  columns
185  /home/scar3crow/Downloads/train_invoice/6-item...    46      511


In [12]:
new_model = MobileNetV2(weights='imagenet')
new_model.summary()

Model: "mobilenetv2_1.00_224"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
_______________________________________________________________________________

In [18]:
categories = ['vendor', 'buyer', 'invoice', 'inv_date', 'po', 'item_detail', 'totals']

values = np.array(categories)
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)

onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
y_encoded = onehot_encoder.fit_transform(integer_encoded)

print(categories)                               
print(y_encoded)


['vendor', 'buyer', 'invoice', 'inv_date', 'po', 'item_detail', 'totals']
<class 'list'>
['vendor', 'buyer', 'invoice', 'inv_date', 'po', 'item_detail', 'totals']
[[0. 0. 0. 0. 0. 0. 1.]
 [1. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 0.]
 [0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0.]
 [0. 0. 0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0.]]


In [23]:
# Preparing Input(X) and Target(Y) file for training :

# Variable Definition

img_w = 224 # target sizes as per MobileNetV2
img_h = 224 #target sizes as per MobileNetV2

# Start :

X_final = [] # X_final list to convert to np array later
Y_final = [] # Y_final list to convert to np array later

# image_list = r_data['img_id'].unique() # make a list of unique images

for i in range(len(inv_crop_image)):
    
    image_path = inv_crop_image[i]
    
    x = cv2.imread(image_path)
    x_ratio = img_w / x.shape[1]
    y_ratio = img_h / x.shape[0]
    img = cv2.resize(x,(img_w, img_h))
    
    X_final.append(img)
    
    Y = np.zeros((7))
    
    y_1 = inv_crop_image[i].split('/')
    y_2 = y_1[-1].split('-')[-1].split('.')
    y_3 = ''.join(i for i in y_2[0] if not i.isdigit())
    
    Y =  y_encoded[categories.index(y_3), :]
                    
    Y_final.append(Y)
    
X = np.array(X_final)
X_final = []
Y = np.array(Y_final)
Y_final = []

X = (X - 127.5)/127.5  # X normalising since pixels vary from 0 to 255
    
np.save('/home/scar3crow/Downloads/Data1/X_1.npy',X)
np.save('/home/scar3crow/Downloads/Data1/Y_1.npy',Y)


In [24]:
X.shape

(210, 224, 224, 3)

In [26]:
Y.shape

(210, 7)

In [27]:
#import data
#X and Y numpy arrays are created using the Prepocess.py file
X = np.load('/home/scar3crow/Downloads/Data1/X_1.npy')
Y = np.load('/home/scar3crow/Downloads/Data1/Y_1.npy')

X_train , X_val , Y_train , Y_val  = train_test_split(X,Y,train_size = 0.75 , shuffle = True)
X = []
Y = []
X_train.shape



(157, 224, 224, 3)

In [29]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = MobileNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(7, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model


In [31]:
input_size = (img_h,img_w,3)

my_invoice_model = my_model(input_size)

print(my_invoice_model.summary())




Model: "model_1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_2 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_2[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

In [32]:
opt = Adam(lr=0.0001, beta_1=0.9, beta_2=0.999, epsilon=1e-08, decay=0.0)
my_invoice_model.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])

In [34]:
my_invoice_model.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))


Train on 157 samples, validate on 53 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fae6f6ddc88>

In [35]:
my_invoice_model.fit(X_train, Y_train, epochs= 10, batch_size = 4, validation_data=(X_val,Y_val))

Train on 157 samples, validate on 53 samples
Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10


<keras.callbacks.callbacks.History at 0x7fae6ae40dd8>

In [38]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = MobileNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_2a = Dropout(0.2)(last_2)
    last_1 = Dense(320, activation = 'relu')(last_2a)
    last_1a = Dropout(0.2)(last_1)
    last_0 = Dense(7, activation = 'softmax')(last_1a)
        
    model = Model(inp,last_0)
    
    return model


In [39]:
input_size = (img_h,img_w,3)

my_invoice_model_1 = my_model(input_size)

print(my_invoice_model_1.summary())




Model: "model_3"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_4 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
Conv1_pad (ZeroPadding2D)       (None, 225, 225, 3)  0           input_4[0][0]                    
__________________________________________________________________________________________________
Conv1 (Conv2D)                  (None, 112, 112, 32) 864         Conv1_pad[0][0]                  
__________________________________________________________________________________________________
bn_Conv1 (BatchNormalization)   (None, 112, 112, 32) 128         Conv1[0][0]                      
____________________________________________________________________________________________

In [40]:
my_invoice_model.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_invoice_model.fit(X_train, Y_train, epochs= 20, batch_size = 4, validation_data=(X_val,Y_val))

Train on 157 samples, validate on 53 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7fae6f6ddb00>

In [41]:
my_invoice_model_1.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_invoice_model_1.fit(X_train, Y_train, epochs= 20, batch_size = 4, validation_data=(X_val,Y_val))

Train on 157 samples, validate on 53 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7fae3fd95b38>

In [43]:
def my_model(input_shape):
    
    
    inp = Input(input_shape)
   
    model = InceptionResNetV2( input_tensor= inp , include_top=False, weights='imagenet')
    last_layer = model.output
    
    last_3 = Flatten()(last_layer)
    last_2 = Dense(640, activation = 'relu')(last_3)
    last_1 = Dense(320, activation = 'relu')(last_2)
    last_0 = Dense(7, activation = 'softmax')(last_1)
        
    model = Model(inp,last_0)
    
    return model


In [46]:
input_size = (img_h,img_w,3)

my_invoice_model_2 = my_model(input_size)

print(my_invoice_model_2.summary())




Model: "model_4"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_6 (InputLayer)            (None, 224, 224, 3)  0                                            
__________________________________________________________________________________________________
conv2d_1 (Conv2D)               (None, 111, 111, 32) 864         input_6[0][0]                    
__________________________________________________________________________________________________
batch_normalization_1 (BatchNor (None, 111, 111, 32) 96          conv2d_1[0][0]                   
__________________________________________________________________________________________________
activation_1 (Activation)       (None, 111, 111, 32) 0           batch_normalization_1[0][0]      
__________________________________________________________________________________________

In [47]:
my_invoice_model_2.compile(optimizer= opt,loss='categorical_crossentropy',metrics=['accuracy'])
my_invoice_model_2.fit(X_train, Y_train, epochs= 20, batch_size = 4, validation_data=(X_val,Y_val))

Train on 157 samples, validate on 53 samples
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.callbacks.History at 0x7fadceb102b0>

In [83]:
image_path = inv_crop_image[50]
    
x = cv2.imread(image_path)

img_1 = (cv2.resize(x,(img_w, img_h))-127.5)/127.5

img_1 = x = np.expand_dims(img_1, axis=0)

preds = my_invoice_model_2.predict(img_1)
print(image_path)
preds

/home/scar3crow/Downloads/train_invoice/16-inv_date8.jpg


array([[5.0491750e-10, 9.9998641e-01, 1.2620322e-05, 9.6850101e-07,
        1.8576269e-09, 3.0928668e-08, 1.0682160e-08]], dtype=float32)