In [2]:
%matplotlib inline
seed = 1234

import pandas as pd
import os, glob
import numpy as np
np.random.seed(seed)
import random
random.seed(seed)
# fix random seed for reproducibility
import tensorflow as tf
tf.compat.v1.random.set_random_seed(seed)
from numpy import array
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import text_utilities as tu
import modeling_utils as mu
import image_utilities as iu
import ocr
import doc_classifier_model as dcm
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.optimizers import Adagrad
from keras import backend as K
import json
import keras
from keras_self_attention import SeqSelfAttention

sess_config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1,
allow_soft_placement=True, device_count = {'CPU': 1})
sess = tf.Session(graph=tf.get_default_graph(),config=sess_config)
K.set_session(sess)


plt.style.use('ggplot')
print(tf.__version__)
print(keras.__version__)

1.15.0
2.3.0


In [3]:
data_df = pd.read_csv('data/data_df_new.csv')
# data_df = pd.read_csv('/home/sureclaim/Documents/Claims/data_df.csv')
data_df['y_oneh'] = mu.onehot_encode(data_df.y)
data_df = data_df.dropna()
# Clean the text column; keep only alphabets
data_df['x_txt_cleaned'] = data_df.x_txt.apply(tu.clean_string)
max_len = 70
vocab = tu.make_vocab(data_df.x_txt_cleaned)
w2i = tu.make_w2i(vocab)

# Shuffle dataframe
data_df = data_df.sample(frac=1)

# Get word level splits
x_train, x_test, y_train, y_test = train_test_split(data_df[['path', 'x_txt_cleaned']], data_df.y_oneh, test_size=0.3, random_state=201)

print("----------------------------------------")
print("Train X Size:", x_train.shape)
print("Train Y Size:", y_train.shape)
print("----------------------------------------")
print("Test X size:", x_test.shape)
print("Test Y Size:", y_test.shape)



Vocab size: 29366
----------------------------------------
Train X Size: (1316, 2)
Train Y Size: (1316,)
----------------------------------------
Test X size: (565, 2)
Test Y Size: (565,)


In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.


In [4]:
# make tensors
x_txt_train = tu.make_tensor_np(x_train.x_txt_cleaned, w2i, max_len)
#x_img_train = np.stack(x_train.path.apply(generate_image_features, args=[resnet50]))
#np.save('data/img_features_train.npy', x_img_train)
x_img_train = np.load('data/img_features_train.npy')
y_train = np.stack(y_train.to_numpy())

x_txt_test = tu.make_tensor_np(x_test.x_txt_cleaned, w2i, max_len)
#x_img_test = np.stack(x_test.path.apply(generate_image_features, args=[resnet50]))
#np.save('data/img_features_test.npy', x_img_test)
x_img_test = np.load('data/img_features_test.npy')
y_test = np.stack(y_test.to_numpy())

print("X text train tensor shape:", x_txt_train.shape)
print("X images train tensor shape:", x_img_train.shape)
print("y train shape:", y_train.shape)
print("----------------------------------------")
print("X text test tensor shape:", x_txt_test.shape)
print("X images test tensor shape:", x_img_test.shape)
print("y test shape:", y_test.shape)

X text train tensor shape: (1316, 70)
X images train tensor shape: (1316, 131072)
y train shape: (1316, 10)
----------------------------------------
X text test tensor shape: (565, 70)
X images test tensor shape: (565, 131072)
y test shape: (565, 10)


In [5]:


tf.reset_default_graph()
# Add ops to save and restore all the variables.
#saver = tf.train.Saver()

# Later, launch the model, use the saver to restore variables from disk, and
# do some work with the model.
with tf.Session() as sess:
    
    json_file = open('model2.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = keras.models.model_from_json(loaded_model_json, custom_objects={'SeqSelfAttention': SeqSelfAttention})
    loaded_model.load_weights('model2.h5')

    opt = Adagrad(lr = 1e-3)
        #sgd = optimizers.SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True)
    loaded_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=["accuracy"])

    graph = tf.get_default_graph()
    
    #saver = tf.train.Saver()
    # Restore variables from disk.
    #saver.restore(sess, "models/sess.ckpt")
    #print("Session restored.")
    
    # Re-evaluate the model
    with graph.as_default():
        loss,acc = loaded_model.evaluate([x_img_train, x_txt_train], y_train, verbose=2)
    print("Restored model, train accuracy: {:5.2f}%".format(100*acc))
    with graph.as_default():
        loss,acc = loaded_model.evaluate([x_img_test, x_txt_test], y_test, verbose=2)
    print("Restored model, test accuracy: {:5.2f}%".format(100*acc))

Instructions for updating:
If using Keras pass *_constraint arguments to layers.

Restored model, train accuracy: 89.51%
Restored model, test accuracy: 72.74%


In [9]:
with tf.Session() as sess:
    graph = tf.get_default_graph()
    with open('model_config', 'r') as configfile:
        config = json.load(configfile)
    
    loaded_model = dcm.build_doc_classifier(config['input_text_shape'], config['input_img_shape'], config['n_classes'], config['vocab_size'])
    loaded_model.load_weights('model2.h5')
    opt = Adagrad(lr = 1e-3)
    loaded_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=["accuracy"])
    # Re-evaluate the model
    with graph.as_default():
        loss,acc = loaded_model.evaluate([x_img_train, x_txt_train], y_train, verbose=2)
    print("Restored model, train accuracy: {:5.2f}%".format(100*acc))
    with graph.as_default():
        loss,acc = loaded_model.evaluate([x_img_test, x_txt_test], y_test, verbose=2)
    print("Restored model, test accuracy: {:5.2f}%".format(100*acc))

Restored model, train accuracy: 89.74%
Restored model, test accuracy: 72.57%


In [6]:
with tf.Session() as sess:
    with open('model_config', 'r') as configfile:
        config = json.load(configfile)
    
    loaded_model = dcm.build_doc_classifier(config['input_text_shape'], config['input_img_shape'], config['n_classes'], config['vocab_size'])
    loaded_model.load_weights('model.h5')
    opt = Adagrad(lr = 1e-3)
    loaded_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=["accuracy"])
    with graph.as_default():
        scores = loaded_model.predict([x_img_test, x_txt_test], verbose=2)
y_hat = np.array([pred.argmax() for pred in scores])
y_act = np.array([act.argmax() for act in y_test])

correct = 0
total=0
for i in range(len(y_hat)):
    if y_hat[i] == y_act[i]:
        correct += 1
    total += 1

correct/total

0.7292035398230089

In [1]:
seed = 1234
import pandas as pd
import os, glob
import numpy as np
np.random.seed(seed)
import random
random.seed(seed)
# fix random seed for reproducibility
import tensorflow as tf
tf.compat.v1.random.set_random_seed(seed)
import keras
from numpy import array
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
import text_utilities as tu
import modeling_utils as mu
import image_utilities as iu
import ocr
from keras_self_attention import SeqSelfAttention
import doc_classifier_model as dcm
from keras.callbacks import EarlyStopping, ModelCheckpoint, ReduceLROnPlateau, TensorBoard
from keras.optimizers import Adagrad
from keras import backend as K
import json
import time
import resnet50_feature_extractor as rfe
import operator
from PIL import Image
import base64
from PIL import Image
from io import BytesIO
from keras.applications import ResNet50

sess_config = tf.ConfigProto(intra_op_parallelism_threads=1, inter_op_parallelism_threads=1,
allow_soft_placement=True, device_count = {'CPU': 1})
sess = tf.Session(graph=tf.get_default_graph(),config=sess_config)
K.set_session(sess)

with open('model_config', 'r') as configfile:
    config = json.load(configfile)

'''tf.reset_default_graph()
with tf.Session() as sess:
    
    json_file = open('model.json', 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    loaded_model = keras.models.model_from_json(loaded_model_json, custom_objects={'SeqSelfAttention': SeqSelfAttention})
    loaded_model.load_weights('model.h5')
    
    loaded_model = dcm.build_doc_classifier(config['input_text_shape'], config['input_img_shape'], config['n_classes'], config['vocab_size'])
    loaded_model.load_weights("models/doc_classifier.h5")

    opt = Adagrad(lr = 1e-3)
    loaded_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=["accuracy"])

    graph = tf.get_default_graph()
    
    #saver = tf.train.Saver()
    # Restore variables from disk.
    #saver.restore(sess, "models/sess.ckpt")
    #print("Session restored.")'''

# Function to decode image to jpg from base64
def decode_image(image_path):
    with open(image_path, 'r') as data_file:
        data = data_file.read()
    data = json.loads(data)

    return Image.open(BytesIO(base64.b64decode(data['base64Data'][0])))

def predict(image_path):
    
    start = time.time()

    image = decode_image(image_path)

    # Create text features
    x_txt = tu.clean_string(ocr.get_text_from_image(image))
    x_txt = tu.make_tensor_np(x_txt, config['w2i'], config['input_text_shape'])
    
    # Create image features
    x_img = np.squeeze(rfe.generate_image_features3(image))
    
    # Make model prediction
    with tf.Session() as sess:
        loaded_model = dcm.build_doc_classifier(config['input_text_shape'], config['input_img_shape'], config['n_classes'], config['vocab_size'])
        loaded_model.load_weights("model2.h5")
  
        opt = Adagrad(lr = 1e-3)
        loaded_model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=["accuracy"])

        graph = tf.get_default_graph()
        with graph.as_default():
            scores = loaded_model.predict([x_img.reshape(-1, config['input_img_shape']), x_txt.reshape(-1, config['input_text_shape'])]).tolist()
    classes = ['Aadhar Card', 'Diagnostic Bill', 'Discharge Summary', 'Insurance Card', 'Internal Case Papers', 'Pan Card', 'Phramacy Bill', 'Policy Copy',	'Prescriptions' , 'Receipts']
    
    # Prepare response and return
    response = dict(zip(classes, scores[0]))
    print('Prediction:', max(response.items(), key=operator.itemgetter(1))[0])
    
    end = time.time()
    dur = end-start
    
    if dur<60:
        print("Execution Time:",dur,"seconds")
    elif dur>60 and dur<3600:
        dur=dur/60
        print("Execution Time:",dur,"minutes")
    else:
        dur=dur/(60*60)
        print("Execution Time:",dur,"hours")
        
    return json.dumps(response)

Using TensorFlow backend.


In [6]:
predict('/home/sureclaim/Documents/Claims/sampleJsonData.txt')



Prediction: Insurance Card
Execution Time: 18.91075325012207 seconds


'{"Aadhar Card": 0.011417805217206478, "Diagnostic Bill": 0.0021242836955934763, "Discharge Summary": 0.16710777580738068, "Insurance Card": 0.6064165234565735, "Internal Case Papers": 0.010215921327471733, "Pan Card": 0.02433878183364868, "Phramacy Bill": 0.047197531908750534, "Policy Copy": 0.00018836144590750337, "Prescriptions": 0.12591440975666046, "Receipts": 0.005078513640910387}'

In [28]:

sess_config = tf.ConfigProto()

class LoadModel:

    def __init__(self, model_path):
        #with K.get_session().graph.as_default():
        self.sess  = tf.Session(config=sess_config)
        K.set_session(self.sess) 
        print('Loading ResNet50...')
        self.resnet50 = ResNet50(weights='imagenet', pooling=max, include_top=False)
        print('ResNet50 Loaded...')
        print('Loading Doc Classifier...')
        self.model = dcm.build_doc_classifier(config['input_text_shape'], config['input_img_shape'], config['n_classes'], config['vocab_size'])
        self.model.load_weights(model_path)
        print('Doc Classifier Loaded...')
        self.opt = Adagrad(lr = 1e-3)
        self.model.compile(loss='categorical_crossentropy',
              optimizer=self.opt,
              metrics=["accuracy"])
        self.graph = tf.get_default_graph()

    # Function to decode image to jpg from base64
    def decode_image(self, image_path):
        with open(image_path, 'r') as data_file:
            self.data = data_file.read()
        self.data = json.loads(self.data)
        return Image.open(BytesIO(base64.b64decode(self.data['base64Data'][0])))
    
    
    # Function to generate resnet features
    def generate_image_features(self, image, reshape=(225, 225)):
    
        # GENERATING FEATURES
        self.image = image.resize(reshape)
        self.image.load()
        self.x = np.asarray(self.image, dtype="int32" )
        self.x = np.expand_dims(self.x, axis=0) 
        with self.graph.as_default():
            self.img_features = self.resnet50.predict(self.x, verbose=2) 
        self.img_features = self.img_features.squeeze() 
        self.img_features = self.img_features.flatten()
        return self.img_features

    def predict(self, image_path):
    
        self.start = time.time()

        self.img = self.decode_image(image_path)

        # Create text features
        self.x_txt = tu.clean_string(ocr.get_text_from_image(self.img))
        self.x_txt = tu.make_tensor_np(self.x_txt, config['w2i'], config['input_text_shape'])
    
        # Create image features
        self.x_img = np.squeeze(self.generate_image_features(self.img))
        
        # Make model prediction
        #with tf.Session() as sess:

            #graph = tf.get_default_graph()
        with self.graph.as_default():
            self.scores = self.model.predict([self.x_img.reshape(-1, config['input_img_shape']), self.x_txt.reshape(-1, config['input_text_shape'])]).tolist()
            #_, self.acc = self.model.evaluate([x_img_train, x_txt_train], y_train, verbose=2)
            #print("Restored model, train accuracy: {:5.2f}%".format(100*self.acc))
            #_, self.acc = self.model.evaluate([x_img_test, x_txt_test], y_test, verbose=2)
            #print("Restored model, test accuracy: {:5.2f}%".format(100*self.acc))
        
        self.classes = ['Aadhar Card', 'Diagnostic Bill', 'Discharge Summary', 'Insurance Card', 'Internal Case Papers', 'Pan Card', 'Phramacy Bill', 'Policy Copy',	'Prescriptions' , 'Receipts']
    
        # Prepare response and return
        self.response = dict(zip(self.classes, self.scores[0]))
        print('Prediction:', max(self.response.items(), key=operator.itemgetter(1))[0])
    
        self.end = time.time()
        self.dur = self.end - self.start
    
        if self.dur<60:
            print("Execution Time:", self.dur,"seconds")
        elif self.dur>60 and self.dur<3600:
            self.dur=dur/60
            print("Execution Time:", self.dur,"minutes")
        else:
            self.dur = self.dur/(60*60)
            print("Execution Time:", self.dur,"hours")
        
        return json.dumps(self.response)

In [29]:
loaded_model = LoadModel('model2.h5')

Loading ResNet50...




ResNet50 Loaded...
Loading Doc Classifier...
Doc Classifier Loaded...


In [31]:
loaded_model.predict('/home/sureclaim/Documents/Claims/sampleJsonData.txt')

Restored model, train accuracy: 89.51%
Restored model, test accuracy: 72.74%
Prediction: Insurance Card
Execution Time: 13.2495596408844 seconds


'{"Aadhar Card": 0.011417805217206478, "Diagnostic Bill": 0.0021242836955934763, "Discharge Summary": 0.16710777580738068, "Insurance Card": 0.6064165234565735, "Internal Case Papers": 0.010215921327471733, "Pan Card": 0.02433878183364868, "Phramacy Bill": 0.047197531908750534, "Policy Copy": 0.00018836144590750337, "Prescriptions": 0.12591440975666046, "Receipts": 0.005078513640910387}'

In [32]:
import flask