In [1]:
import tensorflow as tf
gpu = tf.config.experimental.list_physical_devices('GPU')[0]
tf.config.experimental.set_memory_growth(gpu, True)
import numpy as np
import cv2
import os

## Model Architecture

In [3]:
input_ = tf.keras.layers.Input(shape=(360,360,1), name ="input")

conv_1 = tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same", name="conv_1")(input_)
act_1 = tf.keras.layers.Activation('relu', name='act_1')(conv_1)
pool_1 = tf.keras.layers.MaxPool2D(pool_size = (2,2), name = "pool_1")(act_1)

conv_2 = tf.keras.layers.Conv2D(filters = 64, kernel_size = (3,3), padding = "same" , name="conv_2")(pool_1)
act_2 = tf.keras.layers.Activation('relu', name='act_2')(conv_2)
pool_2 = tf.keras.layers.MaxPool2D(pool_size = (2,2), name = "pool_2")(act_2)

conv_3 = tf.keras.layers.Conv2D(filters = 32, kernel_size = (3,3), padding = "same",  name="conv_3")(pool_2)
act_3 = tf.keras.layers.Activation('relu', name='act_3')(conv_3)
pool_3 = tf.keras.layers.MaxPool2D(pool_size = (2,2), name = "pool_3")(act_3)

conv_4 = tf.keras.layers.Conv2D(filters = 16, kernel_size = (3,3), padding = "same", name="conv_4")(pool_3)
act_4 = tf.keras.layers.Activation('relu', name='act_4')(conv_4)
pool_4 = tf.keras.layers.MaxPool2D(pool_size = (2,2), name = "pool_4")(act_4)
pool_5 = tf.keras.layers.MaxPool2D(pool_size = (2,2), name = "pool_5")(pool_4)

flatten = tf.keras.layers.Flatten(name="flatten")(pool_5)


dense_1 = tf.keras.layers.Dense(64, activation='relu', name = "dense_1")(flatten)
dense_2 = tf.keras.layers.Dense(32, activation='relu', name = "dense_2")(dense_1)

shaded = tf.keras.layers.Dense(1,activation='sigmoid', name="shaded")(dense_2)
gridline = tf.keras.layers.Dense(1,activation='sigmoid', name="gridline")(dense_2)
good = tf.keras.layers.Dense(1,activation='sigmoid', name="good")(dense_2)
black_border = tf.keras.layers.Dense(1,activation='sigmoid', name="black_border")(dense_2)

model_outputs = [black_border, good, gridline, shaded]
model = tf.keras.models.Model(input_, model_outputs, name ="model_v1")
model.summary()

Model: "model_v1"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input (InputLayer)              [(None, 360, 360, 1) 0                                            
__________________________________________________________________________________________________
conv_1 (Conv2D)                 (None, 360, 360, 64) 640         input[0][0]                      
__________________________________________________________________________________________________
act_1 (Activation)              (None, 360, 360, 64) 0           conv_1[0][0]                     
__________________________________________________________________________________________________
pool_1 (MaxPooling2D)           (None, 180, 180, 64) 0           act_1[0][0]                      
___________________________________________________________________________________________

## Compile 

In [4]:
model.compile(
    loss={
        "shaded": 'binary_crossentropy',
        "gridline": "binary_crossentropy",
        "good": "binary_crossentropy",
        "black_border": "binary_crossentropy"
    },
    optimizer='adam',
    metrics=['accuracy']
)

## Load Data

In [5]:
def load_data(file_directory, shape):
    classes = os.listdir(file_directory)
    indices = {}
    data = []
    y = []
    for i in range(len(classes)):
        sub_dir = os.path.join(file_directory, classes[i])
        filename = os.listdir(sub_dir)
        for file_ in filename:
            file_path = os.path.join(sub_dir, file_)
            img = cv2.imread(file_path,0)
            img= cv2.resize(img, shape)
            img = img / 255.0
            img = np.expand_dims(img, axis=-1)
            data.append(img)
            y.append(i)
        indices[i] = classes[i]
    print(indices)
    return np.array(data), tf.one_hot(np.array(y), depth= 4)

In [6]:
x_data, y_data = load_data('processed_data/', shape=(360,360))

{0: 'black_border', 1: 'good', 2: 'gridline', 3: 'shaded'}


In [7]:
y_data[0].shape, x_data[0].shape

(TensorShape([4]), (360, 360, 1))

In [8]:
x_data.shape

(713, 360, 360, 1)

## Seperating Targets for model input

In [9]:
black = np.zeros((len(y_data), 1))
goo = np.zeros((len(y_data), 1))
grid =np.zeros((len(y_data), 1))
shad = np.zeros((len(y_data), 1))
for i, nu in enumerate(y_data):
    black[i] = nu[0]
    goo[i] = nu[1]
    grid[i] = nu[2]
    shad[i] = nu[3]

In [10]:
black.shape

(713, 1)

## Train

In [12]:
history = model.fit(
    x_data, [black, goo, grid, shad],
    steps_per_epoch= 713 // 4,
    batch_size = 4,
    epochs=3)

Epoch 1/3
Epoch 2/3
Epoch 3/3


In [18]:
# model.save('DocuNet.h5')

## Prediction

In [12]:
loaded_model = tf.keras.models.load_model('mtm5.h5')

In [32]:
img = cv2.imread('document_data/black_border/00001431.tif')
img = cv2.resize(img , (360,360))

In [33]:
img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
_, img = cv2.threshold(img, 220, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU)

In [34]:
img = np.expand_dims(img, 0)
img = np.expand_dims(img, -1)

In [35]:
black_border_op, good_op, grid_op, shad_op = model.predict(img)

In [36]:
print(black_border_op, good_op, grid_op, shad_op)

[[1.]] [[0.]] [[0.]] [[0.]]
