# Image Classification of Documents

### 1.0 Import packages and libraries
Import the packages and libraries that you'll use:

In [1]:
import os, random
import numpy as np
import pandas as pd
import PIL
import keras
import itertools
from PIL import Image

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
# from skimage import feature, data, io, measure
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras import backend as K
from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Conv2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img


Using TensorFlow backend.


### 2 Global Variables 
Enter the batch size for training, testing and validation dataset

In [2]:
batch_size_train = 20
batch_size_val = 10
batch_size_test = 10
num_classes= 3
# intereseted_folder='Documents'
STANDARD_SIZE=(224,224)

# 3. Classification

## 3.1 Create the Datset

In [3]:
#Converting Data Format according to the backend used by Keras
classes_required = ['Cheque', 'Driving_License', 'Pancard']

In [4]:
datagen=keras.preprocessing.image.ImageDataGenerator(data_format=K.image_data_format())

In [5]:
#Input the Training Data
# train_datagen = ImageDataGenerator()
# test_datagen = ImageDataGenerator()

# train_batches = train_datagen.flow_from_directory (
#     r'C:\Users\Archit\Desktop\CV_AA\CV2\Images\Train',\
#     target_size=(224, 224),\
#     batch_size=10,\
#     class_mode='categorical')

In [6]:
# val_batches = test_datagen.flow_from_directory(
#     r'C:\Users\Archit\Desktop\CV_AA\CV2\Images\Val',\
#         target_size=(224, 224),\
#         batch_size=10,\
#         class_mode='categorical')

In [7]:
train_path = r'D:\Training\CV\Images\Train'
train_batches = ImageDataGenerator().flow_from_directory(train_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_train)
type(train_batches)

Found 45 images belonging to 3 classes.


keras.preprocessing.image.DirectoryIterator

In [8]:
#Input the Validation Data
val_path = r'D:\Training\CV\Images\Val'
val_batches = ImageDataGenerator().flow_from_directory(val_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_val)
type(val_batches)

Found 12 images belonging to 3 classes.


keras.preprocessing.image.DirectoryIterator

In [9]:
# #Input the Test Data
# test_path = r'C:\Users\Archit\Desktop\CV_AA\CV2\Images\Test'
# test_batches = ImageDataGenerator().flow_from_directory(test_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_test)
# type(test_batches)

In [10]:
# next(test_batches)

In [11]:
train_imgs, train_labels = next(train_batches)


In [12]:
# train_imgs

In [13]:
# train_labels

In [14]:
# test_imgs, test_labels = next(test_batches)

In [15]:
# test_imgs

In [16]:
# test_labels

In [17]:
# y_test= [ np.where(r==1)[0][0] for r in test_labels ]
# y_test

## 3.2 Build the Model

In [18]:
# model_ince = keras.applications.InceptionV3()

In [19]:
# model_ince.summary()

In [20]:
# model_ince.layers

In [21]:
vgg16_model = keras.applications.vgg16.VGG16()
vgg16_model.summary()

Model: "vgg16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
input_1 (InputLayer)         (None, 224, 224, 3)       0         
_________________________________________________________________
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0     

In [22]:
type(vgg16_model) #This is a Keras Functional API need to convert to sequential

keras.engine.training.Model

In [23]:
vgg16_model.layers

[<keras.engine.input_layer.InputLayer at 0x1ed40ead668>,
 <keras.layers.convolutional.Conv2D at 0x1ed40eadb38>,
 <keras.layers.convolutional.Conv2D at 0x1ed40eadf98>,
 <keras.layers.pooling.MaxPooling2D at 0x1ed40f00860>,
 <keras.layers.convolutional.Conv2D at 0x1ed40f005c0>,
 <keras.layers.convolutional.Conv2D at 0x1ed40f08da0>,
 <keras.layers.pooling.MaxPooling2D at 0x1ed40f0f390>,
 <keras.layers.convolutional.Conv2D at 0x1ed40f0f710>,
 <keras.layers.convolutional.Conv2D at 0x1ed40f0ff98>,
 <keras.layers.convolutional.Conv2D at 0x1ed41020ef0>,
 <keras.layers.pooling.MaxPooling2D at 0x1ed41020748>,
 <keras.layers.convolutional.Conv2D at 0x1ed4102acc0>,
 <keras.layers.convolutional.Conv2D at 0x1ed41031fd0>,
 <keras.layers.convolutional.Conv2D at 0x1ed41031f98>,
 <keras.layers.pooling.MaxPooling2D at 0x1ed4103cdd8>,
 <keras.layers.convolutional.Conv2D at 0x1ed4103ca20>,
 <keras.layers.convolutional.Conv2D at 0x1ed41048be0>,
 <keras.layers.convolutional.Conv2D at 0x1ed41051a90>,
 <keras.

In [24]:
model = Sequential() #Iterate over the functional layers and add it as a stack
for layer in vgg16_model.layers:
    model.add(layer)

In [25]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)      

In [None]:
# model.get_weights()

In [None]:
# last=model.get

In [26]:
for layer in model.layers: #Since the model is already trained with certain weights, we dont want to change it. Let it be the same
    layer.trainable = False 

In [27]:
model.add(Dense(3, activation='softmax')) # Add the last layer

In [28]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
block1_conv1 (Conv2D)        (None, 224, 224, 64)      1792      
_________________________________________________________________
block1_conv2 (Conv2D)        (None, 224, 224, 64)      36928     
_________________________________________________________________
block1_pool (MaxPooling2D)   (None, 112, 112, 64)      0         
_________________________________________________________________
block2_conv1 (Conv2D)        (None, 112, 112, 128)     73856     
_________________________________________________________________
block2_conv2 (Conv2D)        (None, 112, 112, 128)     147584    
_________________________________________________________________
block2_pool (MaxPooling2D)   (None, 56, 56, 128)       0         
_________________________________________________________________
block3_conv1 (Conv2D)        (None, 56, 56, 256)      

In [29]:
# Complie the model
model.compile(Adam(lr=.00015), loss='categorical_crossentropy', metrics=['accuracy'])

## 3.3 Train the Model

The model will take about 30-45 minutes to train. 

In [29]:
model.fit_generator(train_batches,
                    steps_per_epoch=10, 
                    validation_data=val_batches, 
                    validation_steps=4, 
                    epochs=20, verbose=1)

Epoch 1/20
Epoch 2/20

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.

ERROR:root:Internal Python error in the inspect module.
Below is the traceback from this internal error.



Traceback (most recent call last):
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\IPython\core\interactiveshell.py", line 3343, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-30-7792a4cd4fe7>", line 5, in <module>
    epochs=20, verbose=1)
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\keras\legacy\interfaces.py", line 91, in wrapper
    return func(*args, **kwargs)
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\keras\engine\training.py", line 1732, in fit_generator
    initial_epoch=initial_epoch)
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\keras\engine\training_generator.py", line 220, in fit_generator
    reset_metrics=False)
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\keras\engine\training.py", line 1514, in train_on_batch
    outputs = self.train_function(ins)
  File "C:\Users\Archit\anaconda3\envs\AIlabs\lib\site-packages\tensorflow_core\python\kera

TypeError: object of type 'NoneType' has no len()

In [None]:
model.save_weights('my_model_weights.h5')
model.load_weights('my_model_weights.h5')

In [None]:
model.summary()

In [None]:
model.get_weights()

## 3.4 Test the Model with External Test Images

In [None]:
# zip_ref = r'C:\Users\Archit\Desktop\CV_AA\CV2\Images'
# paths = [zip_ref+"/gh.jpg", zip_ref+"/th.jpg",zip_ref+"/th1.jpg",zip_ref+"/th2.jpg",zip_ref+"/th3.jpg"]

In [None]:
path=[r'D:\Training\CV\Images\new1.jpg', r'D:\Training\CV\Images\th3.jpg', r"D:\Training\CV\Images\th.jpg"]

In [None]:
def convert_to_image(X):
    '''Function to convert all Input Images to the STANDARD_SIZE and create Training Dataset
    '''
    for f in path:
        #fobj=get_file(f)
        #print(type(fobj))predictions= model.predict(X_test)
        if os.path.isdir(f):
            continue
        img= PIL.Image.open(f)
        img = img.resize(STANDARD_SIZE)
        img=np.array(img)
        X.append(img)
        #print(X_train)
    #print(len(X_train))
    return X

In [None]:
X_test=[]

X_test=np.array(convert_to_image(X_test))
datagen.fit(X_test)

In [None]:
# X_test

In [None]:
predictions= model.predict(X_test)
predictions

In [None]:
y_pred=[]
for i in range(len(predictions)):
    y_pred.append(np.argmax(predictions[i]))

In [None]:
y_pred

In [None]:
classes_required = ['Cheque', 'Driving_License', 'Pancard']

In [None]:
train_batches.classes

In [None]:
train_batches.class_indices