# Image Classification of Documents

### 1.0 Import packages and libraries
Import the packages and libraries that you'll use:

In [1]:
import os, random
import numpy as np
import pandas as pd
import PIL
import keras
import itertools
from PIL import Image

from sklearn.utils import shuffle
from sklearn.model_selection import train_test_split
from sklearn import preprocessing
# from skimage import feature, data, io, measure
from sklearn.metrics import confusion_matrix

import matplotlib.pyplot as plt
from matplotlib import ticker
import seaborn as sns
%matplotlib inline 

from keras import backend as K
from keras.models import Sequential
from keras.layers import Input, Dropout, Flatten, Conv2D, MaxPooling2D, Dense, Activation
from keras.optimizers import RMSprop, Adam
from keras.callbacks import ModelCheckpoint, Callback, EarlyStopping
from keras.utils import np_utils
from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img


Using TensorFlow backend.


### 2 Global Variables 
Enter the batch size for training, testing and validation dataset

In [2]:
batch_size_train = 20
batch_size_val = 10
batch_size_test = 25
num_classes= 5
intereseted_folder='Documents'
STANDARD_SIZE=(224,224)

# 3. Classification

## 3.1 Create the Datset

In [4]:
#Converting Data Format according to the backend used by Keras

classes_required = ['Cheque', 'Documents', 'Driving_License', 'Pancard', 'Passport']

In [5]:
datagen=keras.preprocessing.image.ImageDataGenerator(data_format=K.image_data_format())

In [6]:
#Input the Training Data

train_path = r'C:\Users\ark11\Desktop\Ark Teaching\Alabs\DL\Class 6-7 Files\CNN\Image Classification Case Study-2 (Using Transfer Learning)\Data\Train_Data'
train_batches = ImageDataGenerator().flow_from_directory(train_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_train)
type(train_batches)

Found 20 images belonging to 5 classes.


keras.preprocessing.image.DirectoryIterator

In [7]:
#Input the Validation Data
val_path = r'C:\Users\ark11\Desktop\Ark Teaching\Alabs\DL\Class 6-7 Files\CNN\Image Classification Case Study-2 (Using Transfer Learning)\Data\Val_Data'
val_batches = ImageDataGenerator().flow_from_directory(val_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_val)


Found 5 images belonging to 5 classes.


In [8]:
#Input the Test Data
test_path = r'C:\Users\ark11\Desktop\Ark Teaching\Alabs\DL\Class 6-7 Files\CNN\Image Classification Case Study-2 (Using Transfer Learning)\Data\Test_Data'
test_batches = ImageDataGenerator().flow_from_directory(test_path, target_size=(224,224), classes=classes_required, batch_size=batch_size_test)


Found 5 images belonging to 5 classes.


In [9]:
# next(test_batches)

In [13]:
train_imgs, train_labels = next(train_batches)


In [None]:
# train_imgs

In [None]:
# train_labels

In [None]:
test_imgs, test_labels = next(test_batches)

In [None]:
# test_imgs

In [None]:
# test_labels

In [None]:
# y_test= [ np.where(r==1)[0][0] for r in test_labels ]
# y_test

## 3.2 Build the Model

In [None]:
model = keras.applications.InceptionV3()

In [None]:
model.summary()

In [None]:
model.layers

In [None]:
vgg16_model = keras.applications.vgg16.VGG16()
vgg16_model.summary()

Downloading data from https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5
 30892032/553467096 [>.............................] - ETA: 1:18:08

In [None]:
type(vgg16_model) #This is a Keras Functional API need to convert to sequential

In [None]:
vgg16_model.layers

In [None]:
model = Sequential() #Iterate over the functional layers and add it as a stack
for layer in vgg16_model.layers:
    model.add(layer)

In [None]:
model.summary()

In [None]:
for layer in model.layers: #Since the model is already trained with certain weights, we dont want to change it. Let it be the same
    layer.trainable = False

In [None]:
model.add(Dense(5, activation='softmax')) # Add the last layer

In [None]:
model.summary()

In [None]:
# Complie the model
model.compile(Adam(lr=.00015), loss='categorical_crossentropy', metrics=['accuracy'])

## 3.3 Train the Model

The model will take about 30-45 minutes to train. 

In [None]:
model.fit_generator(train_batches, steps_per_epoch=20, 
                    validation_data=val_batches, validation_steps=20, epochs=2, verbose=1)

In [None]:
%pwd

In [None]:
model.save_weights('my_model_weights.h5')
model.load_weights('my_model_weights.h5')

In [None]:
model.summary()

In [None]:
model.get_weights()

## 3.4 Test the Model with External Test Images

In [None]:
zip_ref = 'C:/Users/HP/Desktop'

In [None]:
paths = [zip_ref+"/cheque copy 2.jpg", zip_ref+"/cheque copy 2.jpg",zip_ref+"/cheque copy 2.jpg"]

In [None]:
paths

In [None]:
X_test=[]
def convert_to_image(X):
    '''Function to convert all Input Images to the STANDARD_SIZE and create Training Dataset
    '''
    for f in paths:
        #fobj=get_file(f)
        #print(type(fobj))predictions= model.predict(X_test)
        if os.path.isdir(f):
            continue
        img= PIL.Image.open(f)
        img = img.resize(STANDARD_SIZE)
        img=np.array(img)
        X.append(img)
        #print(X_train)
    #print(len(X_train))
    return X
X_test=np.array(convert_to_image(X_test))
datagen.fit(X_test)

In [None]:
X_test

In [None]:
predictions= model.predict(X_test)
predictions

In [None]:
y_pred=[]
for i in range(len(predictions)):
    y_pred.append(np.argmax(predictions[i]))

In [None]:
y_pred