In [14]:
from matplotlib import pyplot as plt
from __future__ import print_function
import numpy as np
import tensorflow as tf
from six.moves import cPickle as pickle
from six.moves import range
import os
import cv2

In [15]:
pickle_file = os.path.join('.','dataset_text_detection.pickle')

with open(pickle_file, 'rb') as f:
    save = pickle.load(f)
    dataset = save['dataset']
    labels = save['label']
    del save  # hint to help gc free up memory
    print('Dataset', dataset.shape, labels.shape)

#cv2.imshow("img",dataset[20050])
#cv2.waitKey(0)

Dataset (40000, 32, 32) (40000, 1)


In [17]:
image_size = 32
num_labels = 2
num_channels = 1

def reformat(dataset, labels):
    dataset = dataset.reshape(
        (-1, image_size, image_size, num_channels)).astype(np.float32)
    labels = (np.arange(num_labels) == labels[:]).astype(np.float32)
    return dataset, labels

dataset,labels = reformat(dataset,labels)
print(dataset.shape,labels.shape)
print(labels[4])

(40000, 32, 32, 1) (40000, 2)
[1. 0.]


In [18]:
split = int(40000 * 0.2)
train = int(40000 - 2*split)

train_dataset = dataset[:train]
train_labels = labels[:train]
valid_dataset = dataset[train:train+split]
valid_labels = labels[train:train+split]
test_dataset = dataset[train+split:train+2*split]
test_labels = labels[train+split:train+2*split]

print('Training set', train_dataset.shape,train_labels.shape)
print('Validation set', valid_dataset.shape, valid_labels.shape)
print('Test set', test_dataset.shape, test_labels.shape)



Training set (24000, 32, 32, 1) (24000, 2)
Validation set (8000, 32, 32, 1) (8000, 2)
Test set (8000, 32, 32, 1) (8000, 2)


In [19]:
def accuracy(predictions, labels):
    return (100.0 * np.sum(np.argmax(predictions, 1) == np.argmax(labels, 1))
          / predictions.shape[0])

In [27]:
batch_size = 1000

##Network graph params
filter_size_conv1 = 5 
num_filters_conv1 = 32

filter_size_conv2 = 5
num_filters_conv2 = 32

filter_size_conv3 = 5
num_filters_conv3 = 64
    
fc_layer_size = 64

In [28]:
def create_weights(shape):
    return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def create_biases(shape):
    return tf.Variable(tf.constant(1.0,shape=shape))

def create_convolutional_layer(input, weights, biases ,poolType='max'):
    
    #Creating the convolutional layer 
    layer = tf.nn.conv2d(input=input, filter=weights, strides=[1,1,1,1], padding='SAME')
    
    layer += biases
    
    if poolType == 'max':
        layer = tf.nn.max_pool(value=layer, ksize=[1,3,3,1], strides=[1,2,2,1],padding='VALID')
    else:
        layer = tf.nn.avg_pool(value=layer, ksize=[1,3,3,1], strides=[1,2,2,1],padding='VALID')
    
    #Relu activation function
    layer = tf.nn.relu(layer)
    
    return layer

def create_flatten_layer(layer):
    shape = layer.get_shape().as_list()
    layer = tf.reshape(layer, [-1, shape[1]*shape[2]*shape[3]])
    
    return layer 

def create_fullyConnected_layer(input, weights, biases, use_relu=True):
    layer = tf.matmul(input, weights) + biases
    
    if use_relu:
        layer = tf.nn.relu(layer)
        
    return layer

In [31]:
graph = tf.Graph()

with graph.as_default():
    
    #Input data
    tf_train_dataset = tf.placeholder(tf.float32, shape=(None, image_size, image_size, num_channels),name = 'tf_train_dataset')
    tf_train_labels = tf.placeholder( 
        tf.float32, shape=(None, num_labels),name='tf_train_labels')
    tf_valid_dataset = tf.constant(valid_dataset)
    tf_test_dataset = tf.constant(test_dataset)
    
    #Variables 
    layer1_weights = create_weights([filter_size_conv1,filter_size_conv1,num_channels,num_filters_conv1])
    layer1_biases = tf.Variable(tf.zeros([num_filters_conv1]))
    layer2_weights = create_weights([filter_size_conv2,filter_size_conv2,num_filters_conv1,num_filters_conv2])
    layer2_biases = create_biases([num_filters_conv2])
    layer3_weights = create_weights([filter_size_conv3,filter_size_conv3,num_filters_conv2,num_filters_conv3])
    layer3_biases = create_biases([num_filters_conv3])
    layer4_weights = create_weights([3*3*num_filters_conv3, fc_layer_size])
    layer4_biases = create_biases([fc_layer_size])
    layer5_weights = create_weights([fc_layer_size,num_labels])
    layer5_biases = create_biases([num_labels])
   
    # Model
    def model(data):
        conv_layer1 = create_convolutional_layer(data, layer1_weights, layer1_biases,'max')
        conv_layer2 = create_convolutional_layer(conv_layer1, layer2_weights, layer2_biases, 'avg')
        conv_layer3 = create_convolutional_layer(conv_layer2, layer3_weights,layer3_biases, 'avg')
        flatten_layer = create_flatten_layer(conv_layer3)
        fully_connected_layer = create_fullyConnected_layer(flatten_layer,layer4_weights,layer4_biases)
        
        return tf.matmul(fully_connected_layer,layer5_weights) + layer5_biases

    #Training Computation
    logits = model(tf_train_dataset)
    loss = tf.reduce_mean(
        tf.nn.softmax_cross_entropy_with_logits(labels=tf_train_labels, logits=logits))
    reg_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    reg_constant = 1.5  # Choose an appropriate one.
    #1.0 acc- 83.7 with overfitting
    #loss = loss + reg_constant * sum(reg_losses)
    
    #Optimizer
    #optimizer = tf.train.GradientDescentOptimizer(0.01).minimize(loss)
    optimizer = tf.train.MomentumOptimizer(0.001,momentum=0.9).minimize(loss)
    
    # Predictions for the training, validation, and test data
    train_prediction = tf.nn.softmax(logits,name = 'train_prediction')
    valid_prediction = tf.nn.softmax(model(tf_valid_dataset))
    test_prediction = tf.nn.softmax(model(tf_test_dataset))
    

In [32]:
num_steps = 500
# 1650 acc - 83.7 with overfitting
with tf.Session(graph=graph) as session:
    tf.global_variables_initializer().run()
    print('Initialized')
    saver = tf.train.Saver()
    for step in range(num_steps):
        offset = (step * batch_size) % (train_labels.shape[0] - batch_size)
        batch_data = train_dataset[offset:(offset + batch_size), :, :, :]
        batch_labels = train_labels[offset:(offset + batch_size), :]
        feed_dict = {tf_train_dataset : batch_data, tf_train_labels : batch_labels}
        _, l, predictions = session.run(
          [optimizer, loss, train_prediction], feed_dict=feed_dict)
        if (step % 50 == 0):
            print('Minibatch loss at step %d: %f' % (step, l))
            print('Minibatch accuracy: %.1f%%' % accuracy(predictions, batch_labels))
            print('Validation accuracy: %.1f%%' % accuracy(
            valid_prediction.eval(), valid_labels))
        if(step % 500 == 0):
            saver.save(session, './DetectionCNN/my_text_detection_model')
    print('Test accuracy: %.1f%%' % accuracy(test_prediction.eval(), test_labels))
    values = np.argmax(test_prediction.eval(), 1) == np.argmax(test_labels, 1)
    true_Values = np.count_nonzero(values == True)
    print("true values : ",true_Values)
    print("False values : ",len(values) - true_Values)
    

Initialized
Minibatch loss at step 0: 1.267111
Minibatch accuracy: 8.8%
Validation accuracy: 100.0%


KeyboardInterrupt: 

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See tf.nn.softmax_cross_entropy_with_logits_v2.

