In [2]:
import numpy as np
import scipy.io as sio
import sklearn.metrics as metrics
from skimage.feature import hog
from sklearn.svm import LinearSVC
import matplotlib.pyplot as plt

In [3]:
from fea_util import *

#################################################################################
###################### step 0:      loading data   ############################
#################################################################################
train_data, train_label, test_data, test_label = get_data()

#################################################################################
###################### step 1:      extract feature histograms   ################
#################################################################################

# set parameters for HOG 

orient = 9
pix_per_cell = 8
cell_per_block = 2

train_hog_feature, test_hog_feature = get_hog_feature(train_data, test_data, orient, pix_per_cell, cell_per_block)
train_hog_feature = np.array(train_hog_feature)
test_hog_feature = np.array(test_hog_feature)
train_hog_feature = train_hog_feature.reshape(len(train_label), -1)
test_hog_feature = test_hog_feature.reshape(len(test_hog_feature), -1)

print(train_hog_feature.shape)
print(test_hog_feature.shape)

C:\Users\asiaynrf\AppData\Local\Continuum\anaconda3\envs\tensorflowpy36\lib\site-packages\skimage\feature\_hog.py:150: skimage_deprecation: Default value of `block_norm`==`L1` is deprecated and will be changed to `L2-Hys` in v0.15. To supress this message specify explicitly the normalization method.
  skimage_deprecation)
C:\Users\asiaynrf\AppData\Local\Continuum\anaconda3\envs\tensorflowpy36\lib\site-packages\skimage\feature\_hog.py:248: skimage_deprecation: Argument `visualise` is deprecated and will be changed to `visualize` in v0.16
  'be changed to `visualize` in v0.16', skimage_deprecation)


(10262, 1764)
(1292, 1764)


### Understanding the original data
The original a batch data is (10262 x 64 x 64) dimensional tensor expressed in numpy array, where the number of columns, (10262), indicates the number of sample data. As stated in the dataset, the row vector, (1764) represents an hog of the image of 64x64 pixels. Since this project is going to use CNN for the classification tasks, the row vector, (1764), is not an appropriate form of image data to feed. In order to feed an image data into a CNN model, the dimension of the tensor representing an image data should be either (width x height x num_channel) or (num_channel x width x height). It depends on your choice (check out the tensorflow conv2d). In this particular project, I am going to use the dimension of the first choice because the default choice in tensorflow's CNN operation is so.

num_channel=1 for this case

[O] **need to be modified into a new shape**

#### Understanding the original labels
The label data is just a list of 10262 numbers in the range 1-68, which corresponds to each of the 68 classes

[X] **need to be modified into a new shape**

In [4]:
import pickle

def load_data_changed(train_x_feature, test_x_feature):
    train_features = train_x_feature.reshape((len(train_x_feature), 1, 42, 42)).transpose(0,2,3,1)
    test_features = test_x_feature.reshape((len(test_x_feature), 1, 42, 42)).transpose(0,2,3,1)
    return train_features, test_features


In [5]:
def display_stats(train_feature, test_feature):
    train_features,test_features= load_data_changed(train_feature, test_feature)
    return train_features, test_features

In [6]:
train_feature, test_feauture = display_stats(train_hog_feature, test_hog_feature)

In [7]:
# Print the shapes of all of the above 
print("Train set shape = {}, Test set shape = {}".format(train_feature.shape, test_feauture.shape))
print("Train labels shape = {}, Test labels shape = {}".format(len(train_label), len(test_label)))

Train set shape = (10262, 42, 42, 1), Test set shape = (1292, 42, 42, 1)
Train labels shape = 10262, Test labels shape = 1292


## Preprocessing

In [8]:
# normalize
def normalize(x):
    """
    x: input feature data or image data in numpy array [42, 42, 1]
    output: normalized x
    """
    min_val=np.min(x)
    max_val=np.max(x)
    x=(x-min_val)/(max_val-min_val)
    return x

#### One Hot Encode of the Labels
one_hot_encode function takes the input, x, which is a list of labels(ground truth). The total number of element in the list is the total number of samples in a batch. one_hot_encode function returns a 2 dimensional tensor, where the number of row is the size of the batch, and the number of column is the number of image classes.

In [9]:
# One hot encode
def one_hot_encoding(x):
    """
    x: A list of labels
    return one hot encoded matrix [10262, 68]
    """
    encoded = np.zeros((len(x), 68))
    for idx, value in enumerate(x):
        encoded[idx][value-1]=1
        
    return encoded

In [10]:
# Normalizing and one hot encoding the data
train_label = one_hot_encoding(train_label)
test_label = one_hot_encoding(test_label)

In [11]:
print(train_label.shape) # now the labels are one hot encoded

(10262, 68)


### Start the tensorflow part

In [12]:
import tensorflow as tf
# Remove previous weights, bias, inputs, etc..
tf.reset_default_graph()

# Inputs
x = tf.placeholder(tf.float32, shape=(None, 42, 42, 1), name='input_x')
y =  tf.placeholder(tf.float32, shape=(None, 68), name='output_y')
keep_prob = tf.placeholder(tf.float32, name='keep_prob')

In [13]:
def convnet(x, keep_prob):
    conv1_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 1, 32],   mean=0, stddev=0.08))
    conv2_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 32, 64],  mean=0, stddev=0.08))
    conv3_filter = tf.Variable(tf.truncated_normal(shape=[3, 3, 64, 128], mean=0, stddev=0.08))
    
    # 1, 2
    conv1 = tf.nn.conv2d(x, conv1_filter, strides=[1,1,1,1], padding='SAME')
    conv1 = tf.nn.relu(conv1)
    conv1_pool = tf.nn.max_pool(conv1, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
    conv1_bn = tf.layers.batch_normalization(conv1_pool)
    
    # 3, 4
    conv2 = tf.nn.conv2d(conv1_bn, conv2_filter, strides=[1,1,1,1], padding='SAME')
    conv2 = tf.nn.relu(conv2)
    conv2_pool = tf.nn.max_pool(conv2, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')    
    conv2_bn = tf.layers.batch_normalization(conv2_pool)
    
    # 5, 6
    conv3 = tf.nn.conv2d(conv2_bn, conv3_filter, strides=[1,1,1,1], padding='SAME')
    conv3 = tf.nn.relu(conv3)
    conv3_pool = tf.nn.max_pool(conv3, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')  
    conv3_bn = tf.layers.batch_normalization(conv3_pool)
    
    # 7
    flat = tf.contrib.layers.flatten(conv3_bn)
    
    # 8
    full1 = tf.contrib.layers.fully_connected(inputs=flat, num_outputs=128, activation_fn=tf.nn.relu)
    full1 = tf.nn.dropout(full1, keep_prob)
    full1 = tf.layers.batch_normalization(full1)
    
     # 9
    out = tf.contrib.layers.fully_connected(inputs=full1, num_outputs=68, activation_fn=None)
    return out

### Hyperparameters

In [14]:
epochs  =  10
batch_sizebatch_s  = 128
keep_probability = 0.7
learning_rate = 0.001

In [15]:
logits=convnet(x, keep_prob)
model=tf.identity(logits, 'logits')

#loss and optimizer
cost=tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=y))
optimizer=tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost)

#Accuracy
correct_pred=tf.equal(tf.argmax(logits, 1), tf.arg_max(y,1))
accuracy=tf.reduce_mean(tf.cast(correct_pred, tf.float32), name='accuracy')

Instructions for updating:

Future major versions of TensorFlow will allow gradients to flow
into the labels input on backprop by default.

See @{tf.nn.softmax_cross_entropy_with_logits_v2}.

Instructions for updating:
Use `argmax` instead


### Train the neural Network

In [16]:
def train_nn(session, optimizer, keep_probability, feature_batch, label_batch):
    session.run(optimizer,
                feed_dict={
                    x:feature_batch,
                    y:label_batch,
                    keep_prob:keep_probability
                }
    )

In [17]:
def show_stats(session, feature_batch, label_batch, cost, accuracy):
    loss=sess.run(cost,
                  feed_dict={
                      x: feature_batch,
                      y:label_batch,
                      keep_prob:1
                  })
    valid_acc=sess.run(accuracy,
                      feed_dict={
                          x:feature_batch,
                          y:label_batch,
                          keep_prob:1
                      })
    print('Loss: {:>10.4f}, Training set accuracy: {:.6f}'.format(loss, valid_acc))

In [18]:
def gen_batch(data, batch_size, num_iter):
    data =  np.array(data)
    index = len(data)
    for i in range(num_iter):
        index += batch_size
        if (index + batch_size > len(data)):
            index = 0
            shuffled_indices = np.random.permutation(np.arange(len(data)))
            data = data[shuffled_indices]
        yield data[index:index + batch_size]

In [19]:
from datetime import datetime
train_dir='tf_logs'
logdir = train_dir + '/' + datetime.now().strftime('%Y%m%d-%H%M%S') + '/'

# Operation merging summary data for TensorBoard
summary = tf.summary.merge_all()

# Define saver to save model state at checkpoints
saver = tf.train.Saver()

In [23]:
# Train
import os
max_steps=2000
save_model_pathsave_mod  = './image_classification'
print(".... Training the model")
with tf.Session() as sess:
    #Initializing the variables
    sess.run(tf.global_variables_initializer())
    summary_writer = tf.summary.FileWriter(logdir, sess.graph)

    #Generate input data batches
    zipped_data = zip(train_feature, train_label)
    batches=gen_batch(list(zipped_data), batch_sizebatch_s,max_steps)
    
    for i in range(max_steps):
        batch = next(batches)
        batch_features, batch_labels = zip(*batch)
        
        train_nn(sess, optimizer, keep_probability, batch_features, batch_labels)
        
        # Periodically print out the model's current accuracy
        if i % 100 == 0: show_stats(sess, batch_features, batch_labels, cost, accuracy)
        
        # Periodically save checkpoint
        if (i + 1) % 1000 == 0:
            checkpoint_file = os.path.join(train_dir, 'checkpoint')
            saver.save(sess, checkpoint_file, global_step=i)
            print('Saved checkpoint')
     # After finishing the training, evaluate on the test set
    test_accuracy = sess.run(accuracy, feed_dict={
    x: test_feauture,
    y: test_label,
    keep_prob:1})
    print('Test accuracy {:g}'.format(test_accuracy))       

.... Training the model
Loss:     4.2004, Training set accuracy: 0.046875
Loss:     2.7770, Training set accuracy: 0.476562
Loss:     0.6083, Training set accuracy: 0.882812
Loss:     0.3488, Training set accuracy: 0.929688
Loss:     0.0566, Training set accuracy: 0.984375
Loss:     0.0257, Training set accuracy: 1.000000
Loss:     0.0116, Training set accuracy: 1.000000
Loss:     0.0041, Training set accuracy: 1.000000
Loss:     0.0020, Training set accuracy: 1.000000
Loss:     0.0026, Training set accuracy: 1.000000
Saved checkpoint
Loss:     0.0017, Training set accuracy: 1.000000
Loss:     0.0005, Training set accuracy: 1.000000
Loss:     0.0005, Training set accuracy: 1.000000
Loss:     0.0009, Training set accuracy: 1.000000
Loss:     0.0002, Training set accuracy: 1.000000
Loss:     0.0003, Training set accuracy: 1.000000
Loss:     0.0004, Training set accuracy: 1.000000
Loss:     0.0009, Training set accuracy: 1.000000
Loss:     0.0001, Training set accuracy: 1.000000
Loss:    

### The accuracy is found to be 97% which is very high as this model performs the best of all the Models present