In [1]:
import glob
import cv2
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt 
from PIL import Image
from random import shuffle

# Constants

In [2]:
DATASET_PATH = "Dataset/**/*.jpg"
TRAIN_VAL_TEST = {'train': 0.6, 'val': 0.8, 'test': 1} # Train: 60%, Val: 20%, Test: 20%

CLASS_LABELS = ['cardboard', 'metal', 'paper']
NUM_CHANNELS = 3 # RGB
NUM_CLASSES = 3 # Compost, Landfill & Recyclable

IMAGE_SIZE = 256
IMAGE_RESIZE_SHAPE = (IMAGE_SIZE, IMAGE_SIZE)
IMAGE_RESIZE_SHAPE_LIST = [IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS]

NUM_EPOCHS = 10
BATCH_SIZE = 5

LEARNING_RATE = 0.001
# tf.train.shuffle_batch() - batch_size, capacity, num_threads, min_after_dequeue

# Collect & Split Data

In [3]:
def collect_split_data():
    
    labels = []
    
    files = glob.glob(DATASET_PATH)
    for file in files:
        if CLASS_LABELS[0] in file:
            labels.append(0)
        elif CLASS_LABELS[1] in file:
            labels.append(1)
        elif CLASS_LABELS[2] in file:
            labels.append(2)
        else:
            print("Error: Image filename does not contain correct label.")
                 
    c = list(zip(files, labels))
    shuffle(c)
    files, labels = zip(*c)
    
    train_img = files[0:int(TRAIN_VAL_TEST['train'] * len(files))]
    train_labels = labels[0:int(TRAIN_VAL_TEST['train'] * len(files))]
    val_img = files[int(TRAIN_VAL_TEST['train'] * len(files)) : int(TRAIN_VAL_TEST['val'] * len(files))]
    val_labels = labels[int(TRAIN_VAL_TEST['train'] * len(files)) : int(TRAIN_VAL_TEST['val'] * len(files))]
    test_img = files[int(TRAIN_VAL_TEST['val'] * len(files)):]
    test_labels = labels[int(TRAIN_VAL_TEST['val'] * len(files)):]
        
    return len(files), train_img, train_labels, val_img, val_labels, test_img, test_labels

# Write data to tfrecords file

In [4]:
def load_image(addr):
    # Read, resize and convert to RGB (since cv2 loads images as BGR)
    img = Image.open(addr)
    img = cv2.imread(addr)
    img = cv2.resize(img, IMAGE_RESIZE_SHAPE, interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    return img

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def create_tfrecord(files, labels, train_val_test):
        
    # Open .tfrecords file
    writer = tf.python_io.TFRecordWriter(train_val_test+'.tfrecords')
    
    for i in range(len(files)):
    
        # Load image and its label
        img = load_image(files[i])
        label = labels[i]

        # Create a feature
        feature = { train_val_test+'/label': _int64_feature(label),
                    train_val_test+'/image': _bytes_feature(tf.compat.as_bytes(img.tobytes())) }

        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))

        # Serialize to string and write to file
        writer.write(example.SerializeToString())
    
    writer.close()
    sys.stdout.flush()

def create_tfrecords(train_img, train_labels, val_img, val_labels, test_img, test_labels):
    
    create_tfrecord(train_img, train_labels, 'train')
    create_tfrecord(val_img, val_labels, 'val')
    create_tfrecord(test_img, test_labels, 'test')

# Read data from tfrecords file

In [5]:
def read_from_tfrecords(train_val_test):

    file = glob.glob(train_val_test+'.tfrecords')
    

    feature = { train_val_test+'/image': tf.FixedLenFeature([], tf.string),
                train_val_test+'/label': tf.FixedLenFeature([], tf.int64) }

    # Enqueue train.tfrecords
    filename_queue = tf.train.string_input_producer(file, num_epochs=None)

    # Define reader and read file from queue
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    # Decode the record read by the reader
    features = tf.parse_single_example(serialized_example, features=feature)
    
    # Convert serialized data back to arrays and numbers
    image = tf.decode_raw(features[train_val_test+'/image'], tf.float32)
    label = tf.cast(features[train_val_test+'/label'], tf.int32)
    
    # Reshape image data to original shape
    image = tf.reshape(image, IMAGE_RESIZE_SHAPE_LIST)
    
    return image, label


def input_pipeline():
    
    image, label = read_from_tfrecords('train')
    image_batch, label_batch = tf.train.shuffle_batch([image, label], batch_size=BATCH_SIZE, capacity=10, 
                                                num_threads=2, min_after_dequeue=2, allow_smaller_final_batch=True)
    
    return image_batch, label_batch

# Convolutional Neural Network Model

In [6]:
def conv_net(x, reuse):
        
    with tf.name_scope("conv"):
        # First Layer
        w1 = tf.Variable(tf.truncated_normal([11, 11, 3, 96], stddev=0.03), name="W")
        b1 = tf.Variable(tf.truncated_normal([96]), name="b")
        conv2d_layer1 = tf.nn.conv2d(x, w1, [1, 4, 4, 1], padding='SAME')
        conv2d_layer1 += b1
        conv2d_layer1 = tf.nn.relu(conv2d_layer1)
        conv2d_layer1 = tf.nn.max_pool(conv2d_layer1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        tf.summary.histogram("weights", w1)
        tf.summary.histogram("biases", b1)
        tf.summary.histogram("activations", conv2d_layer1)
        
        # Second Layer
        w2 = tf.Variable(tf.truncated_normal([5, 5, 96, 192], stddev=0.03), name="W")
        b2 = tf.Variable(tf.truncated_normal([192]), name="b")
        conv2d_layer2 = tf.nn.conv2d(conv2d_layer1, w2, [1, 1, 1, 1], padding='SAME')
        conv2d_layer2 += b2
        conv2d_layer2 = tf.nn.relu(conv2d_layer2)
        conv2d_layer2 = tf.nn.max_pool(conv2d_layer2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')

        # Third Layer
        w3 = tf.Variable(tf.truncated_normal([3, 3, 192, 288], stddev=0.03), name="W")
        b3 = tf.Variable(tf.truncated_normal([288]), name="b")
        conv2d_layer3 = tf.nn.conv2d(conv2d_layer2, w3, [1, 1, 1, 1], padding='SAME')
        conv2d_layer3 += b3

        # Fourth Layer
        w4 = tf.Variable(tf.truncated_normal([3, 3, 288, 288], stddev=0.03), name="W")
        b4 = tf.Variable(tf.truncated_normal([288]), name="b")
        conv2d_layer4 = tf.nn.conv2d(conv2d_layer3, w4, [1, 1, 1, 1], padding='SAME')
        conv2d_layer4 += b4

        # Fifth Layer
        w5 = tf.Variable(tf.truncated_normal([3, 3, 288, 192], stddev=0.03), name="W")
        b5 = tf.Variable(tf.truncated_normal([192]), name="b")
        conv2d_layer5 = tf.nn.conv2d(conv2d_layer4, w5, [1, 1, 1, 1], padding='SAME')
        conv2d_layer5 += b5
        conv2d_layer5 = tf.nn.relu(conv2d_layer5)
        conv2d_layer5 = tf.nn.max_pool(conv2d_layer5, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME')
        
    with tf.name_scope("fc"):
        # Fully Connected Layers
        flattened_layer = tf.reshape(conv2d_layer5, [-1, 8*8*192])

        # First Layer 
        fc_w1 = tf.Variable(tf.truncated_normal([8*8*192, 4096], stddev=0.03), name="W")
        fc_b1 = tf.Variable(tf.truncated_normal([4096], stddev=0.01), name="b")
        layer1 = tf.matmul(flattened_layer, fc_w1) + fc_b1

        tf.summary.histogram("fc_weights", fc_w1)
        tf.summary.histogram("fc_biases", fc_b1)
        tf.summary.histogram("fc_activations", layer1)
        
        # Second Layer
        fc_w2 = tf.Variable(tf.truncated_normal([4096, 4096], stddev=0.03), name="W")
        fc_b2 = tf.Variable(tf.truncated_normal([4096], stddev=0.01), name="b")
        layer2 = tf.matmul(layer1, fc_w2) + fc_b2

        # Third Layer
        fc_w3 = tf.Variable(tf.truncated_normal([4096, 3], stddev=0.03), name="W")
        fc_b3 = tf.Variable(tf.truncated_normal([3], stddev=0.01), name="b")
        layer3 = tf.matmul(layer2, fc_w3) + fc_b3

        # Run softmax on final layer
        y = tf.nn.softmax(layer3)
    
    return y

# Training Convolutional Neural Network

In [7]:
def train_cnn(train_size):
        
    img_batch, lbl_batch = input_pipeline()
    prediction = conv_net(img_batch, reuse=True)
    
    with tf.name_scope("cost"):
        cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, 
                                                                    labels=tf.one_hot(lbl_batch, NUM_CLASSES)))
    with tf.name_scope("optimizer"):
        optimizer = tf.train.AdamOptimizer(LEARNING_RATE).minimize(cost)
    
    with tf.name_scope("accuracy"):
        correct_prediction = tf.equal(tf.argmax(prediction, 1), tf.argmax(tf.one_hot(lbl_batch, NUM_CLASSES), 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
    tf.summary.scalar("cost", cost)
    tf.summary.scalar("accuracy", accuracy)
    merged_summary = tf.summary.merge_all()
    
    with tf.Session() as sess:
        
        sess.run(tf.group(tf.global_variables_initializer(), tf.local_variables_initializer()))
        writer = tf.summary.FileWriter("./logdir/", sess.graph)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        
        for i in range(NUM_EPOCHS):
            epoch_loss = 0
            
            for _ in range(int(train_size/BATCH_SIZE)):
                c, _ = sess.run([cost, optimizer])
                epoch_loss += c
                
            if (i % 2 == 0):
                # [train_acc] = sess.run([accuracy])
                s = sess.run(merged_summary)
                writer.add_summary(s, i)
                # print("Step %d, training accuracy %g" % (i, train_acc))
                        
            print('Epoch', i, 'completed out of', NUM_EPOCHS, 'Loss:', epoch_loss)
                 
        coord.request_stop()
        coord.join(threads)

# Main

In [8]:
# Collect data from "Dataset/", shuffle, and split it into training, validation & testing set.
data_size, train_img, train_labels, val_img, val_labels, test_img, test_labels = collect_split_data()

# Creates train.tfrecords, val.tfrecords & test.tfrecords
create_tfrecords(train_img, train_labels, val_img, val_labels, test_img, test_labels)

# Training of CNN
train_cnn(data_size*TRAIN_VAL_TEST['train'])

Epoch 0 completed out of 10 Loss: 0.951444625854
Epoch 1 completed out of 10 Loss: 1.5514446497
Epoch 2 completed out of 10 Loss: 0.751444637775
Epoch 3 completed out of 10 Loss: 1.15144467354
Epoch 4 completed out of 10 Loss: 1.15144467354
Epoch 5 completed out of 10 Loss: 1.15144467354
Epoch 6 completed out of 10 Loss: 0.951444625854
Epoch 7 completed out of 10 Loss: 0.951444625854
Epoch 8 completed out of 10 Loss: 0.951444625854
Epoch 9 completed out of 10 Loss: 1.15144467354
