In [1]:
import glob
import cv2
import sys
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt 
from PIL import Image
from random import shuffle

# Parameters

In [2]:
input_data_path = "Dataset/**/*.jpg" 
trash_type = ['compost', 'landfill', 'recyclable'] # Keep 3 types only
shuffle_data = True
tvt = {'train': 0.6, 'val': 0.8, 'test': 1} # Train: 60%, Val: 20%, Test: 20%

train_filename = 'train.tfrecords'
val_filename = 'val.tfrecords'
test_filename = 'test.tfrecords'
resize_image = (224, 224)
resize_image_set = [224, 224, 3]

BATCH_SIZE = 5
IMAGE_HEIGHT = 224
IMAGE_WIDTH = 224
NUM_CHANNELS = 3 # RGB
NUM_CLASSES = 3 # Compost, Landfill & Recyclable
FILTER_SHAPE = [5, 5]
NUM_FILTERS = 32
POOL_SHAPE = [2, 2]

LEARNING_RATE = 0.0001

# Split dataset into training, validation & test set.

In [3]:
def split_data(input_data_path, trash_type, shuffle_data, tvt, return_type):

    labels = []
    
    # Labelling data
    files = glob.glob(input_data_path)
    for file in files:
        if trash_type[0] in file:
            labels.append(0)
        elif trash_type[1] in file:
            labels.append(1)
        elif trash_type[2] in file:
            labels.append(2)
        else:
            labels.append(-1)

    # Shuffling data
    if shuffle_data:
        c = list(zip(files, labels))
        shuffle(c)
        files, labels = zip(*c)
        
    # Dividing data into 60% train, 20% validation, and 20% test
    train_files = files[0:int(tvt['train']*len(files))]
    train_labels = labels[0:int(tvt['train']*len(files))]
    val_files = files[int(tvt['train']*len(files)):int(tvt['val']*len(files))]
    val_labels = labels[int(tvt['train']*len(files)):int(tvt['val']*len(files))]
    test_files = files[int(tvt['val']*len(files)):]
    test_labels = labels[int(tvt['val']*len(files)):]
    
    if return_type == 'train':
        return train_files, train_labels
    elif return_type == 'val':
        return val_files, val_labels
    elif return_type == 'test':
        return test_files, test_labels

# Write to tfrecords file

In [4]:
def load_image(addr):
    # Read an image and resizing it.
    # cv2 loads images as BGR; converting it to RGB
    img = Image.open(addr)
    img = cv2.imread(addr)
    img = cv2.resize(img, resize_image, interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    return img

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def write_to_tfrecords(filename, files, label, tvt):
    
    # Open .tfrecords file
    writer = tf.python_io.TFRecordWriter(filename)
    
    for i in range(len(files)):
    
        # Load image and its label
        img = load_image(train_files[i])
        label = train_labels[i]

        # Create a feature
        feature = { tvt+'/label': _int64_feature(label),
                    tvt+'/image': _bytes_feature(tf.compat.as_bytes(img.tobytes()))}

        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))

        # Serialize to string and write on the file
        writer.write(example.SerializeToString())
    
    writer.close()
    sys.stdout.flush()

# Read from tfrecords file

In [5]:
def read_from_tfrecords(tvt):
    
    data_path = glob.glob(tvt+'.tfrecords')

    feature = { tvt+'/image': tf.FixedLenFeature([], tf.string),
                tvt+'/label': tf.FixedLenFeature([], tf.int64)}

    # Create list of all .tfrecords files and pass it to queue
    filename_queue = tf.train.string_input_producer(data_path, num_epochs=1)

    # Define reader and read file from queue
    reader = tf.TFRecordReader()
    _, serialized_example = reader.read(filename_queue)

    # Decode the record read by the reader
    features = tf.parse_single_example(serialized_example, features=feature)

    # Convert serialized data back to arrays and numbers
    image = tf.decode_raw(features[tvt+'/image'], tf.float32)
    label = tf.cast(features[tvt+'/label'], tf.int32)

    # Reshape image data into the original shape
    image = tf.reshape(image, resize_image_set)

    # Creates batches by randomly shuffling tensors
    images, labels = tf.train.shuffle_batch([image, label], batch_size=BATCH_SIZE, capacity=10, 
                                        num_threads=1, min_after_dequeue=5)

    return images, labels

# CNN

In [6]:
def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
    
    # Setup filter input shape for tf.nn.conv_2d
    conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels,
                      num_filters]

    # Initialise weights and bias for the filter
    weights = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03),
                                      name=name+'_W')
    bias = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')

    # Setup the convolutional layer operation
    out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')

    # Add the bias
    out_layer += bias

    # Apply a ReLU non-linear activation
    out_layer = tf.nn.relu(out_layer)

    # Perform max pooling
    ksize = [1, pool_shape[0], pool_shape[1], 1]
    strides = [1, 2, 2, 1]
    out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, 
                               padding='SAME')

    return out_layer


def cnn():
    image_batch_out, label_batch_out = read_from_tfrecords('train')
    
    image_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])
    image_batch = tf.reshape(image_batch_out, (BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS))
    
    label_batch_placeholder = tf.placeholder(tf.float32, shape=[BATCH_SIZE, NUM_CLASSES])
    label_offset = -tf.ones([BATCH_SIZE], dtype=tf.int32, name="label_batch_offset")
    label_batch_one_hot = tf.one_hot(tf.add(label_batch_out, label_offset), 
                                     depth=NUM_CLASSES, on_value=1.0, off_value=0.0)
    
    
    layer1 = create_new_conv_layer(image_batch, NUM_CHANNELS, NUM_FILTERS, FILTER_SHAPE, POOL_SHAPE, name='layer1')
    layer2 = create_new_conv_layer(layer1, NUM_FILTERS, NUM_FILTERS*2, 
                                   FILTER_SHAPE, POOL_SHAPE, name='layer2')
    
    flattened = tf.reshape(layer2, [-1, 56 * 56 * 64])
    
    # setup some weights and bias values for this layer, then activate with ReLU
    wd1 = tf.Variable(tf.truncated_normal([56 * 56 * 64, BATCH_SIZE], stddev=0.03), name='wd1')
    bd1 = tf.Variable(tf.truncated_normal([BATCH_SIZE], stddev=0.01), name='bd1')
    dense_layer1 = tf.matmul(flattened, wd1) + bd1
    dense_layer1 = tf.nn.relu(dense_layer1)
    
    # another layer with softmax activations
    wd2 = tf.Variable(tf.truncated_normal([BATCH_SIZE, NUM_CLASSES], stddev=0.03), name='wd2')
    bd2 = tf.Variable(tf.truncated_normal([NUM_CLASSES], stddev=0.01), name='bd2')
    dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
    y_ = tf.nn.softmax(dense_layer2)
    
    loss = tf.losses.mean_squared_error(labels=label_batch_placeholder, predictions=y_)
    train_step = tf.train.GradientDescentOptimizer(0.0005).minimize(loss)
    saver = tf.train.Saver()
    
    with tf.Session() as sess:
        # Visualize the graph through tensorboard.
        file_writer = tf.summary.FileWriter("./logs", sess.graph)

        sess.run(tf.global_variables_initializer())
#         saver.restore(sess, "./log.ckpt")
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord, sess=sess)

        for i in range(3):
            image_out, label_out, label_batch_one_hot_out = sess.run([image_batch, label_batch_out, label_batch_one_hot])

            _, infer_out, loss_out = sess.run([train_step, y_, loss], feed_dict={image_batch_placeholder: image_out, label_batch_placeholder: label_batch_one_hot_out})

            print(i)
            print(image_out.shape)
            print("label_out: ")
            print(filename_out)
            print(label_out)
            print(label_batch_one_hot_out)
            print("infer_out: ")
            print(infer_out)
            print("loss: ")
            print(loss_out)
#             if(i%50 == 0):
#                 saver.save(sess, "./log.ckpt")

        coord.request_stop()
        coord.join(threads)
        sess.close()

    
    
    
#     # x (image) - flattened image data; x_shaped - reshaped to 4D tensor
#     x = tf.placeholder(image_batch, shape=[BATCH_SIZE, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])
#     x_shaped = tf.reshape(x, [-1, IMAGE_HEIGHT, IMAGE_WIDTH, NUM_CHANNELS])
    
#     # y (label) - one_hot_encoded
#     y = tf.placeholder(tf.int64, shape=[BATCH_SIZE, NUM_CLASSES])
#     y = tf.one_hot(y, depth=NUM_CLASSES)
    
#     layer1 = create_new_conv_layer(x_shaped, NUM_CHANNELS, NUM_FILTERS, FILTER_SHAPE, POOL_SHAPE, name='layer1')
#     layer2 = create_new_conv_layer(layer1, NUM_FILTERS, NUM_FILTERS*2, 
#                                    FILTER_SHAPE, POOL_SHAPE, name='layer2')
    
#     flattened = tf.reshape(layer2, [-1, 56 * 56 * 64])
    
#     # setup some weights and bias values for this layer, then activate with ReLU
#     wd1 = tf.Variable(tf.truncated_normal([56 * 56 * 64, BATCH_SIZE], stddev=0.03), name='wd1')
#     bd1 = tf.Variable(tf.truncated_normal([BATCH_SIZE], stddev=0.01), name='bd1')
#     dense_layer1 = tf.matmul(flattened, wd1) + bd1
#     dense_layer1 = tf.nn.relu(dense_layer1)
    
#     # another layer with softmax activations
#     wd2 = tf.Variable(tf.truncated_normal([BATCH_SIZE, NUM_CLASSES], stddev=0.03), name='wd2')
#     bd2 = tf.Variable(tf.truncated_normal([NUM_CLASSES], stddev=0.01), name='bd2')
#     dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
#     y_ = tf.nn.softmax(dense_layer2)
    
#     cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=dense_layer2, labels=y))

#     # add an optimiser
#     optimiser = tf.train.AdamOptimizer(learning_rate=LEARNING_RATE).minimize(cross_entropy)

#     # define an accuracy assessment operation
#     correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
#     accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
    
#     # setup the initialisation operator
#     init_op = tf.global_variables_initializer()

#     # setup recording variables
#     # add a summary to store the accuracy
#     tf.summary.scalar('accuracy', accuracy)

# Main

In [7]:
# train_files, train_labels = split_data(input_data_path, trash_type, shuffle_data, tvt, 'train')
# val_files, val_labels = split_data(input_data_path, trash_type, shuffle_data, tvt, 'val')
# test_files, test_labels = split_data(input_data_path, trash_type, shuffle_data, tvt, 'test')

# write_to_tfrecords(train_filename, train_files, train_labels, 'train')
# write_to_tfrecords(val_filename, val_files, val_labels, 'val')
# write_to_tfrecords(test_filename, test_files, test_labels, 'test')

# cnn()