# To Do

In [1]:
# 1. Change collect_data() to implement the following dataset structure,
# Dataset -> Compost -> one.jpg, two.jpg, ...
#            Landfill -> one.jpg, two.jpg, ...

# 2. Check if cnn_model() -> layer2 = create_new_conv_layer(..., NUM_FILTERS*2, ...) is correct
# 3. Change constants in cnn_model() -> fully connected layer

In [2]:
import glob
import cv2
import sys
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt 
from PIL import Image
from random import shuffle

# Parameters

In [3]:
DATASET_PATH = "Dataset/**/*.jpg"
CLASS_LABELS = ['compost', 'landfill', 'recyclable']
TRAIN_VAL_TEST = {'train': 0.6, 'val': 0.8, 'test': 1} # Train: 60%, Val: 20%, Test: 20%

IMAGE_RESIZE_SHAPE = (28, 28)
IMAGE_RESIZE_SHAPE_LIST = [28, 28, 3]
IMAGE_SIZE = 224
NUM_CHANNELS = 3 # RGB
NUM_CLASSES = 3 # Compost, Landfill & Recyclable
NUM_EPOCHS = 5
BATCH_SIZE = 5
NUM_FILTERS = 32
FILTER_SHAPE = [5, 5]
POOL_SHAPE = [2, 2]

# Collect & Split Data

In [4]:
def collect_split_data():
    
    labels = []
    
    files = glob.glob(DATASET_PATH)
    for file in files:
        if CLASS_LABELS[0] in file:
            labels.append(0)
        elif CLASS_LABELS[1] in file:
            labels.append(1)
        elif CLASS_LABELS[2] in file:
            labels.append(2)
        else:
            print("Error: Image filename does not contain correct label.")

    c = list(zip(files, labels))
    shuffle(c)
    files, labels = zip(*c)
    
    train_img = files[0:int(TRAIN_VAL_TEST['train'] * len(files))]
    train_labels = labels[0:int(TRAIN_VAL_TEST['train'] * len(files))]
    val_img = files[int(TRAIN_VAL_TEST['train'] * len(files)) : int(TRAIN_VAL_TEST['val'] * len(files))]
    val_labels = labels[int(TRAIN_VAL_TEST['train'] * len(files)) : int(TRAIN_VAL_TEST['val'] * len(files))]
    test_img = files[int(TRAIN_VAL_TEST['val'] * len(files)):]
    test_labels = labels[int(TRAIN_VAL_TEST['val'] * len(files)):]
    
    return train_img, train_labels, len(train_labels), val_img, val_labels, test_img, test_labels

# Write data to tfrecords file

In [5]:
def load_image(addr):
    # Read, resize and convert to RGB (since cv2 loads images as BGR)
    img = Image.open(addr)
    img = cv2.imread(addr)
    img = cv2.resize(img, IMAGE_RESIZE_SHAPE, interpolation=cv2.INTER_CUBIC)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    img = img.astype(np.float32)
    return img

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def create_tfrecord(files, labels, train_val_test):
        
    # Open .tfrecords file
    writer = tf.python_io.TFRecordWriter(train_val_test+'.tfrecords')
    
    for i in range(len(files)):
    
        # Load image and its label
        img = load_image(files[i])
        label = labels[i]

        # Create a feature
        feature = { train_val_test+'/label': _int64_feature(label),
                    train_val_test+'/image': _bytes_feature(tf.compat.as_bytes(img.tobytes()))}

        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature=feature))

        # Serialize to string and write to file
        writer.write(example.SerializeToString())
    
    writer.close()
    sys.stdout.flush()

def create_tfrecords(train_img, train_labels, val_img, val_labels, test_img, test_labels):
    
    create_tfrecord(train_img, train_labels, 'train')
    create_tfrecord(val_img, val_labels, 'val')
    create_tfrecord(test_img, test_labels, 'test')

# Read data from tfrecords file

In [6]:
def read_from_tfrecords(train_val_test):
    
    file = glob.glob(train_val_test+'.tfrecords')
    
    if (not file):
        print("Error: train.tfrecords does not exist in current directory.")
    else:
        feature = { train_val_test+'/image': tf.FixedLenFeature([], tf.string),
                    train_val_test+'/label': tf.FixedLenFeature([], tf.int64) }

        # Enqueue train.tfrecords
        filename_queue = tf.train.string_input_producer(file, num_epochs=1)

        # Define reader and read file from queue
        reader = tf.TFRecordReader()
        _, serialized_example = reader.read(filename_queue)

        # Decode the record read by the reader
        features = tf.parse_single_example(serialized_example, features=feature)

        # Convert serialized data back to arrays and numbers
        images = tf.decode_raw(features[train_val_test+'/image'], tf.float32)
        labels = tf.cast(features[train_val_test+'/label'], tf.int32)

        # Reshape image data to original shape
        image = tf.reshape(images, IMAGE_RESIZE_SHAPE_LIST)
        
        return image, label

# Convolutional Neural Network Model

In [7]:
def create_new_conv_layer(input_data, num_input_channels, num_filters, filter_shape, pool_shape, name):
    
    # Setup the filter input shape for tf.nn.conv_2d
    conv_filt_shape = [filter_shape[0], filter_shape[1], num_input_channels, num_filters]

    # Initialise weights and bias for the filter
    weights = tf.Variable(tf.truncated_normal(conv_filt_shape, stddev=0.03), name=name+'_W')
    bias = tf.Variable(tf.truncated_normal([num_filters]), name=name+'_b')

    # Setup the convolutional layer operation
    out_layer = tf.nn.conv2d(input_data, weights, [1, 1, 1, 1], padding='SAME')

    # Add the bias
    out_layer += bias

    # Apply a ReLU non-linear activation
    out_layer = tf.nn.relu(out_layer)

    # now perform max pooling
    # ksize is the argument which defines the size of the max pooling window (i.e. the area over which the maximum is
    # calculated).  It must be 4D to match the convolution - in this case, for each image we want to use a 2 x 2 area
    # applied to each channel
    ksize = [1, pool_shape[0], pool_shape[1], 1]
    # strides defines how the max pooling area moves through the image - a stride of 2 in the x direction will lead to
    # max pooling areas starting at x=0, x=2, x=4 etc. through your image.  If the stride is 1, we will get max pooling
    # overlapping previous max pooling areas (and no reduction in the number of parameters).  In this case, we want
    # to do strides of 2 in the x and y directions.
    strides = [1, 2, 2, 1]
    out_layer = tf.nn.max_pool(out_layer, ksize=ksize, strides=strides, padding='SAME')

    return out_layer


def cnn_model(x):
    layer1 = create_new_conv_layer(x, NUM_CHANNELS, NUM_FILTERS, FILTER_SHAPE, POOL_SHAPE, name='layer1')
    layer2 = create_new_conv_layer(layer1, NUM_CHANNELS, NUM_FILTERS*2, FILTER_SHAPE, POOL_SHAPE, name='layer2')
    
    # flatten the output ready for the fully connected output stage - after two layers of stride 2 pooling, we go
    # from 28 x 28, to 14 x 14 to 7 x 7 x,y co-ordinates, but with 64 output channels.  To create the fully connected,
    # "dense" layer, the new shape needs to be [-1, 7 x 7 x 64]
    flattened = tf.reshape(layer2, [-1, 7 * 7 * 64])

    # setup some weights and bias values for this layer, then activate with ReLU
    wd1 = tf.Variable(tf.truncated_normal([7 * 7 * 64, 1000], stddev=0.03), name='wd1')
    bd1 = tf.Variable(tf.truncated_normal([1000], stddev=0.01), name='bd1')
    dense_layer1 = tf.matmul(flattened, wd1) + bd1
    dense_layer1 = tf.nn.relu(dense_layer1)

    # another layer with softmax activations
    wd2 = tf.Variable(tf.truncated_normal([1000, NUM_CLASSES], stddev=0.03), name='wd2')
    bd2 = tf.Variable(tf.truncated_normal([NUM_CLASSES], stddev=0.01), name='bd2')
    dense_layer2 = tf.matmul(dense_layer1, wd2) + bd2
    y_ = tf.nn.softmax(dense_layer2)
    
    return y_

# Training Convolutional Neural Network

In [8]:
def train_cnn(train_size):
    
    x = tf.placeholder(tf.float32, shape=IMAGE_RESIZE_SHAPE_LIST.insert(0, None))
    x_shaped = tf.reshape(x, [-1, IMAGE_SIZE, IMAGE_SIZE, NUM_CHANNELS])

    y = tf.placeholder(tf.float32, [None, NUM_CLASSES])
    
    images, labels = read_from_tfrecords('train')
    prediction = cnn_model(x_shaped)
    
    cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=prediction, labels=y))
    optimizer = tf.train.AdamOptimizer().minimize(cost)
    
    with tf.Session() as sess:
        
        sess.run(tf.global_variables_initializer())
        for epoch in range(NUM_EPOCHS):
            epoch_loss = 0
            for _ in range(int(train_size/BATCH_SIZE)):
                epoch_x, epoch_y = tf.train.shuffle_batch([images, labels], batch_size=BATCH_SIZE, capacity=10, 
                                                          num_threads=1, min_after_dequeue=5)
                _, loss = sess.run([optimizer, cost], feed_dict={x: epoch_x, y: epoch_y})
                epoch_loss += loss
                
            print('Epoch ', epoch, ' completed out of ', NUM_EPOCHS, '. Loss: ', epoch_loss)

# Main

In [9]:
# Collect data from "Dataset/", shuffle, and split it into training, validation & testing set.
train_img, train_labels, train_size, val_img, val_labels, test_img, test_labels = collect_split_data()

# Creates train.tfrecords, val.tfrecords & test.tfrecords
create_tfrecords(train_img, train_labels, val_img, val_labels, test_img, test_labels)

# Training of CNN
train_cnn(train_size)

TypeError: Failed to convert object of type <class 'list'> to Tensor. Contents: [None, 224, 224, 3]. Consider casting elements to a supported type.