# Environment settings

In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

import numpy as np
import tensorflow as tf
from matplotlib.pyplot import imshow
from matplotlib import pyplot as plot
import cv2 as cv
import pandas as pd

  from ._conv import register_converters as _register_converters


In [2]:
TRAIN_PATH = 'data/train/'
TEST_PATH = 'data/test/'
NUM_OF_CLASSES = 28
mean_image = 14.114832
std_image = 29.73986

In [3]:
# Set tf basic settings
print(tf.__version__)
tf.logging.set_verbosity(tf.logging.INFO)
tf.enable_eager_execution()

1.10.0


# Dataset manipulation and handling

#### Feature encoding for creating tfrecords 

In [4]:
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

#### Function for loading an image from a given file name

In [5]:
def load_image_specific_filter(file):
    img = cv.imread(file)
    img = img.astype(np.float32)
    return img[:,:,0]

## Creation of 2 tfrecords files, 1 for training set and 1 for validation set
This should only be used once as the tfrecords created are what will be used later on during test time

In [6]:
train_labels = pd.read_csv(TRAIN_PATH + "train.csv")

In [17]:
train_filename = 'data/tfrec/train.tfrecords_full'
writer = tf.python_io.TFRecordWriter(train_filename)
num_of_examples = train_labels.count()['Id']
num_train_set = int(num_of_examples * 0.8)
num_validation_set = num_of_examples - num_train_set
entire_train_ds = np.zeros((1, 512, 512), dtype=np.float32)

for i in range(0, int(num_train_set)):                        
    # Create image and labels
    if (i % 1000 == 0):
        print("Reached image {} out of {}".format(i, num_train_set))
    file_name = (TRAIN_PATH + train_labels['Id'][i] + '_green.png')
    img = load_image_specific_filter(file_name)
    labels = np.zeros(shape=(NUM_OF_CLASSES), dtype=int)
    train_label = [int(s) for s in train_labels['Target'][i].split(' ')]
    labels[train_label] = 1
    
    # Create a feature
    feature = {'train/label': _int64_feature(labels),
               'train/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
    # Create an example protocol buffer
    example = tf.train.Example(features=tf.train.Features(feature=feature))
    
    # Serialize to string and write on the file
    writer.write(example.SerializeToString())
    
    # We will save all of the dataset info to create a mean and std to be used during the preprocessing
    if (i < 1500):
        img = np.expand_dims(img, axis=0)
        entire_train_ds = np.append(entire_train_ds, img, axis=0)
    
    
writer.close()   
mean_image = np.mean(entire_train_ds)
std_image = np.std(entire_train_ds)
del(entire_train_ds)

Reached image 0 out of 24857
Reached image 1000 out of 24857
Reached image 2000 out of 24857
Reached image 3000 out of 24857
Reached image 4000 out of 24857
Reached image 5000 out of 24857
Reached image 6000 out of 24857
Reached image 7000 out of 24857
Reached image 8000 out of 24857
Reached image 9000 out of 24857
Reached image 10000 out of 24857
Reached image 11000 out of 24857
Reached image 12000 out of 24857
Reached image 13000 out of 24857
Reached image 14000 out of 24857
Reached image 15000 out of 24857
Reached image 16000 out of 24857
Reached image 17000 out of 24857
Reached image 18000 out of 24857
Reached image 19000 out of 24857
Reached image 20000 out of 24857
Reached image 21000 out of 24857
Reached image 22000 out of 24857
Reached image 23000 out of 24857
Reached image 24000 out of 24857


In [9]:
test_filename = 'data/tfrec/train.tfrecords_test'
writer = tf.python_io.TFRecordWriter(test_filename)
num_validation_set = num_of_examples - num_train_set

for i in range(int(num_train_set), num_of_examples):                        
    # Create image and labels
    if (i % 1000 == 0):
        print("Reached image {} out of {}".format(i, num_of_examples))
    file_name = (TRAIN_PATH + train_labels['Id'][i] + '_green.png')
    img = load_image_specific_filter(file_name)
    labels = np.zeros(shape=(NUM_OF_CLASSES), dtype=int)
    train_label = [int(s) for s in train_labels['Target'][i].split(' ')]
    labels[train_label] = 1
    
    # Create a feature
    feature = {'test/label': _int64_feature(labels),
               'test/image': _bytes_feature(tf.compat.as_bytes(img.tostring()))}
    # Create an example protocol buffer
    example = tf.train.Example(features=tf.train.Features(feature=feature))
    
    # Serialize to string and write on the file
    writer.write(example.SerializeToString())
    
    
writer.close()   

Reached image 25000 out of 31072
Reached image 26000 out of 31072
Reached image 27000 out of 31072
Reached image 28000 out of 31072
Reached image 29000 out of 31072
Reached image 30000 out of 31072
Reached image 31000 out of 31072


#### Function for extracting features from tfrecords

In [7]:
def extract_fn_train(data_record):
    features = {
        # Extract features using the keys set during creation
        'train/label': tf.FixedLenFeature((28), tf.int64),
        'train/image': tf.FixedLenFeature([], tf.string)
    }    
    
    sample = tf.parse_single_example(data_record, features)
    sample['train/image'] = tf.decode_raw(sample['train/image'], tf.float32)
    sample['train/image'] = tf.reshape(sample['train/image'], (512,512))
    sample['train/image'] = (sample['train/image'] - mean_image)/std_image
    
    return sample

def extract_fn_test(data_record):
    features = {
        # Extract features using the keys set during creation
        'test/label': tf.FixedLenFeature((28), tf.int64),
        'test/image': tf.FixedLenFeature([], tf.string)
    }    
    
    sample = tf.parse_single_example(data_record, features)
    sample['test/image'] = tf.decode_raw(sample['test/image'], tf.float32)
    sample['test/image'] = tf.reshape(sample['test/image'], (512,512))
    sample['test/image'] = (sample['test/image'] - mean_image)/std_image

    return sample



## Creating 2 dataset objects, 1 for training set and 1 for validation set

In [8]:
test_dataset = tf.data.TFRecordDataset(['data/tfrec/train.tfrecords_test'])
test_dataset = test_dataset.map(extract_fn_test)

num_epochs = 3
batch_size = 50
buffer_size = 1000

train_dataset = tf.data.TFRecordDataset(['data/tfrec/train.tfrecords_full'])
train_dataset = train_dataset.map(extract_fn_train)
train_dataset = train_dataset.apply(tf.contrib.data.shuffle_and_repeat(buffer_size, num_epochs))
train_dataset = train_dataset.batch(batch_size)

# Model Creation

In [14]:
class ProteinClassifier(tf.keras.Model):
    def __init__(self):
        super().__init__()
        
        # Block1
        self.block1_conv1 = tf.keras.layers.Conv2D(filters=8,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block1_conv2 = tf.keras.layers.Conv2D(filters=8,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block1_pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                               strides=(2, 2),
                                               padding='valid'
                                              )
        
        # Block2
        self.block2_conv1 = tf.keras.layers.Conv2D(filters=16,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block2_conv2 = tf.keras.layers.Conv2D(filters=16,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block2_pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                               strides=(2, 2),
                                               padding='valid'
                                              )
        
        # Block3
        self.block3_conv1 = tf.keras.layers.Conv2D(filters=24,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block3_conv2 = tf.keras.layers.Conv2D(filters=24,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block3_conv3 = tf.keras.layers.Conv2D(filters=24,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block3_pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                               strides=(2, 2),
                                               padding='valid'
                                              )
        
        # Block4
        self.block4_conv1 = tf.keras.layers.Conv2D(filters=32,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block4_conv2 = tf.keras.layers.Conv2D(filters=32,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block4_conv3 = tf.keras.layers.Conv2D(filters=32,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block4_pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                               strides=(2, 2),
                                               padding='valid'
                                              )
        
        # Block4
        self.block5_conv1 = tf.keras.layers.Conv2D(filters=64,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block5_conv2 = tf.keras.layers.Conv2D(filters=64,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block5_conv3 = tf.keras.layers.Conv2D(filters=64,
                                                kernel_size=[3, 3],
                                                strides=(1, 1),
                                                padding='same',
                                                activation=tf.nn.leaky_relu,
                                                use_bias=True,
                                                kernel_initializer=tf.contrib.layers.xavier_initializer_conv2d()
                                               )
        
        self.block5_pool = tf.keras.layers.MaxPool2D(pool_size=(2, 2),
                                               strides=(2, 2),
                                               padding='valid'
                                              )
        
        
        
        # Dense output layer
        self.fc1 = tf.keras.layers.Dense(4096, activation=tf.nn.relu)
        
        # Dense layer for classes
        self.fc2 = tf.keras.layers.Dense(28)
        
        # Optimizer
        self.optimizer = tf.train.AdamOptimizer(learning_rate = 1e-4)
        
    def call(self, inputs, training=True, **kwargs):
        
        # Input Layer
        input_layer = tf.reshape(inputs, [-1, 512, 512, 1])
        
        # Block1
        x_1 = self.block1_conv1(input_layer)
        x_1 = self.block1_conv2(x_1)
        x_1 = self.block1_pool(x_1)
        
        # Block2
        x_2 = self.block2_conv1(x_1)
        x_2 = self.block2_conv2(x_2)
        x_2 = self.block2_pool(x_2)
        
        # Block3
        x_3 = self.block3_conv1(x_2)
        x_3 = self.block3_conv2(x_3)
        x_3 = self.block3_conv3(x_3)
        x_3 = self.block3_pool(x_3)
        
        # Block4
        x_4 = self.block4_conv1(x_3)
        x_4 = self.block4_conv2(x_4)
        x_4 = self.block4_conv3(x_4)
        x_4 = self.block4_pool(x_4)
        
        # Block3
        x_5 = self.block5_conv1(x_4)
        x_5 = self.block5_conv2(x_5)
        x_5 = self.block5_conv3(x_5)
        x_5 = self.block3_pool(x_5)       
        x_5 = tf.reshape(x_5, [x_5.shape[0], -1])
        
        x_fc_1 = self.fc1(x_5)
        logits = self.fc2(x_fc_1)
        
        return logits 
    
    def loss(self, logits, labels):        
        
        loss = tf.losses.sigmoid_cross_entropy(labels, logits)
        
        return loss
    
    def optimize(self, inputs, labels, training = True):
        with tf.GradientTape(persistent=False) as tape:
            logits = self(inputs)
            loss = self.loss(logits, labels)
        
        if  training:
            gradients = tape.gradient(loss, self.variables)
            self.optimizer.apply_gradients(zip(gradients, self.variables))
        del(tape)
        return loss
    
    def test(self, inputs, labels):
        logits = self(inputs, training=False)
        
        # TODO: add the accuracy check
        accuracy = 0

        return accuracy

# Model training

In [None]:
loss = []
with tf.device("/device:CPU:0"):
    model = ProteinClassifier()
    itr = train_dataset.make_one_shot_iterator()
    for x in itr:
        loss = model.optimize(x['train/image'], x['train/label'])
        print(loss)
        break
        