In [10]:
%cd H:/SINS dataset

H:\SINS dataset


In [11]:
import numpy as np
import tensorflow as tf
import sklearn
import cv2
import random
import math
import os

from itertools import chain
from collections import Counter
from sklearn.metrics import f1_score

seed = 42

# constants
img_folder = 'img2'
img_name = ['_pressure.png', '_spec1.png', '_spec2.png', '_spec3.png']

im_size = 64
im_size_flat = im_size * im_size
n_labels = 9
n_channels = 1 # grayscale
sd = np.sqrt(2) / np.sqrt(im_size_flat)

req_improve = 500
update_cnt = 50

In [12]:
# parameters
n_folds = 4

# layer vars
# conv1 (32 * 5x5, 2x2 pool) -> conv2 (64 * 5x5, 2x2 pool) -> fully connected (1024 nodes) -> fully connected (output)
# filter_size_1 = 5
# n_filters_1 = 32

# filter_size_2 = 5
# n_filters_2 = 64

# fc_size = 1024

# allow for procedurally-generated cnns
# format layer_id: {layer_parameter_id: parameter ... etc}
layer_vars = [
    ['conv', {'filter_size': 3, 'n_filters': 16}],
    ['relu', {}],
    ['conv', {'filter_size': 5, 'n_filters': 32}],
    ['relu', {}],
    ['pool', {'ksize': 2, 'strides': 2}],
    ['conv', {'filter_size': 5, 'n_filters': 32}],
    ['relu', {}],
    ['pool', {'ksize': 2, 'strides': 2}],
    ['flat', {}],
    ['full', {'n_outputs': 2500}],
    ['relu', {}],
    ['full', {'n_outputs': n_labels}],
]

# learning vars
learning_rate = 0.001

In [13]:
def cnn(index, do_all = False, spec_fold = 0, epochs = 50, train_batch = 50, test_batch = 50, verbose = True, save_model = False, name = "default"):
    random.seed(seed)
#     tf.set_random_seed(seed)
    folds = get_folds(index)
    if do_all:
        tl_acc = tl_f1 = 0
        for f in range(n_folds):
            trX, trY, tX, tY = get_sets(folds, f)
            acc, f1 = train_test(trX, trY, tX, tY, epochs, train_batch, test_batch, verbose)
            tl_acc += acc
            tl_f1 += f1
        print("{}-fold cross validation. Average accuracy: {:.4%}. Average F1: {:.4%}".format(n_folds, tl_acc/n_folds, tl_f1/n_folds))
            
    else:
        trX, trY, tX, tY = get_sets(folds, spec_fold)
        train_test(trX, trY, tX, tY, epochs, train_batch, test_batch, verbose, save_model, name)

In [14]:
def get_folds(index):
    afs, labels, _ = zip(*[line.rstrip('\n').split('\t') for line in open('meta.txt').readlines()])
    afs_imlinks = [img_folder + af[5:-4] + img_name[index] for af in afs]
        
    # get smallest no. of classes
    mincnt = Counter(labels).most_common()[-1][1]
    
    afs_label_sep = {}
    for (af, label) in zip(afs_imlinks, labels):
        if label in afs_label_sep:
            afs_label_sep[label].append(af)
        else:
            afs_label_sep[label] = [af]

    afs_label_sep_sampled = {label:random.sample(afs_label_sep[label], mincnt) for label in afs_label_sep.keys()}

    # partition the graph into n_folds partitions for cross-validation
    folds = {fold+1:[] for fold in range(n_folds)}
    samples = int(math.ceil(mincnt / n_folds))

    for label in afs_label_sep_sampled.keys():
        random.shuffle(afs_label_sep_sampled[label])
        for fold in range(n_folds):
            folds[fold+1] += [(af, label) for af in afs_label_sep_sampled[label][fold*samples:(fold+1)*samples]]

    return list(folds.values())

# 0-indexed get training/testing sets
def get_sets(folds, fold = -1):
    _, labels, _ = zip(*[line.rstrip('\n').split('\t') for line in open('meta.txt').readlines()])
    sces = set(labels)
    sce_int_map = {sce:i+1 for i, sce in enumerate(sces)}
    int_sce_map = {sce_int_map[i]:i for i in sce_int_map.keys()}

    ex_fold = folds[fold]
    ot_fold = folds[0:fold] + folds[fold+1:]
    chainfold = list(chain.from_iterable(ot_fold))
    random.shuffle(chainfold)
    trX, trY = zip(*[(af, label) for (af, label) in chainfold])
    tX, tY = zip(*[(af, label) for (af, label) in ex_fold])
    
    trY = np.array([[1 if int_sce_map[c+1] == label else 0 for c in range(len(sces))] for label in trY])
    tY = np.array([[1 if int_sce_map[c+1] == label else 0 for c in range(len(sces))] for label in tY])
    
    return (trX, trY, tX, tY)

In [15]:
# helper function to initialise weights and biases
def new_weights(shape):
    return tf.Variable(tf.random_normal(shape, stddev = sd, seed = seed))

def new_biases(length):
    return tf.Variable(tf.constant(0.0, shape = [length]))

def flatten_layer(layer):
    layer_shape = layer.get_shape()
    n_features = np.array(layer_shape[1:4], dtype=int).prod()
    layer_flat = tf.reshape(layer, [-1, n_features])
    return layer_flat, n_features

# helper functions to define the layers
def new_conv_layer(input_data, n_channels, filter_size, n_filters, strides = 1):
    shape = [filter_size, filter_size, n_channels, n_filters]
    weights = new_weights(shape)
    biases = new_biases(n_filters)
    
    layer = tf.nn.conv2d(input_data, weights, strides = [1, strides, strides, 1], padding = 'SAME')
    layer += biases
    
    return layer, weights

def new_relu_layer(layer):
    return tf.nn.relu(layer)

def new_pool_layer(input_data, ksize = 2, strides = 2):
    return tf.nn.max_pool(input_data, ksize = [1, ksize, ksize, 1], strides = [1, strides, strides, 1], padding = 'SAME')

def new_fc_layer(input_data, n_inputs, n_outputs):
    weights = new_weights([n_inputs, n_outputs])
    biases = new_biases(n_outputs)
    
    layer = tf.matmul(input_data, weights) + biases
    
    return layer

# helper function to get images
def get_ims(links):
    return np.array([(1 - cv2.imread(link, 0)/255).reshape(im_size * im_size) for link in links])

In [17]:
def train_test(xtrain, ytrain, xtest, ytest, epochs = 50, train_batch = 50, test_batch = 50, verbose = True, save_model = False, name = "default"):
    # tensorflow placeholder vars
    tfX = tf.placeholder(tf.float32, [None, im_size * im_size], name = 'tfX')
    x_image = tf.reshape(tfX, [-1, im_size, im_size, n_channels])

    y_true = tf.placeholder(tf.float32, [None, n_labels], name = 'y_true')
    y_true_cls = tf.argmax(y_true, axis = 1)
    
    # build cnn
    # first layer
    next_in = 0
    l_id, param = layer_vars[0]
    if l_id == 'pool':
        layer = new_pool_layer(x_image, **param)
    if l_id == 'conv':
        layer, w1 = new_conv_layer(x_image, n_channels, **param)
        next_in = param['n_filters']
    if l_id == 'relu':
        layer = new_relu_layer(x_image)
    if l_id == 'flat':
        layer, next_in = flatten_layer(x_image)
    if l_id == 'full':
        print("Error: Cannot start with a fully-connected layer without first flattening the image.")
        return

    # subsequent layers
    for (l_id, param) in layer_vars[1:]:
        if l_id == 'pool':
            layer = new_pool_layer(layer)
            
        if l_id == 'conv':
            layer, _ = new_conv_layer(layer, next_in, **param)
            next_in = param['n_filters']
            
        if l_id == 'relu':
            layer = new_relu_layer(layer)
            
        if l_id == 'flat':
            layer, next_in = flatten_layer(layer)
            
        if l_id == 'full':
            layer = new_fc_layer(layer, next_in, **param)
            next_in = param['n_outputs']
            

    y_pred = tf.nn.softmax(layer)
    y_pred_cls = tf.argmax(y_pred, axis = 1)


    # cost function and optimiser
    cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2(logits = layer, labels = y_true)
    cost = tf.reduce_mean(cross_entropy)
    optimiser = tf.train.AdamOptimizer(learning_rate = learning_rate).minimize(cost)

    correct_prediction = tf.equal(y_pred_cls, y_true_cls)
    accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
       
    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        
        # saving vars
        if save_model:
            saver = tf.train.Saver()
            save_dir = 'checkpoints/'
            save_path = os.path.join(save_dir, 'best_val'+name)
            best_loss = 10000.0
            last_improvement = 0
        
        # training
        for i in range(epochs):
            epoch_loss = 0
            index = 0
            while index < len(xtrain):
                x_batch_links = xtrain[index:index + train_batch]
                y_true_batch = ytrain[index:index + train_batch]
                x_batch = get_ims(x_batch_links)
                
                feed_dict_train = {tfX: x_batch, y_true: y_true_batch}
                _, c = sess.run([optimiser, cost], feed_dict = feed_dict_train)
                epoch_loss += c
                index += train_batch
                
                if verbose:
                    if index % 1000 == 0:
                        acc = sess.run(accuracy, feed_dict = feed_dict_train)
                        print("{:.0%} complete. Training accuracy: {:.4%}".format(index / len(xtrain), acc))
                        
            if save_model and (i % 3 == 0 or i==epochs-1):
                if best_loss > epoch_loss:
                    print("Improvement found")
                    best_loss = epoch_loss
                    last_improvement = i
                    saver.save(sess = sess, save_path = save_path)
                    
            if verbose or i % 5 == 0:
                print("Epoch {} completed out of {}. Loss: {}".format(i+1, epochs, epoch_loss))
            
            if save_model:
                if i - last_improvement > req_improve:
                    print("No improvements found in the last {} epochs. Stopping optimisation.".format(req_improve))
                    break
        
        # testing
        n_test = len(xtest)
        cls_pred = np.zeros(shape = n_test, dtype = np.int)
        
        index = 0
        while index < n_test:
            x_batch_links = xtest[index:index + test_batch]
            y_true_batch = ytest[index: index + test_batch]
            x_batch = get_ims(x_batch_links)
            
            feed_dict_test = {tfX: x_batch, y_true: y_true_batch}
            cls_pred[index:index + test_batch] = sess.run(y_pred_cls, feed_dict = feed_dict_test)
            index += test_batch
        cls_true = sess.run(tf.argmax(ytest, axis = 1))
        correct = (cls_true == cls_pred)
        acc = correct.sum() / n_test
        f1 = f1_score(cls_true, cls_pred, average = 'macro')
    
        print("Accuracy on Test-Set: {0:.4%} ({1} / {2}). F1 score: {3:.4%}.".format(acc, correct.sum(), n_test, f1))
        return acc, f1

In [8]:
cnn(0, epochs = 2)
cnn(1, epochs = 2)
cnn(2, epochs = 2)
cnn(3, epochs = 2)

15% complete. Training accuracy: 44.0000%
30% complete. Training accuracy: 50.0000%
46% complete. Training accuracy: 54.0000%
61% complete. Training accuracy: 36.0000%
76% complete. Training accuracy: 56.0000%
91% complete. Training accuracy: 44.0000%
Epoch 1 completed out of 2. Loss: 199.1177077293396
15% complete. Training accuracy: 50.0000%
30% complete. Training accuracy: 60.0000%
46% complete. Training accuracy: 60.0000%
61% complete. Training accuracy: 46.0000%
76% complete. Training accuracy: 66.0000%
91% complete. Training accuracy: 48.0000%
Epoch 2 completed out of 2. Loss: 165.06072610616684
Accuracy on Test-Set: 51.8519% (1134 / 2187). F1 score: 49.4362%.
15% complete. Training accuracy: 10.0000%
30% complete. Training accuracy: 12.0000%
46% complete. Training accuracy: 14.0000%
61% complete. Training accuracy: 26.0000%
76% complete. Training accuracy: 26.0000%
91% complete. Training accuracy: 40.0000%
Epoch 1 completed out of 2. Loss: 261.8935272693634
15% complete. Trainin

In [21]:
cnn(3, epochs = 500, save_model = True, name = "new", verbose = False)

Improvement found
Epoch 1 completed out of 500. Loss: 195.73581194877625
Improvement found
Epoch 6 completed out of 500. Loss: 49.81787860393524
Improvement found
Improvement found
Epoch 11 completed out of 500. Loss: 23.72707566898316
Improvement found
Improvement found
Epoch 16 completed out of 500. Loss: 12.072815595194697
Improvement found
Epoch 21 completed out of 500. Loss: 6.519338534624694
Improvement found
Improvement found
Epoch 26 completed out of 500. Loss: 5.027654196041112
Improvement found
Epoch 31 completed out of 500. Loss: 1.3042422788036987
Improvement found
Epoch 36 completed out of 500. Loss: 0.48787882811686245
Epoch 41 completed out of 500. Loss: 5.046820111710261
Epoch 46 completed out of 500. Loss: 5.053301744102953
Epoch 51 completed out of 500. Loss: 0.4746635504582599
Improvement found
Epoch 56 completed out of 500. Loss: 0.46570907641583403
Improvement found
Improvement found
Epoch 61 completed out of 500. Loss: 0.2662339185503697
Improvement found
Epoch 66

In [21]:
cnn(3, epochs = 5)

15% complete. Training accuracy: 28.0000%
30% complete. Training accuracy: 52.0000%
46% complete. Training accuracy: 50.0000%
61% complete. Training accuracy: 36.0000%
76% complete. Training accuracy: 54.0000%
91% complete. Training accuracy: 54.0000%
Epoch 1 completed out of 5. Loss: 183.35951507091522
15% complete. Training accuracy: 80.0000%
30% complete. Training accuracy: 76.0000%
46% complete. Training accuracy: 74.0000%
61% complete. Training accuracy: 62.0000%
76% complete. Training accuracy: 78.0000%
91% complete. Training accuracy: 66.0000%
Epoch 2 completed out of 5. Loss: 94.74127236008644
15% complete. Training accuracy: 88.0000%
30% complete. Training accuracy: 82.0000%
46% complete. Training accuracy: 78.0000%
61% complete. Training accuracy: 72.0000%
76% complete. Training accuracy: 80.0000%
91% complete. Training accuracy: 72.0000%
Epoch 3 completed out of 5. Loss: 73.40512526035309
15% complete. Training accuracy: 86.0000%
30% complete. Training accuracy: 84.0000%
46%

In [23]:
cnn(0, epochs = 200, verbose = False)

Epoch 1 completed out of 200. Loss: 202.0862513780594
Epoch 6 completed out of 200. Loss: 115.51690223813057
Epoch 11 completed out of 200. Loss: 66.47385893017054
Epoch 16 completed out of 200. Loss: 45.45010309293866
Epoch 21 completed out of 200. Loss: 34.66224016435444
Epoch 26 completed out of 200. Loss: 32.064170776167884
Epoch 31 completed out of 200. Loss: 29.244369554537116
Epoch 36 completed out of 200. Loss: 23.306455018722772
Epoch 41 completed out of 200. Loss: 20.851939070271328
Epoch 46 completed out of 200. Loss: 19.72495153475029
Epoch 51 completed out of 200. Loss: 19.76574745295966
Epoch 56 completed out of 200. Loss: 17.08873333502561
Epoch 61 completed out of 200. Loss: 17.702685355587164
Epoch 66 completed out of 200. Loss: 14.463029035883665
Epoch 71 completed out of 200. Loss: 14.379956649703672
Epoch 76 completed out of 200. Loss: 13.118327643955126
Epoch 81 completed out of 200. Loss: 15.130038704551794
Epoch 86 completed out of 200. Loss: 10.054874070485795
E

In [24]:
cnn(1, epochs = 200, verbose = False)

Epoch 1 completed out of 200. Loss: 261.4665811061859
Epoch 6 completed out of 200. Loss: 86.70396772027016
Epoch 11 completed out of 200. Loss: 54.996858447790146
Epoch 16 completed out of 200. Loss: 36.51385881751776
Epoch 21 completed out of 200. Loss: 20.463196581229568
Epoch 26 completed out of 200. Loss: 11.835750481288414
Epoch 31 completed out of 200. Loss: 7.555114217335358
Epoch 36 completed out of 200. Loss: 6.053574820281938
Epoch 41 completed out of 200. Loss: 1.698956900670055
Epoch 46 completed out of 200. Loss: 2.229933993314262
Epoch 51 completed out of 200. Loss: 1.9789735964732245
Epoch 56 completed out of 200. Loss: 0.5888542545526434
Epoch 61 completed out of 200. Loss: 9.067863720178138
Epoch 66 completed out of 200. Loss: 0.3728846343983605
Epoch 71 completed out of 200. Loss: 0.7449031285023011
Epoch 76 completed out of 200. Loss: 0.4591764505548781
Epoch 81 completed out of 200. Loss: 0.4602456727811841
Epoch 86 completed out of 200. Loss: 0.38084018102836126
E

In [25]:
cnn(2, epochs = 200, verbose = False)

Epoch 1 completed out of 200. Loss: 148.78365647792816
Epoch 6 completed out of 200. Loss: 38.36873853765428
Epoch 11 completed out of 200. Loss: 19.13821093412116
Epoch 16 completed out of 200. Loss: 8.041380075592315
Epoch 21 completed out of 200. Loss: 4.212144990545312
Epoch 26 completed out of 200. Loss: 4.209489224025447
Epoch 31 completed out of 200. Loss: 3.2927138511968224
Epoch 36 completed out of 200. Loss: 1.9552249196240155
Epoch 41 completed out of 200. Loss: 1.6141524005670362
Epoch 46 completed out of 200. Loss: 1.408691834185447
Epoch 51 completed out of 200. Loss: 6.774413686505795
Epoch 56 completed out of 200. Loss: 1.4447593315501308
Epoch 61 completed out of 200. Loss: 1.2143592613838905
Epoch 66 completed out of 200. Loss: 1.1563331105364796
Epoch 71 completed out of 200. Loss: 1.1527110767475008
Epoch 76 completed out of 200. Loss: 1.2379798593074156
Epoch 81 completed out of 200. Loss: 1.2222909873152616
Epoch 86 completed out of 200. Loss: 1.1153911849015685
E

In [26]:
cnn(3, epochs = 200, verbose = False)

Epoch 1 completed out of 200. Loss: 215.71210664510727
Epoch 6 completed out of 200. Loss: 64.89219778776169
Epoch 11 completed out of 200. Loss: 33.605582505464554
Epoch 16 completed out of 200. Loss: 15.370372039149515
Epoch 21 completed out of 200. Loss: 5.088374967730488
Epoch 26 completed out of 200. Loss: 2.2736340897681657
Epoch 31 completed out of 200. Loss: 1.182904278381102
Epoch 36 completed out of 200. Loss: 1.1278397807636793
Epoch 41 completed out of 200. Loss: 0.7087203543109126
Epoch 46 completed out of 200. Loss: 0.9084192884402
Epoch 51 completed out of 200. Loss: 1.540683937506401
Epoch 56 completed out of 200. Loss: 6.010517307469854
Epoch 61 completed out of 200. Loss: 0.4234687371727546
Epoch 66 completed out of 200. Loss: 0.2801914910436949
Epoch 71 completed out of 200. Loss: 0.2066887490984186
Epoch 76 completed out of 200. Loss: 0.1634776263774711
Epoch 81 completed out of 200. Loss: 0.18885761284536784
Epoch 86 completed out of 200. Loss: 0.48213239250253537
