In [50]:
import numpy as np
import pandas as pd
import tensorflow as tf
import modutils
import pickle
import time, datetime
import sklearn, sklearn.metrics, sklearn.decomposition
import collections
import matplotlib.pyplot as plt
import seaborn

dev_batches = 'D:/Jupyter/DataSets/prv/dev0_batch{0:03d}.npy'
train_batches = range(5)
test_batches = range(5, 7)

In [5]:
def build_cnn_graph(input_shape, cnn_arch, fc_arch, num_classes):
    tf.reset_default_graph()

    with tf.name_scope('Input'):
        tf_in_x = tf.placeholder(tf.float32, shape=(None, input_shape[0], input_shape[1], 1))
        tf_in_y = tf.placeholder(tf.int32, shape=(None,))

    tf_temp = tf_in_x
    for (i, (conv_filters, conv_size, conv_stride, pool_size, pool_stride)) in enumerate(cnn_arch):
        with tf.name_scope('Conv-MaxPool-{:02d}'.format(i)):
            tf_temp = tf.layers.conv2d(tf_temp, conv_filters, conv_size, conv_stride, activation=tf.nn.relu)
            tf_temp = tf.layers.max_pooling2d(tf_temp, pool_size, pool_stride)

    with tf.name_scope('FC'):
        tf_temp = tf.contrib.layers.flatten(tf_temp)
        for sz in fc_arch:
            tf_temp = tf.layers.dense(tf_temp, sz, activation=tf.nn.elu)
            
        tf_final = tf.layers.dense(tf_temp, num_classes)
        tf_prob = tf.nn.softmax(tf_final)
        tf_predicted = tf.cast(tf.argmax(tf_prob, axis=1), dtype=tf.int32)

    with tf.name_scope('LOSS'):
        tf_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf_in_y, logits=tf_final))
        tf_train = tf.train.AdamOptimizer(1e-3).minimize(tf_loss)
        
        tf_rocauc, tf_upd_rocuac = tf.metrics.auc(labels=tf_in_y, predictions=tf_prob[:,1], num_thresholds=10000)
        tf_gini = tf_rocauc * 2 - 1
        tf_accuracy, tf_upd_accuracy = tf.metrics.accuracy(labels=tf_in_y, predictions=tf_predicted)
        tf_update_metrics = tf.group(tf_upd_rocuac, tf_upd_accuracy)
        
        tfsummary_logloss = tf.summary.scalar('Log-Loss', tf_loss)
        tfsummary_gini = tf.summary.scalar('1-Gini', 1-tf_gini)
        tfsummary_accuracy = tf.summary.scalar('1-Accuracy', 1-tf_accuracy)
        tfsummary = tf.summary.merge([tfsummary_logloss, tfsummary_gini, tfsummary_accuracy])

    return {'in':{'data':tf_in_x, 'label':tf_in_y},
            'out':{'logit':tf_final, 'prob':tf_prob},
            'run':{'loss': tf_loss, 'upd_metrics':tf_update_metrics,
                   'gini':tf_gini, 'accuracy':tf_accuracy,
                   'train': tf_train, 'summary':tfsummary}}

In [57]:
graph_descr = build_cnn_graph((128, 128),
                              [(20, 5, 1, 3, 3), (100, 5, 1, 3, 3), (200, 5, 1, 3, 3)],
                              [20], 2)
model_name = '25EasyCNN01'

tffw_graph = tf.summary.FileWriter('D:/Jupyter/Logs/Graph_{}'.format(model_name), tf.get_default_graph())
model_ckpt_name = '../Models/{0}/model'.format(model_name)+'-{:02d}.ckpt'

print('Graph created')

batch_steps = 1
batch_size  = 64
calc_batch_size = 1024

set2dict = lambda x: {graph_descr['in']['data']: x[0],
                           graph_descr['in']['label']: x[1]}


print('Preparation complete')

Graph created
Preparation complete


In [58]:
num_epochs = 50

dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
tffw_train = tf.summary.FileWriter('D:/Jupyter/Logs/Run_{0}-{1}-T'.format(model_name, dt_now), tf.get_default_graph())
tffw_valid = tf.summary.FileWriter('D:/Jupyter/Logs/Run_{0}-{1}-V'.format(model_name, dt_now), tf.get_default_graph())
tfsSaver = tf.train.Saver(max_to_keep=5)

with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    tfs.run(tf.local_variables_initializer())
    
    for n in range(num_epochs):
        t0 = time.perf_counter()
        
        for batch in train_batches:
            data = np.load(dev_batches.format(batch))
            train_x = data[:,2:].reshape(-1,128,128,1)
            train_y = data[:,1].reshape(-1).astype(np.int32)
            modutils.runEpoch(tfs, (train_x, train_y), batch_size, set2dict,
                          graph_descr['run']['train'],
                         op_loss=graph_descr['run']['loss'], verbatim=True)
            
        test_res = []
        test_yf = []
        for batch in test_batches:
            data = np.load(dev_batches.format(batch))
            valid_x = data[:,2:].reshape(-1,128,128,1)
            valid_y = data[:,1].reshape(-1).astype(np.int32)
            test_yf.append(valid_y)
            test_res += modutils.runDataset(tfs, (valid_x, valid_y), batch_size, set2dict,
                                  [graph_descr['run']['loss'], graph_descr['out']['prob']])
            
        test_loss = np.sum([x[1] * x[2][0] for x in test_res]) / np.sum([x[1] for x in test_res])
        test_p = np.concatenate([x[2][1] for x in test_res])
        test_y = np.concatenate(test_yf)
        test_gini = sklearn.metrics.roc_auc_score(test_y, test_p[:,1])*2-1
        t1 = time.perf_counter()
        
        p = tfsSaver.save(tfs, model_ckpt_name.format(n))
        print('Model saved at checkpoint: {0}'.format(p))        
        print('Epoch {0}: {1:.3f} in {2:.2f} sec, gini={3:.3f}'.format(n, test_loss, t1-t0, test_gini))
print('\nDone')

































































































































































Model saved at checkpoint: ../Models/25EasyCNN01/model-00.ckpt
Epoch 0: 0.280 in 544.97 sec, gini=0.276
Model saved at checkpoint: ../Models/25EasyCNN01/model-01.ckpt
Epoch 1: 0.271 in 405.97 sec, gini=0.325
Model saved at checkpoint: ../Models/25EasyCNN01/model-02.ckpt
Epoch 2: 0.273 in 434.57 sec, gini=0.341
Model saved at checkpoint: ../Models/25EasyCNN01/model-03.ckpt
Epoch 3: 0.278 in 401.79 sec, gini=0.335


KeyboardInterrupt: 

In [10]:
np.array(train_batches)

array([0, 1, 2])

In [24]:
test_p.shape

(1024, 2)

In [25]:
test_y.shape

(1024,)

In [51]:
test_p[:,1].mean()

0.075779587

In [52]:
test_y.mean()

0.078125

In [47]:
sklearn.metrics.roc_auc_score(test_y, test_p[:,1])*2-1

0.078972457627118686

In [64]:
test_y[test_p[:,1]>0.2].mean()

0.15846994535519127

In [65]:
test_y[test_p[:,1]<0.05].mean()

0.010563380281690141