In [7]:
import numpy as np
import pandas as pd
import tensorflow as tf
import modutils
import pickle
import time, datetime
import sklearn, sklearn.metrics, sklearn.decomposition
import collections
import matplotlib.pyplot as plt
import seaborn

dev_batches = 'D:/Jupyter/DataSets/prv/dev0_batch{0:03d}.npy'
train_batches = range(3)
test_batches = range(3, 4)

In [5]:
def build_cnn_graph(input_shape, cnn_arch, fc_arch, num_classes):
    tf.reset_default_graph()

    with tf.name_scope('Input'):
        tf_in_x = tf.placeholder(tf.float32, shape=(None, input_shape[0], input_shape[1], 1))
        tf_in_y = tf.placeholder(tf.int32, shape=(None,))

    tf_temp = tf_in_x
    for (i, (conv_filters, conv_size, conv_stride, pool_size, pool_stride)) in enumerate(cnn_arch):
        with tf.name_scope('Conv-MaxPool-{:02d}'.format(i)):
            tf_temp = tf.layers.conv2d(tf_temp, conv_filters, conv_size, conv_stride, activation=tf.nn.relu)
            tf_temp = tf.layers.max_pooling2d(tf_temp, pool_size, pool_stride)

    with tf.name_scope('FC'):
        tf_temp = tf.contrib.layers.flatten(tf_temp)
        for sz in fc_arch:
            tf_temp = tf.layers.dense(tf_temp, sz, activation=tf.nn.elu)
            
        tf_final = tf.layers.dense(tf_temp, num_classes)
        tf_prob = tf.nn.softmax(tf_final)
        tf_predicted = tf.cast(tf.argmax(tf_prob, axis=1), dtype=tf.int32)

    with tf.name_scope('LOSS'):
        tf_loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=tf_in_y, logits=tf_final))
        tf_train = tf.train.AdamOptimizer(1e-3).minimize(tf_loss)
        
        tf_rocauc, tf_upd_rocuac = tf.metrics.auc(labels=tf_in_y, predictions=tf_prob[:,1], num_thresholds=10000)
        tf_gini = tf_rocauc * 2 - 1
        tf_accuracy, tf_upd_accuracy = tf.metrics.accuracy(labels=tf_in_y, predictions=tf_predicted)
        tf_update_metrics = tf.group(tf_upd_rocuac, tf_upd_accuracy)
        
        tfsummary_logloss = tf.summary.scalar('Log-Loss', tf_loss)
        tfsummary_gini = tf.summary.scalar('1-Gini', 1-tf_gini)
        tfsummary_accuracy = tf.summary.scalar('1-Accuracy', 1-tf_accuracy)
        tfsummary = tf.summary.merge([tfsummary_logloss, tfsummary_gini, tfsummary_accuracy])

    return {'in':{'data':tf_in_x, 'label':tf_in_y},
            'out':{'logit':tf_final, 'prob':tf_prob},
            'run':{'loss': tf_loss, 'upd_metrics':tf_update_metrics,
                   'gini':tf_gini, 'accuracy':tf_accuracy,
                   'train': tf_train, 'summary':tfsummary}}

In [17]:
graph_descr = build_cnn_graph((128, 128), [(20, 5, 1, 3, 3)], [], 2)
model_name = '25EasyCNN01'

tffw_graph = tf.summary.FileWriter('D:/Jupyter/Logs/Graph_{}'.format(model_name), tf.get_default_graph())
model_ckpt_name = '../Models/{0}/model'.format(model_name)+'-{:02d}.ckpt'

print('Graph created')

batch_steps = 1
batch_size  = 64
calc_batch_size = 1024

set2dict = lambda x: {graph_descr['in']['data']: x[0],
                           graph_descr['in']['label']: x[1]}


print('Preparation complete')

Graph created
Preparation complete


In [18]:
num_epochs = 50

dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
tffw_train = tf.summary.FileWriter('D:/Jupyter/Logs/Run_{0}-{1}-T'.format(model_name, dt_now), tf.get_default_graph())
tffw_valid = tf.summary.FileWriter('D:/Jupyter/Logs/Run_{0}-{1}-V'.format(model_name, dt_now), tf.get_default_graph())
tfsSaver = tf.train.Saver(max_to_keep=5)

with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    tfs.run(tf.local_variables_initializer())
    
    for n in range(num_epochs):
        t0 = time.perf_counter()
        
        for batch in train_batches:
            data = np.load(dev_batches.format(batch))
            train_x = data[:,2:].reshape(-1,128,128,1)
            train_y = data[:,1].reshape(-1).astype(np.int32)
            modutils.runEpoch(tfs, (train_x, train_y), batch_size, set2dict,
                          graph_descr['run']['train'],
                         op_loss=graph_descr['run']['loss'], verbatim=True)
            
        test_res = []
        for batch in test_batches:
            data = np.load(dev_batches.format(batch))
            test_x = data[:,2:].reshape(-1,128,128,1)
            test_y = data[:,1].reshape(-1).astype(np.int32)
            test_res.append(modutils.runDataset(tfs, (test_x, test_y), batch_size, set2dict,
                                  [graph_descr['run']['loss'], graph_descr['out']['prob']]))
            
        
        #test_res = run_tf_calc(tfs, test_set, calc_batch_size, set2dict,
        #                       [graph_descr['run']['loss'], graph_descr['out']['prob']])
        
        #test_loss = np.sum([x[1] * x[2][0] for x in test_res]) / np.sum([x[1] for x in test_res])
        #test_p = np.concatenate([x[2][1] for x in test_res])
        #gini = sklearn.metrics.roc_auc_score(test_y, test_p[:,1])*2-1
        #accur = sklearn.metrics.accuracy_score(test_y, 1*(test_p[:,1]>0.5))
        
        #tfs.run(graph_descr['run']['upd_metrics'], stat_train_dict)
        #train_stats = tfs.run([graph_descr['run']['loss'], graph_descr['run']['gini'],
        #                     graph_descr['run']['accuracy'], graph_descr['run']['summary']], stat_train_dict)
        #tffw_train.add_summary(train_stats[-1], n)
        
        #tfs.run(graph_descr['run']['upd_metrics'], stat_valid_dict)
        #valid_stats = tfs.run([graph_descr['run']['loss'], graph_descr['run']['gini'],
        #                     graph_descr['run']['accuracy'], graph_descr['run']['summary']], stat_valid_dict)
        #tffw_valid.add_summary(valid_stats[-1], n)
        
        t1 = time.perf_counter()
        
        p = tfsSaver.save(tfs, model_ckpt_name.format(n))
        print('Model saved at checkpoint: {0}'.format(p))        
        print('Epoch {0}: {1:.3f} in {2:.2f} sec'.format(n, 0, t1-t0))
print('\nDone')

































































































NameError: name 'run_tf_calc' is not defined

In [10]:
np.array(train_batches)

array([0, 1, 2])