In [20]:
import tensorflow as tf
import numpy as np
import datetime, time
import sklearn
import sklearn.metrics

def shuffleBatches(tensorTuple, batchSize=64):
    if type(tensorTuple) is list or type(tensorTuple) is tuple: 
        ids = list(range(tensorTuple[0].shape[0]))
        np.random.shuffle(ids)
        for i in range(0,len(ids),batchSize):
            lst = min(len(ids), i + batchSize)
            yield (np.array(x[ids[i:lst],]) for x in tensorTuple)
    else:
        ids = list(range(tensorTuple.shape[0]))
        np.random.shuffle(ids)
        for i in range(0,len(ids),batchSize):
            lst = min(len(ids), i + batchSize)
            yield np.array(tensorTuple[ids[i:lst],])

In [2]:
def genObs(length, max_depth=5):
    num = np.random.negative_binomial(int(length*0.7), 0.5)
    if num < length:
        res = 'X' * (length - num)
    else:
        res = ''
        
    p0 = np.random.uniform() ** 3
    pa = np.random.uniform(low=0.1, high=0.5) ** 2
    dp = np.random.uniform(low=0.3, high=0.8)
    
    ps = [p0]
    for j in range(max_depth - 1):
        ps.append(np.power(ps[-1], dp))
    ps1 = ps[1:]
    ps0 = p0
    #print(ps1)
    states = [0]
    #print(num)
    for i in range(min(length,num)-1):
        if states[-1] > 0:
            up = min(0.85, ps1[states[-1]-1])
        else:
            up = ps0
        if np.random.uniform() > up:
            if states[-1] > 1:
                ps0 = max(0.15, ps0 * 0.3)
            else:
                ps0 = max(0.05, ps0 * (1 - pa) + 0 * pa)
            states.append(0)
        else:
            states.append(min(max_depth-1, states[-1]+1))
            ps0 = min(0.8, ps0 * (1 - pa) + 1 * pa)

    if states[-1] > 0:
        up = ps1[states[-1]-1]
    else:
        up = ps0
    target = np.random.binomial(n=1, p=up)
    params = {'p0':p0, 'pa':pa, 'dp':dp, 'ps0': ps0, 'p':up}
    return (res + ''.join([str(x) for x in states]), target, params)

def genSample(num, length=40):
    data = [genObs(length) for i in range(num)]
    x = [p[0] for p in data]
    y = [p[1] for p in data]
    z = [p[2]['p'] for p in data]
    return np.array(x), np.array(y), np.array(z)

In [3]:
train_s, train_y, _ = genSample(500000)
valid_s, valid_y, _ = genSample(50000)
valid_s[:10], valid_y[:10]

(array(['XXXXXXXXXXXXXXXXXXXXX0123444001234444444',
        'XXXXXXXXXXXXXXXXXXXXXXXXX000010120001000',
        'XXXXXXXXXXXXXXX0000000000000000000000000',
        'XXXXXXXX00101234444444444444444444444444',
        'XX00000001000000000000000000000000000000',
        'XXXXXX0000000000000000000000000000000000',
        'XX00000000000000010000000000000100000000',
        'XXXXXXXXXXXXXXXX012000001234000012344444',
        'XXXXXXXXXXXXXXXXXXXXX0123400000000001000',
        'XXXXXXXXXXXXXXX0123440000000000000000000'],
       dtype='<U40'), array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0]))

In [8]:
task_dict = set(sum([list(set(p)) for p in train_s[:1000]],list()))
task_digitizer = {e[1]:e[0] for e in enumerate(task_dict)}

In [15]:
train_x = np.array([[task_digitizer[s] for s in x] for x in train_s])
valid_x = np.array([[task_digitizer[s] for s in x] for x in valid_s])

In [33]:
SEQ_LEN = 30
DICT_SIZE = 5
RNN_SIZE = [4]

InnerCell = lambda n: tf.nn.rnn_cell.GRUCell(num_units=n, activation=tf.nn.elu)

tf.reset_default_graph()

tfi_x = tf.placeholder(shape=(None, SEQ_LEN), dtype=tf.int64)
tfi_y = tf.placeholder(shape=(None), dtype=tf.int64)

tfX = tf.one_hot(tfi_x, DICT_SIZE, dtype=tf.float32)
tfY = tf.one_hot(tfi_y, 2, dtype=tf.float32)


rnnCell = tf.nn.rnn_cell.MultiRNNCell([InnerCell(s) for s in RNN_SIZE])

_, tfO = tf.nn.dynamic_rnn(rnnCell, inputs=tfX, dtype=tf.float32)

tfOut0 = tf.layers.dense(tfO[-1], 2)

tfLoss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=tfY, logits=tfOut0))
tfTrain = tf.train.AdamOptimizer(1e-3).minimize(tfLoss)

tfOutP = tf.nn.softmax(tfOut0)[:,1]


#tfsLoss = tf.summary.scalar('RMSE', tfLoss)
#tfsSaver = tf.train.Saver()

#tffw = tf.summary.FileWriter('D:/Jupyter/Logs/00_A', tf.get_default_graph())
print('Graph creation complete')

Graph creation complete


In [34]:
dt_now = datetime.datetime.now().strftime("%Y-%m-%d_%H%M%S")
#tffw = tf.summary.FileWriter('D:/Jupyter/Logs/12RNN07-{0}'.format(dt_now), tf.get_default_graph())

batch_size = 1000
num_steps  = 5
num_epochs = 100
checkpoints = 500
x_offset = 10

fmtstr = 'Epoch {0} ({1:1.3} sec): \t\tVL:{2:1.3f}\t\tGini:{3:1.3f}'
valid_batch = {tfi_x: valid_x[:,x_offset:], tfi_y: valid_y}
with tf.Session() as tfs:
    tfs.run(tf.global_variables_initializer())
    for i in range(num_epochs):
        te0 = time.perf_counter()
        for (mini_x, mini_y) in shuffleBatches((train_x, train_y), batchSize=batch_size):
            train_batch = {tfi_x:mini_x[:,x_offset:], tfi_y:mini_y}
            
            for j in range(num_steps):
                tfTrain.run(feed_dict=train_batch)
    
        te1 = time.perf_counter()
        lv = tfLoss.eval(feed_dict=valid_batch)
            #tffw.add_summary(summary, i)
            #if i%checkpoints == 0 and i > 0:
            #    p = tfsSaver.save(tfs, 'D:/Jupyter/mltest/Models-12RNN07/model-{0:04d}.ckpt'.format(i))
            #    print('Model saved at checkpoint: {0}'.format(p))
                             
        valid_p = tfOutP.eval(feed_dict=valid_batch)
        gini = sklearn.metrics.roc_auc_score(y_true=valid_y, y_score=valid_p) * 2 - 1
        print(fmtstr.format(i,te1-te0,lv,gini))

Epoch 0 (51.0 sec): 		VL:0.319		Gini:0.731
Epoch 1 (50.4 sec): 		VL:0.314		Gini:0.740


KeyboardInterrupt: 

In [25]:
valid_p = valid_p[:,1]

In [26]:
valid_y[:10], valid_p[:10]

(array([1, 0, 0, 1, 0, 0, 0, 1, 0, 0]),
 array([ 0.96357483,  0.14869218,  0.04372051,  0.94771868,  0.04704701,
         0.04704701,  0.06584086,  0.95934361,  0.10914036,  0.06956425], dtype=float32))

In [28]:
sklearn.metrics.roc_auc_score(y_true=valid_y, y_score=valid_p) * 2 - 1

0.73872465767600914

In [None]:
#10-gru: 80-sec per epoch, gini = 74 (2 epochs, no change in between)
#20-20-gru: 200-sec per epoch, gini = 74 (2 epochs, no change in between)
#4-gru: 50-sec per epoch, 0 - 73, 1 - 74