In [1]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from wgomoku import GomokuBoard
from wgomoku import Heuristics
from wgomoku import GomokuTools as gt
from wgomoku import HeuristicGomokuPolicy
from google.cloud import bigquery
import google.datalab.bigquery as bq
#import tensorflow_transform.tf_metadata as metadata
import datetime
import tempfile
#import tensorflow_transform.beam.impl as beam_impl
#import tensorflow_transform as tft
import apache_beam as beam

  'Running the Apache Beam SDK on Python 3 is not yet fully supported. '


In [2]:
input = {'game', "A"}

In [3]:
N_p=5
feature_spec = {
    'state': tf.FixedLenFeature([N_p * N_p * 2], tf.float32),
    'qvalue': tf.FixedLenFeature([N_p * N_p], tf.float32)
}

In [40]:
#schema = metadata.dataset_schema.from_feature_spec(feature_spec)

In [4]:
def create_data(ignore_me):
    data = (
        np.random.randint(0,2,size=[3,5,5,2]),
        np.random.uniform(size=[3,5,5,1]))
    return data

data = create_data("whatever")
data[0].shape, data[1].shape

((3, 5, 5, 2), (3, 5, 5, 1))

In [5]:
state2 = np.rollaxis(data[0][2], 2, 0)

In [12]:
PROJECT='going-tfx'
BUCKET='going-tfx'
LOCAL_TMPDIR="/tmp"
OUTPUT_DIR="./out"
runner='DirectRunner'
job_name = 'tournament_data' + '-' + datetime.datetime.now().strftime('%y%m%d-%H%M%S')    

options = {
    'staging_location': os.path.join(OUTPUT_DIR, 'tmp', 'staging'),
    'temp_location': os.path.join(OUTPUT_DIR, 'tmp'),
    'job_name': job_name,
    'project': PROJECT,
    'max_num_workers': 24,
    'teardown_policy': 'TEARDOWN_ALWAYS',
    'no_save_main_session': True,
    'requirements_file': 'requirements.txt'
}
opts = beam.pipeline.PipelineOptions(flags=[], **options)

In [6]:
def _floats_feature(value):
    return tf.train.Feature(float_list=tf.train.FloatList(value=value))

In [7]:
data[0].shape, data[1].shape

((3, 5, 5, 2), (3, 5, 5, 1))

In [8]:
s_and_q = list(zip(data[0], data[1]))

In [9]:
s0 = s_and_q[0][0]
q0 = s_and_q[0][1]
s0.shape, q0.shape

((5, 5, 2), (5, 5, 1))

In [17]:
f = _floats_feature(q0.flatten())
f

float_list {
  value: 0.9810678362846375
  value: 0.09030850231647491
  value: 0.6971864700317383
  value: 0.7230231761932373
  value: 0.17483244836330414
  value: 0.180739164352417
  value: 0.915027379989624
  value: 0.67628413438797
  value: 0.2386297583580017
  value: 0.3330509662628174
  value: 0.685763955116272
  value: 0.5620337128639221
  value: 0.7280870676040649
  value: 0.1601569652557373
  value: 0.08854532241821289
  value: 0.926276683807373
  value: 0.5031418204307556
  value: 0.44263941049575806
  value: 0.993732750415802
  value: 0.8726805448532104
  value: 0.1694561094045639
  value: 0.777827799320221
  value: 0.24583850800991058
  value: 0.7371916174888611
  value: 0.03603409230709076
}

In [10]:
tfr_filename = "deleteme.tfr"
with tf.python_io.TFRecordWriter(tfr_filename) as writer:
    for vec in s_and_q:
        # Create an example protocol buffer
        example = tf.train.Example(features=tf.train.Features(feature={
            'state': _floats_feature(vec[0].flatten()),
            'qvalue' : _floats_feature(vec[1].flatten()),
            }))
        writer.write(example.SerializeToString())

### Read from File

In [11]:
def _parse_function(example):
    return tf.parse_single_example(example, feature_spec)

In [12]:
dataset = tf.data.TFRecordDataset("deleteme.tfr")

In [13]:
decoded = dataset.map(_parse_function).make_one_shot_iterator().get_next()

In [14]:
decoded

{'qvalue': <tf.Tensor 'IteratorGetNext:0' shape=(25,) dtype=float32>,
 'state': <tf.Tensor 'IteratorGetNext:1' shape=(50,) dtype=float32>}

In [15]:
with tf.Session() as sess:
    sess.run(decoded)
    sess.run(decoded)
    res2 = sess.run(decoded)

In [16]:
res2['state'].shape, res2['qvalue'].shape

((50,), (25,))

In [25]:
np.rollaxis(res2['state'].reshape(N_p,N_p,2), 2, 0).shape

(2, 5, 5)

In [26]:
state2_p = np.rollaxis(data[0][2], 2, 0)
np.equal(state2, state2_p).all()

True

### Pipelines


In [27]:
data = [1,2,3]

In [28]:
def create_games(ignore_me):
    data = (
        np.random.randint(0,2,size=[3,5,5,2]),
        np.random.uniform(size=[3,5,5,1]))
    return data

In [29]:
games = create_games("whatever")

In [30]:
games[0].shape, games[1].shape

((3, 5, 5, 2), (3, 5, 5, 1))

In [31]:
def recwise (games): 
    return [{'state': s.flatten(), 'qvalue': q.flatten()}  for s, q in zip(games[0], games[1])]

In [32]:
res = data | beam.Map(create_games) | beam.FlatMap(recwise)

In [33]:
res[0]['qvalue']

array([0.65185799, 0.48063934, 0.32286172, 0.58717156, 0.38729992,
       0.83176181, 0.76039484, 0.59144584, 0.98204481, 0.50449394,
       0.98084978, 0.02232674, 0.230388  , 0.53772263, 0.01088425,
       0.23732806, 0.62667281, 0.62892179, 0.45871991, 0.01001931,
       0.80749942, 0.58844473, 0.32425795, 0.49634598, 0.70180918])

In [34]:
res[0]['state']

array([1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0,
       1, 1, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0,
       1, 1, 0, 0, 0, 1])

In [35]:
tfr_encoder = tft.coders.ExampleProtoCoder(schema)

NameError: name 'tft' is not defined

In [None]:
tfr_encoder.encode(res[0])

### Pipe to TFRecord

In [None]:
query = "select distinct(game) from `going-tfx.gomoku.tournaments` limit 2"
out_name="games"
out_prefix = os.path.join(LOCAL_TMPDIR, out_name)
phase='train'
with beam.Pipeline(runner, options=opts) as p:
    with beam_impl.Context(temp_dir=tempfile.mkdtemp()):


        #   Read from Big Query
        #
        from_bq = p | "ReadFromBigQuery"  >> beam.io.Read(beam.io.BigQuerySource(
            query=query, use_standard_sql=True)) 

        # Encode back to file(s)
        #
        tfr_encoder = tft.coders.ExampleProtoCoder(schema)
        res = (from_bq
               | beam.Map(create_games)
               | beam.FlatMap(recwise)
               | ('EncodeTFRecord_' + phase) >> beam.Map(tfr_encoder.encode)
               | ('WriteTFRecord_' + phase) >> beam.io.WriteToTFRecord(out_prefix+'_tfr'))

out_prefix + '_tfr'

### Read from File

In [None]:
def _parse_function(example):
    return tf.parse_single_example(example, feature_spec)

In [None]:
dataset = tf.data.TFRecordDataset("/tmp/games_tfr-00000-of-00001")

In [None]:
dataset

In [None]:
record = dataset.take(1)

In [None]:
decoded = dataset.map(_parse_function).make_one_shot_iterator().get_next()

In [None]:
decoded

In [None]:
with tf.Session() as sess:
    sess.run(decoded)
    sess.run(decoded)
    res2 = sess.run(decoded)

In [None]:
res2['state'].shape, res2['qvalue'].shape

In [None]:
np.rollaxis(res2['state'].reshape(N_p,N_p,2), 2, 0)