In [1]:
import numpy as np
import tensorflow as tf
import keras_tuner as kt
from tensorflow import keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
import datetime
import pandas as pd

In [2]:
print(tf.config.list_physical_devices('CPU'))

[PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU')]


In [3]:
IMPORT_COUNT = 1990000
TEST_COUNT = 10000

In [4]:
# Generate random seed
#myrand=np.random.randint(1, 99999 + 1)
myrand=71926
np.random.seed(myrand)
tf.random.set_seed(myrand)
print("Random seed is:",myrand)

Random seed is: 71926


In [5]:
PREVIOUS_TIMESTEP_COUNT = 4
TOTAL_DATA_NUM = IMPORT_COUNT-PREVIOUS_TIMESTEP_COUNT

In [6]:
# convert the sequence of generated numbers to 4 inputs and one output
def strided(a, L):
	shp = a.shape
	s  = a.strides
	nd0 = shp[0]-L+1
	shp_in = (nd0,L)+shp[1:]
	strd_in = (s[0],) + s
	return np.lib.stride_tricks.as_strided(a, shape=shp_in, strides=strd_in)

In [7]:
HD_OUTPUT_FILENAME="hashdice.txt"
df = np.genfromtxt(HD_OUTPUT_FILENAME,delimiter='\n',dtype='uint64')[:IMPORT_COUNT]
print(df)

[   135410    110121      8382 ... 399249997 603589998 943969999]


In [8]:
# calculates how many bits are in the output.
BIT_WIDTH = np.ceil(np.log2(np.amax(df))).astype(int)
print(BIT_WIDTH)

30


In [9]:
# convert the generated numbers to binary sequences
df_as_bits =(df[:,None] & (1 << np.arange(BIT_WIDTH,dtype='uint64')) > 0).astype(int)
df_as_frames = strided(df_as_bits, PREVIOUS_TIMESTEP_COUNT+1)

In [10]:
indicies = np.arange(TOTAL_DATA_NUM,dtype='uint64')
np.random.shuffle(indicies)
df_as_frames=df_as_frames[indicies]

In [11]:
# convert the data into inputs and outputs
y = df_as_frames[:,-1,:]
X = df_as_frames[:,:-1,]
X = X.reshape([X.shape[0], X.shape[1]*X.shape[2]])
print(X[0])
print(y[0])

[0 1 0 1 0 1 0 0 0 1 0 1 1 1 0 0 0 0 0 1 1 1 1 0 1 1 0 1 0 1 1 1 0 1 1 1 0
 1 1 0 1 0 0 0 1 0 0 1 1 0 0 1 0 1 1 0 0 0 1 0 0 0 1 1 0 1 1 1 1 0 1 1 0 1
 1 1 1 0 0 1 1 1 0 1 0 1 0 0 0 0 1 0 1 1 0 0 0 1 1 1 0 1 1 0 0 1 1 0 1 0 1
 0 1 1 1 1 1 0 0 0]
[0 1 1 1 0 1 1 1 1 0 0 0 0 0 0 1 1 0 1 1 0 1 1 1 0 1 1 0 0 1]


In [12]:
# Convert the data into train and test data
X_train = X[TEST_COUNT:]
X_test = X[:TEST_COUNT]
y_train = y[TEST_COUNT:]
y_test = y[:TEST_COUNT]

In [13]:
def build_model(hp):
	LOSS="binary_crossentropy"
	model = Sequential()
	model.add(Dense(10000, activation='relu',input_shape=[X.shape[1]] ))
	model.add(Dense(y.shape[1], activation='sigmoid'))
	opt = keras.optimizers.Nadam(
		learning_rate=hp.Float("learning_rate", 10**(-5), 10**(-3),sampling="log"),
		epsilon=hp.Float("epsilon",1e-7,1e-5,sampling="log"),
		beta_1=hp.Float("beta_1",.8,.9,sampling="reverse_log"),
		beta_2=hp.Float("beta_2",.8,.9,sampling="reverse_log"),
		)
	model.compile(optimizer=opt, loss=LOSS,metrics=['binary_accuracy'])
	return model

In [14]:
#define call back functions
stopEarly = tf.keras.callbacks.EarlyStopping(
	monitor='binary_accuracy', min_delta=.001, patience=3, verbose=0, mode='auto', restore_best_weights=False
)

log_dir = "hyperparameters/"+datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1,profile_batch=0)

In [15]:
# extract a short set from the training for hyper parameter tuning
X_train_short= X_train[:600000]
y_train_short= y_train[:600000]

In [16]:
%%time
# Tuning the hyper parameters

tuner = kt.tuners.bayesian.BayesianOptimization(build_model,'binary_accuracy',20,project_name="bayes", seed=None)
tuner.search(X_train_short, y_train_short,batch_size=256, epochs=100, validation_data=(X_test,y_test),callbacks=[stopEarly,tensorboard_callback])
tuner.results_summary()

INFO:tensorflow:Reloading Tuner from .\bayes\tuner0.json
INFO:tensorflow:Oracle triggered exit
Results summary
Results in .\bayes
Showing 10 best trials
<keras_tuner.engine.objective.Objective object at 0x000001FB5F35DE40>
Trial summary
Hyperparameters:
learning_rate: 0.0005308197455333206
epsilon: 1e-05
beta_1: 0.8000000000000002
beta_2: 0.9000000000000001
Score: 0.7126719951629639
Trial summary
Hyperparameters:
learning_rate: 0.0004896115205085131
epsilon: 8.518788077958315e-06
beta_1: 0.8275699434776518
beta_2: 0.8613670943370839
Score: 0.7124506831169128
Trial summary
Hyperparameters:
learning_rate: 0.001
epsilon: 1e-05
beta_1: 0.8406796408298027
beta_2: 0.9000000000000001
Score: 0.7080880403518677
Trial summary
Hyperparameters:
learning_rate: 0.0003110759805819733
epsilon: 1.7080390761761855e-06
beta_1: 0.8732406468046287
beta_2: 0.8765349740693021
Score: 0.7076530456542969
Trial summary
Hyperparameters:
learning_rate: 0.00016718413174765734
epsilon: 5.85020145627166e-07
beta_1: 0

In [17]:
best_hps = tuner.get_best_hyperparameters(num_trials = 1)[0]
# use the best model for training
model = tuner.hypermodel.build(best_hps)
best_hps.values

{'learning_rate': 0.0005308197455333206,
 'epsilon': 1e-05,
 'beta_1': 0.8000000000000002,
 'beta_2': 0.9000000000000001}

In [18]:
def train_model(model, X = X_train, Y=y_train, epochs=10, batch_size=512,verbose=0, log_dir = "dense_model/"):
    log_dir += datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
    tensorboard_callback = keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1,profile_batch=0)
    model.fit(X, Y, validation_data=(X_test, y_test), epochs=epochs, batch_size=batch_size,callbacks=[tensorboard_callback],verbose=verbose)
    return model

In [19]:
%%time
my_model_trained = train_model(model, epochs=100)

CPU times: total: 1d 6h 5min 2s
Wall time: 9h 7min 31s


In [20]:
results = model.evaluate(X_test, y_test, batch_size=256)
print("test loss: %f, test acc: %s" % tuple(results))

test loss: 0.627516, test acc: 0.5727199912071228


In [21]:
my_model_trained.save("hashdice_model.h5")