In [1]:
import gym
import numpy as np
import random
import tensorflow as tf
import tensorflow.contrib.slim as slim
import matplotlib.pyplot as plt
import scipy.misc
import os
from environment import SM_env
from environment import random_normal_trunc
from environment import eth_env
from environment import SM_env_with_stale
from environment import random_normal_trunc
import mdptoolbox
import pandas as pd
import os
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'  # or any {'0', '1', '2'}
os.environ["CUDA_VISIBLE_DEVICES"] = "1"
gpu_options = tf.GPUOptions(allow_growth=True)

  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])





In [2]:
class Qnetwork():
    def __init__(self, h_size, state_space_n, state_vector_n, action_space_n):

        self.state_space_n = state_space_n
        self.action_space_n = action_space_n
        self.state_vector_n = state_vector_n

        # The network recieves a state number from
        # It then resizes it and processes it through four convolutional layers.
        self.vectorIn = tf.placeholder(shape=[None, state_vector_n], dtype=tf.float32)
        #print(self.scalarInput)
        #self.vectorIn = tf.one_hot(self.scalarInput, state_space_n, dtype=tf.float32)
        #print(self.vectorIn)
        self.fc1 = tf.layers.dense(self.vectorIn, h_size, activation=tf.nn.relu)
        #print(self.fc1)
        self.fc2 = tf.layers.dense(self.fc1, h_size, activation=tf.nn.relu)
        #print(self.fc2)

        '''
        self.imageIn = tf.reshape(self.scalarInput, shape=[-1, 84, 84, 3])
        self.conv1 = slim.conv2d( \
            inputs=self.imageIn, num_outputs=32, kernel_size=[8, 8], stride=[4, 4], padding='VALID',
            biases_initializer=None)
        self.conv2 = slim.conv2d( \
            inputs=self.conv1, num_outputs=64, kernel_size=[4, 4], stride=[2, 2], padding='VALID',
            biases_initializer=None)
        self.conv3 = slim.conv2d( \
            inputs=self.conv2, num_outputs=64, kernel_size=[3, 3], stride=[1, 1], padding='VALID',
            biases_initializer=None)
        self.conv4 = slim.conv2d( \
            inputs=self.conv3, num_outputs=h_size, kernel_size=[7, 7], stride=[1, 1], padding='VALID',
            biases_initializer=None)
        '''

        # We take the output from the final layer and split it into separate advantage and value streams.
        #self.streamAC, self.streamVC = tf.split(self.conv4, 2, 3)
        #self.streamA = slim.flatten(self.streamAC)
        #self.streamV = slim.flatten(self.streamVC)

        #print(self.fc2)

        self.streamA, self.streamV = tf.split(self.fc2, 2, 1)
        xavier_init = tf.contrib.layers.xavier_initializer()
        self.AW = tf.Variable(xavier_init([h_size // 2, action_space_n]))
        self.VW = tf.Variable(xavier_init([h_size // 2, 1]))
        self.Advantage = tf.matmul(self.streamA, self.AW)
        self.Value = tf.matmul(self.streamV, self.VW)
        # Then combine them together to get our final Q-values.
        self.Qout = self.Value + tf.subtract(self.Advantage, tf.reduce_mean(self.Advantage, axis=1, keep_dims=True))
        #print(self.Qout)
        self.predict = tf.argmax(self.Qout, 1)

        # Below we obtain the loss by taking the sum of squares difference between the target and prediction Q values.
        self.targetQ = tf.placeholder(shape=[None], dtype=tf.float32)
        self.actions = tf.placeholder(shape=[None], dtype=tf.int32)
        self.actions_onehot = tf.one_hot(self.actions, action_space_n, dtype=tf.float32)

        self.Q = tf.reduce_sum(tf.multiply(self.Qout, self.actions_onehot), axis=1)

        self.td_error = tf.square(self.targetQ - self.Q)
        self.loss = tf.reduce_mean(self.td_error)
        self.trainer = tf.train.AdamOptimizer(learning_rate=0.0001)
        self.updateModel = self.trainer.minimize(self.loss)

    def get_Q_table(self, sess, s):
        Q = sess.run(self.Qout, feed_dict={self.vectorIn:[s]})
        Q = np.reshape(Q, [-1])
        return Q

    def act_epsilon_greedy(self, sess, s, e = 0):

        #legal_move_list = env.legal_move_list(s)
        #legal_move_list = range(env._action_space_n)

        if np.random.rand(1) < e:
            a = np.random.choice(self.action_space_n)
        else:
            Q = self.get_Q_table(sess, s)
            a = np.argmax(Q)

            '''
            #print(Q)
            a = 0
            val = -100000
            for i in range(self.):
                if (Q[i] > val):
                    val = Q[i]
                    a = i
            '''

        return a

    def get_policy_table(self, sess, env):
        policy = np.zeros(self.state_space_n, dtype = np.int32)
        for i in range(0, self.state_space_n):
            ss = env._index_to_vector(i)
            policy[i] = self.act_epsilon_greedy(sess, ss, 0)
        return policy


In [3]:
HIDDEN_BLOCK = 20 # maximum hidden block of attacker
rule = "longest" # "longest" -- bitcoin rule, "GHOST" -- GHOST rule
h_size = 100 #The size of the final convolutional layer before splitting it into Advantage and Value streams.
path = "./btc_" + rule + str(HIDDEN_BLOCK) + "_" + str(h_size) #The path to save our model to.
know_alpha = True # if the agent knows the current alpha.
# if know_alpha == True: path += "know_alpha"
best_path = path + "_cp/model_best.ckpt" # best model path

stale_rate = 0.0 # stale block rate, 0 means no stale block -- the classical selfish mining setting
ALPHA = 0.4 # the hash power fraction of attacker
GAMMA = 0.5 # the follower's fraction
DEV = 0.0 # the alpha's fluctuation rate, 0 means fixed alpha
know_alpha = True # if the agent knows the current alpha.
random_process = "iid" # or "brown" -- brownian process
interval = (0, 0.5) # the range of the alpha


In [4]:
env = SM_env_with_stale(max_hidden_block = HIDDEN_BLOCK, attacker_fraction = ALPHA, follower_fraction = GAMMA, rule = rule, stale_rate=stale_rate, dev = DEV, know_alpha = know_alpha, random_interval=interval, random_process = random_process, frequency=6)

state space size =  733


In [5]:
h_size = 100 #The size of the final convolutional layer before splitting it into Advantage and Value streams.
load_best_model = True # load a saved best model
init = tf.global_variables_initializer()
mainQN = Qnetwork(h_size, env._state_space_n, env._state_vector_n, env._action_space_n)
targetQN = Qnetwork(h_size, env._state_space_n, env._state_vector_n, env._action_space_n)
saver = tf.train.Saver()



Instructions for updating:
Use keras.layers.dense instead.
Instructions for updating:
Call initializer instance with the dtype argument instead of passing it to the constructor




In [6]:
num_tra = 500
seed_trial = 5

In [8]:
index_file = 'episode_15/train_index.txt'
index_pd = pd.read_table(index_file, sep=';', header=None)
index_loc = index_pd[0].values

In [9]:
with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
    if load_best_model == True:
        print('Loading Best Model..')
        saver.restore(sess, best_path)

    reward_origin = []
    for index in range(num_tra):
        env.seed(index_loc[index])
        s = env.reset()
        s_0 = s
        r_total = 0
        for i in range(100):
            a = mainQN.act_epsilon_greedy(sess, s, 0)
            s, r, d, _ = env.step(s, a, move = True)
            r_total += r
            if s == s_0:
                break
        reward_origin.append(r_total)
    sess.close()

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt


In [9]:
summary = 'summary_explain/15e/smooth_5.txt'
summary_pd = pd.read_table(summary, sep=';', header=None)
max_loc = summary_pd[0].values
min_loc = summary_pd[1].values
relative_reward = summary_pd[1].values
f1 = summary_pd[2].values
f2 = summary_pd[3].values
start = 5
end = 10

In [10]:
# pad = './episode_15_front/padding_index.txt'
# pad_index = pd.read_table(pad, sep=';', header=None)
# pad_num = pad_index[0].values
# # max_loc -= pad_num
# # min_loc -= pad_num

In [11]:
from random import choice
def random_run(choice_state,reward_origin,f1,f2):
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if load_best_model == True:
            print('Loading Best Model..')
            saver.restore(sess, best_path)

        reward_random = []
        for index in range(num_tra):
            # print(index)
            break_state = choice_state[index] - 1
            # break_state = 180
            flag = False
            average = []
            for t in range(seed_trial):
                env.seed(index_loc[index])
                s = env.reset()
                s_0 = s
                if flag:
                    continue
                r_total = 0
                modifiable = False
                for i in range(200):
                    a = mainQN.act_epsilon_greedy(sess, s, 0)
                    if (i==break_state):
                        modifiable = True
                        legal_state = env.legal_move_list(s)
                        if a not in legal_state:
                            a = legal_state[0]
                        legal_state.remove(a)
                        if len(legal_state)==0:
                            flag = True
                            break
                        elif len(legal_state)==1:
                            a = legal_state[0]
                        elif len(legal_state)==2:
                            a = choice(legal_state)
                        # print("The action is: ", a)
                    s, r, d, _ = env.step(s, a, move = True)
                    r_total+=r
                    # line = ("The block index is " + str(i) + "         "+ str(s[0]) + ";" + str(s[1]) + ";" + str(s[3]) + ";")
                    # print(line)
                    if flag==False and s == s_0:
                        break
                    
                    # if (i==break_state):
                    #     env.seed(2021+index+100+t)
                if flag==False and modifiable==True:
                    average.append(abs(r_total-reward_origin[index]))

            if len(average)==0:
                r=-1
            else:
                r = sum(average) / len(average)
                # r = max(average)
            reward_random.append(r)
        sess.close()
    return reward_random

In [19]:
reward_max = random_run(max_loc, reward_origin,f1,f2)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt


In [20]:
reward_min = random_run(min_loc, reward_origin,f1,f2)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt


In [26]:
loc1 = np.random.randint(start,end,size=[num_tra])
# for i in range(len(loc1)):
#     loc1[i] = choice([9,11,13,10,12])
# loc1 -= pad_num
# for i in range(len(loc1)):
#     if loc1[i]==max_loc[i] or loc1[i]==min_loc[i]:
#         legal_list = [5,6,7,8,9]
#         legal_list.remove(max_loc[i])
#         legal_list.remove(min_loc[i])
#         loc1[i] = choice(legal_list)
rand1 = random_run(loc1, reward_origin,f1,f2)    

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt


In [12]:
summary = 'summary_explain/15e/concept_5.txt'
summary_pd = pd.read_table(summary, sep=';', header=None)
max_loc_1 = summary_pd[0].values
min_loc_1 = summary_pd[1].values
relative_reward = summary_pd[1].values
reward_max_1 = random_run(max_loc_1, reward_origin,f1,f2)
reward_min_1 = random_run(min_loc_1, reward_origin,f1,f2)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt
Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt


In [117]:
def compute_prob(reward_max, reward_min, rand1):
    reward_max = np.array(reward_max)
    reward_min = np.array(reward_min)
    rand1 = np.array(rand1)

    cnt = 0
    num = 0
    for i in range(len(reward_max)):
        if (reward_max[i]!=-1 and rand1[i]!=-1 and reward_min[i]!=-1 and reward_max[i]!=rand1[i]):
            cnt+=1
            if reward_max[i] > rand1[i]:
                num+=1
    print("Most important vs rand", num/cnt, " num trajectories", cnt)

    cnt = 0
    num = 0
    for i in range(len(reward_max)):
        if (reward_min[i]!=-1 and rand1[i]!=-1 and reward_max[i]!=-1 and reward_min[i]!=rand1[i]):
            cnt+=1
            if reward_min[i] < rand1[i]:
                num+=1
    print("Least important vs rand", num/cnt, " num trajectories", cnt)

    cnt = 0
    num = 0
    for i in range(len(reward_max)):
        if (reward_max[i]!=-1 and reward_min[i]!=-1 and rand1[i]!=-1 and reward_min[i]!=reward_max[i]):
            cnt+=1
            if reward_max[i] > reward_min[i]:
                num+=1
    print("Most important vs Least important", num/cnt, " num trajectories", cnt)
    

In [118]:
# compute_prob(reward_max, reward_min, rand1)

In [119]:
# import matplotlib.pyplot as plt
# # plt.hist(max_loc,bins=np.unique(max_loc), color = 'red', lw=0, alpha=0.4,label = 'max')
# plt.hist(min_loc,bins=np.unique(min_loc), color = 'indigo', lw=0, alpha=0.4,label = 'min')
# # plt.hist(loc1,bins=np.unique(loc1)+1, color = 'yellow', lw=0, alpha=0.2)
# plt.legend()

In [25]:
sum_ = 0
cnt = 0
for e in reward_max_1:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Concept Max average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in reward_min_1:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Concept Min average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in reward_max:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Saliency Max average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in reward_min:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Saliency Min average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in rand1:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Rand average gap: ", sum_/cnt)

Concept Max average gap:  1.9130800904714944
Concept Min average gap:  1.221104905047221
Saliency Max average gap:  1.652565800273599
Saliency Min average gap:  1.4104399748585779
Rand average gap:  1.6018372817503443


In [121]:
sum_ = 0
cnt = 0
for e in reward_max:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Legal trajectories: ", cnt, " average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in reward_min:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Legal trajectories: ", cnt, " average gap: ", sum_/cnt)
sum_ = 0
cnt = 0
for e in rand1:
    if e!=-1:
        cnt+=1
        sum_+=e
print("Legal trajectories: ", cnt, " average gap: ", sum_/cnt)

Legal trajectories:  404  average gap:  1.6581671655537649
Legal trajectories:  391  average gap:  1.652128709926842
Legal trajectories:  423  average gap:  1.5360030787838805


In [93]:
def conclude_state(num,loc):
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if load_best_model == True:
            print('Loading Best Model..')
            saver.restore(sess, best_path)

        states = []
        append_num = 0
        for index in range(num):
            env.seed(index_loc[index])
            s = env.reset()
            s_0 = s
            r_total = 0
            for i in range(50):
                a = mainQN.act_epsilon_greedy(sess, s, 0)
                if 5<=i<=9:
                    to_append = [s[0],s[1],s[3]]
                    states.append(to_append)
                    append_num+=1
                if i!=0 and s==s_0:
                    break

                
                # if (i==loc[index]):
                #     to_append = [s[0],s[1],s[3]]
                #     states.append(to_append)
                #     break
                s, r, d, _ = env.step(s, a, move = True)

    sess.close()
    print(append_num)
    x = np.array(states)
    mask = np.unique(x, axis=0)
    cnt = np.zeros(mask.shape[0])
    c = 0
    for c in range(mask.shape[0]):
        tmp=0
        for i in range(x.shape[0]):
            if (x[i]==mask[c]).all():
                tmp+=1
        cnt[c] = tmp
    for i in range(cnt.shape[0]):
        print(mask[i], cnt[i])

In [94]:
conclude_state(500,min_loc_1)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt
2500
[0 1 0] 23.0
[1 1 0] 191.0
[1 1 2] 191.0
[1 2 0] 11.0
[2 1 0] 96.0
[2 1 2] 145.0
[2 2 0] 180.0
[2 2 2] 180.0
[2 3 0] 22.0
[3 1 0] 21.0
[3 1 2] 210.0
[3 2 0] 193.0
[3 2 2] 273.0
[3 3 0] 99.0
[3 3 1] 22.0
[3 3 2] 99.0
[3 4 0] 23.0
[4 1 0] 63.0
[4 1 2] 48.0
[4 2 0] 75.0
[4 2 2] 130.0
[4 3 0] 100.0
[4 3 2] 61.0
[5 0 0] 1.0
[5 1 0] 4.0
[5 1 2] 7.0
[5 2 0] 6.0
[5 2 2] 15.0
[5 3 0] 11.0


In [44]:
total = np.array(reward_origin)
a = total.mean()
a

0.9358790697674406

In [17]:
def stat_state(num):
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if load_best_model == True:
            print('Loading Best Model..')
            saver.restore(sess, best_path)

        states = []
        average = []
        traj_num = 0
        for index in range(num):
            env.seed(index_loc[index])
            s = env.reset()
            s_0 = s
            r_total = 0
            flag=False
            traj_flag = False
            for i in range(50):
                a = mainQN.act_epsilon_greedy(sess, s, 0)
                if 5<=i<=9 and s[0]==3 and s[1]==2 and s[3]==2:
                    flag = True
                    if traj_flag==False:
                        traj_num+=1
                        traj_flag=True
                s, r, d, _ = env.step(s, a, move = True)
                r_total+=r
                # print(s)
                
                if s==s_0:
                    break
            if flag:
                average.append(r_total)
    sess.close()
    if len(average)!=0:
        print(sum(average)/len(average))
        print(traj_num)


In [18]:
stat_state(500)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt
0.9341340829712927
273


In [15]:
from random import choice
def patch_state(num):
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if load_best_model == True:
            print('Loading Best Model..')
            saver.restore(sess, best_path)

        states = []
        average = []
        traj_num = 0
        for index in range(num):
            env.seed(index_loc[index])
            s = env.reset()
            s_0 = s
            r_total = 0
            flag=False
            traj_flag = False
            for i in range(50):
                a = mainQN.act_epsilon_greedy(sess, s, 0)
                if 5<=i<=9 and s[0]==3 and s[1]==2 and s[3]==2:
                    a = choice([0,2])
                    flag = True
                    if traj_flag==False:
                        traj_num+=1
                        traj_flag=True
                s, r, d, _ = env.step(s, a, move = True)
                r_total+=r
                # print(s)
                
                if s==s_0:
                    break
            if flag:
                average.append(r_total)
    sess.close()
    if len(average)!=0:
        print(sum(average)/len(average))
        print(traj_num)

In [16]:
patch_state(500)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt
1.2228128460686565
273


In [26]:
from random import choice
def all_patch_state(num):
    with tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) as sess:
        if load_best_model == True:
            print('Loading Best Model..')
            saver.restore(sess, best_path)

        states = []
        average = []
        traj_num = 0
        for index in range(num):
            env.seed(index_loc[500+index])
            s = env.reset()
            s_0 = s
            r_total = 0
            traj_flag = False
            for i in range(50):
                a = mainQN.act_epsilon_greedy(sess, s, 0)
                if 5<=i<=9 and s[0]==3 and s[1]==2 and s[3]==2:
                    a = choice([0,2])
                if 5<=i<=9 and s[0]==2 and s[1]==1 and s[3]==2:
                    a = choice([0,2])
                s, r, d, _ = env.step(s, a, move = True)
                r_total+=r
                # print(s)
                
                if s==s_0:
                    break
            average.append(r_total)
    sess.close()
    if len(average)!=0:
        print(sum(average)/len(average))

In [27]:
all_patch_state(2000)

Loading Best Model..
INFO:tensorflow:Restoring parameters from ./btc_longest20_100_cp/model_best.ckpt
0.8863790697674006
