<a href="https://colab.research.google.com/github/kenzarh/APBFT/blob/main/Less_parameters.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [22]:
#!pip install tensorflow
#!pip install gym==0.21.0
#!pip install keras
#!pip install keras-rl2

import numpy as np
from gym import Env
from gym.spaces import Box, MultiDiscrete
from random import randrange
import random

Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/
Looking in indexes: https://pypi.org/simple, https://us-python.pkg.dev/colab-wheels/public/simple/


## Building the RL environment with OpenAI Gym

In [23]:
N = 10 # Fixed number of nodes in the network
e = 10 # length of one episode 

class Node ():
  def __init__(self):
    self.mean_validation_delay=random.uniform(0, 1)
    self.rate_faulty_messages=random.uniform(0, 1)

class ConsensusNode(Node):
  def __init__(self):
    self.mean_validation_delay=0.3
    self.rate_faulty_messages=0

class SlowNode(Node):
  def __init__(self):
    self.mean_validation_delay=0.6
    self.rate_faulty_messages=0

class UnavailableNode(Node):
  def __init__(self):
    self.mean_validation_delay=1
    self.rate_faulty_messages=0

class MaliciousNode(Node):
  def __init__(self):
    self.mean_validation_delay=0.5
    self.rate_faulty_messages=0.7


def initial_state(N):

     number_of_honest_nodes = randrange(((2*N-1)//3)+1,N)
     number_of_faulty_nodes = N - number_of_honest_nodes

     number_of_consensus_nodes = randrange (0,number_of_honest_nodes) 
     number_of_slow_nodes = number_of_honest_nodes - number_of_consensus_nodes

     if number_of_faulty_nodes != 0:
      number_of_unavailable_nodes = randrange (number_of_faulty_nodes) 
      number_of_malicious_nodes = number_of_faulty_nodes - number_of_unavailable_nodes
     else:
      number_of_unavailable_nodes = 0 
      number_of_malicious_nodes = 0

     mean_validation_delay = []
     rate_faulty_messages = []

     nodes_list = []
     

     for i in range (number_of_consensus_nodes):
       consensus_node = ConsensusNode()
       nodes_list.append(consensus_node)
       mean_validation_delay.append(consensus_node.mean_validation_delay)
       rate_faulty_messages.append(consensus_node.rate_faulty_messages)
          

     for i in range (number_of_slow_nodes):
       slow_node = SlowNode()
       nodes_list.append(slow_node)
       mean_validation_delay.append(slow_node.mean_validation_delay)
       rate_faulty_messages.append(slow_node.rate_faulty_messages)
        

     for i in range (number_of_unavailable_nodes):
       unavailable_node = UnavailableNode()
       nodes_list.append(unavailable_node)
       mean_validation_delay.append(unavailable_node.mean_validation_delay)
       rate_faulty_messages.append(unavailable_node.rate_faulty_messages)
        

     for i in range (number_of_malicious_nodes):
       malicious_node = MaliciousNode()
       nodes_list.append(malicious_node)
       mean_validation_delay.append(malicious_node.mean_validation_delay)
       rate_faulty_messages.append(malicious_node.rate_faulty_messages)


     state = [mean_validation_delay,rate_faulty_messages]

     return np.array(state),nodes_list
   

class ConsensusEnvironment(Env):  # We inherit the methods and properties from the OpenAI Gym environment class
   
   def __init__(self,N):  # N is the number of nodes in the network

     super(ConsensusEnvironment, self).__init__()

     self.action_space = MultiDiscrete(np.array([3 for _ in range (N)]))

     self.observation_space = Box(low=0, high=1, shape= (2, N,),dtype=np.float64)# 1 column per node, 1 row for delays and 1 row for the rate of faulty messages

     self.episod_length = e 

     self.state,self.nodes_list = initial_state(N)



     
   def step(self, action):
     

     print ("Actual state:" , self.state)
     print ("Proposed action:" , action)

     self.episod_length -= 1

     # Calculating the reward:
     honest_nodes = 0
     nodes_sets = action.tolist()
     print("#####################",nodes_sets)
     for node_set in nodes_sets:
         if node_set == 0 or node_set == 1:
           honest_nodes += 1

     consensus_nodes = 0
     for node_set in nodes_sets:
          if node_set == 0:
            consensus_nodes += 1

     print("Number of honest nodes:", honest_nodes)
     print("Number of consensus nodes:", consensus_nodes)

     if consensus_nodes<4 or consensus_nodes < ((2*N-1)//3):
       reward = -1
     else:
      
      #delay = min(self.state[1])
       #print("delay:",delay)
       #print("consensus nodes:",consensus_nodes)
      #reward = 1/(delay*consensus_nodes)
      reward = 1/consensus_nodes

     print("Reward:",reward)

     # Checking if episod is done
     if self.episod_length <= 0: 
            done = True
     else:
            done = False

     #self.state[0] = action # We change the nodes sets to the ones proposed by the RL algorithm

     # The environment state changes depending on the selected nodes
     for i in range (N): # Loop over nodes
        node_type = self.nodes_list[i].__class__.__name__
        #print(node_type)
        if action[i]==0: # Node parameters only change if it participates in the consensus
          if node_type == "ConsensusNode": 
            self.nodes_list[i].mean_validation_delay = random.uniform(0.1,0.3)
            self.state[0][i]=self.nodes_list[i].mean_validation_delay
            # self.nodes_list[i].rate_faulty_messages   do not change
            reward += 0.5

          if node_type == "SlowNode": 
            self.nodes_list[i].mean_validation_delay = random.uniform(0.5,0.8)
            self.state[0][i]=self.nodes_list[i].mean_validation_delay
            reward -= 0.1

          # Unavailable nodes parameters do not change: they never respond to requests
          if node_type == "UnavailableNode": 
            reward -= 0.3

          if node_type == "MaliciousNode": 
            self.nodes_list[i].mean_validation_delay = random.uniform(0.1,0.6)
            self.state[0][i]=self.nodes_list[i].mean_validation_delay
            self.nodes_list[i].rate_faulty_messages = random.uniform(0.3,0.9)
            self.state[1][i]=self.nodes_list[i].mean_validation_delay
            reward -= 0.5
          
      
     info = {}

     print ("New state:",self.state)

     return np.array(self.state), reward, done, info

   #def render(self): # Visualizing the state
      #print (self.state)
      #return reward

   def reset(self):
     self.state,self.nodes_list = initial_state(N)
     
     print("State returned by reset:" , self.state)
     #print(type(self.state))
     #print(self.state.dtype)
   
     self.episod_length = e
     return self.state



In [None]:
#!pip install stable_baselines3
#from stable_baselines3.common.env_checker import check_env
#env = ConsensusEnvironment(N)
#check_env(env)