In [1]:
# !pip install gym~=0.17.3
# !pip install numpy==1.19.4
# !pip install boolean.py@git+https://github.com/bastikr/boolean.py/@74063a8588875c058e6dbbb85b69ed052e1f2099#egg=boolpy_stubs
# !pip install networkx==2.4
# !pip install pyyaml~=5.4.1
# !pip install setuptools~=49.2.1
# !pip install matplotlib~=3.2.1
# !pip install plotly~=4.11.0
# !pip install tabulate~=0.8.7
# !pip install ordered_set==4.0.2
# !pip install progressbar2==3.51.4
# !pip install decorator==4.3

In [1]:
import sys
import gym
from gym.utils import seeding
import math
from copy import deepcopy

import cyberbattle._env.cyberbattle_env as cyberbattle_env
gym_env = gym.make('CyberBattleToyCtf-v0')

import cyberbattle.simulation.model as model
import cyberbattle.simulation.commandcontrol as commandcontrol
import cyberbattle.samples.toyctf.toy_ctf as ctf
import plotly.offline as plo
plo.init_notebook_mode(connected=True)

from cyberbattle.agents.baseline.agent_wrapper import EnvironmentBounds
import cyberbattle.agents.baseline.agent_wrapper as w

import logging
logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s: %(message)s")

import numpy as np

In [2]:
#############
# gymid = 'CyberBattleTiny-v0'
#############
gymid = "CyberBattleToyCtf-v0"
env_size = None
# iteration_count = 1500
iteration_count = 10
training_episode_count = 20
eval_episode_count = 10
maximum_node_count = 12
maximum_total_credentials = 10



# Load the Gym environment
if env_size:
    gym_env = gym.make(gymid, size=env_size)
else:
    gym_env = gym.make(gymid)

ep = w.EnvironmentBounds.of_identifiers(
    maximum_node_count=maximum_node_count,
    maximum_total_credentials=maximum_total_credentials,
    identifiers=gym_env.identifiers
)

In [3]:
def randomPolicy(state, env, random):
#     for debug
# def randomPolicy(state,numiters, env, random):

    '''
    Policy followed in MCTS simulation for playout
    '''
    reward = 0.
    count=0
    while not state.isDone():
# for debug
#     for i in range(count):
#         count=count+1
#         print("count")
#         print(count)

        action = gym_env.sample_valid_action()
        
# for debug
#         state = state.simulateStep(env=env,ep=ep,count=count,numiters=numiters,action=action)
        state = state.simulateStep(env=env,ep=ep,action=action)
        reward += state.getReward()

    
    return reward

In [4]:
class CyberBattleStateActionModel:
    """ Define an abstraction of the state and action space
        for a CyberBattle environment, to be used to train a Q-function.
    """

    def __init__(self, ep: EnvironmentBounds):
        self.ep = ep

        self.global_features = w.ConcatFeatures(ep, [
            # w.Feature_discovered_node_count(ep),
            # w.Feature_owned_node_count(ep),
            w.Feature_discovered_notowned_node_count(ep, None)

            # w.Feature_discovered_ports(ep),
            # w.Feature_discovered_ports_counts(ep),
            # w.Feature_discovered_ports_sliding(ep),
            # w.Feature_discovered_credential_count(ep),
            # w.Feature_discovered_nodeproperties_sliding(ep),
        ])

        self.node_specific_features = w.ConcatFeatures(ep, [
            # w.Feature_actions_tried_at_node(ep),
            w.Feature_success_actions_at_node(ep),
            w.Feature_failed_actions_at_node(ep),
            w.Feature_active_node_properties(ep),
            w.Feature_active_node_age(ep)
            # w.Feature_active_node_id(ep)
        ])

        self.state_space = w.ConcatFeatures(ep, self.global_features.feature_selection +
                                            self.node_specific_features.feature_selection)

        self.action_space = w.AbstractAction(ep)

    def get_state_astensor(self, state: w.StateAugmentation):
        state_vector = self.state_space.get(state, node=None)
        state_vector_float = np.array(state_vector, dtype=np.float32)
        state_tensor = torch.from_numpy(state_vector_float).unsqueeze(0)
        return state_tensor

    def implement_action(
            self,
            wrapped_env: w.AgentWrapper,
            actor_features: np.ndarray,
#             abstract_action: np.int32) -> Tuple[str, Optional[cyberbattle_env.Action], Optional[int]]:
            abstract_action: np.int32):
        """Specialize an abstract model action into a CyberBattle gym action.

            actor_features -- the desired features of the actor to use (source CyberBattle node)
            abstract_action -- the desired type of attack (connect, local, remote).

            Returns a gym environment implementing the desired attack at a node with the desired embedding.
        """

        observation = wrapped_env.state.observation

        # Pick source node at random (owned and with the desired feature encoding)
        potential_source_nodes = [
            from_node
            for from_node in w.owned_nodes(observation)
            if np.all(actor_features == self.node_specific_features.get(wrapped_env.state, from_node))
        ]

        if len(potential_source_nodes) > 0:
            source_node = np.random.choice(potential_source_nodes)

            gym_action = self.action_space.specialize_to_gymaction(
                source_node, observation, np.int32(abstract_action))

            if not gym_action:
                return "exploit[undefined]->explore", None, None

            elif wrapped_env.env.is_action_valid(gym_action, observation['action_mask']):
                return "exploit", gym_action, source_node
            else:
                return "exploit[invalid]->explore", None, None
        else:
            return "exploit[no_actor]->explore", None, None

In [5]:
class NetworkState():
# for debug
#     def __init__(self, state, ep,count,numiters, done=False, reward = 0):
    def __init__(self, state, ep, done=False, reward = 0):
        '''
        Data structure to represent state of the environment
        self.env : Environment of gym_network_environment simulator
        self.state : State of the gym_network_environment
        self.is_done : Denotes whether the NetworkState is terminal
        self.num_lanes : Number of nodes in gym_network_environment
        self.width : Width of lanes in gym_network_environment
        self.reward : Reward of the state
        '''
        self.state = deepcopy(state)
        self.is_done = done
        '''
        TO ADD IS DONE CONDITION
        '''     
# for debug
#         self.count=count+1
#         if self.count>numiters:
#             self.is_done=True
        
        self.reward = reward
        
        self.ep = ep

        self.global_features = w.ConcatFeatures(ep, [
            # w.Feature_discovered_node_count(ep),
            # w.Feature_owned_node_count(ep),
            w.Feature_discovered_notowned_node_count(ep, None)

            # w.Feature_discovered_ports(ep),
            # w.Feature_discovered_ports_counts(ep),
            # w.Feature_discovered_ports_sliding(ep),
            # w.Feature_discovered_credential_count(ep),
            # w.Feature_discovered_nodeproperties_sliding(ep),
        ])

        self.node_specific_features = w.ConcatFeatures(ep, [
            # w.Feature_actions_tried_at_node(ep),
            w.Feature_success_actions_at_node(ep),
            w.Feature_failed_actions_at_node(ep),
            w.Feature_active_node_properties(ep),
            w.Feature_active_node_age(ep)
            # w.Feature_active_node_id(ep)
        ])

        self.state_space = w.ConcatFeatures(ep, self.global_features.feature_selection +
                                            self.node_specific_features.feature_selection)

        self.action_space = w.AbstractAction(ep)
        
    def get_state_astensor(self, state: w.StateAugmentation):
        state_vector = self.state_space.get(state, node=None)
        state_vector_float = np.array(state_vector, dtype=np.float32)
        state_tensor = torch.from_numpy(state_vector_float).unsqueeze(0)
        return state_tensor

    def implement_action(
            self,
            wrapped_env: w.AgentWrapper,
            actor_features: np.ndarray,
#             abstract_action: np.int32) -> Tuple[str, Optional[cyberbattle_env.Action], Optional[int]]:
            abstract_action: np.int32): 
        """Specialize an abstract model action into a CyberBattle gym action.

            actor_features -- the desired features of the actor to use (source CyberBattle node)
            abstract_action -- the desired type of attack (connect, local, remote).

            Returns a gym environment implementing the desired attack at a node with the desired embedding.
        """

        observation = wrapped_env.state.observation

        # Pick source node at random (owned and with the desired feature encoding)
        potential_source_nodes = [
            from_node
            for from_node in w.owned_nodes(observation)
            if np.all(actor_features == self.node_specific_features.get(wrapped_env.state, from_node))
        ]

        if len(potential_source_nodes) > 0:
            source_node = np.random.choice(potential_source_nodes)

            gym_action = self.action_space.specialize_to_gymaction(
                source_node, observation, np.int32(abstract_action))

            if not gym_action:
                return "exploit[undefined]->explore", None, None

            elif wrapped_env.env.is_action_valid(gym_action, observation['action_mask']):
                return "exploit", gym_action, source_node
            else:
                return "exploit[invalid]->explore", None, None
        else:
            return "exploit[no_actor]->explore", None, None
        
#         for debug
#     def simulateStep(self, env,ep,count,numiters, action):
    def simulateStep(self, env,ep,action):
        '''
        Simulates action at self.state and returns the next state
        '''
        observation, reward, done, info = env.step(action=action)
# for debug
#         print(reward)
#         print(done)
#         print(info)
#         print(count)
                
#         if count>numiters:
#             self.is_done=True
#       newState  = NetworkState(state=observation,ep=ep,count=count,numiters=numiters, done=self.is_done, reward=reward)
        
        newState  = NetworkState(state=observation,ep=ep,done=self.is_done, reward=reward)
        return newState

    def isDone(self):
        '''
        Returns whether the state is terminal
        '''
        return self.is_done

    def getReward(self):
        '''
        Returns reward of the state
        '''
        return self.reward
    
    def actions(self):
        _actions = [{a:e,}for a, e in self.state.action_space.spaces.items()]
        return _actions

In [6]:
class Node:
    def __init__(self, state, parent=None):
        '''
        Data structure for a node of the MCTS tree
        self.state : GridWorld state represented by the node
        self.parent : Parent of the node in the MCTS tree
        self.numVisits : Number of times the node has been visited
        self.totalReward : Sum of all rewards backpropagated to the node
        self.isDone : Denotes whether the node represents a terminal state
        self.allChildrenAdded : Denotes whether all actions from the node have been explored
        self.children : Set of children of the node in the MCTS tree
        '''
        self.state = state
        self.parent = parent
        self.numVisits = 0
        self.totalReward = state.reward #0
        self.isDone = state.isDone()
        self.allChildrenAdded = state.isDone()
        self.children = {}

In [7]:
class MonteCarloTreeSearch:
    def __init__(self, env, numiters, explorationParam, playoutPolicy=randomPolicy, random_seed=None):
        '''
        self.numiters : Number of MCTS iterations
        self.explorationParam : exploration constant used in computing value of node
        self.playoutPolicy : Policy followed by agent to simulate rollout from leaf node
        self.root : root node of MCTS tree
        '''
        self.env = env
        self.numiters = numiters
        self.explorationParam = explorationParam
        self.playoutPolicy = playoutPolicy
        self.root = None
        self.random, self.seed = seeding.np_random(random_seed)

    def buildTreeAndReturnBestAction(self, initialState):
        '''
        Function to build MCTS tree and return best action at initialState
        '''
        self.root = Node(state=initialState, parent=None)
        for i in range(self.numiters):
            self.addNodeAndBackpropagate()
        bestChild = self.chooseBestActionNode(self.root, 0)
        for action, cur_node in self.root.children.items():
            if cur_node is bestChild:
               return action

    def addNodeAndBackpropagate(self):
        '''
        Function to run a single MCTS iteration
        '''
        node = self.addNode()
        reward = self.playoutPolicy(node.state, self.env, self.random)
# for debug
#         reward = self.playoutPolicy(node.state,self.numiters, self.env, self.random)
        self.backpropagate(node, reward)
        

    def addNode(self):
        '''
        Function to add a node to the MCTS tree
        '''
        cur_node = self.root
        while not cur_node.isDone:
# for debug
#         for i in range(numiters):
            if cur_node.allChildrenAdded:
                cur_node = self.chooseBestActionNode(cur_node, self.explorationParam)
            else:
#                 actions = self.env.actions
#                 actions=list(gym_env.action_space)
#                 for action in actions:
                action= gym_env.action_space.sample()
                action_str=str(action.keys())
                print(action_str)
                if action_str not in cur_node.children:
#                     for debug
#                     childnode = cur_node.state.simulateStep(env=self.env,ep=ep,count=0,numiters=numiters,action=action)
                    childnode = cur_node.state.simulateStep(env=self.env,ep=ep,action=action)
                    newNode = Node(state=childnode, parent=cur_node)
#                     cur_node.children[action] = newNode
                    cur_node.children[action_str] = newNode
#                     if len(actions) == len(cur_node.children):
                    if len(list(gym_env.action_space)) == len(cur_node.children):
                        cur_node.allChildrenAdded = True
                    return newNode
        return cur_node

    def backpropagate(self, node, reward):
        '''
        FILL ME : This function should implement the backpropation step of MCTS.
                  Update the values of relevant variables in Node Class to complete this function
        '''
        while True:
            # Add values to node
            node.totalReward += reward
            node.numVisits += 1

            # Terminating Condition
            if node.parent is None:
                break

            # Go to parent node
            node = node.parent

    def chooseBestActionNode(self, node, explorationValue):
        random = self.random
        bestValue = float("-inf")
        bestNodes = []
        for child in node.children.values():
            '''
            FILL ME : Populate the list bestNodes with all children having maximum value

                       Value of all nodes should be computed as mentioned in question 3(b).
                       All the nodes that have the largest value should be included in the list bestNodes.
                       We will then choose one of the nodes in this list at random as the best action node.
            '''
            # Get Child values
            try:
                child_value = (child.totalReward/child.numVisits) + \
                    explorationValue * math.sqrt((math.log(node.numVisits) / child.numVisits))
            except ZeroDivisionError:  # Case if division by zero
                child_value = 0

            # Case if child value more than best value
            if child_value > bestValue:
                bestNodes = [child,]
                bestValue = child_value
            elif child_value == bestValue:  # Case if child value is best value
                bestNodes.append(child)

        return random.choice(bestNodes)

In [8]:
try:
    from runner.abstracts import Agent
except:
    class Agent(object): pass

In [9]:
class MCTSAgent(Agent):
    def initialize(self, env, numiters, random_seed):
        self.env = env
        self.numiters = numiters
        self.random_seed = random_seed
        self.explorationParam = 1.
        self.mcts = MonteCarloTreeSearch(env=self.env, numiters=self.numiters,
                explorationParam=self.explorationParam, random_seed=self.random_seed)

    def step(self, state,numiters, *args, **kwargs) :
        _state = NetworkState(state,numiters)
        action = self.mcts.buildTreeAndReturnBestAction(initialState=_state)
        return action

def create_agent(test_case_env, *args, **kwargs):
    return MCTSAgent()

In [10]:
### Sample test cases.

RANDOM_SEED = 3
# numiters = 500
numiters = 10
stochasticity = 1.

gym_env = gym.make('CyberBattleToyCtf-v0')

actions = gym_env.action_space.spaces
gym_env.render()
done = False
mcts = MonteCarloTreeSearch(env=deepcopy(gym_env), numiters=numiters, explorationParam=1., random_seed=RANDOM_SEED)
state = gym_env.reset()

#for debug
# count=0;
# total_reward = 0




while not done:
    
# for debug
# for t in range(iteration_count):

    net_state = NetworkState(state,ep,done=done)
#     net_state = NetworkState(state,ep,0,numiters,done=done)

#     action = mcts.buildTreeAndReturnBestAction(initialState=net_state)
# for debug
    action = gym_env.sample_valid_action()
    state, reward, done, info = gym_env.step(action)


    total_reward += reward
        
    if reward>0:
        print('####### rewarded action: {action}')
        print(f'total_reward={total_reward} reward={reward}')
        gym_env.render()
    
# for debug
#     count=count+1
#     if count==iteration_count:
#         done = True
#         break
    
    
print ("simulation done")

Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]


INFO: Resetting the CyberBattle environment
odict_keys(['connect'])


AssertionError: 