In [2]:
!pip install gym~=0.17.3
!pip install numpy==1.19.4
!pip install boolean.py@git+https://github.com/bastikr/boolean.py/@74063a8588875c058e6dbbb85b69ed052e1f2099#egg=boolpy_stubs
!pip install networkx==2.4
!pip install pyyaml~=5.4.1
!pip install setuptools~=49.2.1
!pip install matplotlib~=3.2.1
!pip install plotly~=4.11.0
!pip install tabulate~=0.8.7
!pip install ordered_set==4.0.2
!pip install progressbar2==3.51.4
!pip install decorator==4.3

Collecting boolean.py@ git+https://github.com/bastikr/boolean.py/@74063a8588875c058e6dbbb85b69ed052e1f2099#egg=boolpy_stubs
  Using cached boolean.py-3.9-py2.py3-none-any.whl


In [1]:
import sys
import gym
from gym.utils import seeding
import math
import numpy as np
from copy import deepcopy

import cyberbattle._env.cyberbattle_env
from cyberbattle._env.cyberbattle_env import AttackerGoal, Action
gym_env = gym.make('CyberBattleToyCtf-v0', attacker_goal=AttackerGoal(own_atleast_percent=0.3))

import logging
# logging.basicConfig(stream=sys.stdout, level=logging.INFO, format="%(levelname)s: %(message)s")

In [2]:
invalid_actions = set()

In [3]:
def randomPolicy(state, env, random):
    '''
    Policy followed in MCTS simulation for playout
    '''
    reward = 0.
    while not state.isDone():
        action = env.sample_valid_action()
        state, flag = state.simulateStep(env=env,action=action)
        if flag:
            reward += state.getReward()
#         env.render()
    return reward

def string_to_action(action_str):
    key, value_str = action_str.split('-')
    if key == 'connect':
        _val = eval(value_str)
        value = np.array(_val, np.int32)
        return Action(connect=value)
    elif key == 'local_vulnerability':
        _val = eval(value_str)
        value = np.array(_val, np.int32)
        return Action(local_vulnerability=value)
    else:
        _val = eval(value_str)
        value = np.array(_val, np.int32)
        return Action(remote_vulnerability=value)

In [4]:
class NetworkState():
    def __init__(self, state, done=False, env=None, reward=0):
        '''
        Data structure to represent state of the environment
        self.env : Environment of gym_network_environment simulator
        self.state : State of the gym_network_environment
        self.is_done : Denotes whether the NetworkState is terminal
        self.num_lanes : Number of nodes in gym_network_environment
        self.width : Width of lanes in gym_network_environment
        self.reward : Reward of the state
        '''
        self.state = deepcopy(state)
        self.is_done = done
        self.reward = reward
        
        self.env = env
        
    def simulateStep(self, env, action):
        '''
        Simulates action at self.state and returns the next state
        '''
        observation, reward, done, info = env.step(action=action)
        if reward > 0:
            newState = NetworkState(state=observation, done=done, env=env, reward=reward)
            valid_action_flag = True
        else:
            newState = self
            valid_action_flag = False
        return newState, valid_action_flag

    def isDone(self):
        '''
        Returns whether the state is terminal
        '''
        return self.is_done

    def getReward(self):
        '''
        Returns reward of the state
        '''
        return self.reward

In [5]:
class Node:
    def __init__(self, state, parent=None):
        '''
        Data structure for a node of the MCTS tree
        self.state : GridWorld state represented by the node
        self.parent : Parent of the node in the MCTS tree
        self.numVisits : Number of times the node has been visited
        self.totalReward : Sum of all rewards backpropagated to the node
        self.isDone : Denotes whether the node represents a terminal state
        self.allChildrenAdded : Denotes whether all actions from the node have been explored
        self.children : Set of children of the node in the MCTS tree
        '''
        self.state = state
        self.parent = parent
        self.numVisits = 0
        self.totalReward = state.reward #0
        self.isDone = state.isDone()
        self.allChildrenAdded = state.isDone()
        self.children = {}

In [9]:
class MonteCarloTreeSearch(object):
    def __init__(self, env, numiters, explorationParam, playoutPolicy=randomPolicy, random_seed=None):
        '''
        self.numiters : Number of MCTS iterations
        self.explorationParam : exploration constant used in computing value of node
        self.playoutPolicy : Policy followed by agent to simulate rollout from leaf node
        self.root : root node of MCTS tree
        '''
        self.env = env
        self.numiters = numiters
        self.explorationParam = explorationParam
        self.playoutPolicy = playoutPolicy
        self.root = None
        self.random, self.seed = seeding.np_random(random_seed)

    def buildTreeAndReturnBestAction(self, initialState):
        '''
        Function to build MCTS tree and return best action at initialState
        '''
        self.root = Node(state=initialState, parent=None)
        for i in range(self.numiters):
            print(f'Iteration {i}')
            self.addNodeAndBackpropagate()
        bestChild = self.chooseBestActionNode(self.root, 0)
        for action_str, cur_node in self.root.children.items():
            if cur_node is bestChild:
                action = string_to_action(action_str)
                return action

    def addNodeAndBackpropagate(self):
        '''
        Function to run a single MCTS iteration
        '''
        node = self.addNode()
        print('Node ADDED')
        reward = self.playoutPolicy(node.state, deepcopy(self.env), self.random)
        print('reward GOTTEN')
        self.backpropagate(node, reward)
        print('Backpropagated!')

    def addNode(self):
        '''
        Function to add a node to the MCTS tree
        '''
        cur_node = self.root
        while not cur_node.isDone:
            if cur_node.allChildrenAdded:
                print('\nEntered Another Node!\n')
                cur_node = self.chooseBestActionNode(cur_node, self.explorationParam)
            else:
                _counter = 0
                while not cur_node.allChildrenAdded:
                    
                    # Get action and action string
                    action = self.env.sample_valid_action()
                    key, value = list(action.items())[0][0], list(action.items())[0][1]
                    action_str = f"{key}-{list(value)}"
                    
                    # Check if invalid actions or already a tried action
                    if (action_str not in cur_node.children) and (action_str not in invalid_actions):
                        childnode, valid_action_flag = cur_node.state.simulateStep(env=deepcopy(self.env), action=action)
                        if valid_action_flag:
                            newNode = Node(state=childnode, parent=cur_node)
                            cur_node.children[action_str] = newNode
                            return newNode
                        else:
                            invalid_actions.add(action_str)
                            _counter += 1
#                             print(f'Counter Value: {_counter}\n')
                    elif _counter > 250:
                        cur_node.allChildrenAdded = True
                        print('\nAll Children Added!\n')
                    else:
                        _counter += 1
#                         print(f'\nNo Valid Actions for: {_counter} iterations\n')
                        

        return cur_node

    def backpropagate(self, node, reward):
        '''
        FILL ME : This function should implement the backpropation step of MCTS.
                  Update the values of relevant variables in Node Class to complete this function
        '''
        while True:
            # Add values to node
            node.totalReward += reward
            node.numVisits += 1

            # Terminating Condition
            if node.parent is None:
                break

            # Go to parent node
            node = node.parent

    def chooseBestActionNode(self, node, explorationValue):
        random = self.random
        bestValue = float("-inf")
        bestNodes = []
        for child in node.children.values():
            '''
            FILL ME : Populate the list bestNodes with all children having maximum value

                       Value of all nodes should be computed as mentioned in question 3(b).
                       All the nodes that have the largest value should be included in the list bestNodes.
                       We will then choose one of the nodes in this list at random as the best action node.
            '''
            # Get Child values
            try:
                child_value = (child.totalReward/child.numVisits) + \
                    explorationValue * math.sqrt((math.log(node.numVisits) / child.numVisits))
            except ZeroDivisionError:  # Case if division by zero
                child_value = 0

            # Case if child value more than best value
            if child_value > bestValue:
                bestNodes = [child,]
                bestValue = child_value
            elif child_value == bestValue:  # Case if child value is best value
                bestNodes.append(child)

        return random.choice(bestNodes)

In [7]:
try:
    from runner.abstracts import Agent
except:
    class Agent(object): pass

In [7]:
gym_env._CyberBattleEnv__attacker_goal

AttackerGoal(reward=0.0, low_availability=1.0, own_atleast=0, own_atleast_percent=0.5)

In [2]:
dir(gym_env)

NameError: name 'gym_env' is not defined

In [10]:
### Sample test cases.

RANDOM_SEED = 10
numiters = 3
stochasticity = 1.

# gym_env = gym.make('CyberBattleToyCtf-v0')

gym_env.render()
done = False
mcts = MonteCarloTreeSearch(env=deepcopy(gym_env), numiters=numiters, explorationParam=1., random_seed=RANDOM_SEED)
state = gym_env.reset()

# net_state = NetworkState(state, done=done)
for i in range(4):
    print(f'Episode {i}')
    net_state = NetworkState(state, done=done, env=deepcopy(gym_env))
    action = mcts.buildTreeAndReturnBestAction(initialState=net_state)
    state, reward, done, info = gym_env.step(action)
    gym_env.render()
    if done == True:
        break
print ("simulation done")

Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]
1,Website,discovered,,,"[ScanPageContent, ScanPageSource]"


Episode 0
Iteration 0
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 1
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 2

All Children Added!


Entered Another Node!

Node ADDED




reward GOTTEN
Backpropagated!


Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]


Episode 1
Iteration 0
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 1
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 2

All Children Added!


Entered Another Node!

Node ADDED




reward GOTTEN
Backpropagated!


Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]


Episode 2
Iteration 0
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 1
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 2

All Children Added!


Entered Another Node!

Node ADDED




reward GOTTEN
Backpropagated!


Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]


Episode 3
Iteration 0
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 1
Node ADDED
reward GOTTEN
Backpropagated!
Iteration 2

All Children Added!


Entered Another Node!

Node ADDED




reward GOTTEN
Backpropagated!


Unnamed: 0,id,status,properties,local_attacks,remote_attacks
0,client,owned,[],[SearchEdgeHistory],[]


simulation done


In [21]:
action

'remote_vulnerability_[0 0 0]'

In [10]:
net_state.state.keys()

dict_keys(['newly_discovered_nodes_count', 'leaked_credentials', 'lateral_move', 'customer_data_found', 'escalation', 'action_mask', 'probe_result', 'credential_cache_matrix', 'credential_cache_length', 'discovered_node_count', 'discovered_nodes_properties', 'nodes_privilegelevel', 'credential_cache', 'discovered_nodes', 'explored_network'])

In [None]:
if __name__ == '__main__':
    import argparse

    parser = argparse.ArgumentParser()
    parser.add_argument('testcase', type=int, help='test case number')
    args = parser.parse_args()

    ### Sample test cases.
    test_config = [{'lanes' : [LaneSpec(1, [-1, -1])] *3,'width' :5, 'seed' : 10, 'iters': 300},
                   {'lanes' : [LaneSpec(2, [-2, -1])] *3,'width' :7, 'seed' : 15, 'iters': 100},
                   {'lanes' : [LaneSpec(2, [-2, -1])] *4,'width' :8, 'seed' : 125, 'iters': 500},
                   {'lanes' : [LaneSpec(2, [-3, -2])] *4,'width' :10, 'seed' : 44, 'iters': 300},
                   {'lanes' : [LaneSpec(2, [-3, -1])] *4,'width' :10, 'seed' : 125, 'iters': 400},
                   {'lanes' : [LaneSpec(2, [-3, -1])] *4,'width' :10, 'seed' : 25, 'iters': 300}]

    test_case_number = args.testcase
    LANES = test_config[test_case_number]['lanes']
    WIDTH = test_config[test_case_number]['width']
    RANDOM_SEED = test_config[test_case_number]['seed']
    numiters = test_config[test_case_number]['iters']
    stochasticity = 1.
    env = gym.make('GridDriving-v0', lanes=LANES, width=WIDTH,
                   agent_speed_range=(-3,-1), finish_position=Point(0,0), #agent_ pos_init=Point(4,2),
                   stochasticity=stochasticity, tensor_state=False, flicker_rate=0., mask=None, random_seed=RANDOM_SEED)

    actions = env.actions
    env.render()
    done = False
    mcts = MonteCarloTreeSearch(env=deepcopy(env), numiters=numiters, explorationParam=1.,random_seed=RANDOM_SEED)
    state = env.reset()
    while not done:
        gw_state = GridWorldState(state)
        action = mcts.buildTreeAndReturnBestAction(initialState=gw_state)
        print (action)
        state, reward, done, info = env.step(action)
        env.render()
        if done == True:
            break
    print ("simulation done")