In [1]:
import numpy as np
import const
import matplotlib.pyplot as plt
import datetime
import joblib
from tqdm import tqdm
import torch
import torch.multiprocessing as mp

import gym
import ctfsql
import evaluate

In [2]:
from typing import Mapping, Any

import numpy as np
from ctfsql.agents.random_agent import Agent


class RandomAgent(Agent):
    """ Agent that randomly selects a command from the admissible ones. """
    def __init__(self, seed=1234):
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
    
    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]):
        command_id = np.random.randint(0, len(infos["admissible_commands"]))
        command = infos["admissible_commands"][command_id]
        return command_id,  command

In [3]:
class Worker(mp.Process):
    def __init__(self, agent, name, global_ep, global_ep_r, res_queue):
        super(Worker, self).__init__()
        self.agent = agent
        self.name = 'w%02i' % name
        self.g_ep, self.g_ep_r, self.res_queue = global_ep, global_ep_r, res_queue
        self.env = gym.make('ctfsql-v0')

    def run(self):
        # Collect some statistics: nb_steps, final reward.
#         steps, mean_scores = [], []
        while self.g_ep.value < 5:
            print(self.name)
            obs, infos = self.env.reset()  # Start new episode.
            print('ok')

            score = 0
            sum_score = 0
            done = False
            nb_steps = 0
            print(self.name, self.env.url, 'started')
            while True:
                command_id, command = self.agent.act(obs, score, done, infos)
                print(self.name, command)
                obs, score, done, infos = self.env.step(command_id, command)

                sum_score += score
                
                if done:
                    print(self.name, 'finished')
                    self.agent.act(obs, score, done, infos)  # Let the agent know the game is done.
                    print(self.name, '合計報酬: {}'.format(sum_score))
                    self.record(sum_score)
                    break
                    
                nb_steps += 1
                    
#             steps.append(nb_steps)
#             mean_score = round(np.mean(sum_score), 3)
#             mean_scores.append(mean_score)
#             print('Worker{0}, ステップ数:{1}, 平均報酬:{2}'.format(self.name, nb_steps, mean_score))

        self.env.close()
        self.res_queue.put(None)
        #     timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S%f")
        #     joblib.dump(agent,'ignore_simul2_'+timestamp+'.pkl', compress=True)
        #     train_data = joblib.load('ignore_simul2_20211203102304978989.pkl')
    
    def record(self, sum_score):
        with self.g_ep.get_lock():
            self.g_ep.value += 1
        with self.g_ep_r.get_lock():
            if self.g_ep_r.value == 0.:
                self.g_ep_r.value = sum_score
            else:
                self.g_ep_r.value = self.g_ep_r.value * 0.99 + sum_score * 0.01
        self.res_queue.put(self.g_ep_r.value)
        print(
            self.name,
            "Ep:", self.g_ep.value,
            "| Ep_r: %.0f" % self.g_ep_r.value,
        )

In [4]:
from ctfsql.agents.agent import NeuralAgent
from ctfsql.agents.command_scorer import CommandScorer
from ctfsql.agents.share_adam import SharedAdam

g_model = CommandScorer(input_size=const.MAX_VOCAB_SIZE, hidden_size=128)        # global model
g_model.share_memory()         # share the global parameters in multiprocessing
g_optimizer = SharedAdam(g_model.parameters(), lr=1e-4, betas=(0.92, 0.999))      # global optimizer
global_ep, global_ep_r, res_queue = mp.Value('i', 0), mp.Value('d', 0.), mp.Queue()

agent = NeuralAgent(g_model, g_optimizer)
agent.train()

In [5]:
processes = []
for i in range(mp.cpu_count()):
    worker = Worker(agent, i, global_ep, global_ep_r, res_queue)
    worker.start()
    processes.append(worker)
for worker in processes:
    worker.join()

w00
reset
reset2
w01
http://127.0.0.1/ctf_2/ctf_2_3.php
reset
reset3
ok
reset2
w00 http://127.0.0.1/ctf_2/ctf_2_3.php started
http://127.0.0.1/ctf_2/ctf_2_3.php
act
w02
reset3
hoge
ok
reset
w01 http://127.0.0.1/ctf_2/ctf_2_3.php started
reset2
act
http://127.0.0.1/ctf_2/ctf_2_3.php
w03
hoge
reset3
reset
ok
reset2
hoge1
w04
hoge1
w02 http://127.0.0.1/ctf_2/ctf_2_3.php started
http://127.0.0.1/ctf_2/ctf_2_3.php
reset
act
reset3
hoge
reset2
ok
w05
http://127.0.0.1/ctf_2/ctf_2_3.php
w03 http://127.0.0.1/ctf_2/ctf_2_3.php started
reset
hoge1
reset3
act
ok
reset2
hoge
http://127.0.0.1/ctf_2/ctf_2_3.php
w06
w04 http://127.0.0.1/ctf_2/ctf_2_3.php started
reset3
ok
act
reset
hoge
hoge1
w05 http://127.0.0.1/ctf_2/ctf_2_3.php started
w07
act
reset2
hoge
http://127.0.0.1/ctf_2/ctf_2_3.php
reset
hoge1
reset2
reset3
ok
http://127.0.0.1/ctf_2/ctf_2_3.php
reset3
w06 http://127.0.0.1/ctf_2/ctf_2_3.php started
hoge1
act
ok
hoge
w07 http://127.0.0.1/ctf_2/ctf_2_3.php started
act
hoge
hoge1
hoge1
model2 t

          0,  0,  0,  0,  0,  0,  0, 11]])model2 tensor([[ 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  9, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12,  9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  9, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12],
        [10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13,
         13, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13,
         13, 13, 13, 13, 13, 13, 13, 13],
        [10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 10, 10, 10, 10, 10, 10, 18,
         18, 18, 18, 18, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 10, 10, 10,
         10, 10, 10, 18, 18, 18, 18, 18],
        [11, 11, 14, 14, 14, 14, 19, 19, 19, 19, 19, 11, 11, 14, 14, 14, 14, 19,
         19, 19, 19, 19, 11, 11, 14, 14, 14, 14, 19, 19, 19, 19, 19, 11, 11, 14,
         14, 14, 14, 19, 19, 19, 19, 19],
        [ 0,  0, 11, 15, 15, 15, 20, 20, 20, 20, 20,  0,  0, 11, 15, 15, 15, 20,
         20, 20, 20, 20,  0,  0, 11, 15, 15, 15, 20, 2

          0,  0,  0,  0,  0,  0,  0, 11]])model2 tensor([[ 9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  9, 12, 12, 12, 12, 12, 12,
         12, 12, 12, 12,  9, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,  9, 12, 12,
         12, 12, 12, 12, 12, 12, 12, 12],
        [10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13,
         13, 13, 13, 13, 10, 13, 13, 13, 13, 13, 13, 13, 13, 13, 13, 10, 13, 13,
         13, 13, 13, 13, 13, 13, 13, 13],
        [10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 10, 10, 10, 10, 10, 10, 18,
         18, 18, 18, 18, 10, 10, 10, 10, 10, 10, 18, 18, 18, 18, 18, 10, 10, 10,
         10, 10, 10, 18, 18, 18, 18, 18],
        [11, 11, 14, 14, 14, 14, 19, 19, 19, 19, 19, 11, 11, 14, 14, 14, 14, 19,
         19, 19, 19, 19, 11, 11, 14, 14, 14, 14, 19, 19, 19, 19, 19, 11, 11, 14,
         14, 14, 14, 19, 19, 19, 19, 19],
        [ 0,  0, 11, 15, 15, 15, 20, 20, 20, 20, 20,  0,  0, 11, 15, 15, 15, 20,
         20, 20, 20, 20,  0,  0, 11, 15, 15, 15, 20, 2

Process w07:
Process w05:
Process w06:
Process w04:
Process w02:
Process w01:
Process w03:
Process w00:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/opt/anaconda3/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
 

KeyboardInterrupt
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2043, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2043, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
  File "/opt/anaconda3/lib/python3.7/site-packages/torch/nn/functional.py", line 2043, in embedding
    return torch.embedding(weight, input, padding_idx, scale_grad_by_freq, sparse)
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt
KeyboardInterrupt


KeyboardInterrupt: 

In [None]:
# fig = plt.figure()
# plt.plot(steps)
# plt.title('neural agent plot')
# plt.xlabel('nb_episodes')
# plt.ylabel('steps per episode')
# fig.savefig("nn_agent.png")