In [None]:
import numpy as np
import const
import matplotlib.pyplot as plt
import datetime
import joblib
from tqdm import tqdm
import torch
import torch.multiprocessing as mp

import gym
import ctfsql
import evaluate

In [None]:
from typing import Mapping, Any

import numpy as np
from ctfsql.agents.random_agent import Agent


class RandomAgent(Agent):
    """ Agent that randomly selects a command from the admissible ones. """
    def __init__(self, seed=1234):
        self.seed = seed
        self.rng = np.random.RandomState(self.seed)
    
    def act(self, obs: str, score: int, done: bool, infos: Mapping[str, Any]):
        command_id = np.random.randint(0, len(infos["admissible_commands"]))
        command = infos["admissible_commands"][command_id]
        return command_id,  command

In [None]:
import torch
from tqdm import tqdm

            score = 0
            sum_score = 0
            done = False
            nb_steps = 0
            print(self.name, self.env.url, 'started')
            while True:
                command_id, command = self.agent.act(obs, score, done, infos)
                print(self.name, command)
                obs, score, done, infos = self.env.step(command_id, command)

def run_episode(agent, max_step=2000, nb_episodes=1, verbose=True):
    
    torch.manual_seed(20211021)  # For reproducibility when using action sampling.

    env = gym.make('ctfsql-v0')

    try:
        steps = np.load('steps.npy')
        print('ステップ数ロード成功')
    except:
        steps = []
    try:
        mean_scores = np.load('mean_scores.npy')
        print('平均報酬ロード成功')
    except:
        mean_scores = []
        
    for no_episode in tqdm(range(len(steps), nb_episodes)):
        obs, infos = env.reset()  # Start new episode.

        score = 0
        sum_score = 0
        done = False
        nb_steps = 0
        print(env.url)
        while not done and nb_steps <= max_step:
            command_id, command = agent.act(obs, score, done, infos)
            obs, score, done, infos = env.step(command_id, command)
            
            nb_steps += 1
            sum_score += score
        agent.act(obs, score, done, infos)  # Let the agent know the game is done.

        steps = np.append(steps, nb_steps)
        mean_score = round(np.mean(sum_score), 3)
        mean_scores = np.append(mean_scores, mean_score)
        if verbose:
            print('ステップ数:{0}, 平均報酬:{1}'.format(nb_steps, mean_score))

#         print(steps, mean_scores)
        if no_episode % 100 == 0:
            np.save('steps', steps)
            np.save('mean_scores', mean_scores)
            joblib.dump(agent,'trained_agent.pkl', compress=True)
    env.close()
    return agent, steps, mean_scores

In [None]:
from ctfsql.agents.agent import NeuralAgent
try:
    agent = joblib.load('trained_agent.pkl')
    print('エージェントロード成功')
except:
    agent = NeuralAgent()
agent.train()
trained_agent, steps, mean_scores = run_episode(agent)

In [None]:
fig = plt.figure()
plt.plot(steps)
plt.title('neural agent plot')
plt.xlabel('nb_episodes')
plt.ylabel('steps per episode')
fig.savefig("nn_agent.png")