In [1]:
import numpy as np
import pandas as pd


import json
import os

import subprocess

from matplotlib import pyplot as plt

In [6]:
%matplotlib inline
%config Completer.use_jedi = False

In [3]:
import torch
from torch import nn
from torch import optim

from tensorboardX import SummaryWriter

In [643]:
from importlib import reload
from utils import dataset

reload(dataset)

<module 'utils.dataset' from '/Users/sergmiller/Documents/my/lux-ai-v1/research/utils/dataset.py'>

In [573]:
def learn(train, val, model_ff, criterion, epochs=5, batch_size=64, shuffle=True, freq=10,lr=1e-3, l2=1e-5, use_tb=True): 
    if use_tb:
        writer = SummaryWriter()
    
#     np.random.seed(1)
    ids_nn = np.arange(train.targets.shape[0])
    
    reshape_to_last = lambda x: torch.reshape(x, [np.prod(x.shape[:-1]), x.shape[-1]])

    optimizer = optim.Adam(model_ff.parameters(), lr=lr, weight_decay=l2)

    time_for_print_loss = lambda i: (i + 1) % freq == 0
    
    n_iter = 0


    for epoch in np.arange(epochs):
        np.random.shuffle(ids_nn)

        model_ff.train(True)

        for b in np.arange(0, train.targets.shape[0], batch_size):
            X_batch = torch.FloatTensor(train.features[ids_nn[b:b+batch_size]])
            y_batch = torch.FloatTensor(train.weights[ids_nn[b:b+batch_size]])  # reward(advantage)
            a_batch = torch.LongTensor(train.targets[ids_nn[b:b+batch_size]])  # action

            optimizer.zero_grad()
            y_pred_logits = model_ff(X_batch)

            loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
            loss.backward()

            optimizer.step()

            if (b // batch_size + 1) % freq == 0:
                print('train loss in %d epoch in %d batch: %.5f' %
                  (epoch + 1, b // batch_size + 1, loss.item()))
                
                if use_tb:
                    writer.add_scalar('data/train_loss', loss.item(), n_iter)
                    writer.add_scalar('data/epoch', epoch + 1, n_iter)
                    writer.add_scalar('data/batch', b // batch_size + 1, n_iter)

                val_loss = 0
                its = 0
                model_ff.train(False)
                for b in np.arange(0, val.targets.shape[0], batch_size):
                    its += 1
                    X_batch = torch.FloatTensor(val.features[b:b+batch_size])
#                     X_batch = reshape_to_last(X_batch)

                    y_batch = torch.FloatTensor(val.weights[b:b+batch_size])
                    a_batch = torch.LongTensor(val.targets[b:b+batch_size])
                    with torch.no_grad():
                        y_pred_logits = model_ff(X_batch)
                    loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
                    val_loss += loss.item()
                val_loss /= its
                print('val loss in %d epoch: %.5f' % (epoch + 1, val_loss))
                
                if use_tb:
                    writer.add_scalar('data/val_loss', val_loss, n_iter)
                n_iter += 1


In [96]:
# datasets = dataset.read_datasets_from_dir("features_v3/")

In [97]:
# dataset.read_columns_from_random_file("features_v3")

[(0, 'cargo_vol_total'),
 (1, 'cargo_fuel_total'),
 (2, 'unit_can_build'),
 (3, 'unit_routine'),
 (4, 'unit_last_action'),
 (5, 'near_city_dist'),
 (6, 'near_city_dir'),
 (7, 'near_city_fuel'),
 (8, 'near_city_light_upkeep'),
 (9, 'city_size'),
 (10, 'opp_near_city_dist'),
 (11, 'opp_near_city_dir'),
 (12, 'opp_near_city_fuel'),
 (13, 'opp_near_city_light_upkeep'),
 (14, 'opp_city_size'),
 (15, 'near_resource_dist'),
 (16, 'near_resource_dir'),
 (17, 'near_resource_type'),
 (18, 'near_resource_amount'),
 (19, 'my_city_count'),
 (20, 'opp_city_count'),
 (21, 'turn'),
 (22, 'is_night'),
 (23, 'time_to_night'),
 (24, 'width'),
 (25, 'height'),
 (26, 'my_research'),
 (27, 'opp_research'),
 (28, 'my_research_coal'),
 (29, 'opp_research_coal'),
 (30, 'my_research_uran'),
 (31, 'opp_research_uran'),
 (32, 'action'),
 (33, 'my_tiles'),
 (34, 'opp_tiles')]

In [100]:
dataset.CAT_FEATURES

[2, 3, 4, 6, 11, 16, 17, 22, 28, 29, 30, 31]

In [409]:
FLOAT_FEATURES = [i for i in range(42 + 32*32*7) if i not in dataset.CAT_FEATURES_V4]

In [410]:
FLOAT_FEATURES[:20], FLOAT_FEATURES[-10:]

([0, 1, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 21, 23, 26, 27, 28],
 [7200, 7201, 7202, 7203, 7204, 7205, 7206, 7207, 7208, 7209])

In [87]:
import pickle
from sklearn.preprocessing import OneHotEncoder

In [392]:
with open("../submissions/simple/models/ohe_v2", "rb") as f:
    OHE = pickle.load(f)

In [419]:
def prepare_features(t: dataset.Dataset, v: dataset.Dataset, ohe=None, categories=None) -> (dataset.Dataset, dataset.Dataset):
    create_ohe = ohe is None
    if create_ohe:
         ohe = OneHotEncoder(sparse=False, categories=categories)
    def prepare(d, is_train):
        cf = d.features[:, dataset.CAT_FEATURES_V4]
        ff = d.features[:, FLOAT_FEATURES]
        cf[cf == "False"] = False
        cf[cf == "True"] = True
        cf[cf == None] = "None"
        cf[cf == "1"] = 1
        cf[cf == "2"] = 2
        cf[cf == "3"] = 3
        ff[ff == "None"] = 0
        cf_o = ohe.fit_transform(cf) if is_train and create_ohe else ohe.transform(cf)
        return dataset.Dataset(
            features=np.array(np.concatenate([cf_o, ff], axis=1), dtype=np.float),
            targets=np.array(d.targets, dtype=np.float),
            weights=np.array(d.weights, dtype=np.float),
            next_state_id = d.next_state_id
        )
    t = prepare(t, True)
    v = prepare(v, False)
    return (t,v, ohe)

In [184]:
# dt,dv,OHE = prepare_features(data, data, None, [
#      np.array(["None", False, True], dtype=object),
#      np.array([1, 2, 3], dtype=object),
#      np.array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(['None', 'coal', 'uranium', 'wood'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object)])

In [185]:
OHE.categories_

[array(['None', False, True], dtype=object),
 array([1, 2, 3], dtype=object),
 array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', 'coal', 'uranium', 'wood'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object)]

In [188]:
# with open("../submissions/simple/models/ohe_v2", "wb") as f:
#     pickle.dump(OHE, f)

In [142]:
# with open("../submissions/simple/models/ohe_v1", "wb") as f:
#     pickle.dump(ohe, f)

In [626]:
MAP_F = 32 * 32 * 7

class NNWithCustomFeatures(nn.Module):
    def __init__(self, INPUT_F, DROP_P, H, A=6):
        super().__init__()
        INPUT_F_C = INPUT_F + 128
        self.model_q =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A),
            nn.ReLU()
#             nn.Sigmoid()
        )
        
        self.model_p =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A)
#             nn.Sigmoid()
        )
        
        self.map_model = nn.Sequential(
            nn.Conv2d(7, 64, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (16,16)
            nn.Conv2d(64, 128, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (8, 8)
            nn.Conv2d(128, 256, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (4, 4)
#             nn.Conv2d(128, 256, 3),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(2),  # after -> (1, 1)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.proj = nn.Sequential(
            nn.Dropout(p=DROP_P),
            nn.Linear(256 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=DROP_P),
            nn.Linear(256, 128)
        )
        
    def forward(self, x):
        L = x.shape[1]
        cur_r = self.forward_impl(x[:, :L // 2])
        next_r =  self.forward_impl(x[:, L // 2:])
        return torch.cat([cur_r, next_r],dim=1)

    def forward_impl(self, x):
        mapp = x[:, -MAP_F:].reshape(-1, 32, 32, 7)
        rest = x[:, :-MAP_F]
        mapp = torch.transpose(mapp, 1, -1)
        mapp = self.avgpool(self.map_model(mapp))
        mapp = torch.flatten(mapp, 1)
        mapp_f = self.proj(mapp)
#         print(mapp_f.shape)
        input_x = torch.cat([rest, mapp_f], dim=1)
#         print(input_x.shape)
#         return self.model_q(input_x)
        return torch.cat([self.model_q(input_x), self.model_p(input_x)], dim=1)

In [627]:
model = NNWithCustomFeatures(63, 0.05, 64)

In [630]:
model.forward_impl(torch.Tensor(4, 63 + 32*32*7))

tensor([[ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450],
        [ 0.0000,  0.0000,  0.1094,  0.0437,  0.0000,  0.0924, -0.0958, -0.0997,
         -0.1076,  0.0741,  0.0568, -0.0414],
        [ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0927, -0.0949,
         -0.1044,  0.0684,  0.0588, -0.0371],
        [ 0.0000,  0.0000,  0.1147,  0.0556,  0.0000,  0.0966, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450]], grad_fn=<CatBackward>)

In [648]:
ENTROPY_REG = 1e-4
def policy_loss(pi_logits, reward_batch, a_batch, X_batch):
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    return torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * reward_batch 
                      - torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))

def q_loss(q_vals, reward_batch, a_batch, X_batch):
    q_vals_per_reward = q_vals[np.arange(q_vals.shape[0]), a_batch]
    return torch.nn.MSELoss()(q_vals_per_reward, reward_batch) * 0.01

gamma = 0.99

def get_is_last_state(x):
    t = torch.sum(torch.isclose(x, torch.ones_like(x) * (-1)), dim=1) == x.shape[1]
    return t.float()

def q_loss_pair(q_vals_cur_and_next, reward_batch, a_batch, X_batch):
    q_vals = q_vals_cur_and_next[:, :6]
    q_vals_next = q_vals_cur_and_next[:, 6:12]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
#     print(list(enumerate([q_vals_per_reward_cur, reward_batch, best_q_vals_next, q_vals_next])))
#     print(0.99 * best_q_vals_next)
#     return torch.nn.MSELoss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)
    return torch.nn.SmoothL1Loss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)


def actor_critic_loss(q_pi_payload, reward_batch, a_batch, X_batch):
    q_vals = q_pi_payload[:, :6]
    pi_logits =  q_pi_payload[:, 6:12]
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    q_vals_next = q_pi_payload[:, 12:18]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
    
    q_loss = torch.nn.SmoothL1Loss()(target=reward_batch + gamma * best_q_vals_next.detach(), input=q_vals_per_reward_cur)
    pi_loss =  torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * q_vals_per_reward_cur.detach()
                      - torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))
#     print("q_loss={}, pi_loss={}".format(q_loss.item(), pi_loss.item()))
    return q_loss + pi_loss

In [429]:
simple_bot = "../submissions/simple/main.py"
replays = "replays"

def run_game(left_bot=simple_bot, right_bot=simple_bot, seed=42, loglevel=2):
    replay_path = "replay.json"
    python_v = "python3.7"
    
    replay_path = os.path.join(replays, str(np.random.randint(1e9)) + ".json")
    
    size = np.random.choice([12,16,24,32], size=1)[0]
    
    res = subprocess.run([
        "lux-ai-2021",
        left_bot,
        right_bot,
#         "--statefulReplay",
        "--width={}".format(size),
        "--height={}".format(size),
        "--loglevel={}".format(loglevel),
        "--python={}".format(python_v),
        "--maxtime=100000",
        "--maxConcurrentMatches=1",
        "--seed={}".format(seed),
        "--out={}".format(replay_path)], stdout=subprocess.PIPE)
    
    if loglevel > 0:
        print(res.stdout.decode())

    assert res.returncode == 0

    with open(replay_path, "r") as f:
        result = json.load(f)
    return result, res.stdout.decode()

In [432]:
run_game(simple_bot, simple_bot)  # <-- test run one game with default bot

In [98]:
import hashlib

def build_runnable_bot_with_flags(flags: dict, origin = simple_bot, base_path = '../submissions/simple/') -> str:
    lines = []
    with open(origin, "r") as f:
        for line in f:
            lines.append(line[:-1])
    text = '\n'.join(lines)
    f = json.dumps(flags)
    text = text.format(f)
    h = int(hashlib.sha256(f.encode('utf-8')).hexdigest(), 16) % (10 ** 18)
    path = base_path + "main_" + str(h) + ".py"
    with open(path, "w") as f:
        f.write(text)
    return path

In [229]:
def count_series(results: list):
    wins = []
    for i, r in enumerate(results):
        ranks = r[0]['results']['ranks']
        teams = r[0]['teamDetails']
        if ranks[0]['rank'] == 1 and ranks[1]['rank'] == 2:
            if ranks[0]["agentID"] == i % 2:
                wins.append(1)
            else:
                wins.append(0)
        else:
            wins.append(0.5)
    return wins

In [100]:
from joblib import Parallel, delayed
import tqdm

In [614]:
def sample_dataset(d, p=0.5):
    N = len(d.features)
    ids = np.random.choice(N, size=int(N * p))
    return dataset.Dataset(features = d.features[ids], weights = d.weights[ids], targets = d.targets[ids])

In [492]:
def add_next_features(d):
    assert d.next_state_id is not None
    coupled_features = []
    weights = []
    targets = []
    for i in np.arange(d.features.shape[0]):
        next_i = d.next_state_id[i]
        if d.next_state_id[i] != -1:
            next_f = d.features[next_i]
        else:
            next_f = np.ones_like(d.features[i]) * (-1)
        coupled_features.append(np.concatenate([d.features[i], next_f]))
        weights.append(d.weights[i])
        targets.append(d.targets[i])
    return dataset.Dataset(
        features=np.array(coupled_features),
        weights=np.array(weights),
        targets=np.array(targets))

In [None]:
t = 0  #  1778 - value_iter
B = 1

model = NNWithCustomFeatures(83, 0.05, 64)

writer = SummaryWriter()

while True:
    t += 1
    np.random.seed(t)
    torch.save(model.state_dict(), '../submissions/simple/models/ac_iter_v{}'.format(t))
    r = []
    for i in np.arange(B):
        seed = t * B + i
        _f = str(seed) + ".txt"
        bot = build_runnable_bot_with_flags({
            "model_path": "models/ac_iter_v{}".format(t), 
            "use_policy": True,
            "is_neural": True,
            "prob_use_default_agent": 0.5 / np.log(t + 1),
            "prob_use_random": 0.05,
            "ohe_path": "models/ohe_v2",
            "use_old_units_cargo_rules": False,
            "log_features_path": "../../research/features_iter/", "log_path_file_name": _f
        })
        if t % 2 == 0:
            _r = run_game(bot, simple_bot, loglevel=0, seed=seed)
        else:
            _r = run_game(simple_bot, bot, loglevel=0, seed=seed)
        r.append(_r)
    wins = np.mean(count_series(r))
    if t % 2 == 1:
        wins = 1 - wins
    trainD = dataset.get_dataset_from_file(os.path.join("features_iter/", _f), wins)
    reward = np.sum(trainD.weights)
    trainD_ohe, valD_ohe, _ = prepare_features(trainD, trainD, OHE)
    max_step = np.max(trainD.features[:, 31])
    trainD_ohe_with_next = add_next_features(trainD_ohe)
    valD_ohe_with_next = add_next_features(valD_ohe)
    trainD_ohe_with_next_sampled = sample_dataset(trainD_ohe_with_next, 0.1)
    print("Round {}, winrate: {}, max_step: {}, reward: {}, example: {}".format(t, wins, max_step, reward, r[0][0]['results']))
    writer.add_scalar('data/reward', reward, t)
    writer.add_scalar('data/winrate', wins, t)
    writer.add_scalar('data/max_step', max_step, t)
    try:
        learn(trainD_ohe_with_next_sampled, valD_ohe_with_next, model, actor_critic_loss, lr=1e-1 / (t+1), batch_size=64, epochs=1, freq=1, l2=1e-5, use_tb=False)
    except Exception as e:
        print(e)

  if (await self.run_code(code, result,  async_=asy)):


Round 1, winrate: 0.0, max_step: 359, reward: 0.26289999999999836, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/717354021.json'}
train loss in 1 epoch in 1 batch: 0.04858
val loss in 1 epoch: 22.23305
train loss in 1 epoch in 2 batch: 31.29694
val loss in 1 epoch: 1.07178


  if (await self.run_code(code, result,  async_=asy)):


Round 2, winrate: 0.0, max_step: 310, reward: 1.7673000000000298, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/798842024.json'}
train loss in 1 epoch in 1 batch: 0.89470
val loss in 1 epoch: 6.25246
train loss in 1 epoch in 2 batch: 6.39071
val loss in 1 epoch: 18.49046
train loss in 1 epoch in 3 batch: 18.13600
val loss in 1 epoch: 13.67970


  if (await self.run_code(code, result,  async_=asy)):


Round 3, winrate: 0.0, max_step: 359, reward: 0.9316999999999964, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218175338.json'}
train loss in 1 epoch in 1 batch: 16.02784
val loss in 1 epoch: 46.59298
train loss in 1 epoch in 2 batch: 46.14453
val loss in 1 epoch: 57.88479
train loss in 1 epoch in 3 batch: 57.39045
val loss in 1 epoch: 86.11944
Round 4, winrate: 0.0, max_step: 359, reward: 0.6188999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/932136058.json'}
train loss in 1 epoch in 1 batch: 82.17757
val loss in 1 epoch: 89.83972
train loss in 1 epoch in 2 batch: 88.25482
val loss in 1 epoch: 71.09046


  if (await self.run_code(code, result,  async_=asy)):


Round 5, winrate: 0.0, max_step: 238, reward: 0.03640000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/953453411.json'}
train loss in 1 epoch in 1 batch: 65.57085
val loss in 1 epoch: 132.50782


  if (await self.run_code(code, result,  async_=asy)):


Round 6, winrate: 0.0, max_step: 350, reward: 0.20019999999999885, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/613579658.json'}
train loss in 1 epoch in 1 batch: 88.05369
val loss in 1 epoch: 64.98711
Round 7, winrate: 0.0, max_step: 110, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/327741615.json'}
train loss in 1 epoch in 1 batch: 31.80294
val loss in 1 epoch: 193.95966
Round 8, winrate: 0.0, max_step: 79, reward: 0.013100000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/530125251.json'}
train loss in 1 epoch in 1 batch: 33.32146
val loss in 1 epoch: 86.04759


  if (await self.run_code(code, result,  async_=asy)):


Round 9, winrate: 0.0, max_step: 319, reward: 0.045200000000000295, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/44556670.json'}
train loss in 1 epoch in 1 batch: 190.91922
val loss in 1 epoch: 84.81249
Round 10, winrate: 0.0, max_step: 196, reward: 0.0127, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/91571465.json'}
train loss in 1 epoch in 1 batch: 93.79315
val loss in 1 epoch: 111.41048


  if (await self.run_code(code, result,  async_=asy)):


Round 11, winrate: 1.0, max_step: 314, reward: 4.190800000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/774252441.json'}
train loss in 1 epoch in 1 batch: 909.19666
val loss in 1 epoch: 668.16833
Round 12, winrate: 0.0, max_step: 29, reward: 0.004200000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/662124363.json'}
train loss in 1 epoch in 1 batch: 1065.47693
val loss in 1 epoch: 690.68457
Round 13, winrate: 0.0, max_step: 156, reward: 0.011399999999999987, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/118980946.json'}
train loss in 1 epoch in 1 batch: 36.81111
val loss in 1 epoch: 29.70477


  if (await self.run_code(code, result,  async_=asy)):


Round 14, winrate: 0.0, max_step: 150, reward: 0.026599999999999947, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/59886187.json'}
train loss in 1 epoch in 1 batch: 27.43460
val loss in 1 epoch: 83.40161
Round 15, winrate: 0.0, max_step: 112, reward: 0.006100000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/424418760.json'}
train loss in 1 epoch in 1 batch: 18.55710
val loss in 1 epoch: 59.74951
Round 16, winrate: 0.0, max_step: 276, reward: 1.0378999999999987, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/959027881.json'}
train loss in 1 epoch in 1 batch: 1361.95618
val loss in 1 epoch: 1147.46030
train loss in 1 epoch in 2 batch: 1077.70996
val loss in 1 epoch: 970.36043
train loss in 1 epoch in 3 batch: 1071.70105
val loss in 1 epoch: 820.92862


  if (await self.run_code(code, result,  async_=asy)):


Round 17, winrate: 0.0, max_step: 279, reward: 0.02479999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/191834735.json'}
train loss in 1 epoch in 1 batch: 142.05771
val loss in 1 epoch: 116.36702


  if (await self.run_code(code, result,  async_=asy)):


Round 18, winrate: 0.0, max_step: 310, reward: 0.16959999999999825, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/645852458.json'}
train loss in 1 epoch in 1 batch: 481.06995
val loss in 1 epoch: 411.30109
train loss in 1 epoch in 2 batch: 410.80725
val loss in 1 epoch: 377.32683
Round 19, winrate: 0.0, max_step: 155, reward: 0.02629999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/418903645.json'}
train loss in 1 epoch in 1 batch: 433.30289
val loss in 1 epoch: 333.89712
Round 20, winrate: 0.0, max_step: 191, reward: 0.11519999999999932, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/378518883.json'}
train loss in 1 epoch in 1 batch: 344.16223
val loss in 1 epoch: 330.41922
Round 21, winrate: 0.0, max_step: 310, reward: 0.031599999999999906, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replay

  if (await self.run_code(code, result,  async_=asy)):


Round 22, winrate: 0.0, max_step: 190, reward: 0.01089999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/895331189.json'}
train loss in 1 epoch in 1 batch: 246.15454
val loss in 1 epoch: 209.60680
Round 23, winrate: 0.0, max_step: 196, reward: 0.9099000000000081, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/74293843.json'}
train loss in 1 epoch in 1 batch: 312.94333
val loss in 1 epoch: 260.40880
train loss in 1 epoch in 2 batch: 252.52475
val loss in 1 epoch: 246.79204
Round 24, winrate: 0.0, max_step: 30, reward: 0.0020000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/902017442.json'}
train loss in 1 epoch in 1 batch: 479.45038
val loss in 1 epoch: 566.68054
Round 25, winrate: 0.0, max_step: 359, reward: 0.6505999999999864, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFil

  if (await self.run_code(code, result,  async_=asy)):


Round 35, winrate: 0.0, max_step: 359, reward: 0.46479999999999866, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/893589193.json'}
train loss in 1 epoch in 1 batch: 226.86462
val loss in 1 epoch: 139.03641
train loss in 1 epoch in 2 batch: 166.25969
val loss in 1 epoch: 132.11613
Round 36, winrate: 0.0, max_step: 310, reward: 0.06390000000000017, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/981430917.json'}
train loss in 1 epoch in 1 batch: 132.91501
val loss in 1 epoch: 80.78207
Round 37, winrate: 0.0, max_step: 351, reward: 1.7245999999999282, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/835356559.json'}
train loss in 1 epoch in 1 batch: 58.49144
val loss in 1 epoch: 65.58780
train loss in 1 epoch in 2 batch: 55.34632
val loss in 1 epoch: 60.19590
train loss in 1 epoch in 3 batch: 39.38912
val loss in 1 epoch: 58.41866
train los

  if (await self.run_code(code, result,  async_=asy)):


Round 40, winrate: 0.0, max_step: 359, reward: 0.7285000000000156, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/677260614.json'}
train loss in 1 epoch in 1 batch: 76.34996
val loss in 1 epoch: 74.86792
train loss in 1 epoch in 2 batch: 60.64955
val loss in 1 epoch: 75.42678
train loss in 1 epoch in 3 batch: 12.33106
val loss in 1 epoch: 77.31007


  if (await self.run_code(code, result,  async_=asy)):


Round 41, winrate: 0.0, max_step: 359, reward: 0.18029999999999802, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/3966912.json'}
train loss in 1 epoch in 1 batch: 81.51180
val loss in 1 epoch: 53.99648
train loss in 1 epoch in 2 batch: 97.16074
val loss in 1 epoch: 52.28765


  if (await self.run_code(code, result,  async_=asy)):


Round 42, winrate: 0.0, max_step: 270, reward: 0.1100000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/534895718.json'}
train loss in 1 epoch in 1 batch: 128.21826
val loss in 1 epoch: 75.53202
Round 43, winrate: 1.0, max_step: 196, reward: 1.0093, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/494155588.json'}
train loss in 1 epoch in 1 batch: 539.17950
val loss in 1 epoch: 488.48856
Round 44, winrate: 0.0, max_step: 150, reward: 0.01079999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/364394260.json'}
train loss in 1 epoch in 1 batch: 84.18806
val loss in 1 epoch: 58.83724
Round 45, winrate: 0.0, max_step: 230, reward: 0.3414999999999966, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/234232222.json'}
train loss in 1 epoch in 1 batch: 75.91531
val loss in 1 epoc

  if (await self.run_code(code, result,  async_=asy)):


Round 47, winrate: 1.0, max_step: 355, reward: 1.0281999999999998, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/487429255.json'}
train loss in 1 epoch in 1 batch: 45.14914
val loss in 1 epoch: 38.67229
Round 48, winrate: 0.0, max_step: 151, reward: 0.032399999999999894, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/75120128.json'}
train loss in 1 epoch in 1 batch: 79.24583
val loss in 1 epoch: 44.12326
Round 49, winrate: 0.0, max_step: 231, reward: 0.0063000000000000035, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218890666.json'}
train loss in 1 epoch in 1 batch: 449.75928
val loss in 1 epoch: 434.57611
Round 50, winrate: 0.0, max_step: 156, reward: 0.0032999999999999982, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/554800608.json'}
train loss in 1 epoch in 1 batch: 95.48973
v

  if (await self.run_code(code, result,  async_=asy)):


Round 52, winrate: 0.0, max_step: 199, reward: 0.013399999999999974, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/314006517.json'}
train loss in 1 epoch in 1 batch: 506.74277
val loss in 1 epoch: 451.53254
Round 53, winrate: 0.0, max_step: 313, reward: 0.019499999999999938, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/415161881.json'}
train loss in 1 epoch in 1 batch: 466.05130
val loss in 1 epoch: 428.97362
Round 54, winrate: 0.0, max_step: 359, reward: 0.005400000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/730930287.json'}
train loss in 1 epoch in 1 batch: 407.63181
val loss in 1 epoch: 369.73022
Round 55, winrate: 0.0, max_step: 150, reward: 0.014699999999999967, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/399897037.json'}
train loss in 1 epoch in 1 batch: 61.94

  if (await self.run_code(code, result,  async_=asy)):


Round 62, winrate: 0.0, max_step: 275, reward: 0.008400000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/144975378.json'}
train loss in 1 epoch in 1 batch: 379.42545
val loss in 1 epoch: 348.50168


  if (await self.run_code(code, result,  async_=asy)):


Round 63, winrate: 0.0, max_step: 114, reward: 0.021499999999999922, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/231674924.json'}
train loss in 1 epoch in 1 batch: 334.49719
val loss in 1 epoch: 347.06238
Round 64, winrate: 0.0, max_step: 152, reward: 0.12719999999999967, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/554473924.json'}
train loss in 1 epoch in 1 batch: 68.55685
val loss in 1 epoch: 63.39416
Round 65, winrate: 0.0, max_step: 270, reward: 0.020199999999999933, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/939073326.json'}
train loss in 1 epoch in 1 batch: 331.26529
val loss in 1 epoch: 297.98415
Round 66, winrate: 0.0, max_step: 312, reward: 0.02739999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/662660116.json'}
train loss in 1 epoch in 1 batch: 293.85175

  if (await self.run_code(code, result,  async_=asy)):


Round 72, winrate: 0.0, max_step: 359, reward: 0.0886999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/458437080.json'}
train loss in 1 epoch in 1 batch: 91.83481
val loss in 1 epoch: 86.11523


  if (await self.run_code(code, result,  async_=asy)):


Round 73, winrate: 0.0, max_step: 359, reward: 0.03509999999999998, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/612848534.json'}
train loss in 1 epoch in 1 batch: 53.03247
val loss in 1 epoch: 32.32279


  if (await self.run_code(code, result,  async_=asy)):


Round 74, winrate: 0.0, max_step: 359, reward: 0.2472000000000016, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/868452254.json'}
train loss in 1 epoch in 1 batch: 63.84532
val loss in 1 epoch: 33.92993
train loss in 1 epoch in 2 batch: 35.42611
val loss in 1 epoch: 32.04917
Round 75, winrate: 0.0, max_step: 191, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/296095992.json'}
train loss in 1 epoch in 1 batch: 319.39938
val loss in 1 epoch: 319.85744
Round 76, winrate: 1.0, max_step: 350, reward: 2.1187000000000014, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/261261985.json'}
train loss in 1 epoch in 1 batch: 35.75071
val loss in 1 epoch: 42.33874
Round 77, winrate: 0.0, max_step: 350, reward: 0.028199999999999885, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile':

  if (await self.run_code(code, result,  async_=asy)):


Round 84, winrate: 0.0, max_step: 189, reward: 0.014499999999999968, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/197744554.json'}
train loss in 1 epoch in 1 batch: 321.31219
val loss in 1 epoch: 347.44617
Round 85, winrate: 0.0, max_step: 312, reward: 0.029599999999999876, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/517001578.json'}
train loss in 1 epoch in 1 batch: 356.40427
val loss in 1 epoch: 319.43919
Round 86, winrate: 0.0, max_step: 155, reward: 0.011499999999999986, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/870190228.json'}
train loss in 1 epoch in 1 batch: 354.87836
val loss in 1 epoch: 304.60735


  if (await self.run_code(code, result,  async_=asy)):


Round 87, winrate: 0.0, max_step: 359, reward: 0.08810000000000044, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/671149542.json'}
train loss in 1 epoch in 1 batch: 310.32373
val loss in 1 epoch: 272.61209


  if (await self.run_code(code, result,  async_=asy)):


Round 88, winrate: 0.0, max_step: 358, reward: 0.026199999999999897, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/633726936.json'}
train loss in 1 epoch in 1 batch: 281.87949
val loss in 1 epoch: 267.91309
Round 89, winrate: 0.0, max_step: 189, reward: 0.0017000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/591978782.json'}
train loss in 1 epoch in 1 batch: 288.68295
val loss in 1 epoch: 267.91318
Round 90, winrate: 0.0, max_step: 359, reward: 0.2784000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/657362779.json'}
train loss in 1 epoch in 1 batch: 83.94552
val loss in 1 epoch: 71.53402
train loss in 1 epoch in 2 batch: 72.84979
val loss in 1 epoch: 72.09185
Round 91, winrate: 0.0, max_step: 239, reward: 0.4616000000000009, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile

  if (await self.run_code(code, result,  async_=asy)):


Round 98, winrate: 0.0, max_step: 359, reward: 0.12539999999999893, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/997580250.json'}
train loss in 1 epoch in 1 batch: 37.56306
val loss in 1 epoch: 29.50850
train loss in 1 epoch in 2 batch: 9.52085
val loss in 1 epoch: 28.46802
Round 99, winrate: 0.0, max_step: 359, reward: 0.7094000000000018, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/739930753.json'}
train loss in 1 epoch in 1 batch: 32.68301
val loss in 1 epoch: 28.84313
train loss in 1 epoch in 2 batch: 15.54413
val loss in 1 epoch: 28.34348
Round 100, winrate: 0.0, max_step: 230, reward: 0.015299999999999963, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/186422792.json'}
train loss in 1 epoch in 1 batch: 366.83868
val loss in 1 epoch: 331.30687


  if (await self.run_code(code, result,  async_=asy)):


Round 101, winrate: 0.0, max_step: 359, reward: 0.536299999999993, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/70431583.json'}
train loss in 1 epoch in 1 batch: 27.51153
val loss in 1 epoch: 30.46040
train loss in 1 epoch in 2 batch: 21.59778
val loss in 1 epoch: 29.82834
Round 102, winrate: 0.0, max_step: 150, reward: 0.014499999999999968, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/419539200.json'}
train loss in 1 epoch in 1 batch: 13.07081
val loss in 1 epoch: 44.17689
Round 103, winrate: 0.0, max_step: 198, reward: 0.008200000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/782161671.json'}
train loss in 1 epoch in 1 batch: 459.66873
val loss in 1 epoch: 366.69472
Round 104, winrate: 0.0, max_step: 235, reward: 0.01739999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile

  if (await self.run_code(code, result,  async_=asy)):


Round 105, winrate: 0.0, max_step: 350, reward: 0.06730000000000079, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/359660224.json'}
train loss in 1 epoch in 1 batch: 350.21738
val loss in 1 epoch: 343.09093
Round 106, winrate: 0.0, max_step: 113, reward: 0.03599999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/36785518.json'}
train loss in 1 epoch in 1 batch: 24.14100
val loss in 1 epoch: 18.77297
Round 107, winrate: 0.0, max_step: 109, reward: 0.005500000000000001, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/698812478.json'}
train loss in 1 epoch in 1 batch: 306.73184
val loss in 1 epoch: 364.72993
Round 108, winrate: 0.0, max_step: 269, reward: 0.04140000000000015, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/675248919.json'}
train loss in 1 epoch in 1 batch: 333.957

  if (await self.run_code(code, result,  async_=asy)):


Round 111, winrate: 0.0, max_step: 234, reward: 0.01569999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/481767252.json'}
train loss in 1 epoch in 1 batch: 319.98761
val loss in 1 epoch: 298.38822
Round 112, winrate: 0.0, max_step: 275, reward: 0.17210000000000059, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/537116260.json'}
train loss in 1 epoch in 1 batch: 231.50226
val loss in 1 epoch: 196.45670


  if (await self.run_code(code, result,  async_=asy)):


Round 113, winrate: 0.0, max_step: 355, reward: 0.009000000000000001, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/438024357.json'}
train loss in 1 epoch in 1 batch: 299.90976
val loss in 1 epoch: 272.34822
Round 114, winrate: 0.0, max_step: 359, reward: 0.24890000000000279, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/651254346.json'}
train loss in 1 epoch in 1 batch: 38.34096
val loss in 1 epoch: 27.74250
train loss in 1 epoch in 2 batch: 8.26871
val loss in 1 epoch: 27.02403
Round 115, winrate: 0.0, max_step: 316, reward: 0.02059999999999993, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/842210066.json'}
train loss in 1 epoch in 1 batch: 279.13516
val loss in 1 epoch: 254.54746


  if (await self.run_code(code, result,  async_=asy)):


Round 116, winrate: 0.0, max_step: 359, reward: 0.08290000000000051, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/537595189.json'}
train loss in 1 epoch in 1 batch: 24.83594
val loss in 1 epoch: 28.77865


  if (await self.run_code(code, result,  async_=asy)):


Round 117, winrate: 0.0, max_step: 359, reward: 0.023099999999999916, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/865054190.json'}
train loss in 1 epoch in 1 batch: 263.84583
val loss in 1 epoch: 244.65430


  if (await self.run_code(code, result,  async_=asy)):


Round 118, winrate: 0.0, max_step: 230, reward: 0.03739999999999986, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/827432192.json'}
train loss in 1 epoch in 1 batch: 36.35129
val loss in 1 epoch: 18.85181


  if (await self.run_code(code, result,  async_=asy)):


Round 119, winrate: 0.0, max_step: 271, reward: 0.3794000000000002, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/420235714.json'}
train loss in 1 epoch in 1 batch: 25.76330
val loss in 1 epoch: 21.89825
train loss in 1 epoch in 2 batch: 21.24514
val loss in 1 epoch: 21.98521
Round 120, winrate: 0.0, max_step: 359, reward: 0.030999999999999868, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/764313255.json'}
train loss in 1 epoch in 1 batch: 286.32858
val loss in 1 epoch: 253.67193
Round 121, winrate: 0.0, max_step: 235, reward: 0.011699999999999985, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/478162242.json'}
train loss in 1 epoch in 1 batch: 312.98087
val loss in 1 epoch: 255.29223


  if (await self.run_code(code, result,  async_=asy)):


Round 122, winrate: 0.0, max_step: 316, reward: 0.23039999999999788, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/674274799.json'}
train loss in 1 epoch in 1 batch: 25.37051
val loss in 1 epoch: 25.96754
train loss in 1 epoch in 2 batch: 47.88850
val loss in 1 epoch: 25.59143


  if (await self.run_code(code, result,  async_=asy)):


Round 123, winrate: 0.0, max_step: 359, reward: 0.02239999999999992, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/843828734.json'}
train loss in 1 epoch in 1 batch: 251.07074
val loss in 1 epoch: 236.20892
Round 124, winrate: 0.0, max_step: 110, reward: 0.0020000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/455545294.json'}
train loss in 1 epoch in 1 batch: 227.45030
val loss in 1 epoch: 235.18953


  if (await self.run_code(code, result,  async_=asy)):


Round 125, winrate: 0.0, max_step: 359, reward: 0.026699999999999894, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/28887997.json'}
train loss in 1 epoch in 1 batch: 230.74202
val loss in 1 epoch: 216.95523
Round 126, winrate: 0.0, max_step: 239, reward: 0.15990000000000065, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/451710822.json'}
train loss in 1 epoch in 1 batch: 39.16140
val loss in 1 epoch: 46.94634
Round 127, winrate: 0.0, max_step: 111, reward: 0.0030999999999999986, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/102363337.json'}
train loss in 1 epoch in 1 batch: 213.09445
val loss in 1 epoch: 201.25038
Round 128, winrate: 0.0, max_step: 277, reward: 0.1752999999999982, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/500952274.json'}
train loss in 1 epoch in 1 batch: 22.438

  if (await self.run_code(code, result,  async_=asy)):


Round 136, winrate: 0.0, max_step: 359, reward: 0.06659999999999983, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/658057439.json'}
train loss in 1 epoch in 1 batch: 246.62772
val loss in 1 epoch: 238.32538
Round 137, winrate: 0.0, max_step: 236, reward: 0.05469999999999977, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/832580318.json'}
train loss in 1 epoch in 1 batch: 32.00589
val loss in 1 epoch: 17.81433


  if (await self.run_code(code, result,  async_=asy)):


Round 138, winrate: 0.0, max_step: 359, reward: 0.01079999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/251585001.json'}
train loss in 1 epoch in 1 batch: 242.77171
val loss in 1 epoch: 232.67378
Round 139, winrate: 0.0, max_step: 310, reward: 0.016599999999999955, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/802709328.json'}
train loss in 1 epoch in 1 batch: 223.03697
val loss in 1 epoch: 215.86674
Round 140, winrate: 0.0, max_step: 275, reward: 0.0948000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/785150630.json'}
train loss in 1 epoch in 1 batch: 38.38170
val loss in 1 epoch: 15.89114
Round 141, winrate: 0.0, max_step: 155, reward: 0.0019000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/718915190.json'}
train loss in 1 epoch in 1 batch: 225.5

  if (await self.run_code(code, result,  async_=asy)):


Round 142, winrate: 0.0, max_step: 359, reward: 0.025299999999999902, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/653099285.json'}
train loss in 1 epoch in 1 batch: 214.76292
val loss in 1 epoch: 196.79277


  if (await self.run_code(code, result,  async_=asy)):


Round 143, winrate: 0.0, max_step: 191, reward: 0.05619999999999978, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/100829337.json'}
train loss in 1 epoch in 1 batch: 196.42378
val loss in 1 epoch: 192.66084
Round 144, winrate: 0.0, max_step: 235, reward: 0.0017000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/400909671.json'}
train loss in 1 epoch in 1 batch: 188.10503
val loss in 1 epoch: 179.93124
Round 145, winrate: 0.0, max_step: 157, reward: 0.0427000000000001, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/141914101.json'}
train loss in 1 epoch in 1 batch: 196.06009
val loss in 1 epoch: 181.85143
Round 146, winrate: 0.0, max_step: 190, reward: 0.012299999999999981, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/833375074.json'}
train loss in 1 epoch in 1 batch: 186

  if (await self.run_code(code, result,  async_=asy)):


Round 154, winrate: 0.0, max_step: 358, reward: 0.05450000000000035, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/33173321.json'}
train loss in 1 epoch in 1 batch: 179.46124
val loss in 1 epoch: 174.20463
Round 155, winrate: 0.5, max_step: 234, reward: 0.5019, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/654236596.json'}
train loss in 1 epoch in 1 batch: 186.77565
val loss in 1 epoch: 168.59118


  if (await self.run_code(code, result,  async_=asy)):


Round 156, winrate: 0.0, max_step: 359, reward: 0.024899999999999905, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/273243692.json'}
train loss in 1 epoch in 1 batch: 159.86449
val loss in 1 epoch: 156.03981
Round 157, winrate: 0.0, max_step: 353, reward: 1.7563999999999755, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/203590312.json'}
train loss in 1 epoch in 1 batch: 24.24184
val loss in 1 epoch: 25.12333
train loss in 1 epoch in 2 batch: 22.78010
val loss in 1 epoch: 24.58016
train loss in 1 epoch in 3 batch: 12.33416
val loss in 1 epoch: 23.98329
train loss in 1 epoch in 4 batch: 28.55201
val loss in 1 epoch: 23.35567
Round 158, winrate: 0.0, max_step: 311, reward: 0.18319999999999806, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/413473693.json'}
train loss in 1 epoch in 1 batch: 18.95904
val loss in 1 epoch: 13.26962
train lo

  if (await self.run_code(code, result,  async_=asy)):


Round 163, winrate: 0.0, max_step: 359, reward: 0.17669999999999708, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/811593569.json'}
train loss in 1 epoch in 1 batch: 87.84927
val loss in 1 epoch: 82.71333
train loss in 1 epoch in 2 batch: 74.84451
val loss in 1 epoch: 80.73750


  if (await self.run_code(code, result,  async_=asy)):


Round 164, winrate: 0.0, max_step: 350, reward: 0.029899999999999875, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/459487352.json'}
train loss in 1 epoch in 1 batch: 27.71261
val loss in 1 epoch: 46.47875
Round 165, winrate: 0.0, max_step: 356, reward: 0.15639999999999837, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/327374784.json'}
train loss in 1 epoch in 1 batch: 30.79198
val loss in 1 epoch: 17.63973
train loss in 1 epoch in 2 batch: 4.89318
val loss in 1 epoch: 18.32399


  if (await self.run_code(code, result,  async_=asy)):


Round 166, winrate: 0.0, max_step: 270, reward: 0.023499999999999913, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/205037573.json'}
train loss in 1 epoch in 1 batch: 203.18803
val loss in 1 epoch: 184.49239


  if (await self.run_code(code, result,  async_=asy)):


Round 167, winrate: 0.0, max_step: 274, reward: 0.17849999999999813, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/637283137.json'}
train loss in 1 epoch in 1 batch: 20.00699
val loss in 1 epoch: 15.42611
train loss in 1 epoch in 2 batch: 4.87022
val loss in 1 epoch: 15.03031
Round 168, winrate: 0.0, max_step: 157, reward: 0.10579999999999948, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/420042453.json'}
train loss in 1 epoch in 1 batch: 14.08573
val loss in 1 epoch: 15.43819
Round 169, winrate: 0.0, max_step: 314, reward: 0.026099999999999898, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/728386851.json'}
train loss in 1 epoch in 1 batch: 190.63565
val loss in 1 epoch: 187.50548


  if (await self.run_code(code, result,  async_=asy)):


Round 170, winrate: 0.0, max_step: 359, reward: 0.09989999999999986, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/226110362.json'}
train loss in 1 epoch in 1 batch: 8.88234
val loss in 1 epoch: 7.96132


  if (await self.run_code(code, result,  async_=asy)):


Round 171, winrate: 0.0, max_step: 359, reward: 0.026399999999999896, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/677514685.json'}
train loss in 1 epoch in 1 batch: 198.07693
val loss in 1 epoch: 174.91721
Round 172, winrate: 0.0, max_step: 318, reward: 0.02709999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/641676058.json'}
train loss in 1 epoch in 1 batch: 187.85713
val loss in 1 epoch: 173.40923
Round 173, winrate: 0.0, max_step: 113, reward: 0.0067000000000000046, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/400537829.json'}
train loss in 1 epoch in 1 batch: 174.15350
val loss in 1 epoch: 170.64800


  if (await self.run_code(code, result,  async_=asy)):


Round 174, winrate: 0.0, max_step: 359, reward: 0.024999999999999904, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/902109485.json'}
train loss in 1 epoch in 1 batch: 173.20953
val loss in 1 epoch: 168.00826


  if (await self.run_code(code, result,  async_=asy)):


Round 175, winrate: 0.0, max_step: 359, reward: 0.030299999999999872, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/223715604.json'}
train loss in 1 epoch in 1 batch: 163.79254
val loss in 1 epoch: 153.07570
Round 176, winrate: 0.0, max_step: 270, reward: 0.02049999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/461441856.json'}
train loss in 1 epoch in 1 batch: 165.10257
val loss in 1 epoch: 159.30518
Round 177, winrate: 0.0, max_step: 350, reward: 0.03129999999999987, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/12159537.json'}
train loss in 1 epoch in 1 batch: 19.70830
val loss in 1 epoch: 11.72688


  if (await self.run_code(code, result,  async_=asy)):


Round 178, winrate: 0.0, max_step: 236, reward: 0.017999999999999947, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/105640118.json'}
train loss in 1 epoch in 1 batch: 171.94569
val loss in 1 epoch: 157.33082
Round 179, winrate: 1.0, max_step: 353, reward: 1.0756000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/860947865.json'}
train loss in 1 epoch in 1 batch: 16.01737
val loss in 1 epoch: 12.86755


  if (await self.run_code(code, result,  async_=asy)):


Round 180, winrate: 0.0, max_step: 278, reward: 0.08020000000000023, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/269393649.json'}
train loss in 1 epoch in 1 batch: 20.95848
val loss in 1 epoch: 15.85629
Round 181, winrate: 0.0, max_step: 359, reward: 0.033899999999999944, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/103178493.json'}
train loss in 1 epoch in 1 batch: 171.01277
val loss in 1 epoch: 154.58204


  if (await self.run_code(code, result,  async_=asy)):


Round 182, winrate: 0.0, max_step: 319, reward: 0.20190000000000155, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/543601981.json'}
train loss in 1 epoch in 1 batch: 13.51019
val loss in 1 epoch: 11.62022
train loss in 1 epoch in 2 batch: 5.45211
val loss in 1 epoch: 11.43913


  if (await self.run_code(code, result,  async_=asy)):


Round 183, winrate: 0.0, max_step: 314, reward: 0.023699999999999912, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/315050277.json'}
train loss in 1 epoch in 1 batch: 174.05118
val loss in 1 epoch: 156.76746
Round 184, winrate: 0.0, max_step: 315, reward: 0.030199999999999873, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/294016430.json'}
train loss in 1 epoch in 1 batch: 157.27991
val loss in 1 epoch: 144.41051


  if (await self.run_code(code, result,  async_=asy)):


Round 185, winrate: 0.0, max_step: 355, reward: 0.0019000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/560723200.json'}
train loss in 1 epoch in 1 batch: 157.31964
val loss in 1 epoch: 147.10859
Round 186, winrate: 0.0, max_step: 195, reward: 0.009799999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/183389357.json'}
train loss in 1 epoch in 1 batch: 141.74968
val loss in 1 epoch: 130.58144
Round 187, winrate: 0.0, max_step: 359, reward: 0.4113, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/730320491.json'}
train loss in 1 epoch in 1 batch: 17.46155
val loss in 1 epoch: 14.06850
train loss in 1 epoch in 2 batch: 18.26563
val loss in 1 epoch: 14.13271


  if (await self.run_code(code, result,  async_=asy)):


Round 188, winrate: 0.0, max_step: 352, reward: 0.5166999999999948, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/51443474.json'}
train loss in 1 epoch in 1 batch: 26.75248
val loss in 1 epoch: 12.38259
train loss in 1 epoch in 2 batch: 11.11670
val loss in 1 epoch: 11.98391


  if (await self.run_code(code, result,  async_=asy)):


Round 189, winrate: 0.0, max_step: 359, reward: 0.03309999999999992, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/741780543.json'}
train loss in 1 epoch in 1 batch: 144.78328
val loss in 1 epoch: 130.67666


  if (await self.run_code(code, result,  async_=asy)):


Round 190, winrate: 0.0, max_step: 359, reward: 0.439799999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/990780300.json'}
train loss in 1 epoch in 1 batch: 36.89403
val loss in 1 epoch: 14.63510
train loss in 1 epoch in 2 batch: 15.30898
val loss in 1 epoch: 14.62134
Round 191, winrate: 0.0, max_step: 274, reward: 0.16870000000000104, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/178712497.json'}
train loss in 1 epoch in 1 batch: 18.18189
val loss in 1 epoch: 12.88573
Round 192, winrate: 0.0, max_step: 359, reward: 1.2507999999999821, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/31351906.json'}
train loss in 1 epoch in 1 batch: 23.59774
val loss in 1 epoch: 14.08177
train loss in 1 epoch in 2 batch: 18.75040
val loss in 1 epoch: 14.13667
train loss in 1 epoch in 3 batch: 13.14098
val loss in 1 epoch: 13.91108
Round 193, win

  if (await self.run_code(code, result,  async_=asy)):


Round 194, winrate: 0.0, max_step: 196, reward: 0.2134999999999998, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/935007969.json'}
train loss in 1 epoch in 1 batch: 19.43395
val loss in 1 epoch: 13.67312
Round 195, winrate: 0.0, max_step: 359, reward: 0.29960000000000064, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/589817801.json'}
train loss in 1 epoch in 1 batch: 12.78534
val loss in 1 epoch: 14.93802
train loss in 1 epoch in 2 batch: 8.62531
val loss in 1 epoch: 14.70184


  if (await self.run_code(code, result,  async_=asy)):


Round 196, winrate: 0.0, max_step: 230, reward: 0.021099999999999928, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/813936156.json'}
train loss in 1 epoch in 1 batch: 143.24707
val loss in 1 epoch: 137.13120


  if (await self.run_code(code, result,  async_=asy)):


Round 197, winrate: 0.0, max_step: 157, reward: 0.03569999999999984, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/506405649.json'}
train loss in 1 epoch in 1 batch: 137.48009
val loss in 1 epoch: 131.27531
Round 198, winrate: 0.0, max_step: 151, reward: 0.054699999999999915, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/471594238.json'}
train loss in 1 epoch in 1 batch: 8.60408
val loss in 1 epoch: 12.64964
Round 199, winrate: 0.0, max_step: 159, reward: 0.0991999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/664825252.json'}
train loss in 1 epoch in 1 batch: 8.29838
val loss in 1 epoch: 8.59415
Round 200, winrate: 0.0, max_step: 154, reward: 0.0021000000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/848824090.json'}
train loss in 1 epoch in 1 batch: 158.09090


  if (await self.run_code(code, result,  async_=asy)):


Round 203, winrate: 0.0, max_step: 349, reward: 0.02879999999999988, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/420477548.json'}
train loss in 1 epoch in 1 batch: 147.95186
val loss in 1 epoch: 137.72968
Round 204, winrate: 0.0, max_step: 358, reward: 0.0255999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/775484481.json'}
train loss in 1 epoch in 1 batch: 123.56388
val loss in 1 epoch: 128.83465


  if (await self.run_code(code, result,  async_=asy)):


Round 205, winrate: 0.0, max_step: 350, reward: 0.031799999999999884, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/903062850.json'}
train loss in 1 epoch in 1 batch: 128.65034
val loss in 1 epoch: 112.53920
Round 206, winrate: 0.0, max_step: 359, reward: 0.7563999999999809, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/761601813.json'}
train loss in 1 epoch in 1 batch: 15.27048
val loss in 1 epoch: 11.40229
train loss in 1 epoch in 2 batch: 11.10644
val loss in 1 epoch: 11.28651
train loss in 1 epoch in 3 batch: 5.62243
val loss in 1 epoch: 11.11049
Round 207, winrate: 0.0, max_step: 235, reward: 0.13650000000000237, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/12733958.json'}
train loss in 1 epoch in 1 batch: 9.88887
val loss in 1 epoch: 8.48804
Round 208, winrate: 0.0, max_step: 359, reward: 0.16009999999999944, example: {'ranks

  if (await self.run_code(code, result,  async_=asy)):


Round 221, winrate: 0.0, max_step: 230, reward: 0.06759999999999985, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/200019788.json'}
train loss in 1 epoch in 1 batch: 9.19064
val loss in 1 epoch: 15.38028
Round 222, winrate: 0.0, max_step: 359, reward: 0.0019000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/111791238.json'}
train loss in 1 epoch in 1 batch: 144.82408
val loss in 1 epoch: 130.91537
Round 223, winrate: 0.0, max_step: 358, reward: 0.02709999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/280828382.json'}
train loss in 1 epoch in 1 batch: 139.60883
val loss in 1 epoch: 129.66895


  if (await self.run_code(code, result,  async_=asy)):


Round 224, winrate: 0.0, max_step: 359, reward: 0.028299999999999884, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/994005916.json'}
train loss in 1 epoch in 1 batch: 127.42282
val loss in 1 epoch: 121.93652
Round 225, winrate: 0.0, max_step: 359, reward: 0.0019000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/181100195.json'}
train loss in 1 epoch in 1 batch: 107.60152
val loss in 1 epoch: 99.06964


  if (await self.run_code(code, result,  async_=asy)):


Round 226, winrate: 0.5, max_step: 315, reward: 1.4776999999999896, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/622586688.json'}
train loss in 1 epoch in 1 batch: 14.00523
val loss in 1 epoch: 8.14620
train loss in 1 epoch in 2 batch: 10.97586
val loss in 1 epoch: 7.85252
Round 227, winrate: 0.0, max_step: 157, reward: 0.025199999999999955, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/547485728.json'}
train loss in 1 epoch in 1 batch: 106.11137
val loss in 1 epoch: 93.69712
Round 228, winrate: 0.0, max_step: 75, reward: 0.0025999999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/746511749.json'}
train loss in 1 epoch in 1 batch: 96.32982
val loss in 1 epoch: 89.02028


  if (await self.run_code(code, result,  async_=asy)):


Round 229, winrate: 0.0, max_step: 230, reward: 0.07760000000000017, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/416719193.json'}
train loss in 1 epoch in 1 batch: 19.72403
val loss in 1 epoch: 17.73049
Round 230, winrate: 0.0, max_step: 113, reward: 0.015300000000000025, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/562640640.json'}
train loss in 1 epoch in 1 batch: 42.75599
val loss in 1 epoch: 42.49960
Round 231, winrate: 0.0, max_step: 192, reward: 0.019899999999999935, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/135265879.json'}
train loss in 1 epoch in 1 batch: 24.32917
val loss in 1 epoch: 17.71991
Round 232, winrate: 0.0, max_step: 191, reward: 0.04289999999999985, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/331126199.json'}
train loss in 1 epoch in 1 batch: 12.12722


  if (await self.run_code(code, result,  async_=asy)):


Round 234, winrate: 0.0, max_step: 196, reward: 0.25259999999999977, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/133003720.json'}
train loss in 1 epoch in 1 batch: 12.48492
val loss in 1 epoch: 10.09239
Round 235, winrate: 0.0, max_step: 152, reward: 0.09809999999999962, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/775619992.json'}
train loss in 1 epoch in 1 batch: 11.30663
val loss in 1 epoch: 7.72098
Round 236, winrate: 0.0, max_step: 350, reward: 0.03439999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/334431105.json'}
train loss in 1 epoch in 1 batch: 13.67281
val loss in 1 epoch: 10.35243
Round 237, winrate: 0.0, max_step: 110, reward: 0.041799999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/607866348.json'}
train loss in 1 epoch in 1 batch: 10.38216
va

  if (await self.run_code(code, result,  async_=asy)):


Round 238, winrate: 0.0, max_step: 279, reward: 0.07740000000000026, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/280626723.json'}
train loss in 1 epoch in 1 batch: 28.77499
val loss in 1 epoch: 9.16688
Round 239, winrate: 0.0, max_step: 189, reward: 0.014999999999999965, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/256973224.json'}
train loss in 1 epoch in 1 batch: 108.50964
val loss in 1 epoch: 102.53119
Round 240, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/186859151.json'}
train loss in 1 epoch in 1 batch: 86.44294
val loss in 1 epoch: 78.24887


  if (await self.run_code(code, result,  async_=asy)):


Round 241, winrate: 0.0, max_step: 310, reward: 0.029799999999999927, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/493695404.json'}
train loss in 1 epoch in 1 batch: 11.65643
val loss in 1 epoch: 8.04308
Round 242, winrate: 0.0, max_step: 319, reward: 0.17479999999999807, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/793222286.json'}
train loss in 1 epoch in 1 batch: 12.16484
val loss in 1 epoch: 7.15429
train loss in 1 epoch in 2 batch: 2.83289
val loss in 1 epoch: 7.13494


  if (await self.run_code(code, result,  async_=asy)):


Round 243, winrate: 0.0, max_step: 359, reward: 0.028099999999999885, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/891787127.json'}
train loss in 1 epoch in 1 batch: 105.22289
val loss in 1 epoch: 100.85181


  if (await self.run_code(code, result,  async_=asy)):


Round 244, winrate: 0.0, max_step: 275, reward: 0.016499999999999956, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/174709226.json'}
train loss in 1 epoch in 1 batch: 10.15720
val loss in 1 epoch: 7.40940
Round 245, winrate: 1.0, max_step: 315, reward: 1.0312, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/673682229.json'}
train loss in 1 epoch in 1 batch: 12.68799
val loss in 1 epoch: 7.78711
Round 246, winrate: 0.0, max_step: 359, reward: 0.31129999999999597, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/986336164.json'}
train loss in 1 epoch in 1 batch: 7.78667
val loss in 1 epoch: 8.34383
train loss in 1 epoch in 2 batch: 4.88193
val loss in 1 epoch: 8.44020
Round 247, winrate: 0.0, max_step: 112, reward: 0.02559999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/4989907

  if (await self.run_code(code, result,  async_=asy)):


Round 248, winrate: 0.0, max_step: 199, reward: 0.018699999999999942, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/824576254.json'}
train loss in 1 epoch in 1 batch: 5.20413
val loss in 1 epoch: 3.72458
Round 249, winrate: 0.0, max_step: 359, reward: 1.0841000000000063, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/72742160.json'}
train loss in 1 epoch in 1 batch: 10.94092
val loss in 1 epoch: 7.38382
train loss in 1 epoch in 2 batch: 8.06776
val loss in 1 epoch: 7.29374
train loss in 1 epoch in 3 batch: 12.94835
val loss in 1 epoch: 7.37136
Round 250, winrate: 1.0, max_step: 154, reward: 1.0076, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/922470254.json'}
train loss in 1 epoch in 1 batch: 117.47575
val loss in 1 epoch: 108.81008


  if (await self.run_code(code, result,  async_=asy)):


Round 251, winrate: 0.0, max_step: 359, reward: 0.1346999999999995, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/591035737.json'}
train loss in 1 epoch in 1 batch: 6.25754
val loss in 1 epoch: 6.05529
Round 252, winrate: 0.0, max_step: 236, reward: 0.05599999999999974, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/23250075.json'}
train loss in 1 epoch in 1 batch: 10.54768
val loss in 1 epoch: 24.25507


  if (await self.run_code(code, result,  async_=asy)):


Round 253, winrate: 0.0, max_step: 315, reward: 0.025299999999999902, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/51796747.json'}
train loss in 1 epoch in 1 batch: 109.25944
val loss in 1 epoch: 101.99330


  if (await self.run_code(code, result,  async_=asy)):


Round 254, winrate: 0.0, max_step: 351, reward: 0.023099999999999916, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/51804998.json'}
train loss in 1 epoch in 1 batch: 101.73236
val loss in 1 epoch: 101.89784
Round 255, winrate: 0.0, max_step: 199, reward: 0.010599999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/918850355.json'}
train loss in 1 epoch in 1 batch: 111.47037
val loss in 1 epoch: 96.22057
Round 256, winrate: 0.0, max_step: 150, reward: 0.014699999999999967, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/196639904.json'}
train loss in 1 epoch in 1 batch: 3.77633
val loss in 1 epoch: 5.88007
Round 257, winrate: 0.0, max_step: 355, reward: 0.14649999999999855, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/772348806.json'}
train loss in 1 epoch in 1 batch: 10.06912

  if (await self.run_code(code, result,  async_=asy)):


Round 259, winrate: 0.0, max_step: 349, reward: 0.023399999999999914, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/816988264.json'}
train loss in 1 epoch in 1 batch: 96.57592
val loss in 1 epoch: 91.21517
Round 260, winrate: 0.0, max_step: 359, reward: 0.09419999999999931, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/414944266.json'}
train loss in 1 epoch in 1 batch: 6.94849
val loss in 1 epoch: 5.83231
Round 261, winrate: 0.0, max_step: 359, reward: 0.040500000000000154, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218768096.json'}
train loss in 1 epoch in 1 batch: 9.43448
val loss in 1 epoch: 8.67739


  if (await self.run_code(code, result,  async_=asy)):


Round 262, winrate: 1.0, max_step: 359, reward: 3.2111999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/610542285.json'}
train loss in 1 epoch in 1 batch: 5.73999
val loss in 1 epoch: 6.03939
train loss in 1 epoch in 2 batch: 1.87183
val loss in 1 epoch: 5.90039


  if (await self.run_code(code, result,  async_=asy)):


Round 263, winrate: 0.0, max_step: 319, reward: 0.1290999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/314751461.json'}
train loss in 1 epoch in 1 batch: 14.40554
val loss in 1 epoch: 7.25798


  if (await self.run_code(code, result,  async_=asy)):


Round 264, winrate: 0.0, max_step: 310, reward: 0.029599999999999876, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/113670318.json'}
train loss in 1 epoch in 1 batch: 17.80640
val loss in 1 epoch: 8.32572


  if (await self.run_code(code, result,  async_=asy)):


Round 265, winrate: 0.0, max_step: 359, reward: 0.04889999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/94157877.json'}
train loss in 1 epoch in 1 batch: 9.71427
val loss in 1 epoch: 4.77721
Round 266, winrate: 0.0, max_step: 310, reward: 0.1765999999999982, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/342154546.json'}
train loss in 1 epoch in 1 batch: 9.36287
val loss in 1 epoch: 4.96722
train loss in 1 epoch in 2 batch: 1.72342
val loss in 1 epoch: 4.81946
Round 267, winrate: 0.0, max_step: 234, reward: 0.0018000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/758621641.json'}
train loss in 1 epoch in 1 batch: 72.61855
val loss in 1 epoch: 74.23432


  if (await self.run_code(code, result,  async_=asy)):


Round 268, winrate: 0.0, max_step: 159, reward: 0.04349999999999985, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/639857071.json'}
train loss in 1 epoch in 1 batch: 98.16914
val loss in 1 epoch: 90.70955
Round 269, winrate: 0.0, max_step: 270, reward: 0.023599999999999913, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/35726716.json'}
train loss in 1 epoch in 1 batch: 5.47174
val loss in 1 epoch: 6.53862
Round 270, winrate: 0.0, max_step: 151, reward: 0.008500000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/833760288.json'}
train loss in 1 epoch in 1 batch: 74.26538
val loss in 1 epoch: 73.54098
Round 271, winrate: 0.0, max_step: 318, reward: 0.017999999999999947, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/596505449.json'}
train loss in 1 epoch in 1 batch: 98.24495
va

  if (await self.run_code(code, result,  async_=asy)):


Round 276, winrate: 0.0, max_step: 196, reward: 0.013499999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/918172834.json'}
train loss in 1 epoch in 1 batch: 4.81472
val loss in 1 epoch: 11.93752


  if (await self.run_code(code, result,  async_=asy)):


Round 277, winrate: 0.0, max_step: 359, reward: 0.08690000000000038, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/121744725.json'}
train loss in 1 epoch in 1 batch: 6.35639
val loss in 1 epoch: 4.98309
Round 278, winrate: 0.0, max_step: 349, reward: 0.024999999999999904, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/841052320.json'}
train loss in 1 epoch in 1 batch: 117.75623
val loss in 1 epoch: 104.25915


  if (await self.run_code(code, result,  async_=asy)):


Round 279, winrate: 0.0, max_step: 359, reward: 0.057799999999999706, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/368454705.json'}
train loss in 1 epoch in 1 batch: 9.16526
val loss in 1 epoch: 4.37678
Round 280, winrate: 0.0, max_step: 231, reward: 0.021199999999999927, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/572330789.json'}
train loss in 1 epoch in 1 batch: 100.97775
val loss in 1 epoch: 93.72052


  if (await self.run_code(code, result,  async_=asy)):


Round 281, winrate: 0.0, max_step: 279, reward: 0.028099999999999885, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/878061684.json'}
train loss in 1 epoch in 1 batch: 12.24134
val loss in 1 epoch: 5.39210
Round 282, winrate: 0.0, max_step: 199, reward: 0.013499999999999974, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/508637049.json'}
train loss in 1 epoch in 1 batch: 71.48084
val loss in 1 epoch: 73.43184
Round 283, winrate: 0.0, max_step: 318, reward: 0.08190000000000025, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/418394767.json'}
train loss in 1 epoch in 1 batch: 8.88478
val loss in 1 epoch: 5.49684


  if (await self.run_code(code, result,  async_=asy)):


Round 284, winrate: 0.0, max_step: 199, reward: 0.015299999999999963, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/411791308.json'}
train loss in 1 epoch in 1 batch: 88.46063
val loss in 1 epoch: 88.67244
Round 285, winrate: 0.0, max_step: 270, reward: 0.08720000000000044, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/10317473.json'}
train loss in 1 epoch in 1 batch: 14.59575
val loss in 1 epoch: 9.52555
Round 286, winrate: 0.0, max_step: 190, reward: 0.01889999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/622873037.json'}
train loss in 1 epoch in 1 batch: 4.58355
val loss in 1 epoch: 1.88027
Round 287, winrate: 0.0, max_step: 154, reward: 0.048499999999999786, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/620559351.json'}
train loss in 1 epoch in 1 batch: 27.36492
val 

  if (await self.run_code(code, result,  async_=asy)):


Round 289, winrate: 0.0, max_step: 352, reward: 0.031799999999999884, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/975522479.json'}
train loss in 1 epoch in 1 batch: 66.25668
val loss in 1 epoch: 61.88345
Round 290, winrate: 0.0, max_step: 158, reward: 0.01579999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/100556268.json'}
train loss in 1 epoch in 1 batch: 79.13387
val loss in 1 epoch: 73.88601
Round 291, winrate: 0.0, max_step: 238, reward: 0.0021000000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/422279215.json'}
train loss in 1 epoch in 1 batch: 70.18085
val loss in 1 epoch: 73.10686


  if (await self.run_code(code, result,  async_=asy)):


Round 292, winrate: 0.0, max_step: 236, reward: 0.2860999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/5563564.json'}
train loss in 1 epoch in 1 batch: 6.54659
val loss in 1 epoch: 7.51732
train loss in 1 epoch in 2 batch: 1.76589
val loss in 1 epoch: 7.58606
Round 293, winrate: 0.0, max_step: 359, reward: 0.08110000000000021, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/282341798.json'}
train loss in 1 epoch in 1 batch: 3.50639
val loss in 1 epoch: 2.75564


  if (await self.run_code(code, result,  async_=asy)):


Round 294, winrate: 0.0, max_step: 350, reward: 0.009499999999999998, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/721663450.json'}
train loss in 1 epoch in 1 batch: 79.25822
val loss in 1 epoch: 76.68885
Round 295, winrate: 0.0, max_step: 359, reward: 0.19449999999999776, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/524644576.json'}
train loss in 1 epoch in 1 batch: 10.85058
val loss in 1 epoch: 6.27752
train loss in 1 epoch in 2 batch: 1.73119
val loss in 1 epoch: 6.18231


  if (await self.run_code(code, result,  async_=asy)):


Round 296, winrate: 0.0, max_step: 276, reward: 0.023099999999999916, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/234810708.json'}
train loss in 1 epoch in 1 batch: 4.44381
val loss in 1 epoch: 6.18718


  if (await self.run_code(code, result,  async_=asy)):


Round 297, winrate: 0.0, max_step: 230, reward: 0.016699999999999954, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/857257094.json'}
train loss in 1 epoch in 1 batch: 62.58701
val loss in 1 epoch: 58.54592


  if (await self.run_code(code, result,  async_=asy)):


Round 298, winrate: 0.0, max_step: 270, reward: 0.013299999999999975, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/806161868.json'}
train loss in 1 epoch in 1 batch: 57.68322
val loss in 1 epoch: 52.76324
Round 299, winrate: 0.0, max_step: 110, reward: 0.01079999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/212837082.json'}
train loss in 1 epoch in 1 batch: 3.38099
val loss in 1 epoch: 6.27296


  if (await self.run_code(code, result,  async_=asy)):


Round 300, winrate: 0.0, max_step: 359, reward: 0.022099999999999922, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/863814865.json'}
train loss in 1 epoch in 1 batch: 71.88177
val loss in 1 epoch: 67.88151


  if (await self.run_code(code, result,  async_=asy)):


Round 301, winrate: 0.0, max_step: 358, reward: 0.028499999999999883, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/374864757.json'}
train loss in 1 epoch in 1 batch: 57.25742
val loss in 1 epoch: 50.39101
Round 302, winrate: 0.0, max_step: 359, reward: 0.0898000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/585323358.json'}
train loss in 1 epoch in 1 batch: 4.46972
val loss in 1 epoch: 3.95113
Round 303, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/637499.json'}
train loss in 1 epoch in 1 batch: 53.30433
val loss in 1 epoch: 47.79302
Round 304, winrate: 0.0, max_step: 358, reward: 0.15389999999999818, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/716111135.json'}
train loss in 1 epoch in 1 batch: 4.76321
val loss in 1 epoch: 

  if (await self.run_code(code, result,  async_=asy)):


Round 307, winrate: 0.0, max_step: 234, reward: 0.015299999999999963, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/303357813.json'}
train loss in 1 epoch in 1 batch: 65.95689
val loss in 1 epoch: 65.05501
Round 308, winrate: 0.0, max_step: 359, reward: 0.0981000000000007, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/148017079.json'}
train loss in 1 epoch in 1 batch: 5.47286
val loss in 1 epoch: 4.12136
Round 309, winrate: 0.0, max_step: 314, reward: 0.5026999999999944, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/163002273.json'}
train loss in 1 epoch in 1 batch: 4.25221
val loss in 1 epoch: 4.60439
train loss in 1 epoch in 2 batch: 1.40527
val loss in 1 epoch: 4.55869
Round 310, winrate: 0.0, max_step: 230, reward: 0.018599999999999943, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'rep

  if (await self.run_code(code, result,  async_=asy)):


Round 312, winrate: 0.0, max_step: 273, reward: 0.024799999999999905, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/930834848.json'}
train loss in 1 epoch in 1 batch: 49.71632
val loss in 1 epoch: 47.69840


  if (await self.run_code(code, result,  async_=asy)):


Round 313, winrate: 0.0, max_step: 190, reward: 0.015599999999999961, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/710987272.json'}
train loss in 1 epoch in 1 batch: 5.10888
val loss in 1 epoch: 7.13015


  if (await self.run_code(code, result,  async_=asy)):


Round 314, winrate: 0.0, max_step: 230, reward: 0.010499999999999992, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/716716552.json'}
train loss in 1 epoch in 1 batch: 63.96305
val loss in 1 epoch: 67.70107
Round 315, winrate: 0.0, max_step: 310, reward: 0.1187000000000017, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/501133161.json'}
train loss in 1 epoch in 1 batch: 6.06220
val loss in 1 epoch: 5.78986
Round 316, winrate: 0.0, max_step: 112, reward: 0.030299999999999928, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/346047293.json'}
train loss in 1 epoch in 1 batch: 4.93820
val loss in 1 epoch: 4.78024
Round 317, winrate: 1.0, max_step: 150, reward: 2.0389, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/233432823.json'}
train loss in 1 epoch in 1 batch: 2.67855
val loss in 1 epoch

  if (await self.run_code(code, result,  async_=asy)):


Round 325, winrate: 0.0, max_step: 359, reward: 0.014699999999999967, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/763364343.json'}
train loss in 1 epoch in 1 batch: 71.24455
val loss in 1 epoch: 66.57044
Round 326, winrate: 0.0, max_step: 198, reward: 0.005400000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/83845785.json'}
train loss in 1 epoch in 1 batch: 44.61588
val loss in 1 epoch: 42.79440
Round 327, winrate: 0.0, max_step: 315, reward: 0.009799999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/850225603.json'}
train loss in 1 epoch in 1 batch: 59.23050
val loss in 1 epoch: 58.50105
Round 328, winrate: 0.0, max_step: 359, reward: 0.11210000000000116, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/542491570.json'}
train loss in 1 epoch in 1 batch: 2.95947
v

  if (await self.run_code(code, result,  async_=asy)):


Round 329, winrate: 0.0, max_step: 316, reward: 0.0693999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/317070217.json'}
train loss in 1 epoch in 1 batch: 2.72825
val loss in 1 epoch: 4.06228
Round 330, winrate: 0.0, max_step: 156, reward: 0.010099999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/844262656.json'}
train loss in 1 epoch in 1 batch: 45.49133
val loss in 1 epoch: 39.00880
Round 331, winrate: 0.0, max_step: 189, reward: 0.016899999999999953, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/844699060.json'}
train loss in 1 epoch in 1 batch: 59.22445
val loss in 1 epoch: 52.86372


  if (await self.run_code(code, result,  async_=asy)):


Round 332, winrate: 0.0, max_step: 274, reward: 0.021099999999999928, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/722306095.json'}
train loss in 1 epoch in 1 batch: 37.27529
val loss in 1 epoch: 35.99054
Round 333, winrate: 0.0, max_step: 154, reward: 0.012899999999999977, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/185933804.json'}
train loss in 1 epoch in 1 batch: 38.28502
val loss in 1 epoch: 36.50463
Round 334, winrate: 0.0, max_step: 315, reward: 0.031099999999999867, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/849408786.json'}
train loss in 1 epoch in 1 batch: 21.48433
val loss in 1 epoch: 4.05931
Round 335, winrate: 0.0, max_step: 158, reward: 0.02589999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/45167558.json'}
train loss in 1 epoch in 1 batch: 35.25902
v

  if (await self.run_code(code, result,  async_=asy)):


Round 344, winrate: 0.0, max_step: 350, reward: 0.025399999999999902, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/274387573.json'}
train loss in 1 epoch in 1 batch: 27.13529
val loss in 1 epoch: 23.69228
Round 345, winrate: 0.0, max_step: 235, reward: 0.0036999999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/519376408.json'}
train loss in 1 epoch in 1 batch: 24.45272
val loss in 1 epoch: 23.64823


  if (await self.run_code(code, result,  async_=asy)):


Round 346, winrate: 0.0, max_step: 271, reward: 0.25770000000000115, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/402092820.json'}
train loss in 1 epoch in 1 batch: 3.14244
val loss in 1 epoch: 3.98109
train loss in 1 epoch in 2 batch: 1.65533
val loss in 1 epoch: 3.87503
Round 347, winrate: 0.0, max_step: 194, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/70704305.json'}
train loss in 1 epoch in 1 batch: 25.86508
val loss in 1 epoch: 22.22495
Round 348, winrate: 0.0, max_step: 190, reward: 0.012799999999999978, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/98173523.json'}
train loss in 1 epoch in 1 batch: 21.33298
val loss in 1 epoch: 25.73495
Round 349, winrate: 0.0, max_step: 79, reward: 0.0078000000000000074, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 

  if (await self.run_code(code, result,  async_=asy)):


Round 353, winrate: 0.0, max_step: 351, reward: 0.04859999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/799637117.json'}
train loss in 1 epoch in 1 batch: 3.87006
val loss in 1 epoch: 2.77584
Round 354, winrate: 0.0, max_step: 275, reward: 0.02749999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/994185185.json'}
train loss in 1 epoch in 1 batch: 23.37912
val loss in 1 epoch: 20.69512
Round 355, winrate: 0.0, max_step: 234, reward: 0.23530000000000265, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/993123070.json'}
train loss in 1 epoch in 1 batch: 3.35426
val loss in 1 epoch: 5.86751
train loss in 1 epoch in 2 batch: 1.21028
val loss in 1 epoch: 5.74538
Round 356, winrate: 0.0, max_step: 119, reward: 0.0078000000000000074, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'r

  if (await self.run_code(code, result,  async_=asy)):


Round 357, winrate: 0.0, max_step: 191, reward: 0.01909999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/958913704.json'}
train loss in 1 epoch in 1 batch: 46.18267
val loss in 1 epoch: 37.06425
Round 358, winrate: 0.0, max_step: 235, reward: 0.0018000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/917458633.json'}
train loss in 1 epoch in 1 batch: 39.55765
val loss in 1 epoch: 32.07188
Round 359, winrate: 0.0, max_step: 353, reward: 0.11860000000000134, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/553817060.json'}
train loss in 1 epoch in 1 batch: 5.01466
val loss in 1 epoch: 3.55482


  if (await self.run_code(code, result,  async_=asy)):


Round 360, winrate: 0.0, max_step: 231, reward: 0.022799999999999918, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/613461693.json'}
train loss in 1 epoch in 1 batch: 17.16581
val loss in 1 epoch: 17.12316
Round 361, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/473300733.json'}
train loss in 1 epoch in 1 batch: 19.43937
val loss in 1 epoch: 15.87923
Round 362, winrate: 0.5, max_step: 273, reward: 2.306800000000005, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/68509339.json'}
train loss in 1 epoch in 1 batch: 4.92297
val loss in 1 epoch: 3.83679
train loss in 1 epoch in 2 batch: 1.20835
val loss in 1 epoch: 3.76057
Round 363, winrate: 0.0, max_step: 359, reward: 0.2519000000000011, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/452721581

  if (await self.run_code(code, result,  async_=asy)):


Round 366, winrate: 0.0, max_step: 311, reward: 0.026199999999999897, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/450295223.json'}
train loss in 1 epoch in 1 batch: 30.11380
val loss in 1 epoch: 26.16417
Round 367, winrate: 0.0, max_step: 114, reward: 0.02619999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/389607854.json'}
train loss in 1 epoch in 1 batch: 5.21458
val loss in 1 epoch: 4.09256


  if (await self.run_code(code, result,  async_=asy)):


Round 368, winrate: 1.0, max_step: 314, reward: 1.0308, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/709353853.json'}
train loss in 1 epoch in 1 batch: 28.02476
val loss in 1 epoch: 23.44702
Round 369, winrate: 0.0, max_step: 310, reward: 0.02749999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/644242367.json'}
train loss in 1 epoch in 1 batch: 2.93081
val loss in 1 epoch: 3.18908
Round 370, winrate: 0.0, max_step: 199, reward: 0.009000000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/497334504.json'}
train loss in 1 epoch in 1 batch: 25.61839
val loss in 1 epoch: 22.04610
Round 371, winrate: 0.0, max_step: 359, reward: 0.07880000000000019, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/733397965.json'}
train loss in 1 epoch in 1 batch: 21.99701
val loss in 1 ep

  if (await self.run_code(code, result,  async_=asy)):


Round 374, winrate: 0.0, max_step: 359, reward: 0.6412999999999992, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/582479007.json'}
train loss in 1 epoch in 1 batch: 5.50375
val loss in 1 epoch: 4.21439
train loss in 1 epoch in 2 batch: 4.25710
val loss in 1 epoch: 4.20895


  if (await self.run_code(code, result,  async_=asy)):


Round 375, winrate: 0.0, max_step: 270, reward: 0.026999999999999892, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/872631779.json'}
train loss in 1 epoch in 1 batch: 13.71995
val loss in 1 epoch: 12.32800
Round 376, winrate: 0.0, max_step: 109, reward: 0.0039999999999999975, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/662163068.json'}
train loss in 1 epoch in 1 batch: 12.94692
val loss in 1 epoch: 13.27737
Round 377, winrate: 0.0, max_step: 277, reward: 0.027699999999999888, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/508885519.json'}
train loss in 1 epoch in 1 batch: 23.39262
val loss in 1 epoch: 20.08463
Round 378, winrate: 0.0, max_step: 309, reward: 0.01749999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/405119220.json'}
train loss in 1 epoch in 1 batch: 21.0025

  if (await self.run_code(code, result,  async_=asy)):


Round 383, winrate: 0.0, max_step: 277, reward: 0.01749999999999995, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/815683415.json'}
train loss in 1 epoch in 1 batch: 10.14004
val loss in 1 epoch: 5.76366
Round 384, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/152891622.json'}
train loss in 1 epoch in 1 batch: 34.33413
val loss in 1 epoch: 28.25877
Round 385, winrate: 0.0, max_step: 195, reward: 0.009799999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/356589118.json'}
train loss in 1 epoch in 1 batch: 27.21901
val loss in 1 epoch: 21.25464
Round 386, winrate: 0.0, max_step: 158, reward: 0.0017000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/965892534.json'}
train loss in 1 epoch in 1 batch: 20.49730

  if (await self.run_code(code, result,  async_=asy)):


Round 391, winrate: 0.0, max_step: 359, reward: 0.021699999999999924, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/251497322.json'}
train loss in 1 epoch in 1 batch: 11.48966
val loss in 1 epoch: 12.16344


  if (await self.run_code(code, result,  async_=asy)):


Round 392, winrate: 0.0, max_step: 359, reward: 0.09550000000000065, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/494073372.json'}
train loss in 1 epoch in 1 batch: 5.92867
val loss in 1 epoch: 4.71006
Round 393, winrate: 0.0, max_step: 359, reward: 0.0059000000000000025, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/856526519.json'}
train loss in 1 epoch in 1 batch: 12.31260
val loss in 1 epoch: 11.88714
Round 394, winrate: 0.0, max_step: 71, reward: 0.004099999999999998, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/40157939.json'}
train loss in 1 epoch in 1 batch: 2.52431
val loss in 1 epoch: 4.23897
Round 395, winrate: 0.0, max_step: 359, reward: 0.10470000000000092, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/50846189.json'}
train loss in 1 epoch in 1 batch: 2.21531
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 407, winrate: 0.0, max_step: 359, reward: 0.04439999999999979, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/439683900.json'}
train loss in 1 epoch in 1 batch: 30.00005
val loss in 1 epoch: 24.57972
Round 408, winrate: 0.0, max_step: 234, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/609605666.json'}
train loss in 1 epoch in 1 batch: 25.78847
val loss in 1 epoch: 20.07995
Round 409, winrate: 0.0, max_step: 359, reward: 0.030199999999999873, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/907259755.json'}
train loss in 1 epoch in 1 batch: 11.90615
val loss in 1 epoch: 9.89060
Round 410, winrate: 0.0, max_step: 359, reward: 0.11930000000000136, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/590135470.json'}
train loss in 1 epoch in 1 batch: 2.02524
v

  if (await self.run_code(code, result,  async_=asy)):


Round 412, winrate: 0.0, max_step: 359, reward: 0.03379999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/837378034.json'}
train loss in 1 epoch in 1 batch: 11.14174
val loss in 1 epoch: 9.79614
Round 413, winrate: 0.0, max_step: 359, reward: 0.03129999999999987, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/673384519.json'}
train loss in 1 epoch in 1 batch: 2.15843
val loss in 1 epoch: 4.26684


  if (await self.run_code(code, result,  async_=asy)):


Round 414, winrate: 0.0, max_step: 359, reward: 0.016899999999999953, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/123406564.json'}
train loss in 1 epoch in 1 batch: 22.65134
val loss in 1 epoch: 17.56223
Round 415, winrate: 0.0, max_step: 359, reward: 0.11500000000000064, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/270514505.json'}
train loss in 1 epoch in 1 batch: 3.18192
val loss in 1 epoch: 2.07880
Round 416, winrate: 0.0, max_step: 310, reward: 0.02739999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/548078540.json'}
train loss in 1 epoch in 1 batch: 2.24900
val loss in 1 epoch: 2.83736
Round 417, winrate: 0.0, max_step: 359, reward: 0.08980000000000048, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/81508979.json'}
train loss in 1 epoch in 1 batch: 10.20421
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 420, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/281947057.json'}
train loss in 1 epoch in 1 batch: 9.78907
val loss in 1 epoch: 8.26849
Round 421, winrate: 0.0, max_step: 230, reward: 0.06499999999999981, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/785042775.json'}
train loss in 1 epoch in 1 batch: 19.38627
val loss in 1 epoch: 14.24694
Round 422, winrate: 0.0, max_step: 313, reward: 0.9286000000000142, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/392104575.json'}
train loss in 1 epoch in 1 batch: 2.76808
val loss in 1 epoch: 2.58237
train loss in 1 epoch in 2 batch: 1.21602
val loss in 1 epoch: 2.59829
train loss in 1 epoch in 3 batch: 4.52614
val loss in 1 epoch: 2.57998
Round 423, winrate: 0.0, max_step: 153, reward: 0.015299999999999963, example: {'ranks': [{'rank': 1, 'ag

  if (await self.run_code(code, result,  async_=asy)):


Round 426, winrate: 0.0, max_step: 314, reward: 0.16999999999999824, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/522836138.json'}
train loss in 1 epoch in 1 batch: 1.73055
val loss in 1 epoch: 2.34033
train loss in 1 epoch in 2 batch: 0.67497
val loss in 1 epoch: 2.30347
Round 427, winrate: 0.0, max_step: 279, reward: 0.1640999999999984, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/876434722.json'}
train loss in 1 epoch in 1 batch: 4.25227
val loss in 1 epoch: 2.68151
Round 428, winrate: 0.0, max_step: 359, reward: 0.09670000000000067, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/293879180.json'}
train loss in 1 epoch in 1 batch: 1.61299
val loss in 1 epoch: 1.50113
Round 429, winrate: 0.0, max_step: 359, reward: 0.021699999999999924, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'repla

  if (await self.run_code(code, result,  async_=asy)):


Round 439, winrate: 0.0, max_step: 190, reward: 0.013899999999999971, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/385934420.json'}
train loss in 1 epoch in 1 batch: 7.76719
val loss in 1 epoch: 6.49745
Round 440, winrate: 0.0, max_step: 359, reward: 0.04259999999999987, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/738127222.json'}
train loss in 1 epoch in 1 batch: 1.53643
val loss in 1 epoch: 2.07457
Round 441, winrate: 0.0, max_step: 230, reward: 0.03409999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/949784717.json'}
train loss in 1 epoch in 1 batch: 1.61178
val loss in 1 epoch: 4.10923
Round 442, winrate: 0.0, max_step: 190, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/621379426.json'}
train loss in 1 epoch in 1 batch: 6.29474
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 443, winrate: 0.0, max_step: 351, reward: 0.03349999999999993, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/863567001.json'}
train loss in 1 epoch in 1 batch: 6.01510
val loss in 1 epoch: 6.35715


  if (await self.run_code(code, result,  async_=asy)):


Round 444, winrate: 0.0, max_step: 359, reward: 0.032899999999999915, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/383802755.json'}
train loss in 1 epoch in 1 batch: 6.97758
val loss in 1 epoch: 6.58897
Round 445, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/615303267.json'}
train loss in 1 epoch in 1 batch: 7.42994
val loss in 1 epoch: 6.14754
Round 446, winrate: 0.0, max_step: 350, reward: 0.03379999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/70480415.json'}
train loss in 1 epoch in 1 batch: 9.04558
val loss in 1 epoch: 2.85916


  if (await self.run_code(code, result,  async_=asy)):


Round 447, winrate: 0.0, max_step: 351, reward: 0.03299999999999992, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/272945505.json'}
train loss in 1 epoch in 1 batch: 6.64864
val loss in 1 epoch: 5.61925
Round 448, winrate: 0.0, max_step: 152, reward: 0.015199999999999964, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/328806364.json'}
train loss in 1 epoch in 1 batch: 6.61721
val loss in 1 epoch: 5.81270


  if (await self.run_code(code, result,  async_=asy)):


Round 449, winrate: 0.0, max_step: 272, reward: 0.02719999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/251817659.json'}
train loss in 1 epoch in 1 batch: 6.10639
val loss in 1 epoch: 6.28564
Round 450, winrate: 0.0, max_step: 359, reward: 0.0322999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/130639423.json'}
train loss in 1 epoch in 1 batch: 5.11888
val loss in 1 epoch: 4.78136
Round 451, winrate: 0.0, max_step: 276, reward: 0.025199999999999903, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/98734623.json'}
train loss in 1 epoch in 1 batch: 5.24622
val loss in 1 epoch: 4.44578
Round 452, winrate: 0.0, max_step: 115, reward: 0.030299999999999924, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/585956746.json'}
train loss in 1 epoch in 1 batch: 6.14735
val loss 

  if (await self.run_code(code, result,  async_=asy)):


Round 457, winrate: 0.0, max_step: 278, reward: 0.018299999999999945, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/781284706.json'}
train loss in 1 epoch in 1 batch: 4.00670
val loss in 1 epoch: 2.87579
Round 458, winrate: 0.0, max_step: 239, reward: 0.024899999999999905, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/240375229.json'}
train loss in 1 epoch in 1 batch: 6.55770
val loss in 1 epoch: 5.94958
Round 459, winrate: 0.0, max_step: 277, reward: 0.027699999999999888, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/542553298.json'}
train loss in 1 epoch in 1 batch: 6.48368
val loss in 1 epoch: 5.58819


  if (await self.run_code(code, result,  async_=asy)):


Round 460, winrate: 0.0, max_step: 197, reward: 0.018599999999999943, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/542898890.json'}
train loss in 1 epoch in 1 batch: 1.24025
val loss in 1 epoch: 0.74442
Round 461, winrate: 0.0, max_step: 159, reward: 0.013399999999999974, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/428039457.json'}
train loss in 1 epoch in 1 batch: 14.32427
val loss in 1 epoch: 12.52258
Round 462, winrate: 0.0, max_step: 193, reward: 0.01929999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/914971328.json'}
train loss in 1 epoch in 1 batch: 0.96620
val loss in 1 epoch: 2.38279
Round 463, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/252550776.json'}
train loss in 1 epoch in 1 batch: 6.05879
val loss in 1 epoc

  if (await self.run_code(code, result,  async_=asy)):


Round 470, winrate: 0.0, max_step: 359, reward: 0.03349999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/86647640.json'}
train loss in 1 epoch in 1 batch: 10.23349
val loss in 1 epoch: 8.66812


  if (await self.run_code(code, result,  async_=asy)):


Round 471, winrate: 0.0, max_step: 271, reward: 0.0892000000000005, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/370738517.json'}
train loss in 1 epoch in 1 batch: 4.97719
val loss in 1 epoch: 2.53926


  if (await self.run_code(code, result,  async_=asy)):


Round 472, winrate: 0.0, max_step: 357, reward: 0.06269999999999974, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/611349564.json'}
train loss in 1 epoch in 1 batch: 3.83666
val loss in 1 epoch: 1.84815
Round 473, winrate: 0.0, max_step: 314, reward: 0.0059000000000000025, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/200865510.json'}
train loss in 1 epoch in 1 batch: 6.24067
val loss in 1 epoch: 5.44681


  if (await self.run_code(code, result,  async_=asy)):


Round 474, winrate: 0.0, max_step: 270, reward: 0.008500000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/284082176.json'}
train loss in 1 epoch in 1 batch: 9.03726
val loss in 1 epoch: 7.96215
Round 475, winrate: 0.0, max_step: 277, reward: 0.02419999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/37985537.json'}
train loss in 1 epoch in 1 batch: 9.94946
val loss in 1 epoch: 3.49138
Round 476, winrate: 0.0, max_step: 359, reward: 0.09990000000000077, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/801034007.json'}
train loss in 1 epoch in 1 batch: 1.54998
val loss in 1 epoch: 1.83059
Round 477, winrate: 0.0, max_step: 359, reward: 0.09800000000000071, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/912823586.json'}
train loss in 1 epoch in 1 batch: 1.26071
val loss 

  if (await self.run_code(code, result,  async_=asy)):


Round 478, winrate: 0.0, max_step: 355, reward: 0.1509999999999985, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/763743906.json'}
train loss in 1 epoch in 1 batch: 3.21374
val loss in 1 epoch: 3.13956
train loss in 1 epoch in 2 batch: 0.58000
val loss in 1 epoch: 3.10793
Round 479, winrate: 0.0, max_step: 198, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/124173540.json'}
train loss in 1 epoch in 1 batch: 1.62903
val loss in 1 epoch: 1.39538
Round 480, winrate: 0.0, max_step: 359, reward: 0.11170000000000067, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/237762688.json'}
train loss in 1 epoch in 1 batch: 2.33704
val loss in 1 epoch: 1.95861
Round 481, winrate: 0.0, max_step: 359, reward: 0.12380000000000149, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'repla

  if (await self.run_code(code, result,  async_=asy)):


Round 487, winrate: 0.0, max_step: 354, reward: 0.03189999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/446056185.json'}
train loss in 1 epoch in 1 batch: 4.00020
val loss in 1 epoch: 2.56916
Round 488, winrate: 0.0, max_step: 277, reward: 0.021499999999999925, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/924659847.json'}
train loss in 1 epoch in 1 batch: 10.75512
val loss in 1 epoch: 9.55831
Round 489, winrate: 0.0, max_step: 234, reward: 0.0078000000000000074, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/979225765.json'}
train loss in 1 epoch in 1 batch: 9.65639
val loss in 1 epoch: 8.46480
Round 490, winrate: 0.0, max_step: 359, reward: 0.23709999999999948, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/453627034.json'}
train loss in 1 epoch in 1 batch: 4.94408
val l

  if (await self.run_code(code, result,  async_=asy)):


Round 491, winrate: 0.0, max_step: 359, reward: 0.07460000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/470808429.json'}
train loss in 1 epoch in 1 batch: 1.12221
val loss in 1 epoch: 0.95007
Round 492, winrate: 0.0, max_step: 316, reward: 0.11350000000000118, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/870128140.json'}
train loss in 1 epoch in 1 batch: 1.91575
val loss in 1 epoch: 1.18483


  if (await self.run_code(code, result,  async_=asy)):


Round 493, winrate: 0.0, max_step: 359, reward: 0.09479999999999948, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/748571090.json'}
train loss in 1 epoch in 1 batch: 1.96143
val loss in 1 epoch: 1.84100
Round 494, winrate: 0.0, max_step: 359, reward: 0.07820000000000013, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/319248652.json'}
train loss in 1 epoch in 1 batch: 2.32784
val loss in 1 epoch: 1.09321
Round 495, winrate: 0.0, max_step: 194, reward: 0.0043999999999999985, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/163480217.json'}
train loss in 1 epoch in 1 batch: 8.03052
val loss in 1 epoch: 6.92081
Round 496, winrate: 0.0, max_step: 189, reward: 0.017799999999999948, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/401275577.json'}
train loss in 1 epoch in 1 batch: 4.56588
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 498, winrate: 1.0, max_step: 316, reward: 2.0935000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/897878633.json'}
train loss in 1 epoch in 1 batch: 4.59352
val loss in 1 epoch: 3.96263
Round 499, winrate: 0.0, max_step: 359, reward: 0.12200000000000163, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/875360407.json'}
train loss in 1 epoch in 1 batch: 5.14246
val loss in 1 epoch: 2.14501
Round 500, winrate: 0.0, max_step: 270, reward: 0.0823000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/831847258.json'}
train loss in 1 epoch in 1 batch: 0.98480
val loss in 1 epoch: 2.15993
Round 501, winrate: 0.0, max_step: 359, reward: 0.13139999999999954, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/891313239.json'}
train loss in 1 epoch in 1 batch: 1.46204
val loss in

  if (await self.run_code(code, result,  async_=asy)):


Round 502, winrate: 0.0, max_step: 197, reward: 0.015299999999999963, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/382446995.json'}
train loss in 1 epoch in 1 batch: 4.32739
val loss in 1 epoch: 4.19834
Round 503, winrate: 0.0, max_step: 154, reward: 0.052099999999999806, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/89416765.json'}
train loss in 1 epoch in 1 batch: 0.76806
val loss in 1 epoch: 1.46463
Round 504, winrate: 0.0, max_step: 317, reward: 0.09909999999999937, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/263026055.json'}
train loss in 1 epoch in 1 batch: 3.80843
val loss in 1 epoch: 1.65961
Round 505, winrate: 0.0, max_step: 274, reward: 0.005800000000000002, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/432151433.json'}
train loss in 1 epoch in 1 batch: 4.14580
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 507, winrate: 0.0, max_step: 239, reward: 0.02389999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/65283696.json'}
train loss in 1 epoch in 1 batch: 4.25282
val loss in 1 epoch: 4.39555
Round 508, winrate: 0.0, max_step: 357, reward: 0.318200000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/324033189.json'}
train loss in 1 epoch in 1 batch: 2.98537
val loss in 1 epoch: 1.92733
train loss in 1 epoch in 2 batch: 3.14668
val loss in 1 epoch: 1.95322
Round 509, winrate: 0.0, max_step: 275, reward: 0.0022, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/25224570.json'}
train loss in 1 epoch in 1 batch: 5.02843
val loss in 1 epoch: 4.23202


  if (await self.run_code(code, result,  async_=asy)):


Round 510, winrate: 0.0, max_step: 359, reward: 0.03479999999999997, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/256995256.json'}
train loss in 1 epoch in 1 batch: 1.79499
val loss in 1 epoch: 0.88817


  if (await self.run_code(code, result,  async_=asy)):


Round 511, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/83161205.json'}
train loss in 1 epoch in 1 batch: 7.08470
val loss in 1 epoch: 8.12586
Round 512, winrate: 0.0, max_step: 195, reward: 0.0078000000000000074, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/460832195.json'}
train loss in 1 epoch in 1 batch: 9.30126
val loss in 1 epoch: 7.57132
Round 513, winrate: 0.0, max_step: 118, reward: 0.039800000000000224, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/425083540.json'}
train loss in 1 epoch in 1 batch: 0.71529
val loss in 1 epoch: 2.31945
Round 514, winrate: 0.5, max_step: 359, reward: 0.5327999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/977758534.json'}
train loss in 1 epoch in 1 batch: 8.21247
val loss in 1 epoch: 

  if (await self.run_code(code, result,  async_=asy)):


Round 516, winrate: 0.0, max_step: 352, reward: 0.018399999999999944, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/264424191.json'}
train loss in 1 epoch in 1 batch: 7.40693
val loss in 1 epoch: 6.05765
Round 517, winrate: 0.0, max_step: 354, reward: 0.15759999999999838, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/664551457.json'}
train loss in 1 epoch in 1 batch: 0.81983
val loss in 1 epoch: 2.58162
train loss in 1 epoch in 2 batch: 0.38239
val loss in 1 epoch: 2.56401


  if (await self.run_code(code, result,  async_=asy)):


Round 518, winrate: 0.0, max_step: 359, reward: 0.029299999999999878, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/65763433.json'}
train loss in 1 epoch in 1 batch: 7.34735
val loss in 1 epoch: 6.18695
Round 519, winrate: 0.5, max_step: 358, reward: 0.5249999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/508848152.json'}
train loss in 1 epoch in 1 batch: 6.28988
val loss in 1 epoch: 5.61913
Round 520, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/824285868.json'}
train loss in 1 epoch in 1 batch: 0.62567
val loss in 1 epoch: 0.74822
Round 521, winrate: 0.0, max_step: 195, reward: 0.05259999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/885692434.json'}
train loss in 1 epoch in 1 batch: 0.99274
val loss 

  if (await self.run_code(code, result,  async_=asy)):


Round 525, winrate: 0.0, max_step: 276, reward: 0.02759999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/1211914.json'}
train loss in 1 epoch in 1 batch: 5.03034
val loss in 1 epoch: 5.61982
Round 526, winrate: 0.0, max_step: 195, reward: 0.05299999999999973, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/330066297.json'}
train loss in 1 epoch in 1 batch: 4.10359
val loss in 1 epoch: 1.97499


  if (await self.run_code(code, result,  async_=asy)):


Round 527, winrate: 0.5, max_step: 359, reward: 0.5335, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/434384137.json'}
train loss in 1 epoch in 1 batch: 1.17891
val loss in 1 epoch: 1.01880
Round 528, winrate: 0.0, max_step: 154, reward: 0.010499999999999992, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/144219916.json'}
train loss in 1 epoch in 1 batch: 6.00354
val loss in 1 epoch: 5.23953
Round 529, winrate: 0.0, max_step: 156, reward: 0.013599999999999973, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/968628105.json'}
train loss in 1 epoch in 1 batch: 5.30672
val loss in 1 epoch: 4.81757


  if (await self.run_code(code, result,  async_=asy)):


Round 530, winrate: 0.0, max_step: 274, reward: 0.08630000000000042, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/255410497.json'}
train loss in 1 epoch in 1 batch: 0.81173
val loss in 1 epoch: 0.98191
Round 531, winrate: 0.0, max_step: 359, reward: 0.02709999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/815218976.json'}
train loss in 1 epoch in 1 batch: 3.26964
val loss in 1 epoch: 3.18421
Round 532, winrate: 0.0, max_step: 235, reward: 0.23970000000000002, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/682360505.json'}
train loss in 1 epoch in 1 batch: 1.64214
val loss in 1 epoch: 1.47817
train loss in 1 epoch in 2 batch: 0.30189
val loss in 1 epoch: 1.48321
Round 533, winrate: 0.0, max_step: 358, reward: 0.0358, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/586456516.

  if (await self.run_code(code, result,  async_=asy)):


Round 542, winrate: 0.0, max_step: 354, reward: 0.02889999999999988, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/352802423.json'}
train loss in 1 epoch in 1 batch: 4.14838
val loss in 1 epoch: 3.63839
Round 543, winrate: 0.0, max_step: 119, reward: 0.007100000000000006, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/16935015.json'}
train loss in 1 epoch in 1 batch: 11.33011
val loss in 1 epoch: 7.71581
Round 544, winrate: 0.0, max_step: 358, reward: 0.03469999999999997, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/700770333.json'}
train loss in 1 epoch in 1 batch: 3.64931
val loss in 1 epoch: 3.45232


  if (await self.run_code(code, result,  async_=asy)):


Round 545, winrate: 0.0, max_step: 316, reward: 0.029799999999999875, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/370686397.json'}
train loss in 1 epoch in 1 batch: 3.17314
val loss in 1 epoch: 3.07313


  if (await self.run_code(code, result,  async_=asy)):


Round 546, winrate: 0.0, max_step: 230, reward: 0.020399999999999932, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/390460796.json'}
train loss in 1 epoch in 1 batch: 1.88840
val loss in 1 epoch: 1.34966
Round 547, winrate: 0.0, max_step: 359, reward: 0.0877000000000004, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/488480879.json'}
train loss in 1 epoch in 1 batch: 0.80157
val loss in 1 epoch: 1.81418
Round 548, winrate: 0.0, max_step: 157, reward: 0.02449999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/853561253.json'}
train loss in 1 epoch in 1 batch: 1.05369
val loss in 1 epoch: 2.54788
Round 549, winrate: 0.0, max_step: 270, reward: 0.008000000000000007, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/207747704.json'}
train loss in 1 epoch in 1 batch: 20.90766
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 552, winrate: 0.0, max_step: 316, reward: 0.021899999999999923, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/254873570.json'}
train loss in 1 epoch in 1 batch: 3.95310
val loss in 1 epoch: 3.29359
Round 553, winrate: 0.0, max_step: 275, reward: 0.011199999999999988, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/613380938.json'}
train loss in 1 epoch in 1 batch: 9.74241
val loss in 1 epoch: 6.93749
Round 554, winrate: 0.0, max_step: 359, reward: 0.12020000000000139, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/805326271.json'}
train loss in 1 epoch in 1 batch: 1.95098
val loss in 1 epoch: 1.16592
Round 555, winrate: 0.0, max_step: 75, reward: 0.004699999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/935590298.json'}
train loss in 1 epoch in 1 batch: 5.06904
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 556, winrate: 0.0, max_step: 278, reward: 0.1649999999999984, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/344151183.json'}
train loss in 1 epoch in 1 batch: 0.94888
val loss in 1 epoch: 0.88486
Round 557, winrate: 0.0, max_step: 158, reward: 0.015599999999999961, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/685346530.json'}
train loss in 1 epoch in 1 batch: 10.77414
val loss in 1 epoch: 2.27398
Round 558, winrate: 0.0, max_step: 190, reward: 0.016899999999999953, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/521955945.json'}
train loss in 1 epoch in 1 batch: 3.31554
val loss in 1 epoch: 2.88884


  if (await self.run_code(code, result,  async_=asy)):


Round 559, winrate: 0.0, max_step: 232, reward: 0.022099999999999922, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/248131811.json'}
train loss in 1 epoch in 1 batch: 0.60799
val loss in 1 epoch: 0.96954


  if (await self.run_code(code, result,  async_=asy)):


Round 560, winrate: 0.0, max_step: 312, reward: 0.1708999999999981, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/343483530.json'}
train loss in 1 epoch in 1 batch: 1.20792
val loss in 1 epoch: 1.39009
train loss in 1 epoch in 2 batch: 0.26082
val loss in 1 epoch: 1.37503


  if (await self.run_code(code, result,  async_=asy)):


Round 561, winrate: 0.0, max_step: 355, reward: 0.03399999999999995, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/881265151.json'}
train loss in 1 epoch in 1 batch: 7.60452
val loss in 1 epoch: 5.69559
Round 562, winrate: 0.0, max_step: 359, reward: 0.21049999999999924, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/153112139.json'}
train loss in 1 epoch in 1 batch: 1.89523
val loss in 1 epoch: 1.00739
train loss in 1 epoch in 2 batch: 0.24293
val loss in 1 epoch: 1.00809
Round 563, winrate: 0.0, max_step: 194, reward: 0.23410000000000264, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/245202462.json'}
train loss in 1 epoch in 1 batch: 1.07284
val loss in 1 epoch: 2.25570


  if (await self.run_code(code, result,  async_=asy)):


Round 564, winrate: 0.0, max_step: 350, reward: 0.03159999999999988, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/224625087.json'}
train loss in 1 epoch in 1 batch: 0.73310
val loss in 1 epoch: 1.26496
Round 565, winrate: 0.0, max_step: 238, reward: 0.021499999999999925, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/873218308.json'}
train loss in 1 epoch in 1 batch: 3.06729
val loss in 1 epoch: 2.54648
Round 566, winrate: 0.0, max_step: 359, reward: 0.029299999999999878, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/611864660.json'}
train loss in 1 epoch in 1 batch: 2.63939
val loss in 1 epoch: 2.68018
Round 567, winrate: 0.0, max_step: 278, reward: 0.023299999999999915, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/235285364.json'}
train loss in 1 epoch in 1 batch: 0.51495
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 569, winrate: 0.0, max_step: 315, reward: 0.1660999999999983, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/541653026.json'}
train loss in 1 epoch in 1 batch: 0.65603
val loss in 1 epoch: 1.83741
train loss in 1 epoch in 2 batch: 0.23804
val loss in 1 epoch: 1.84933
Round 570, winrate: 0.0, max_step: 195, reward: 0.0039999999999999975, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/867971669.json'}
train loss in 1 epoch in 1 batch: 2.60115
val loss in 1 epoch: 2.90688


  if (await self.run_code(code, result,  async_=asy)):


Round 571, winrate: 0.0, max_step: 359, reward: 0.01899999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/725315049.json'}
train loss in 1 epoch in 1 batch: 8.44792
val loss in 1 epoch: 6.64237
Round 572, winrate: 0.0, max_step: 195, reward: 0.0017000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/49972857.json'}
train loss in 1 epoch in 1 batch: 6.73061
val loss in 1 epoch: 5.68705


  if (await self.run_code(code, result,  async_=asy)):


Round 573, winrate: 0.0, max_step: 359, reward: 0.04499999999999984, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/942916128.json'}
train loss in 1 epoch in 1 batch: 5.17646
val loss in 1 epoch: 1.56794


  if (await self.run_code(code, result,  async_=asy)):


Round 574, winrate: 0.0, max_step: 310, reward: 0.1842000000000007, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/247202987.json'}
train loss in 1 epoch in 1 batch: 4.67953
val loss in 1 epoch: 1.53768
train loss in 1 epoch in 2 batch: 2.29306
val loss in 1 epoch: 1.55290
Round 575, winrate: 0.0, max_step: 359, reward: 0.11870000000000135, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/629527449.json'}
train loss in 1 epoch in 1 batch: 3.24977
val loss in 1 epoch: 1.50753
Round 576, winrate: 0.0, max_step: 150, reward: 0.03899999999999988, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/574497171.json'}
train loss in 1 epoch in 1 batch: 0.48274
val loss in 1 epoch: 1.11061


  if (await self.run_code(code, result,  async_=asy)):


Round 577, winrate: 0.0, max_step: 234, reward: 0.02229999999999992, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/901790531.json'}
train loss in 1 epoch in 1 batch: 6.04584
val loss in 1 epoch: 4.31407


  if (await self.run_code(code, result,  async_=asy)):


Round 578, winrate: 0.0, max_step: 359, reward: 0.11260000000000117, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/553253805.json'}
train loss in 1 epoch in 1 batch: 1.08323
val loss in 1 epoch: 1.53774


  if (await self.run_code(code, result,  async_=asy)):


Round 579, winrate: 0.0, max_step: 352, reward: 0.1243000000000015, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/712209341.json'}
train loss in 1 epoch in 1 batch: 2.98117
val loss in 1 epoch: 3.07125
train loss in 1 epoch in 2 batch: 3.03318
val loss in 1 epoch: 3.22111
Round 580, winrate: 0.0, max_step: 190, reward: 0.016599999999999955, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/59615136.json'}
train loss in 1 epoch in 1 batch: 1.20345
val loss in 1 epoch: 0.69421
Round 581, winrate: 0.0, max_step: 232, reward: 0.019199999999999932, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/503366333.json'}
train loss in 1 epoch in 1 batch: 5.71901
val loss in 1 epoch: 6.16163
Round 582, winrate: 0.0, max_step: 150, reward: 0.02859999999999988, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'repla

  if (await self.run_code(code, result,  async_=asy)):


Round 587, winrate: 0.0, max_step: 314, reward: 0.10200000000000087, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/879796242.json'}
train loss in 1 epoch in 1 batch: 0.70767
val loss in 1 epoch: 2.33089


  if (await self.run_code(code, result,  async_=asy)):


Round 588, winrate: 0.0, max_step: 359, reward: 0.0868000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/956182263.json'}
train loss in 1 epoch in 1 batch: 2.33767
val loss in 1 epoch: 1.15025
Round 589, winrate: 0.0, max_step: 238, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/403614944.json'}
train loss in 1 epoch in 1 batch: 6.41706
val loss in 1 epoch: 5.18460
Round 590, winrate: 0.0, max_step: 156, reward: 0.07579999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/293792490.json'}
train loss in 1 epoch in 1 batch: 0.99190
val loss in 1 epoch: 1.30856
Round 591, winrate: 0.0, max_step: 79, reward: 0.007900000000000008, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/273192155.json'}
train loss in 1 epoch in 1 batch: 0.72505
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 596, winrate: 0.0, max_step: 195, reward: 0.013299999999999975, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/647626395.json'}
train loss in 1 epoch in 1 batch: 4.85072
val loss in 1 epoch: 5.59769


  if (await self.run_code(code, result,  async_=asy)):


Round 597, winrate: 0.0, max_step: 312, reward: 0.015099999999999964, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/895872160.json'}
train loss in 1 epoch in 1 batch: 6.41531
val loss in 1 epoch: 4.77302
Round 598, winrate: 0.0, max_step: 273, reward: 0.20399999999999774, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/207955727.json'}
train loss in 1 epoch in 1 batch: 0.77865
val loss in 1 epoch: 0.99977
train loss in 1 epoch in 2 batch: 0.27112
val loss in 1 epoch: 0.99073
Round 599, winrate: 0.0, max_step: 197, reward: 0.10349999999999948, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/867253489.json'}
train loss in 1 epoch in 1 batch: 1.35544
val loss in 1 epoch: 1.20961


  if (await self.run_code(code, result,  async_=asy)):


Round 600, winrate: 0.0, max_step: 356, reward: 0.01729999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/139016388.json'}
train loss in 1 epoch in 1 batch: 4.51958
val loss in 1 epoch: 3.06517
Round 601, winrate: 0.0, max_step: 72, reward: 0.004299999999999998, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/595312809.json'}
train loss in 1 epoch in 1 batch: 0.46597
val loss in 1 epoch: 1.58987
Round 602, winrate: 0.0, max_step: 194, reward: 0.0021000000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/949360585.json'}
train loss in 1 epoch in 1 batch: 3.96119
val loss in 1 epoch: 2.70302
Round 603, winrate: 0.0, max_step: 316, reward: 0.14519999999999875, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/277032607.json'}
train loss in 1 epoch in 1 batch: 1.62964
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 607, winrate: 0.0, max_step: 315, reward: 0.042999999999999795, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/747943270.json'}
train loss in 1 epoch in 1 batch: 2.05033
val loss in 1 epoch: 1.51992
Round 608, winrate: 0.0, max_step: 231, reward: 0.021299999999999927, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/903099021.json'}
train loss in 1 epoch in 1 batch: 9.71007
val loss in 1 epoch: 7.98963


  if (await self.run_code(code, result,  async_=asy)):


Round 609, winrate: 0.0, max_step: 359, reward: 0.04330000000000008, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/823883549.json'}
train loss in 1 epoch in 1 batch: 1.94355
val loss in 1 epoch: 1.64708
Round 610, winrate: 0.0, max_step: 359, reward: 0.11259999999999963, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/380828244.json'}
train loss in 1 epoch in 1 batch: 2.45969
val loss in 1 epoch: 1.30642
Round 611, winrate: 0.0, max_step: 116, reward: 0.0325999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/241843726.json'}
train loss in 1 epoch in 1 batch: 1.09231
val loss in 1 epoch: 1.42362
Round 612, winrate: 0.0, max_step: 359, reward: 0.032499999999999904, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/616205957.json'}
train loss in 1 epoch in 1 batch: 9.39803
val loss 

  if (await self.run_code(code, result,  async_=asy)):


Round 613, winrate: 0.0, max_step: 350, reward: 0.02729999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/163189602.json'}
train loss in 1 epoch in 1 batch: 3.01860
val loss in 1 epoch: 2.34502
Round 614, winrate: 0.0, max_step: 359, reward: 0.07949999999999943, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/392759405.json'}
train loss in 1 epoch in 1 batch: 0.98200
val loss in 1 epoch: 0.82761


  if (await self.run_code(code, result,  async_=asy)):


Round 615, winrate: 0.0, max_step: 278, reward: 0.027799999999999887, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/871283890.json'}
train loss in 1 epoch in 1 batch: 3.28485
val loss in 1 epoch: 2.62108
Round 616, winrate: 0.0, max_step: 359, reward: 0.0877000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/90985018.json'}
train loss in 1 epoch in 1 batch: 1.58256
val loss in 1 epoch: 0.98917
Round 617, winrate: 0.0, max_step: 359, reward: 0.10490000000000095, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/3264686.json'}
train loss in 1 epoch in 1 batch: 2.89103
val loss in 1 epoch: 2.45933
Round 618, winrate: 0.0, max_step: 276, reward: 0.010499999999999992, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/192513567.json'}
train loss in 1 epoch in 1 batch: 2.69893
val loss in

  if (await self.run_code(code, result,  async_=asy)):


Round 620, winrate: 0.0, max_step: 156, reward: 0.03369999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/242583715.json'}
train loss in 1 epoch in 1 batch: 2.93879
val loss in 1 epoch: 1.26052
Round 621, winrate: 0.0, max_step: 350, reward: 0.03269999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/178274211.json'}
train loss in 1 epoch in 1 batch: 0.65396
val loss in 1 epoch: 1.10316
Round 622, winrate: 0.0, max_step: 354, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/690014903.json'}
train loss in 1 epoch in 1 batch: 7.93902
val loss in 1 epoch: 7.36847


  if (await self.run_code(code, result,  async_=asy)):


Round 623, winrate: 0.0, max_step: 359, reward: 0.022099999999999922, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/875016726.json'}
train loss in 1 epoch in 1 batch: 0.60312
val loss in 1 epoch: 1.56287


  if (await self.run_code(code, result,  async_=asy)):


Round 624, winrate: 0.0, max_step: 359, reward: 0.03349999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/452090300.json'}
train loss in 1 epoch in 1 batch: 7.83546
val loss in 1 epoch: 6.57750
Round 625, winrate: 0.0, max_step: 349, reward: 0.03089999999999987, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/53784371.json'}
train loss in 1 epoch in 1 batch: 2.62473
val loss in 1 epoch: 2.43980
Round 626, winrate: 0.0, max_step: 357, reward: 0.12350000000000148, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/58829777.json'}
train loss in 1 epoch in 1 batch: 1.33783
val loss in 1 epoch: 0.95438
Round 627, winrate: 1.0, max_step: 359, reward: 2.0981000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/913463700.json'}
train loss in 1 epoch in 1 batch: 0.50568
val loss in 

  if (await self.run_code(code, result,  async_=asy)):


Round 629, winrate: 0.0, max_step: 236, reward: 0.021599999999999925, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/854446421.json'}
train loss in 1 epoch in 1 batch: 2.30929
val loss in 1 epoch: 2.40029


  if (await self.run_code(code, result,  async_=asy)):


Round 630, winrate: 0.0, max_step: 359, reward: 0.022699999999999918, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/230216792.json'}
train loss in 1 epoch in 1 batch: 2.50872
val loss in 1 epoch: 2.16919


  if (await self.run_code(code, result,  async_=asy)):


Round 631, winrate: 0.0, max_step: 359, reward: 0.03209999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/536616985.json'}
train loss in 1 epoch in 1 batch: 2.43361
val loss in 1 epoch: 2.20487
Round 632, winrate: 1.0, max_step: 77, reward: 2.0107, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/350265706.json'}
train loss in 1 epoch in 1 batch: 0.47907
val loss in 1 epoch: 0.55922
Round 633, winrate: 0.0, max_step: 236, reward: 0.021299999999999927, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/878101368.json'}
train loss in 1 epoch in 1 batch: 2.34573
val loss in 1 epoch: 2.25438
Round 634, winrate: 0.0, max_step: 190, reward: 0.0833999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/583350560.json'}
train loss in 1 epoch in 1 batch: 7.47759
val loss in 1 epoch: 5.

  if (await self.run_code(code, result,  async_=asy)):


Round 638, winrate: 0.0, max_step: 359, reward: 0.03409999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/820980108.json'}
train loss in 1 epoch in 1 batch: 2.09158
val loss in 1 epoch: 1.94313
Round 639, winrate: 0.0, max_step: 191, reward: 0.03179999999999988, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/706882780.json'}
train loss in 1 epoch in 1 batch: 0.65117
val loss in 1 epoch: 0.90984
Round 640, winrate: 0.0, max_step: 316, reward: 0.19849999999999787, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/145049628.json'}
train loss in 1 epoch in 1 batch: 1.67491
val loss in 1 epoch: 1.62562
train loss in 1 epoch in 2 batch: 0.20201
val loss in 1 epoch: 1.61697
Round 641, winrate: 0.0, max_step: 78, reward: 0.0078000000000000074, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'repl

  if (await self.run_code(code, result,  async_=asy)):


Round 647, winrate: 0.0, max_step: 356, reward: 0.03559999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/757025131.json'}
train loss in 1 epoch in 1 batch: 1.98572
val loss in 1 epoch: 1.77682


  if (await self.run_code(code, result,  async_=asy)):


Round 648, winrate: 0.0, max_step: 274, reward: 0.01749999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/641928918.json'}
train loss in 1 epoch in 1 batch: 4.37912
val loss in 1 epoch: 4.66347
Round 649, winrate: 0.0, max_step: 109, reward: 0.009999999999999995, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/249583918.json'}
train loss in 1 epoch in 1 batch: 1.80631
val loss in 1 epoch: 1.94084
Round 650, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/558083164.json'}
train loss in 1 epoch in 1 batch: 0.69720
val loss in 1 epoch: 0.73962
Round 651, winrate: 0.0, max_step: 274, reward: 0.056399999999999714, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/319318239.json'}
train loss in 1 epoch in 1 batch: 2.41170
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 653, winrate: 0.0, max_step: 359, reward: 0.11810000000000133, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/623134486.json'}
train loss in 1 epoch in 1 batch: 1.01832
val loss in 1 epoch: 0.65181
Round 654, winrate: 0.0, max_step: 359, reward: 0.03549999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/635521976.json'}
train loss in 1 epoch in 1 batch: 3.05021
val loss in 1 epoch: 2.13800


  if (await self.run_code(code, result,  async_=asy)):


Round 655, winrate: 1.0, max_step: 195, reward: 2.0564, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/859364263.json'}
train loss in 1 epoch in 1 batch: 1.34573
val loss in 1 epoch: 0.96984
Round 656, winrate: 0.0, max_step: 194, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/255132843.json'}
train loss in 1 epoch in 1 batch: 4.75440
val loss in 1 epoch: 4.04376
Round 657, winrate: 0.0, max_step: 197, reward: 0.018399999999999944, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/559402794.json'}
train loss in 1 epoch in 1 batch: 0.56609
val loss in 1 epoch: 1.74487
Round 658, winrate: 0.0, max_step: 235, reward: 0.02059999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/672069062.json'}
train loss in 1 epoch in 1 batch: 1.97627
val loss in 1 epoch

  if (await self.run_code(code, result,  async_=asy)):


Round 665, winrate: 0.0, max_step: 355, reward: 0.034599999999999964, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/253581545.json'}
train loss in 1 epoch in 1 batch: 0.69810
val loss in 1 epoch: 1.14840
Round 666, winrate: 0.0, max_step: 311, reward: 0.49350000000000266, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/860870892.json'}
train loss in 1 epoch in 1 batch: 0.43480
val loss in 1 epoch: 1.52561
train loss in 1 epoch in 2 batch: 0.21067
val loss in 1 epoch: 1.51560
Round 667, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/361961871.json'}
train loss in 1 epoch in 1 batch: 4.99577
val loss in 1 epoch: 3.86990
Round 668, winrate: 0.0, max_step: 359, reward: 0.11430000000000122, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/999712579

  if (await self.run_code(code, result,  async_=asy)):


Round 676, winrate: 0.0, max_step: 359, reward: 0.03209999999999995, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/501304664.json'}
train loss in 1 epoch in 1 batch: 1.49722
val loss in 1 epoch: 1.66896
Round 677, winrate: 0.0, max_step: 352, reward: 0.25239999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/23698688.json'}
train loss in 1 epoch in 1 batch: 0.71713
val loss in 1 epoch: 0.83345
train loss in 1 epoch in 2 batch: 0.46495
val loss in 1 epoch: 0.83337
Round 678, winrate: 0.0, max_step: 359, reward: 0.0407999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/94020689.json'}
train loss in 1 epoch in 1 batch: 0.57829
val loss in 1 epoch: 1.06115
Round 679, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays

  if (await self.run_code(code, result,  async_=asy)):


Round 680, winrate: 0.0, max_step: 359, reward: 0.13990000000000044, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/931354636.json'}
train loss in 1 epoch in 1 batch: 0.36436
val loss in 1 epoch: 0.55640
train loss in 1 epoch in 2 batch: 0.18605
val loss in 1 epoch: 0.53416
Round 681, winrate: 0.0, max_step: 359, reward: 0.22590000000000002, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/545762453.json'}
train loss in 1 epoch in 1 batch: 0.43985
val loss in 1 epoch: 1.46863
train loss in 1 epoch in 2 batch: 1.99844
val loss in 1 epoch: 1.46296
Round 682, winrate: 0.0, max_step: 118, reward: 0.006600000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/707725541.json'}
train loss in 1 epoch in 1 batch: 1.30781
val loss in 1 epoch: 2.67119
Round 683, winrate: 0.0, max_step: 359, reward: 0.07510000000000004, example: {'ranks': [{'r

  if (await self.run_code(code, result,  async_=asy)):


Round 690, winrate: 0.0, max_step: 278, reward: 0.02419999999999991, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/488721355.json'}
train loss in 1 epoch in 1 batch: 4.54993
val loss in 1 epoch: 3.94614
Round 691, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/196881768.json'}
train loss in 1 epoch in 1 batch: 1.57143
val loss in 1 epoch: 0.77172
Round 692, winrate: 0.0, max_step: 359, reward: 0.4954000000000029, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/169123872.json'}
train loss in 1 epoch in 1 batch: 0.44819
val loss in 1 epoch: 1.44564
train loss in 1 epoch in 2 batch: 0.12854
val loss in 1 epoch: 1.46018


  if (await self.run_code(code, result,  async_=asy)):


Round 693, winrate: 0.0, max_step: 310, reward: 0.106700000000001, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/487748850.json'}
train loss in 1 epoch in 1 batch: 1.71332
val loss in 1 epoch: 1.60401
Round 694, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/689595245.json'}
train loss in 1 epoch in 1 batch: 1.69118
val loss in 1 epoch: 1.71821


  if (await self.run_code(code, result,  async_=asy)):


Round 695, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/986146962.json'}
train loss in 1 epoch in 1 batch: 0.34026
val loss in 1 epoch: 0.51017
Round 696, winrate: 0.0, max_step: 359, reward: 0.3134000000000049, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/176004031.json'}
train loss in 1 epoch in 1 batch: 1.12640
val loss in 1 epoch: 0.67776
train loss in 1 epoch in 2 batch: 0.53671
val loss in 1 epoch: 0.71088
Round 697, winrate: 0.0, max_step: 359, reward: 0.016699999999999954, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/850219426.json'}
train loss in 1 epoch in 1 batch: 0.42748
val loss in 1 epoch: 2.16275


  if (await self.run_code(code, result,  async_=asy)):


Round 698, winrate: 0.0, max_step: 236, reward: 0.021299999999999927, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/360827050.json'}
train loss in 1 epoch in 1 batch: 1.84382
val loss in 1 epoch: 1.65595
Round 699, winrate: 0.0, max_step: 359, reward: 0.030999999999999868, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/354538713.json'}
train loss in 1 epoch in 1 batch: 5.31797
val loss in 1 epoch: 4.06026


  if (await self.run_code(code, result,  async_=asy)):


Round 700, winrate: 0.0, max_step: 355, reward: 0.06049999999999973, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/672361527.json'}
train loss in 1 epoch in 1 batch: 1.21396
val loss in 1 epoch: 0.86640
Round 701, winrate: 0.0, max_step: 275, reward: 0.0024, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/516015590.json'}
train loss in 1 epoch in 1 batch: 1.56808
val loss in 1 epoch: 1.59552


  if (await self.run_code(code, result,  async_=asy)):


Round 702, winrate: 0.0, max_step: 310, reward: 0.07890000000000015, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/664752119.json'}
train loss in 1 epoch in 1 batch: 1.69036
val loss in 1 epoch: 0.72470
Round 703, winrate: 0.0, max_step: 235, reward: 0.023399999999999914, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/802115870.json'}
train loss in 1 epoch in 1 batch: 0.26875
val loss in 1 epoch: 1.41884


  if (await self.run_code(code, result,  async_=asy)):


Round 704, winrate: 0.0, max_step: 231, reward: 0.006900000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/120869359.json'}
train loss in 1 epoch in 1 batch: 1.53473
val loss in 1 epoch: 1.49826
Round 705, winrate: 0.0, max_step: 190, reward: 0.017799999999999948, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/529534609.json'}
train loss in 1 epoch in 1 batch: 0.31659
val loss in 1 epoch: 0.57248
Round 706, winrate: 0.0, max_step: 349, reward: 0.024599999999999907, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/942526334.json'}
train loss in 1 epoch in 1 batch: 4.68442
val loss in 1 epoch: 4.08828
Round 707, winrate: 0.0, max_step: 229, reward: 0.015399999999999962, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/911401819.json'}
train loss in 1 epoch in 1 batch: 4.51181
val l

  if (await self.run_code(code, result,  async_=asy)):


Round 708, winrate: 0.0, max_step: 358, reward: 0.02079999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/65071272.json'}
train loss in 1 epoch in 1 batch: 1.62313
val loss in 1 epoch: 1.39395
Round 709, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/981560335.json'}
train loss in 1 epoch in 1 batch: 3.37040
val loss in 1 epoch: 2.63655
Round 710, winrate: 0.0, max_step: 314, reward: 0.09230000000000059, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/443508454.json'}
train loss in 1 epoch in 1 batch: 1.85457
val loss in 1 epoch: 2.03606
Round 711, winrate: 0.0, max_step: 359, reward: 0.10030000000000082, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/246344090.json'}
train loss in 1 epoch in 1 batch: 1.76092
val loss in 1 epoch: 1.

  if (await self.run_code(code, result,  async_=asy)):


Round 713, winrate: 0.0, max_step: 234, reward: 0.018599999999999943, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/992760525.json'}
train loss in 1 epoch in 1 batch: 3.79576
val loss in 1 epoch: 3.68602


  if (await self.run_code(code, result,  async_=asy)):


Round 714, winrate: 0.0, max_step: 310, reward: 0.02719999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/73290591.json'}
train loss in 1 epoch in 1 batch: 3.19150
val loss in 1 epoch: 2.33744
Round 715, winrate: 0.0, max_step: 72, reward: 0.0039999999999999975, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/618679734.json'}
train loss in 1 epoch in 1 batch: 0.31440
val loss in 1 epoch: 0.96807
Round 716, winrate: 0.0, max_step: 313, reward: 0.02899999999999988, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/409857046.json'}
train loss in 1 epoch in 1 batch: 0.44759
val loss in 1 epoch: 0.87839
Round 717, winrate: 0.0, max_step: 195, reward: 0.0029999999999999988, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/652079235.json'}
train loss in 1 epoch in 1 batch: 1.61514
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 720, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/462875311.json'}
train loss in 1 epoch in 1 batch: 1.28793
val loss in 1 epoch: 1.15631
Round 721, winrate: 1.0, max_step: 230, reward: 1.0078, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/314345657.json'}
train loss in 1 epoch in 1 batch: 2.89524
val loss in 1 epoch: 3.13194
Round 722, winrate: 0.0, max_step: 230, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/581656168.json'}
train loss in 1 epoch in 1 batch: 1.33107
val loss in 1 epoch: 1.21935


  if (await self.run_code(code, result,  async_=asy)):


Round 723, winrate: 0.0, max_step: 279, reward: 0.010199999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/957321711.json'}
train loss in 1 epoch in 1 batch: 3.28185
val loss in 1 epoch: 3.85702


  if (await self.run_code(code, result,  async_=asy)):


Round 724, winrate: 0.0, max_step: 359, reward: 0.028499999999999883, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/589087750.json'}
train loss in 1 epoch in 1 batch: 1.36704
val loss in 1 epoch: 1.29317


  if (await self.run_code(code, result,  async_=asy)):


Round 725, winrate: 0.0, max_step: 356, reward: 0.11430000000000119, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/90727939.json'}
train loss in 1 epoch in 1 batch: 1.36693
val loss in 1 epoch: 0.73967
Round 726, winrate: 0.0, max_step: 190, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/133491610.json'}
train loss in 1 epoch in 1 batch: 3.87306
val loss in 1 epoch: 2.90558


  if (await self.run_code(code, result,  async_=asy)):


Round 727, winrate: 0.0, max_step: 358, reward: 0.03279999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/158280314.json'}
train loss in 1 epoch in 1 batch: 1.19225
val loss in 1 epoch: 1.30065
Round 728, winrate: 0.0, max_step: 118, reward: 0.011799999999999984, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/53761797.json'}
train loss in 1 epoch in 1 batch: 0.43029
val loss in 1 epoch: 0.19924
Round 729, winrate: 0.0, max_step: 357, reward: 0.10130000000000078, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/145775741.json'}
train loss in 1 epoch in 1 batch: 1.19994
val loss in 1 epoch: 0.63907
Round 730, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/229777668.json'}
train loss in 1 epoch in 1 batch: 1.39368
val loss in 1 epoch: 1

  if (await self.run_code(code, result,  async_=asy)):


Round 733, winrate: 0.0, max_step: 199, reward: 0.01889999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/222426237.json'}
train loss in 1 epoch in 1 batch: 0.33771
val loss in 1 epoch: 1.56016
Round 734, winrate: 1.0, max_step: 350, reward: 1.0325, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/173883837.json'}
train loss in 1 epoch in 1 batch: 0.33024
val loss in 1 epoch: 1.29346
Round 735, winrate: 0.0, max_step: 150, reward: 0.006100000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/763541140.json'}
train loss in 1 epoch in 1 batch: 1.32711
val loss in 1 epoch: 1.07012
Round 736, winrate: 0.0, max_step: 359, reward: 0.02729999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/682208113.json'}
train loss in 1 epoch in 1 batch: 2.61783
val loss in 1 epoch: 

  if (await self.run_code(code, result,  async_=asy)):


Round 737, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/387509833.json'}
train loss in 1 epoch in 1 batch: 1.13843
val loss in 1 epoch: 1.11137
Round 738, winrate: 0.0, max_step: 152, reward: 0.024599999999999945, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/402768199.json'}
train loss in 1 epoch in 1 batch: 5.16954
val loss in 1 epoch: 1.63510


  if (await self.run_code(code, result,  async_=asy)):


Round 739, winrate: 0.0, max_step: 275, reward: 0.015499999999999962, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/578111633.json'}
train loss in 1 epoch in 1 batch: 1.12504
val loss in 1 epoch: 1.07063
Round 740, winrate: 1.0, max_step: 359, reward: 2.105900000000001, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/349025981.json'}
train loss in 1 epoch in 1 batch: 0.22966
val loss in 1 epoch: 0.44510
Round 741, winrate: 0.0, max_step: 230, reward: 0.020099999999999934, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/392411912.json'}
train loss in 1 epoch in 1 batch: 1.17379
val loss in 1 epoch: 0.98438
Round 742, winrate: 0.0, max_step: 195, reward: 0.010399999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/5509850.json'}
train loss in 1 epoch in 1 batch: 1.15338
val loss i

  if (await self.run_code(code, result,  async_=asy)):


Round 743, winrate: 0.0, max_step: 359, reward: 0.022899999999999917, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/354079510.json'}
train loss in 1 epoch in 1 batch: 0.89699
val loss in 1 epoch: 0.95896


  if (await self.run_code(code, result,  async_=asy)):


Round 744, winrate: 0.0, max_step: 351, reward: 0.0924999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/106288533.json'}
train loss in 1 epoch in 1 batch: 0.91553
val loss in 1 epoch: 0.87748


  if (await self.run_code(code, result,  async_=asy)):


Round 745, winrate: 0.0, max_step: 359, reward: 0.03349999999999993, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/880554519.json'}
train loss in 1 epoch in 1 batch: 1.20815
val loss in 1 epoch: 1.25934
Round 746, winrate: 0.0, max_step: 359, reward: 0.1430000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/368824940.json'}
train loss in 1 epoch in 1 batch: 1.21703
val loss in 1 epoch: 1.25607
train loss in 1 epoch in 2 batch: 1.18272
val loss in 1 epoch: 1.28523


  if (await self.run_code(code, result,  async_=asy)):


Round 747, winrate: 0.0, max_step: 195, reward: 0.10420000000000122, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/758801756.json'}
train loss in 1 epoch in 1 batch: 0.71771
val loss in 1 epoch: 0.66284


  if (await self.run_code(code, result,  async_=asy)):


Round 748, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/733986913.json'}
train loss in 1 epoch in 1 batch: 1.45105
val loss in 1 epoch: 1.02646
Round 749, winrate: 0.0, max_step: 275, reward: 0.026399999999999896, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/89113747.json'}
train loss in 1 epoch in 1 batch: 2.56732
val loss in 1 epoch: 0.91118
Round 750, winrate: 0.0, max_step: 311, reward: 0.031099999999999867, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/895147972.json'}
train loss in 1 epoch in 1 batch: 1.32675
val loss in 1 epoch: 1.17740
Round 751, winrate: 0.0, max_step: 190, reward: 0.11449999999999923, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/378246717.json'}
train loss in 1 epoch in 1 batch: 0.23387
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 754, winrate: 0.0, max_step: 319, reward: 0.1535999999999985, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/776396101.json'}
train loss in 1 epoch in 1 batch: 0.54584
val loss in 1 epoch: 0.99642
Round 755, winrate: 0.0, max_step: 236, reward: 0.04999999999999977, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/562464916.json'}
train loss in 1 epoch in 1 batch: 3.94821
val loss in 1 epoch: 0.71537
Round 756, winrate: 0.0, max_step: 156, reward: 0.0067000000000000046, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/284102340.json'}
train loss in 1 epoch in 1 batch: 1.67645
val loss in 1 epoch: 1.16676
Round 757, winrate: 0.0, max_step: 279, reward: 0.1801999999999981, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/846844194.json'}
train loss in 1 epoch in 1 batch: 0.82343
val loss 

  if (await self.run_code(code, result,  async_=asy)):


Round 758, winrate: 1.0, max_step: 235, reward: 1.0158, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/246994933.json'}
train loss in 1 epoch in 1 batch: 6.05356
val loss in 1 epoch: 4.72333
Round 759, winrate: 0.0, max_step: 231, reward: 0.01909999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/896287880.json'}
train loss in 1 epoch in 1 batch: 5.13003
val loss in 1 epoch: 3.95313
Round 760, winrate: 0.0, max_step: 190, reward: 0.015599999999999961, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/535122361.json'}
train loss in 1 epoch in 1 batch: 1.27322
val loss in 1 epoch: 1.28135


  if (await self.run_code(code, result,  async_=asy)):


Round 761, winrate: 0.0, max_step: 359, reward: 0.1171000000000013, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/58619677.json'}
train loss in 1 epoch in 1 batch: 2.11881
val loss in 1 epoch: 0.72105
Round 762, winrate: 0.0, max_step: 74, reward: 0.004299999999999998, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/773358646.json'}
train loss in 1 epoch in 1 batch: 1.37889
val loss in 1 epoch: 1.26056


  if (await self.run_code(code, result,  async_=asy)):


Round 763, winrate: 0.0, max_step: 271, reward: 0.07770000000000017, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/247896938.json'}
train loss in 1 epoch in 1 batch: 1.23854
val loss in 1 epoch: 1.14618
Round 764, winrate: 0.0, max_step: 191, reward: 0.03409999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/918085310.json'}
train loss in 1 epoch in 1 batch: 2.85513
val loss in 1 epoch: 1.23322
Round 765, winrate: 0.0, max_step: 359, reward: 0.12199999999999862, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/971648029.json'}
train loss in 1 epoch in 1 batch: 0.33795
val loss in 1 epoch: 0.66937
Round 766, winrate: 0.0, max_step: 235, reward: 0.0019000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/382611441.json'}
train loss in 1 epoch in 1 batch: 1.20109
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 771, winrate: 0.0, max_step: 359, reward: 0.18879999999999764, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/560291268.json'}
train loss in 1 epoch in 1 batch: 0.21805
val loss in 1 epoch: 0.57041
train loss in 1 epoch in 2 batch: 0.12423
val loss in 1 epoch: 0.56578
Round 772, winrate: 0.0, max_step: 359, reward: 0.33030000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/71929890.json'}
train loss in 1 epoch in 1 batch: 1.65038
val loss in 1 epoch: 1.18520
train loss in 1 epoch in 2 batch: 0.28829
val loss in 1 epoch: 1.17721


  if (await self.run_code(code, result,  async_=asy)):


Round 773, winrate: 0.0, max_step: 274, reward: 0.019999999999999934, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/613149209.json'}
train loss in 1 epoch in 1 batch: 0.62047
val loss in 1 epoch: 0.83078


  if (await self.run_code(code, result,  async_=asy)):


Round 774, winrate: 0.0, max_step: 359, reward: 0.033199999999999924, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/543468622.json'}
train loss in 1 epoch in 1 batch: 0.91872
val loss in 1 epoch: 1.01296
Round 775, winrate: 0.0, max_step: 154, reward: 0.05089999999999977, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/540027776.json'}
train loss in 1 epoch in 1 batch: 1.00733
val loss in 1 epoch: 0.88649
Round 776, winrate: 0.0, max_step: 235, reward: 0.021299999999999927, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/22076793.json'}
train loss in 1 epoch in 1 batch: 0.57730
val loss in 1 epoch: 0.25422


  if (await self.run_code(code, result,  async_=asy)):


Round 777, winrate: 0.0, max_step: 270, reward: 0.06489999999999979, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/655685735.json'}
train loss in 1 epoch in 1 batch: 1.90094
val loss in 1 epoch: 1.79543


  if (await self.run_code(code, result,  async_=asy)):


Round 778, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/718281167.json'}
train loss in 1 epoch in 1 batch: 3.13654
val loss in 1 epoch: 2.49569
Round 779, winrate: 0.0, max_step: 319, reward: 0.03189999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/690898706.json'}
train loss in 1 epoch in 1 batch: 2.46331
val loss in 1 epoch: 2.95730
Round 780, winrate: 0.0, max_step: 312, reward: 0.02219999999999992, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/998416085.json'}
train loss in 1 epoch in 1 batch: 3.08732
val loss in 1 epoch: 2.49611
Round 781, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/856964372.json'}
train loss in 1 epoch in 1 batch: 0.42180
val loss in 1 epoch: 0.22234
Round 

  if (await self.run_code(code, result,  async_=asy)):


Round 787, winrate: 0.0, max_step: 314, reward: 0.01399999999999997, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/453522989.json'}
train loss in 1 epoch in 1 batch: 0.87646
val loss in 1 epoch: 0.86147


  if (await self.run_code(code, result,  async_=asy)):


Round 788, winrate: 0.0, max_step: 359, reward: 0.06759999999999983, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/753041330.json'}
train loss in 1 epoch in 1 batch: 0.32569
val loss in 1 epoch: 1.01042


  if (await self.run_code(code, result,  async_=asy)):


Round 789, winrate: 0.0, max_step: 196, reward: 0.042999999999999865, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/314970675.json'}
train loss in 1 epoch in 1 batch: 0.79312
val loss in 1 epoch: 1.44152
Round 790, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/170877168.json'}
train loss in 1 epoch in 1 batch: 0.86945
val loss in 1 epoch: 0.96209
Round 791, winrate: 0.0, max_step: 359, reward: 0.09390000000000058, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/895124453.json'}
train loss in 1 epoch in 1 batch: 2.57495
val loss in 1 epoch: 0.82816


  if (await self.run_code(code, result,  async_=asy)):


Round 792, winrate: 0.0, max_step: 155, reward: 0.010199999999999994, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/865325181.json'}
train loss in 1 epoch in 1 batch: 0.17910
val loss in 1 epoch: 0.72232
Round 793, winrate: 0.0, max_step: 359, reward: 0.11830000000000133, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/153145981.json'}
train loss in 1 epoch in 1 batch: 0.58870
val loss in 1 epoch: 0.34441
Round 794, winrate: 0.0, max_step: 190, reward: 0.002799999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/378014899.json'}
train loss in 1 epoch in 1 batch: 1.12671
val loss in 1 epoch: 0.91899
Round 795, winrate: 0.0, max_step: 230, reward: 0.022899999999999917, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/445992228.json'}
train loss in 1 epoch in 1 batch: 0.17575
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 797, winrate: 0.0, max_step: 311, reward: 0.0735, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/64075633.json'}
train loss in 1 epoch in 1 batch: 2.64083
val loss in 1 epoch: 1.76247
Round 798, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/697048452.json'}
train loss in 1 epoch in 1 batch: 0.95196
val loss in 1 epoch: 0.86077
Round 799, winrate: 0.0, max_step: 278, reward: 0.026699999999999894, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/510321191.json'}
train loss in 1 epoch in 1 batch: 0.21126
val loss in 1 epoch: 0.57526
Round 800, winrate: 0.0, max_step: 359, reward: 0.11180000000000115, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/349519183.json'}
train loss in 1 epoch in 1 batch: 1.16330
val loss in 1 epoch: 

  if (await self.run_code(code, result,  async_=asy)):


Round 804, winrate: 0.0, max_step: 314, reward: 0.015499999999999962, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/938895169.json'}
train loss in 1 epoch in 1 batch: 0.81784
val loss in 1 epoch: 0.89964
Round 805, winrate: 0.0, max_step: 275, reward: 0.0093, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/112294984.json'}
train loss in 1 epoch in 1 batch: 1.06200
val loss in 1 epoch: 0.83038


  if (await self.run_code(code, result,  async_=asy)):


Round 806, winrate: 0.0, max_step: 190, reward: 0.01409999999999997, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/727406070.json'}
train loss in 1 epoch in 1 batch: 1.62007
val loss in 1 epoch: 2.03352
Round 807, winrate: 0.0, max_step: 236, reward: 0.02249999999999992, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/607123852.json'}
train loss in 1 epoch in 1 batch: 0.20064
val loss in 1 epoch: 0.41033
Round 808, winrate: 0.0, max_step: 231, reward: 0.0018000000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/897001934.json'}
train loss in 1 epoch in 1 batch: 2.42924
val loss in 1 epoch: 1.91773
Round 809, winrate: 0.0, max_step: 359, reward: 0.06219999999999968, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/198287149.json'}
train loss in 1 epoch in 1 batch: 3.26357
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 810, winrate: 0.0, max_step: 277, reward: 0.027699999999999888, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/192367988.json'}
train loss in 1 epoch in 1 batch: 0.93669
val loss in 1 epoch: 0.76859


  if (await self.run_code(code, result,  async_=asy)):


Round 811, winrate: 1.0, max_step: 359, reward: 1.0331, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/401576774.json'}
train loss in 1 epoch in 1 batch: 1.86966
val loss in 1 epoch: 1.44810


  if (await self.run_code(code, result,  async_=asy)):


Round 812, winrate: 0.0, max_step: 275, reward: 0.02749999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/21392363.json'}
train loss in 1 epoch in 1 batch: 0.76865
val loss in 1 epoch: 0.66518


  if (await self.run_code(code, result,  async_=asy)):


Round 813, winrate: 1.0, max_step: 359, reward: 1.0341, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/989306935.json'}
train loss in 1 epoch in 1 batch: 0.66182
val loss in 1 epoch: 0.71481
Round 814, winrate: 0.0, max_step: 155, reward: 0.032299999999999954, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/769686693.json'}
train loss in 1 epoch in 1 batch: 0.37163
val loss in 1 epoch: 0.55408
Round 815, winrate: 0.0, max_step: 155, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/248375264.json'}
train loss in 1 epoch in 1 batch: 0.78830
val loss in 1 epoch: 0.66315


  if (await self.run_code(code, result,  async_=asy)):


Round 816, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/49672788.json'}
train loss in 1 epoch in 1 batch: 0.69725
val loss in 1 epoch: 0.59676
Round 817, winrate: 0.0, max_step: 312, reward: 0.10500000000000095, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/41358614.json'}
train loss in 1 epoch in 1 batch: 0.25377
val loss in 1 epoch: 0.31059
Round 818, winrate: 0.0, max_step: 194, reward: 0.03820000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/486290293.json'}
train loss in 1 epoch in 1 batch: 1.47763
val loss in 1 epoch: 0.87897


  if (await self.run_code(code, result,  async_=asy)):


Round 819, winrate: 0.0, max_step: 358, reward: 0.03209999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/912325057.json'}
train loss in 1 epoch in 1 batch: 0.59393
val loss in 1 epoch: 0.64662
Round 820, winrate: 0.0, max_step: 310, reward: 0.03089999999999987, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/179739825.json'}
train loss in 1 epoch in 1 batch: 0.17403
val loss in 1 epoch: 0.28138
Round 821, winrate: 0.0, max_step: 195, reward: 0.009699999999999997, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/52308207.json'}
train loss in 1 epoch in 1 batch: 0.71826
val loss in 1 epoch: 0.59243
Round 822, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/860280102.json'}
train loss in 1 epoch in 1 batch: 0.35384
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 825, winrate: 0.0, max_step: 271, reward: 0.019399999999999938, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/605683318.json'}
train loss in 1 epoch in 1 batch: 0.55770
val loss in 1 epoch: 0.57220
Round 826, winrate: 0.0, max_step: 359, reward: 0.11750000000000131, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/448169677.json'}
train loss in 1 epoch in 1 batch: 0.36090
val loss in 1 epoch: 0.24782
Round 827, winrate: 0.0, max_step: 359, reward: 0.0323999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/288580745.json'}
train loss in 1 epoch in 1 batch: 1.32785
val loss in 1 epoch: 1.11270
Round 828, winrate: 0.0, max_step: 359, reward: 0.029299999999999878, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/227632929.json'}
train loss in 1 epoch in 1 batch: 0.56516
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 834, winrate: 0.0, max_step: 234, reward: 0.013599999999999973, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/485089308.json'}
train loss in 1 epoch in 1 batch: 0.54850
val loss in 1 epoch: 0.60192
Round 835, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/115462237.json'}
train loss in 1 epoch in 1 batch: 0.61831
val loss in 1 epoch: 0.55403
Round 836, winrate: 0.0, max_step: 319, reward: 0.08600000000000038, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/508757783.json'}
train loss in 1 epoch in 1 batch: 0.43656
val loss in 1 epoch: 0.36045
Round 837, winrate: 0.0, max_step: 359, reward: 0.027999999999999886, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/608633456.json'}
train loss in 1 epoch in 1 batch: 0.58265
val loss in 1 epoch:

  if (await self.run_code(code, result,  async_=asy)):


Round 841, winrate: 0.0, max_step: 356, reward: 0.1204000000000014, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/719040036.json'}
train loss in 1 epoch in 1 batch: 0.35429
val loss in 1 epoch: 0.40701
Round 842, winrate: 1.0, max_step: 274, reward: 1.0263, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/572140548.json'}
train loss in 1 epoch in 1 batch: 0.14915
val loss in 1 epoch: 0.36329
Round 843, winrate: 0.0, max_step: 315, reward: 0.0864000000000004, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/379708147.json'}
train loss in 1 epoch in 1 batch: 0.41307
val loss in 1 epoch: 0.64490
Round 844, winrate: 0.0, max_step: 197, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/730765061.json'}
train loss in 1 epoch in 1 batch: 0.69309
val loss in 1 epoch: 0.

  if (await self.run_code(code, result,  async_=asy)):


Round 845, winrate: 0.0, max_step: 358, reward: 0.13489999999999852, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/803521849.json'}
train loss in 1 epoch in 1 batch: 0.15876
val loss in 1 epoch: 0.51193
Round 846, winrate: 0.0, max_step: 359, reward: 0.12369999999999894, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/206579543.json'}
train loss in 1 epoch in 1 batch: 0.40314
val loss in 1 epoch: 0.84315
Round 847, winrate: 0.0, max_step: 274, reward: 0.006200000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/808762721.json'}
train loss in 1 epoch in 1 batch: 0.66177
val loss in 1 epoch: 0.56295
Round 848, winrate: 0.0, max_step: 110, reward: 0.005500000000000001, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/864482942.json'}
train loss in 1 epoch in 1 batch: 0.62992
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 854, winrate: 0.0, max_step: 199, reward: 0.018499999999999944, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/75352712.json'}
train loss in 1 epoch in 1 batch: 0.52672
val loss in 1 epoch: 0.49039
Round 855, winrate: 0.0, max_step: 350, reward: 0.025999999999999898, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/751067844.json'}
train loss in 1 epoch in 1 batch: 1.50106
val loss in 1 epoch: 1.47948
Round 856, winrate: 0.0, max_step: 233, reward: 0.02089999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/527824783.json'}
train loss in 1 epoch in 1 batch: 2.52410
val loss in 1 epoch: 0.77779
Round 857, winrate: 0.0, max_step: 277, reward: 0.01919999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/755243894.json'}
train loss in 1 epoch in 1 batch: 1.41422
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 862, winrate: 0.0, max_step: 359, reward: 0.12230000000000145, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/95418284.json'}
train loss in 1 epoch in 1 batch: 0.15185
val loss in 1 epoch: 0.74001
Round 863, winrate: 0.0, max_step: 359, reward: 0.035699999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/101837026.json'}
train loss in 1 epoch in 1 batch: 0.14059
val loss in 1 epoch: 0.63999
Round 864, winrate: 0.0, max_step: 315, reward: 0.004499999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/604892702.json'}
train loss in 1 epoch in 1 batch: 1.99056
val loss in 1 epoch: 1.53223
Round 865, winrate: 0.0, max_step: 350, reward: 0.03269999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/568978214.json'}
train loss in 1 epoch in 1 batch: 0.11799
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 868, winrate: 0.0, max_step: 359, reward: 0.024499999999999907, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/737784147.json'}
train loss in 1 epoch in 1 batch: 1.62732
val loss in 1 epoch: 1.30720


  if (await self.run_code(code, result,  async_=asy)):


Round 869, winrate: 0.0, max_step: 239, reward: 0.2518, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/579049488.json'}
train loss in 1 epoch in 1 batch: 0.18683
val loss in 1 epoch: 0.44856
train loss in 1 epoch in 2 batch: 0.07895
val loss in 1 epoch: 0.45352
Round 870, winrate: 0.0, max_step: 359, reward: 0.3976000000000003, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/966517467.json'}
train loss in 1 epoch in 1 batch: 0.88671
val loss in 1 epoch: 0.69134
train loss in 1 epoch in 2 batch: 0.15757
val loss in 1 epoch: 0.70937
Round 871, winrate: 0.0, max_step: 359, reward: 0.5321999999999959, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/606948454.json'}
train loss in 1 epoch in 1 batch: 0.84806
val loss in 1 epoch: 0.69277
train loss in 1 epoch in 2 batch: 0.08480
val loss in 1 epoch: 0.70234


  if (await self.run_code(code, result,  async_=asy)):


Round 872, winrate: 0.0, max_step: 354, reward: 0.007400000000000006, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/87228345.json'}
train loss in 1 epoch in 1 batch: 1.97631
val loss in 1 epoch: 1.52828
Round 873, winrate: 0.0, max_step: 274, reward: 0.02399999999999991, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/188240881.json'}
train loss in 1 epoch in 1 batch: 1.46737
val loss in 1 epoch: 1.78952


  if (await self.run_code(code, result,  async_=asy)):


Round 874, winrate: 0.0, max_step: 359, reward: 0.010399999999999993, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/776349007.json'}
train loss in 1 epoch in 1 batch: 2.14240
val loss in 1 epoch: 1.56493
Round 875, winrate: 0.0, max_step: 278, reward: 0.06559999999999977, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/150558954.json'}
train loss in 1 epoch in 1 batch: 0.60618
val loss in 1 epoch: 0.31487


  if (await self.run_code(code, result,  async_=asy)):


Round 876, winrate: 0.0, max_step: 359, reward: 0.01569999999999996, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/937552162.json'}
train loss in 1 epoch in 1 batch: 1.54640
val loss in 1 epoch: 1.74897
Round 877, winrate: 1.0, max_step: 196, reward: 1.0131999999999999, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/384916143.json'}
train loss in 1 epoch in 1 batch: 0.56044
val loss in 1 epoch: 0.54822
Round 878, winrate: 0.0, max_step: 230, reward: 0.021799999999999924, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/565784161.json'}
train loss in 1 epoch in 1 batch: 0.12665
val loss in 1 epoch: 0.40038
Round 879, winrate: 0.0, max_step: 190, reward: 0.013299999999999975, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/752520315.json'}
train loss in 1 epoch in 1 batch: 2.09076
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 882, winrate: 0.0, max_step: 315, reward: 0.02739999999999989, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/594148264.json'}
train loss in 1 epoch in 1 batch: 0.46850
val loss in 1 epoch: 0.45878
Round 883, winrate: 0.0, max_step: 358, reward: 0.09740000000000074, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/468114120.json'}
train loss in 1 epoch in 1 batch: 1.66623
val loss in 1 epoch: 1.50954
Round 884, winrate: 0.0, max_step: 194, reward: 0.0038999999999999972, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/354934740.json'}
train loss in 1 epoch in 1 batch: 1.39382
val loss in 1 epoch: 1.35806
Round 885, winrate: 0.0, max_step: 349, reward: 0.019799999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/225661242.json'}
train loss in 1 epoch in 1 batch: 0.52582
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 887, winrate: 0.0, max_step: 359, reward: 0.03369999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/470298387.json'}
train loss in 1 epoch in 1 batch: 0.68050
val loss in 1 epoch: 0.77178
Round 888, winrate: 0.0, max_step: 359, reward: 0.08600000000000035, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/470559230.json'}
train loss in 1 epoch in 1 batch: 1.50441
val loss in 1 epoch: 0.74106
Round 889, winrate: 0.0, max_step: 199, reward: 0.11259999999999931, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/766907018.json'}
train loss in 1 epoch in 1 batch: 0.32013
val loss in 1 epoch: 0.40727
Round 890, winrate: 0.0, max_step: 350, reward: 0.031499999999999875, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/702648030.json'}
train loss in 1 epoch in 1 batch: 0.13644
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 891, winrate: 0.0, max_step: 359, reward: 0.03349999999999993, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/900271817.json'}
train loss in 1 epoch in 1 batch: 0.48865
val loss in 1 epoch: 0.48250
Round 892, winrate: 0.0, max_step: 119, reward: 0.008500000000000004, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/215671913.json'}
train loss in 1 epoch in 1 batch: 1.68539
val loss in 1 epoch: 1.59089
Round 893, winrate: 0.0, max_step: 230, reward: 0.01899999999999994, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/421604850.json'}
train loss in 1 epoch in 1 batch: 0.49091
val loss in 1 epoch: 0.56089
Round 894, winrate: 0.0, max_step: 350, reward: 0.21909999999999744, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/835583282.json'}
train loss in 1 epoch in 1 batch: 0.14275
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 901, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/62348857.json'}
train loss in 1 epoch in 1 batch: 1.43325
val loss in 1 epoch: 1.34778


  if (await self.run_code(code, result,  async_=asy)):


Round 902, winrate: 0.0, max_step: 356, reward: 0.09510000000000064, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/74325058.json'}
train loss in 1 epoch in 1 batch: 1.15468
val loss in 1 epoch: 0.94121
Round 903, winrate: 0.0, max_step: 111, reward: 0.0031999999999999984, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/721378116.json'}
train loss in 1 epoch in 1 batch: 0.28076
val loss in 1 epoch: 0.32197


  if (await self.run_code(code, result,  async_=asy)):


Round 904, winrate: 0.0, max_step: 197, reward: 0.016399999999999956, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/865090517.json'}
train loss in 1 epoch in 1 batch: 0.15595
val loss in 1 epoch: 1.00347
Round 905, winrate: 0.0, max_step: 359, reward: 0.10479999999999903, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/401564872.json'}
train loss in 1 epoch in 1 batch: 0.92827
val loss in 1 epoch: 0.36837
Round 906, winrate: 0.0, max_step: 359, reward: 0.09890000000000074, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/461235659.json'}
train loss in 1 epoch in 1 batch: 0.19088
val loss in 1 epoch: 0.26755
Round 907, winrate: 0.0, max_step: 359, reward: 0.031499999999999875, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/499933072.json'}
train loss in 1 epoch in 1 batch: 1.55871
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 913, winrate: 0.0, max_step: 354, reward: 0.0036999999999999976, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/426937734.json'}
train loss in 1 epoch in 1 batch: 0.53996
val loss in 1 epoch: 0.46818
Round 914, winrate: 1.0, max_step: 359, reward: 1.0186, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/265679500.json'}
train loss in 1 epoch in 1 batch: 1.71101
val loss in 1 epoch: 1.40140


  if (await self.run_code(code, result,  async_=asy)):


Round 915, winrate: 0.0, max_step: 359, reward: 0.04290000000000009, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/679466974.json'}
train loss in 1 epoch in 1 batch: 0.13421
val loss in 1 epoch: 0.36172
Round 916, winrate: 0.0, max_step: 359, reward: 0.0359, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/154176748.json'}
train loss in 1 epoch in 1 batch: 1.53622
val loss in 1 epoch: 1.17282
Round 917, winrate: 0.0, max_step: 199, reward: 0.0504999999999998, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/829199959.json'}
train loss in 1 epoch in 1 batch: 0.12572
val loss in 1 epoch: 0.27129


  if (await self.run_code(code, result,  async_=asy)):


Round 918, winrate: 0.0, max_step: 237, reward: 0.018699999999999942, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/2595785.json'}
train loss in 1 epoch in 1 batch: 0.28566
val loss in 1 epoch: 0.28796
Round 919, winrate: 0.0, max_step: 232, reward: 0.010999999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/397137410.json'}
train loss in 1 epoch in 1 batch: 0.13019
val loss in 1 epoch: 0.27401
Round 920, winrate: 0.0, max_step: 270, reward: 0.026899999999999893, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/780392900.json'}
train loss in 1 epoch in 1 batch: 0.11536
val loss in 1 epoch: 0.46448
Round 921, winrate: 0.0, max_step: 359, reward: 0.07919999999999947, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/926072681.json'}
train loss in 1 epoch in 1 batch: 0.13842
val loss

  if (await self.run_code(code, result,  async_=asy)):


Round 928, winrate: 0.0, max_step: 359, reward: 0.056899999999999715, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/372180088.json'}
train loss in 1 epoch in 1 batch: 0.30861
val loss in 1 epoch: 0.70365


  if (await self.run_code(code, result,  async_=asy)):


Round 929, winrate: 0.0, max_step: 271, reward: 0.02709999999999989, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/231288974.json'}
train loss in 1 epoch in 1 batch: 1.34392
val loss in 1 epoch: 0.96444
Round 930, winrate: 0.0, max_step: 270, reward: 0.026799999999999893, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/812968797.json'}
train loss in 1 epoch in 1 batch: 0.15774
val loss in 1 epoch: 1.27177
Round 931, winrate: 0.0, max_step: 359, reward: 0.054599999999999725, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/484439426.json'}
train loss in 1 epoch in 1 batch: 0.14555
val loss in 1 epoch: 0.53116
Round 932, winrate: 0.0, max_step: 358, reward: 0.0016000000000000005, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/988932012.json'}
train loss in 1 epoch in 1 batch: 0.37637
val l

  if (await self.run_code(code, result,  async_=asy)):


Round 933, winrate: 0.0, max_step: 359, reward: 0.008600000000000003, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/534128023.json'}
train loss in 1 epoch in 1 batch: 1.36469
val loss in 1 epoch: 0.96446
Round 934, winrate: 0.0, max_step: 350, reward: 0.03039999999999987, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/982377712.json'}
train loss in 1 epoch in 1 batch: 0.47728
val loss in 1 epoch: 0.33973
Round 935, winrate: 0.0, max_step: 194, reward: 0.019399999999999938, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/960519203.json'}
train loss in 1 epoch in 1 batch: 0.14182
val loss in 1 epoch: 0.59059
Round 936, winrate: 0.0, max_step: 358, reward: 0.11550000000000125, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/973968834.json'}
train loss in 1 epoch in 1 batch: 0.14762
val los

  if (await self.run_code(code, result,  async_=asy)):


Round 937, winrate: 0.0, max_step: 234, reward: 0.013399999999999974, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/81155732.json'}
train loss in 1 epoch in 1 batch: 0.38712
val loss in 1 epoch: 0.40599


  if (await self.run_code(code, result,  async_=asy)):


Round 938, winrate: 1.0, max_step: 230, reward: 1.0211999999999999, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/75612687.json'}
train loss in 1 epoch in 1 batch: 2.08820
val loss in 1 epoch: 0.42528


  if (await self.run_code(code, result,  async_=asy)):


Round 939, winrate: 0.0, max_step: 234, reward: 0.019699999999999936, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/746675552.json'}
train loss in 1 epoch in 1 batch: 0.26445
val loss in 1 epoch: 0.54082


  if (await self.run_code(code, result,  async_=asy)):


Round 940, winrate: 0.0, max_step: 237, reward: 0.019499999999999938, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/792177597.json'}
train loss in 1 epoch in 1 batch: 2.07928
val loss in 1 epoch: 1.56407
Round 941, winrate: 0.0, max_step: 110, reward: 0.0091, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/332295752.json'}
train loss in 1 epoch in 1 batch: 0.15813
val loss in 1 epoch: 0.43771
Round 942, winrate: 0.0, max_step: 239, reward: 0.07640000000000025, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/218199952.json'}
train loss in 1 epoch in 1 batch: 0.37249
val loss in 1 epoch: 0.71265
Round 943, winrate: 0.0, max_step: 359, reward: 0.035699999999999996, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/134757515.json'}
train loss in 1 epoch in 1 batch: 0.10386
val loss in 1 epoch:

  if (await self.run_code(code, result,  async_=asy)):


Round 948, winrate: 0.0, max_step: 315, reward: 0.007700000000000007, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/53236521.json'}
train loss in 1 epoch in 1 batch: 0.41529
val loss in 1 epoch: 0.40069
Round 949, winrate: 0.0, max_step: 353, reward: 0.1042000000000009, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/225492101.json'}
train loss in 1 epoch in 1 batch: 1.12508
val loss in 1 epoch: 0.41568
Round 950, winrate: 0.0, max_step: 279, reward: 0.17329999999999826, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/313880279.json'}
train loss in 1 epoch in 1 batch: 0.14112
val loss in 1 epoch: 0.61537
train loss in 1 epoch in 2 batch: 0.07969
