In [1]:
import numpy as np
import pandas as pd


import json
import os

import subprocess

from matplotlib import pyplot as plt

In [6]:
%matplotlib inline
%config Completer.use_jedi = False

In [3]:
import torch
from torch import nn
from torch import optim

from tensorboardX import SummaryWriter

In [674]:
from importlib import reload
from utils import dataset

reload(dataset)

<module 'utils.dataset' from '/Users/sergmiller/Documents/my/lux-ai-v1/research/utils/dataset.py'>

In [573]:
def learn(train, val, model_ff, criterion, epochs=5, batch_size=64, shuffle=True, freq=10,lr=1e-3, l2=1e-5, use_tb=True): 
    if use_tb:
        writer = SummaryWriter()
    
#     np.random.seed(1)
    ids_nn = np.arange(train.targets.shape[0])
    
    reshape_to_last = lambda x: torch.reshape(x, [np.prod(x.shape[:-1]), x.shape[-1]])

    optimizer = optim.Adam(model_ff.parameters(), lr=lr, weight_decay=l2)

    time_for_print_loss = lambda i: (i + 1) % freq == 0
    
    n_iter = 0


    for epoch in np.arange(epochs):
        np.random.shuffle(ids_nn)

        model_ff.train(True)

        for b in np.arange(0, train.targets.shape[0], batch_size):
            X_batch = torch.FloatTensor(train.features[ids_nn[b:b+batch_size]])
            y_batch = torch.FloatTensor(train.weights[ids_nn[b:b+batch_size]])  # reward(advantage)
            a_batch = torch.LongTensor(train.targets[ids_nn[b:b+batch_size]])  # action

            optimizer.zero_grad()
            y_pred_logits = model_ff(X_batch)

            loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
            loss.backward()

            optimizer.step()

            if (b // batch_size + 1) % freq == 0:
                print('train loss in %d epoch in %d batch: %.5f' %
                  (epoch + 1, b // batch_size + 1, loss.item()))
                
                if use_tb:
                    writer.add_scalar('data/train_loss', loss.item(), n_iter)
                    writer.add_scalar('data/epoch', epoch + 1, n_iter)
                    writer.add_scalar('data/batch', b // batch_size + 1, n_iter)

                val_loss = 0
                its = 0
                model_ff.train(False)
                for b in np.arange(0, val.targets.shape[0], batch_size):
                    its += 1
                    X_batch = torch.FloatTensor(val.features[b:b+batch_size])
#                     X_batch = reshape_to_last(X_batch)

                    y_batch = torch.FloatTensor(val.weights[b:b+batch_size])
                    a_batch = torch.LongTensor(val.targets[b:b+batch_size])
                    with torch.no_grad():
                        y_pred_logits = model_ff(X_batch)
                    loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
                    val_loss += loss.item()
                val_loss /= its
                print('val loss in %d epoch: %.5f' % (epoch + 1, val_loss))
                
                if use_tb:
                    writer.add_scalar('data/val_loss', val_loss, n_iter)
                n_iter += 1


In [96]:
# datasets = dataset.read_datasets_from_dir("features_v3/")

In [97]:
# dataset.read_columns_from_random_file("features_v3")

[(0, 'cargo_vol_total'),
 (1, 'cargo_fuel_total'),
 (2, 'unit_can_build'),
 (3, 'unit_routine'),
 (4, 'unit_last_action'),
 (5, 'near_city_dist'),
 (6, 'near_city_dir'),
 (7, 'near_city_fuel'),
 (8, 'near_city_light_upkeep'),
 (9, 'city_size'),
 (10, 'opp_near_city_dist'),
 (11, 'opp_near_city_dir'),
 (12, 'opp_near_city_fuel'),
 (13, 'opp_near_city_light_upkeep'),
 (14, 'opp_city_size'),
 (15, 'near_resource_dist'),
 (16, 'near_resource_dir'),
 (17, 'near_resource_type'),
 (18, 'near_resource_amount'),
 (19, 'my_city_count'),
 (20, 'opp_city_count'),
 (21, 'turn'),
 (22, 'is_night'),
 (23, 'time_to_night'),
 (24, 'width'),
 (25, 'height'),
 (26, 'my_research'),
 (27, 'opp_research'),
 (28, 'my_research_coal'),
 (29, 'opp_research_coal'),
 (30, 'my_research_uran'),
 (31, 'opp_research_uran'),
 (32, 'action'),
 (33, 'my_tiles'),
 (34, 'opp_tiles')]

In [100]:
dataset.CAT_FEATURES

[2, 3, 4, 6, 11, 16, 17, 22, 28, 29, 30, 31]

In [409]:
FLOAT_FEATURES = [i for i in range(42 + 32*32*7) if i not in dataset.CAT_FEATURES_V4]

In [410]:
FLOAT_FEATURES[:20], FLOAT_FEATURES[-10:]

([0, 1, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 21, 23, 26, 27, 28],
 [7200, 7201, 7202, 7203, 7204, 7205, 7206, 7207, 7208, 7209])

In [87]:
import pickle
from sklearn.preprocessing import OneHotEncoder

In [392]:
with open("../submissions/simple/models/ohe_v2", "rb") as f:
    OHE = pickle.load(f)

In [419]:
def prepare_features(t: dataset.Dataset, v: dataset.Dataset, ohe=None, categories=None) -> (dataset.Dataset, dataset.Dataset):
    create_ohe = ohe is None
    if create_ohe:
         ohe = OneHotEncoder(sparse=False, categories=categories)
    def prepare(d, is_train):
        cf = d.features[:, dataset.CAT_FEATURES_V4]
        ff = d.features[:, FLOAT_FEATURES]
        cf[cf == "False"] = False
        cf[cf == "True"] = True
        cf[cf == None] = "None"
        cf[cf == "1"] = 1
        cf[cf == "2"] = 2
        cf[cf == "3"] = 3
        ff[ff == "None"] = 0
        cf_o = ohe.fit_transform(cf) if is_train and create_ohe else ohe.transform(cf)
        return dataset.Dataset(
            features=np.array(np.concatenate([cf_o, ff], axis=1), dtype=np.float),
            targets=np.array(d.targets, dtype=np.float),
            weights=np.array(d.weights, dtype=np.float),
            next_state_id = d.next_state_id
        )
    t = prepare(t, True)
    v = prepare(v, False)
    return (t,v, ohe)

In [184]:
# dt,dv,OHE = prepare_features(data, data, None, [
#      np.array(["None", False, True], dtype=object),
#      np.array([1, 2, 3], dtype=object),
#      np.array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(['None', 'coal', 'uranium', 'wood'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object)])

In [185]:
OHE.categories_

[array(['None', False, True], dtype=object),
 array([1, 2, 3], dtype=object),
 array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', 'coal', 'uranium', 'wood'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object)]

In [188]:
# with open("../submissions/simple/models/ohe_v2", "wb") as f:
#     pickle.dump(OHE, f)

In [142]:
# with open("../submissions/simple/models/ohe_v1", "wb") as f:
#     pickle.dump(ohe, f)

In [626]:
MAP_F = 32 * 32 * 7

class NNWithCustomFeatures(nn.Module):
    def __init__(self, INPUT_F, DROP_P, H, A=6):
        super().__init__()
        INPUT_F_C = INPUT_F + 128
        self.model_q =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A),
            nn.ReLU()
#             nn.Sigmoid()
        )
        
        self.model_p =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A)
#             nn.Sigmoid()
        )
        
        self.map_model = nn.Sequential(
            nn.Conv2d(7, 64, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (16,16)
            nn.Conv2d(64, 128, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (8, 8)
            nn.Conv2d(128, 256, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (4, 4)
#             nn.Conv2d(128, 256, 3),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(2),  # after -> (1, 1)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.proj = nn.Sequential(
            nn.Dropout(p=DROP_P),
            nn.Linear(256 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=DROP_P),
            nn.Linear(256, 128)
        )
        
    def forward(self, x):
        L = x.shape[1]
        cur_r = self.forward_impl(x[:, :L // 2])
        next_r =  self.forward_impl(x[:, L // 2:])
        return torch.cat([cur_r, next_r],dim=1)

    def forward_impl(self, x):
        mapp = x[:, -MAP_F:].reshape(-1, 32, 32, 7)
        rest = x[:, :-MAP_F]
        mapp = torch.transpose(mapp, 1, -1)
        mapp = self.avgpool(self.map_model(mapp))
        mapp = torch.flatten(mapp, 1)
        mapp_f = self.proj(mapp)
#         print(mapp_f.shape)
        input_x = torch.cat([rest, mapp_f], dim=1)
#         print(input_x.shape)
#         return self.model_q(input_x)
        return torch.cat([self.model_q(input_x), self.model_p(input_x)], dim=1)

In [627]:
model = NNWithCustomFeatures(63, 0.05, 64)

In [630]:
model.forward_impl(torch.Tensor(4, 63 + 32*32*7))

tensor([[ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450],
        [ 0.0000,  0.0000,  0.1094,  0.0437,  0.0000,  0.0924, -0.0958, -0.0997,
         -0.1076,  0.0741,  0.0568, -0.0414],
        [ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0927, -0.0949,
         -0.1044,  0.0684,  0.0588, -0.0371],
        [ 0.0000,  0.0000,  0.1147,  0.0556,  0.0000,  0.0966, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450]], grad_fn=<CatBackward>)

In [677]:
ENTROPY_REG = 1e-2
def policy_loss(pi_logits, reward_batch, a_batch, X_batch):
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    return torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * reward_batch 
                      - torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))

def q_loss(q_vals, reward_batch, a_batch, X_batch):
    q_vals_per_reward = q_vals[np.arange(q_vals.shape[0]), a_batch]
    return torch.nn.MSELoss()(q_vals_per_reward, reward_batch) * 0.01

gamma = 0.99

def get_is_last_state(x):
    t = torch.sum(torch.isclose(x, torch.ones_like(x) * (-1)), dim=1) == x.shape[1]
    return t.float()

def q_loss_pair(q_vals_cur_and_next, reward_batch, a_batch, X_batch):
    q_vals = q_vals_cur_and_next[:, :6]
    q_vals_next = q_vals_cur_and_next[:, 6:12]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
#     print(list(enumerate([q_vals_per_reward_cur, reward_batch, best_q_vals_next, q_vals_next])))
#     print(0.99 * best_q_vals_next)
#     return torch.nn.MSELoss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)
    return torch.nn.SmoothL1Loss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)


def actor_critic_loss(q_pi_payload, reward_batch, a_batch, X_batch):
    q_vals = q_pi_payload[:, :6]
    pi_logits =  q_pi_payload[:, 6:12]
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    q_vals_next = q_pi_payload[:, 12:18]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
    advantage = reward_batch + gamma * best_q_vals_next - q_vals_per_reward_cur
    q_loss = torch.nn.SmoothL1Loss()(target=reward_batch + gamma * best_q_vals_next.detach(), input=q_vals_per_reward_cur)
    pi_loss =  torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * advantage.detach()
                      - torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))
#     print("q_loss={}, pi_loss={}".format(q_loss.item(), pi_loss.item()))
    return q_loss + pi_loss

In [429]:
simple_bot = "../submissions/simple/main.py"
replays = "replays"

def run_game(left_bot=simple_bot, right_bot=simple_bot, seed=42, loglevel=2):
    replay_path = "replay.json"
    python_v = "python3.7"
    
    replay_path = os.path.join(replays, str(np.random.randint(1e9)) + ".json")
    
    size = np.random.choice([12,16,24,32], size=1)[0]
    
    res = subprocess.run([
        "lux-ai-2021",
        left_bot,
        right_bot,
#         "--statefulReplay",
        "--width={}".format(size),
        "--height={}".format(size),
        "--loglevel={}".format(loglevel),
        "--python={}".format(python_v),
        "--maxtime=100000",
        "--maxConcurrentMatches=1",
        "--seed={}".format(seed),
        "--out={}".format(replay_path)], stdout=subprocess.PIPE)
    
    if loglevel > 0:
        print(res.stdout.decode())

    assert res.returncode == 0

    with open(replay_path, "r") as f:
        result = json.load(f)
    return result, res.stdout.decode()

In [432]:
run_game(simple_bot, simple_bot)  # <-- test run one game with default bot

In [98]:
import hashlib

def build_runnable_bot_with_flags(flags: dict, origin = simple_bot, base_path = '../submissions/simple/') -> str:
    lines = []
    with open(origin, "r") as f:
        for line in f:
            lines.append(line[:-1])
    text = '\n'.join(lines)
    f = json.dumps(flags)
    text = text.format(f)
    h = int(hashlib.sha256(f.encode('utf-8')).hexdigest(), 16) % (10 ** 18)
    path = base_path + "main_" + str(h) + ".py"
    with open(path, "w") as f:
        f.write(text)
    return path

In [229]:
def count_series(results: list):
    wins = []
    for i, r in enumerate(results):
        ranks = r[0]['results']['ranks']
        teams = r[0]['teamDetails']
        if ranks[0]['rank'] == 1 and ranks[1]['rank'] == 2:
            if ranks[0]["agentID"] == i % 2:
                wins.append(1)
            else:
                wins.append(0)
        else:
            wins.append(0.5)
    return wins

In [100]:
from joblib import Parallel, delayed
import tqdm

In [614]:
def sample_dataset(d, p=0.5):
    N = len(d.features)
    ids = np.random.choice(N, size=int(N * p))
    return dataset.Dataset(features = d.features[ids], weights = d.weights[ids], targets = d.targets[ids])

In [492]:
def add_next_features(d):
    assert d.next_state_id is not None
    coupled_features = []
    weights = []
    targets = []
    for i in np.arange(d.features.shape[0]):
        next_i = d.next_state_id[i]
        if d.next_state_id[i] != -1:
            next_f = d.features[next_i]
        else:
            next_f = np.ones_like(d.features[i]) * (-1)
        coupled_features.append(np.concatenate([d.features[i], next_f]))
        weights.append(d.weights[i])
        targets.append(d.targets[i])
    return dataset.Dataset(
        features=np.array(coupled_features),
        weights=np.array(weights),
        targets=np.array(targets))

In [None]:
t = 0  #  1778 - value_iter
B = 1

model = NNWithCustomFeatures(83, 0.05, 128)

writer = SummaryWriter()

while True:
    t += 1
    np.random.seed(t)
    torch.save(model.state_dict(), '../submissions/simple/models/ac_iter_v{}'.format(t))
    r = []
    for i in np.arange(B):
        seed = t * B + i
#         _f = str(seed) + ".txt"
        _f = "log.txt"
        bot = build_runnable_bot_with_flags({
            "model_path": "models/ac_iter_v{}".format(t),
            "use_policy": True,
            "is_neural": True,
            "prob_use_default_agent": 0.5 / np.log(t + 1),
            "prob_use_random": 0.05,
            "ohe_path": "models/ohe_v2",
            "use_old_units_cargo_rules": False,
            "log_features_path": "../../research/features_iter/", "log_path_file_name": _f
        })
        if t % 2 == 0:
            _r = run_game(bot, simple_bot, loglevel=0, seed=seed)
        else:
            _r = run_game(simple_bot, bot, loglevel=0, seed=seed)
        r.append(_r)
    wins = np.mean(count_series(r))
    if t % 2 == 1:
        wins = 1 - wins
    trainD = dataset.get_dataset_from_file(os.path.join("features_iter/", _f), wins)
    reward = np.sum(trainD.weights)
    trainD_ohe, valD_ohe, _ = prepare_features(trainD, trainD, OHE)
    max_step = np.max(trainD.features[:, 31])
    trainD_ohe_with_next = add_next_features(trainD_ohe)
    valD_ohe_with_next = add_next_features(valD_ohe)
    trainD_ohe_with_next_sampled = sample_dataset(trainD_ohe_with_next, 0.1)
    print("Round {}, winrate: {}, max_step: {}, reward: {}, example: {}".format(t, wins, max_step, reward, r[0][0]['results']))
    writer.add_scalar('data/reward', reward, t)
    writer.add_scalar('data/winrate', wins, t)
    writer.add_scalar('data/max_step', max_step, t)
    try:
        learn(trainD_ohe_with_next_sampled, valD_ohe_with_next, model, actor_critic_loss, lr=1e-1 / (t + 1), batch_size=64, epochs=1, freq=1, l2=1e-5, use_tb=False)
    except Exception as e:
        print(e)

  if (await self.run_code(code, result,  async_=asy)):


Round 1, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/717354021.json'}
train loss in 1 epoch in 1 batch: 0.13380
val loss in 1 epoch: 79.47787
train loss in 1 epoch in 2 batch: 90.93904
val loss in 1 epoch: 100.33106
train loss in 1 epoch in 3 batch: 92.60023
val loss in 1 epoch: 213.08604
train loss in 1 epoch in 4 batch: 286.67859
val loss in 1 epoch: 449.73454
Round 2, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/798842024.json'}
train loss in 1 epoch in 1 batch: 646.99487
val loss in 1 epoch: 240.44003
Round 3, winrate: 0.0, max_step: 158, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218175338.json'}
train loss in 1 epoch in 1 batch: 108.87628
val loss in 1 epoch: 653.31531
Round 4, winrate: 0.0, max_step: 70, reward: 0, example: {'rank

  if (await self.run_code(code, result,  async_=asy)):


Round 9, winrate: 0.0, max_step: 269, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/44556670.json'}
train loss in 1 epoch in 1 batch: 609.60803
val loss in 1 epoch: 415.93657


  if (await self.run_code(code, result,  async_=asy)):


Round 10, winrate: 0.0, max_step: 192, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/91571465.json'}
train loss in 1 epoch in 1 batch: 1230.62231
val loss in 1 epoch: 625.68748
Round 11, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/774252441.json'}
train loss in 1 epoch in 1 batch: 925.72229
val loss in 1 epoch: 459.82204
Round 12, winrate: 0.0, max_step: 109, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/662124363.json'}
train loss in 1 epoch in 1 batch: 871.60339
val loss in 1 epoch: -805.75122
Round 13, winrate: 0.0, max_step: 71, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/118980946.json'}
train loss in 1 epoch in 1 batch: 415.10941
val loss in 1 epoch: 552.16748
Round 14, winrate: 0.0, max_step: 70, re

  if (await self.run_code(code, result,  async_=asy)):


Round 24, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/902017442.json'}
train loss in 1 epoch in 1 batch: 453.71527
val loss in 1 epoch: 314.02766
Round 25, winrate: 0.0, max_step: 71, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/515929220.json'}
train loss in 1 epoch in 1 batch: 398.65155
val loss in 1 epoch: 48.21088
Round 26, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/248828725.json'}
train loss in 1 epoch in 1 batch: 1058.64026
val loss in 1 epoch: 235.01193
Round 27, winrate: 0.0, max_step: 69, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/754717715.json'}
train loss in 1 epoch in 1 batch: 568.18237
val loss in 1 epoch: 265.14249
Round 28, winrate: 0.0, max_step: 31, rewa

  if (await self.run_code(code, result,  async_=asy)):


Round 33, winrate: 0.0, max_step: 149, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/814680455.json'}
train loss in 1 epoch in 1 batch: 8.44952
val loss in 1 epoch: 159.60605


  if (await self.run_code(code, result,  async_=asy)):


Round 34, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/165621153.json'}
train loss in 1 epoch in 1 batch: 223.34302
val loss in 1 epoch: 507.59639
Round 35, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/893589193.json'}
train loss in 1 epoch in 1 batch: 590.32208
val loss in 1 epoch: 523.32595


  if (await self.run_code(code, result,  async_=asy)):


Round 36, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/981430917.json'}
train loss in 1 epoch in 1 batch: 319.27985
val loss in 1 epoch: 153.97856
Round 37, winrate: 0.0, max_step: 315, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/835356559.json'}
train loss in 1 epoch in 1 batch: 377.61340
val loss in 1 epoch: 258.99935
Round 38, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/578846113.json'}
train loss in 1 epoch in 1 batch: 334.19467
val loss in 1 epoch: 404.68533
Round 39, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/201387401.json'}
train loss in 1 epoch in 1 batch: -109.90990
val loss in 1 epoch: 192.26283
Round 40, winrate: 0.0, max_step: 149, 

  if (await self.run_code(code, result,  async_=asy)):


Round 42, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/534895718.json'}
train loss in 1 epoch in 1 batch: 567.14795
val loss in 1 epoch: 280.16150
Round 43, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/494155588.json'}
train loss in 1 epoch in 1 batch: 812.22900
val loss in 1 epoch: 278.70825
Round 44, winrate: 0.0, max_step: 349, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/364394260.json'}
train loss in 1 epoch in 1 batch: 380.40613
val loss in 1 epoch: 154.50598


  if (await self.run_code(code, result,  async_=asy)):


Round 45, winrate: 0.0, max_step: 315, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/234232222.json'}
train loss in 1 epoch in 1 batch: 377.41553
val loss in 1 epoch: 163.27263


  if (await self.run_code(code, result,  async_=asy)):


Round 46, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/145308861.json'}
train loss in 1 epoch in 1 batch: 378.79324
val loss in 1 epoch: 155.71810
Round 47, winrate: 0.0, max_step: 70, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/487429255.json'}
train loss in 1 epoch in 1 batch: -188.90933
val loss in 1 epoch: 160.42128
Round 48, winrate: 0.0, max_step: 70, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/75120128.json'}
train loss in 1 epoch in 1 batch: -219.71075
val loss in 1 epoch: -6.40033
Round 49, winrate: 0.0, max_step: 37, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218890666.json'}
train loss in 1 epoch in 1 batch: 3063.46118
val loss in 1 epoch: 145.18521
Round 50, winrate: 0.0, max_step: 194, rew

  if (await self.run_code(code, result,  async_=asy)):


Round 56, winrate: 0.0, max_step: 238, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/412060132.json'}
train loss in 1 epoch in 1 batch: 347.99115
val loss in 1 epoch: 425.22127
Round 57, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/375163855.json'}
train loss in 1 epoch in 1 batch: -169.43140
val loss in 1 epoch: 467.17303
Round 58, winrate: 0.0, max_step: 69, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/494374691.json'}
train loss in 1 epoch in 1 batch: 1987.19458
val loss in 1 epoch: 577.78088
Round 59, winrate: 0.0, max_step: 154, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/747475121.json'}
train loss in 1 epoch in 1 batch: 468.19543
val loss in 1 epoch: 484.97946
Round 60, winrate: 0.0, max_step: 150, r

  if (await self.run_code(code, result,  async_=asy)):


Round 63, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/231674924.json'}
train loss in 1 epoch in 1 batch: 472.59058
val loss in 1 epoch: 5.18203
Round 64, winrate: 0.0, max_step: 117, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/554473924.json'}
train loss in 1 epoch in 1 batch: 358.97559
val loss in 1 epoch: -90.26247
Round 65, winrate: 0.0, max_step: 114, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/939073326.json'}
train loss in 1 epoch in 1 batch: 737.32837
val loss in 1 epoch: -593.73454
Round 66, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/662660116.json'}
train loss in 1 epoch in 1 batch: 710.02600
val loss in 1 epoch: 87.05142
Round 67, winrate: 0.0, max_step: 114, rew

  if (await self.run_code(code, result,  async_=asy)):


Round 71, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/797039723.json'}
train loss in 1 epoch in 1 batch: 1295.06958
val loss in 1 epoch: 293.12538
Round 72, winrate: 0.0, max_step: 232, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/458437080.json'}
train loss in 1 epoch in 1 batch: 729.92822
val loss in 1 epoch: 398.03915
Round 73, winrate: 1.0, max_step: 359, reward: 65, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/612848534.json'}
train loss in 1 epoch in 1 batch: 401.48038
val loss in 1 epoch: -167.32581
train loss in 1 epoch in 2 batch: 57.84192
val loss in 1 epoch: -291.19929
Round 74, winrate: 0.0, max_step: 269, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/868452254.json'}
train loss in 1 epoch in 1 batch: 279

  if (await self.run_code(code, result,  async_=asy)):


Round 80, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/94127279.json'}
train loss in 1 epoch in 1 batch: 386.55719
val loss in 1 epoch: -33.29944
Round 81, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/452771651.json'}
train loss in 1 epoch in 1 batch: 267.59042
val loss in 1 epoch: 71.04604


  if (await self.run_code(code, result,  async_=asy)):


Round 82, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/108343427.json'}
train loss in 1 epoch in 1 batch: 66.74005
val loss in 1 epoch: 110.11716
Round 83, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/30246994.json'}
train loss in 1 epoch in 1 batch: 231.10422
val loss in 1 epoch: 19.29817
Round 84, winrate: 0.0, max_step: 29, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/197744554.json'}
train loss in 1 epoch in 1 batch: 1506.34741
val loss in 1 epoch: -6.94116
Round 85, winrate: 0.0, max_step: 193, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/517001578.json'}
train loss in 1 epoch in 1 batch: 502.34308
val loss in 1 epoch: -47.13964


  if (await self.run_code(code, result,  async_=asy)):


Round 86, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/870190228.json'}
train loss in 1 epoch in 1 batch: 122.66536
val loss in 1 epoch: -8.03951
train loss in 1 epoch in 2 batch: -1157.46448
val loss in 1 epoch: -68.08679
Round 87, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/671149542.json'}
train loss in 1 epoch in 1 batch: -333.61865
val loss in 1 epoch: -262.21472
Round 88, winrate: 0.0, max_step: 74, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/633726936.json'}
train loss in 1 epoch in 1 batch: -453.68616
val loss in 1 epoch: -80.24548
Round 89, winrate: 0.0, max_step: 112, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/591978782.json'}
train loss in 1 epoch in 1 batch: 308.

  if (await self.run_code(code, result,  async_=asy)):


Round 91, winrate: 0.0, max_step: 149, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/863303858.json'}
train loss in 1 epoch in 1 batch: 230.17444
val loss in 1 epoch: -78.10455
Round 92, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/583602298.json'}
train loss in 1 epoch in 1 batch: -9.85858
val loss in 1 epoch: 98.54336
Round 93, winrate: 0.0, max_step: 69, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/456788389.json'}
train loss in 1 epoch in 1 batch: -797.71722
val loss in 1 epoch: 91.09818
Round 94, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/942254779.json'}
train loss in 1 epoch in 1 batch: 68.70245
val loss in 1 epoch: -118.51151


  if (await self.run_code(code, result,  async_=asy)):


Round 95, winrate: 0.0, max_step: 359, reward: 18, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/982703510.json'}
train loss in 1 epoch in 1 batch: 160.47943
val loss in 1 epoch: -2.52236
train loss in 1 epoch in 2 batch: 227.40619
val loss in 1 epoch: -106.16188
Round 96, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/922073172.json'}
train loss in 1 epoch in 1 batch: 210.80070
val loss in 1 epoch: 116.60186


  if (await self.run_code(code, result,  async_=asy)):


Round 97, winrate: 0.0, max_step: 349, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/373422170.json'}
train loss in 1 epoch in 1 batch: 424.14862
val loss in 1 epoch: -173.79406
Round 98, winrate: 0.0, max_step: 69, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/997580250.json'}
train loss in 1 epoch in 1 batch: 272.40332
val loss in 1 epoch: -65.17679


  if (await self.run_code(code, result,  async_=asy)):


Round 99, winrate: 0.0, max_step: 359, reward: 36, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/739930753.json'}
train loss in 1 epoch in 1 batch: 186.83447
val loss in 1 epoch: -152.33326
train loss in 1 epoch in 2 batch: 30.69837
val loss in 1 epoch: -204.18790
Round 100, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/186422792.json'}
train loss in 1 epoch in 1 batch: -1121.77466
val loss in 1 epoch: -235.58952
Round 101, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/70431583.json'}
train loss in 1 epoch in 1 batch: -1452.85107
val loss in 1 epoch: -423.66793


  if (await self.run_code(code, result,  async_=asy)):


Round 102, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/419539200.json'}
train loss in 1 epoch in 1 batch: 213.91466
val loss in 1 epoch: -183.05494
Round 103, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/782161671.json'}
train loss in 1 epoch in 1 batch: -246.28116
val loss in 1 epoch: -184.74321
Round 104, winrate: 0.0, max_step: 115, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/635314245.json'}
train loss in 1 epoch in 1 batch: -304.22040
val loss in 1 epoch: -663.38932
Round 105, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/359660224.json'}
train loss in 1 epoch in 1 batch: -2354.15479
val loss in 1 epoch: -393.44043
Round 106, winrate: 0.0, max_

  if (await self.run_code(code, result,  async_=asy)):


Round 108, winrate: 0.0, max_step: 274, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/675248919.json'}
train loss in 1 epoch in 1 batch: -20.52686
val loss in 1 epoch: -171.64708
Round 109, winrate: 0.0, max_step: 29, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/478719238.json'}
train loss in 1 epoch in 1 batch: 25.57269
val loss in 1 epoch: -205.23920
Round 110, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/498468992.json'}
train loss in 1 epoch in 1 batch: 338.80115
val loss in 1 epoch: 28.23657


  if (await self.run_code(code, result,  async_=asy)):


Round 111, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/481767252.json'}
train loss in 1 epoch in 1 batch: -201.74985
val loss in 1 epoch: -71.53996
Round 112, winrate: 0.0, max_step: 157, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/537116260.json'}
train loss in 1 epoch in 1 batch: 157.66861
val loss in 1 epoch: -57.61908


  if (await self.run_code(code, result,  async_=asy)):


Round 113, winrate: 0.0, max_step: 274, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/438024357.json'}
train loss in 1 epoch in 1 batch: 303.42468
val loss in 1 epoch: -47.53022
Round 114, winrate: 0.0, max_step: 359, reward: 3, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/651254346.json'}
train loss in 1 epoch in 1 batch: 1.91458
val loss in 1 epoch: -57.85299
train loss in 1 epoch in 2 batch: 5.12488
val loss in 1 epoch: -155.44301
train loss in 1 epoch in 3 batch: -579.29504
val loss in 1 epoch: -256.02218


  if (await self.run_code(code, result,  async_=asy)):


Round 115, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/842210066.json'}
train loss in 1 epoch in 1 batch: -68.40283
val loss in 1 epoch: -108.06287
Round 116, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/537595189.json'}
train loss in 1 epoch in 1 batch: 452.96014
val loss in 1 epoch: -89.34958


  if (await self.run_code(code, result,  async_=asy)):


Round 117, winrate: 0.0, max_step: 358, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/865054190.json'}
train loss in 1 epoch in 1 batch: 137.37228
val loss in 1 epoch: -212.34167
train loss in 1 epoch in 2 batch: -17.52719
val loss in 1 epoch: -304.58595
train loss in 1 epoch in 3 batch: -505.40152
val loss in 1 epoch: -391.73262
train loss in 1 epoch in 4 batch: -831.99530
val loss in 1 epoch: -426.09396
Round 118, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/827432192.json'}
train loss in 1 epoch in 1 batch: 119.16895
val loss in 1 epoch: -102.11057
Round 119, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/420235714.json'}
train loss in 1 epoch in 1 batch: 212.11002
val loss in 1 epoch: -198.07602
Round 120, winrate: 0.0, max_step: 158, reward

  if (await self.run_code(code, result,  async_=asy)):


Round 121, winrate: 1.0, max_step: 116, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/478162242.json'}
train loss in 1 epoch in 1 batch: -273.25308
val loss in 1 epoch: -284.97698
Round 122, winrate: 0.0, max_step: 71, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/674274799.json'}
train loss in 1 epoch in 1 batch: 446.43799
val loss in 1 epoch: -308.69910
Round 123, winrate: 0.0, max_step: 29, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/843828734.json'}
train loss in 1 epoch in 1 batch: -442.17108
val loss in 1 epoch: -461.82178
Round 124, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/455545294.json'}
train loss in 1 epoch in 1 batch: 345.92963
val loss in 1 epoch: -97.10822


  if (await self.run_code(code, result,  async_=asy)):


Round 125, winrate: 0.0, max_step: 193, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/28887997.json'}
train loss in 1 epoch in 1 batch: -1071.45569
val loss in 1 epoch: -130.56213


  if (await self.run_code(code, result,  async_=asy)):


Round 126, winrate: 0.0, max_step: 356, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/451710822.json'}
train loss in 1 epoch in 1 batch: -494.30518
val loss in 1 epoch: -375.69708


  if (await self.run_code(code, result,  async_=asy)):


Round 127, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/102363337.json'}
train loss in 1 epoch in 1 batch: -98.58282
val loss in 1 epoch: -375.31587
train loss in 1 epoch in 2 batch: 101.52409
val loss in 1 epoch: -427.68083
Round 128, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/500952274.json'}
train loss in 1 epoch in 1 batch: -992.61407
val loss in 1 epoch: -394.95960
Round 129, winrate: 0.0, max_step: 29, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/302931039.json'}
train loss in 1 epoch in 1 batch: 1268.49451
val loss in 1 epoch: -577.82379
Round 130, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/608297369.json'}
train loss in 1 epoch in 1 batc

  if (await self.run_code(code, result,  async_=asy)):


Round 132, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/132731233.json'}
train loss in 1 epoch in 1 batch: -225.44647
val loss in 1 epoch: -227.53976
train loss in 1 epoch in 2 batch: -641.44385
val loss in 1 epoch: -299.60548
Round 133, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/728033953.json'}
train loss in 1 epoch in 1 batch: -616.91498
val loss in 1 epoch: -669.95807
Round 134, winrate: 0.0, max_step: 29, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/263726563.json'}
train loss in 1 epoch in 1 batch: -324.53506
val loss in 1 epoch: -717.61987
Round 135, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/692902559.json'}
train loss in 1 epoch in 1 batc

  if (await self.run_code(code, result,  async_=asy)):


Round 137, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/832580318.json'}
train loss in 1 epoch in 1 batch: 82.54099
val loss in 1 epoch: -403.80395
Round 138, winrate: 0.0, max_step: 234, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/251585001.json'}
train loss in 1 epoch in 1 batch: 232.18469
val loss in 1 epoch: -45.05365
train loss in 1 epoch in 2 batch: 463.21204
val loss in 1 epoch: -146.28193
Round 139, winrate: 0.0, max_step: 152, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/802709328.json'}
train loss in 1 epoch in 1 batch: 4.33624
val loss in 1 epoch: -136.51286
Round 140, winrate: 0.0, max_step: 70, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/785150630.json'}
train loss in 1 epoch in 1 batch: 149

  if (await self.run_code(code, result,  async_=asy)):


Round 141, winrate: 0.0, max_step: 355, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/718915190.json'}
train loss in 1 epoch in 1 batch: 82.40332
val loss in 1 epoch: 21.45817
train loss in 1 epoch in 2 batch: 29.85175
val loss in 1 epoch: -61.25123
Round 142, winrate: 0.0, max_step: 119, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/653099285.json'}
train loss in 1 epoch in 1 batch: -89.02570
val loss in 1 epoch: -206.90023


  if (await self.run_code(code, result,  async_=asy)):


Round 143, winrate: 0.0, max_step: 197, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/100829337.json'}
train loss in 1 epoch in 1 batch: 666.21387
val loss in 1 epoch: -246.87484


  if (await self.run_code(code, result,  async_=asy)):


Round 144, winrate: 0.0, max_step: 234, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/400909671.json'}
train loss in 1 epoch in 1 batch: 273.84387
val loss in 1 epoch: -69.09929


  if (await self.run_code(code, result,  async_=asy)):


Round 145, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/141914101.json'}
train loss in 1 epoch in 1 batch: 603.61505
val loss in 1 epoch: 179.17505
train loss in 1 epoch in 2 batch: 581.09766
val loss in 1 epoch: 116.31418
Round 146, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/833375074.json'}
train loss in 1 epoch in 1 batch: 284.26160
val loss in 1 epoch: 459.29960


  if (await self.run_code(code, result,  async_=asy)):


Round 147, winrate: 0.0, max_step: 192, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/501203670.json'}
train loss in 1 epoch in 1 batch: -484.41895
val loss in 1 epoch: 138.65106
Round 148, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/323865683.json'}
train loss in 1 epoch in 1 batch: 334.82397
val loss in 1 epoch: -174.61787


  if (await self.run_code(code, result,  async_=asy)):


Round 149, winrate: 0.0, max_step: 192, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/563961649.json'}
train loss in 1 epoch in 1 batch: -319.64880
val loss in 1 epoch: -134.02981
Round 150, winrate: 0.0, max_step: 112, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/681112804.json'}
train loss in 1 epoch in 1 batch: 633.47632
val loss in 1 epoch: -49.55275


  if (await self.run_code(code, result,  async_=asy)):


Round 151, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/85015960.json'}
train loss in 1 epoch in 1 batch: 115.11035
val loss in 1 epoch: 0.70016
train loss in 1 epoch in 2 batch: 79.19208
val loss in 1 epoch: -37.77712
train loss in 1 epoch in 3 batch: 171.75017
val loss in 1 epoch: -76.06608
Round 152, winrate: 0.0, max_step: 196, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/853529308.json'}
train loss in 1 epoch in 1 batch: 249.82320
val loss in 1 epoch: -19.09306
train loss in 1 epoch in 2 batch: -592.02795
val loss in 1 epoch: -68.25268


  if (await self.run_code(code, result,  async_=asy)):


Round 153, winrate: 0.0, max_step: 357, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/634366047.json'}
train loss in 1 epoch in 1 batch: 229.80838
val loss in 1 epoch: 193.69910
train loss in 1 epoch in 2 batch: -4632.86426
val loss in 1 epoch: 161.99463


  if (await self.run_code(code, result,  async_=asy)):


Round 154, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/33173321.json'}
train loss in 1 epoch in 1 batch: 5.44342
val loss in 1 epoch: 121.94086
train loss in 1 epoch in 2 batch: -1176.79370
val loss in 1 epoch: 134.53789


  if (await self.run_code(code, result,  async_=asy)):


Round 155, winrate: 0.0, max_step: 316, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/654236596.json'}
train loss in 1 epoch in 1 batch: 546.45990
val loss in 1 epoch: -10.85592


  if (await self.run_code(code, result,  async_=asy)):


Round 156, winrate: 0.0, max_step: 356, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/273243692.json'}
train loss in 1 epoch in 1 batch: -394.34164
val loss in 1 epoch: 92.24391
Round 157, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/203590312.json'}
train loss in 1 epoch in 1 batch: -117.88739
val loss in 1 epoch: 43.13639
train loss in 1 epoch in 2 batch: 351.65051
val loss in 1 epoch: -26.50958
train loss in 1 epoch in 3 batch: -248.65076
val loss in 1 epoch: -77.43658
train loss in 1 epoch in 4 batch: 203.82767
val loss in 1 epoch: -103.46366
Round 158, winrate: 0.0, max_step: 192, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/413473693.json'}
train loss in 1 epoch in 1 batch: -300.43155
val loss in 1 epoch: 71.11595
train loss in 1 epoch in 2 batch: 168.14177
val lo

  if (await self.run_code(code, result,  async_=asy)):


Round 159, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/166843915.json'}
train loss in 1 epoch in 1 batch: 338.32822
val loss in 1 epoch: -60.54563
train loss in 1 epoch in 2 batch: -266.78149
val loss in 1 epoch: -69.43110
train loss in 1 epoch in 3 batch: 149.95190
val loss in 1 epoch: -116.23275


  if (await self.run_code(code, result,  async_=asy)):


Round 160, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/358169448.json'}
train loss in 1 epoch in 1 batch: -155.85687
val loss in 1 epoch: -42.39289
train loss in 1 epoch in 2 batch: -285.55377
val loss in 1 epoch: -92.03473
train loss in 1 epoch in 3 batch: -825.47638
val loss in 1 epoch: -139.66144
Round 161, winrate: 0.0, max_step: 113, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/818936910.json'}
train loss in 1 epoch in 1 batch: 180.61925
val loss in 1 epoch: 251.81545
Round 162, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/304943035.json'}
train loss in 1 epoch in 1 batch: 310.01996
val loss in 1 epoch: -211.47536
train loss in 1 epoch in 2 batch: 106.85158
val loss in 1 epoch: -211.02585
train loss in 1 epoch in 3 batch: -12.60692
va

  if (await self.run_code(code, result,  async_=asy)):


Round 164, winrate: 0.0, max_step: 271, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/459487352.json'}
train loss in 1 epoch in 1 batch: -454.18878
val loss in 1 epoch: -115.73987
train loss in 1 epoch in 2 batch: 19.02708
val loss in 1 epoch: -122.37126
Round 165, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/327374784.json'}
train loss in 1 epoch in 1 batch: 160.87440
val loss in 1 epoch: 40.80075
train loss in 1 epoch in 2 batch: 38.81901
val loss in 1 epoch: 38.96661
Round 166, winrate: 1.0, max_step: 157, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/205037573.json'}
train loss in 1 epoch in 1 batch: 215.06764
val loss in 1 epoch: 129.07002


  if (await self.run_code(code, result,  async_=asy)):


Round 167, winrate: 0.0, max_step: 311, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/637283137.json'}
train loss in 1 epoch in 1 batch: 327.01562
val loss in 1 epoch: -300.31722
Round 168, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/420042453.json'}
train loss in 1 epoch in 1 batch: 134.17361
val loss in 1 epoch: -33.18707


  if (await self.run_code(code, result,  async_=asy)):


Round 169, winrate: 0.0, max_step: 351, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/728386851.json'}
train loss in 1 epoch in 1 batch: 116.56005
val loss in 1 epoch: -92.27799
Round 170, winrate: 0.0, max_step: 197, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/226110362.json'}
train loss in 1 epoch in 1 batch: 125.20290
val loss in 1 epoch: -324.06801


  if (await self.run_code(code, result,  async_=asy)):


Round 171, winrate: 0.0, max_step: 355, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/677514685.json'}
train loss in 1 epoch in 1 batch: 115.95665
val loss in 1 epoch: -134.04427
train loss in 1 epoch in 2 batch: -866.15662
val loss in 1 epoch: -160.28304


  if (await self.run_code(code, result,  async_=asy)):


Round 172, winrate: 0.0, max_step: 238, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/641676058.json'}
train loss in 1 epoch in 1 batch: 231.81065
val loss in 1 epoch: -340.88498
train loss in 1 epoch in 2 batch: 75.77475
val loss in 1 epoch: -338.74682


  if (await self.run_code(code, result,  async_=asy)):


Round 173, winrate: 0.0, max_step: 231, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/400537829.json'}
train loss in 1 epoch in 1 batch: 86.89146
val loss in 1 epoch: -180.20535
train loss in 1 epoch in 2 batch: 55.17974
val loss in 1 epoch: -203.16697
Round 174, winrate: 1.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/902109485.json'}
train loss in 1 epoch in 1 batch: 454.98483
val loss in 1 epoch: 81.68029


  if (await self.run_code(code, result,  async_=asy)):


Round 175, winrate: 0.0, max_step: 275, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/223715604.json'}
train loss in 1 epoch in 1 batch: 43.60464
val loss in 1 epoch: -111.61023
train loss in 1 epoch in 2 batch: 92.69566
val loss in 1 epoch: -105.71421
train loss in 1 epoch in 3 batch: 12.76747
val loss in 1 epoch: -108.66532
Round 176, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/461441856.json'}
train loss in 1 epoch in 1 batch: 190.10289
val loss in 1 epoch: 27.18189
Round 177, winrate: 0.0, max_step: 195, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/12159537.json'}
train loss in 1 epoch in 1 batch: 127.47643
val loss in 1 epoch: -614.49786
Round 178, winrate: 0.0, max_step: 153, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'r

  if (await self.run_code(code, result,  async_=asy)):


Round 181, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/103178493.json'}
train loss in 1 epoch in 1 batch: -108.57428
val loss in 1 epoch: -155.73360
Round 182, winrate: 0.0, max_step: 359, reward: 12, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/543601981.json'}
train loss in 1 epoch in 1 batch: -103.29849
val loss in 1 epoch: -273.48520
train loss in 1 epoch in 2 batch: -643.42181
val loss in 1 epoch: -291.14951
Round 183, winrate: 0.0, max_step: 113, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/315050277.json'}
train loss in 1 epoch in 1 batch: 373.98291
val loss in 1 epoch: -94.63453


  if (await self.run_code(code, result,  async_=asy)):


Round 184, winrate: 0.0, max_step: 193, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/294016430.json'}
train loss in 1 epoch in 1 batch: 135.51241
val loss in 1 epoch: 109.08594


  if (await self.run_code(code, result,  async_=asy)):


Round 185, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/560723200.json'}
train loss in 1 epoch in 1 batch: 291.16394
val loss in 1 epoch: -89.21328


  if (await self.run_code(code, result,  async_=asy)):


Round 186, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/183389357.json'}
train loss in 1 epoch in 1 batch: -1328.60632
val loss in 1 epoch: -1177.83802


  if (await self.run_code(code, result,  async_=asy)):


Round 187, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/730320491.json'}
train loss in 1 epoch in 1 batch: 161.74493
val loss in 1 epoch: 652.64802


  if (await self.run_code(code, result,  async_=asy)):


Round 188, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/51443474.json'}
train loss in 1 epoch in 1 batch: -1.85724
val loss in 1 epoch: -364.55658
train loss in 1 epoch in 2 batch: 47.72827
val loss in 1 epoch: -372.45988


  if (await self.run_code(code, result,  async_=asy)):


Round 189, winrate: 0.0, max_step: 156, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/741780543.json'}
train loss in 1 epoch in 1 batch: 160.22984
val loss in 1 epoch: -258.33714


  if (await self.run_code(code, result,  async_=asy)):


Round 190, winrate: 0.0, max_step: 276, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/990780300.json'}
train loss in 1 epoch in 1 batch: -379.71448
val loss in 1 epoch: -211.55830
train loss in 1 epoch in 2 batch: -380.25323
val loss in 1 epoch: -222.27152
train loss in 1 epoch in 3 batch: -955.64148
val loss in 1 epoch: -239.72042


  if (await self.run_code(code, result,  async_=asy)):


Round 191, winrate: 1.0, max_step: 359, reward: 20, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/178712497.json'}
train loss in 1 epoch in 1 batch: -34.52887
val loss in 1 epoch: -256.57779
train loss in 1 epoch in 2 batch: -12.00784
val loss in 1 epoch: -260.07508


  if (await self.run_code(code, result,  async_=asy)):


Round 192, winrate: 0.0, max_step: 359, reward: 2, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/31351906.json'}
train loss in 1 epoch in 1 batch: 215.92108
val loss in 1 epoch: -182.71444
train loss in 1 epoch in 2 batch: -602.66174
val loss in 1 epoch: -195.90852
train loss in 1 epoch in 3 batch: 49.32351
val loss in 1 epoch: -204.10375
train loss in 1 epoch in 4 batch: 61.05553
val loss in 1 epoch: -209.44309
train loss in 1 epoch in 5 batch: -14.45989
val loss in 1 epoch: -213.59899


  if (await self.run_code(code, result,  async_=asy)):


Round 193, winrate: 0.0, max_step: 279, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/652786414.json'}
train loss in 1 epoch in 1 batch: 217.94122
val loss in 1 epoch: -78.41318
train loss in 1 epoch in 2 batch: -924.25946
val loss in 1 epoch: -45.29666
Round 194, winrate: 0.0, max_step: 195, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/935007969.json'}
train loss in 1 epoch in 1 batch: -648.45563
val loss in 1 epoch: -314.69879
Round 195, winrate: 0.0, max_step: 359, reward: 49, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/589817801.json'}
train loss in 1 epoch in 1 batch: 151.81238
val loss in 1 epoch: -107.04888
train loss in 1 epoch in 2 batch: 12.00720
val loss in 1 epoch: -94.17034
train loss in 1 epoch in 3 batch: -13.79320
val loss in 1 epoch: -86.16081


  if (await self.run_code(code, result,  async_=asy)):


Round 196, winrate: 0.0, max_step: 157, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/813936156.json'}
train loss in 1 epoch in 1 batch: 400.03571
val loss in 1 epoch: -5.97157


  if (await self.run_code(code, result,  async_=asy)):


Round 197, winrate: 0.0, max_step: 275, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/506405649.json'}
train loss in 1 epoch in 1 batch: 138.92212
val loss in 1 epoch: -305.18844
Round 198, winrate: 0.0, max_step: 232, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/471594238.json'}
train loss in 1 epoch in 1 batch: -298.25165
val loss in 1 epoch: -315.59127
train loss in 1 epoch in 2 batch: 13.50631
val loss in 1 epoch: -336.58522
Round 199, winrate: 0.0, max_step: 191, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/664825252.json'}
train loss in 1 epoch in 1 batch: 281.62067
val loss in 1 epoch: -280.57702
train loss in 1 epoch in 2 batch: 16.65395
val loss in 1 epoch: -262.16546
Round 200, winrate: 0.0, max_step: 115, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}

  if (await self.run_code(code, result,  async_=asy)):


Round 201, winrate: 0.0, max_step: 276, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/489216512.json'}
train loss in 1 epoch in 1 batch: 284.14590
val loss in 1 epoch: 60.90629
Round 202, winrate: 0.0, max_step: 309, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/962958227.json'}
train loss in 1 epoch in 1 batch: 424.69299
val loss in 1 epoch: -469.89140
Round 203, winrate: 0.0, max_step: 359, reward: 110, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/420477548.json'}
train loss in 1 epoch in 1 batch: 86.72206
val loss in 1 epoch: -153.02744
train loss in 1 epoch in 2 batch: 103.03090
val loss in 1 epoch: -188.76052
train loss in 1 epoch in 3 batch: 44.75507
val loss in 1 epoch: -190.86904
train loss in 1 epoch in 4 batch: 28.69922
val loss in 1 epoch: -197.73937
Round 204, winrate: 0.0, max_step: 359, reward: 1

  if (await self.run_code(code, result,  async_=asy)):


Round 205, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/903062850.json'}
train loss in 1 epoch in 1 batch: -41.61720
val loss in 1 epoch: 107.15024


  if (await self.run_code(code, result,  async_=asy)):


Round 206, winrate: 0.0, max_step: 313, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/761601813.json'}
train loss in 1 epoch in 1 batch: 387.16425
val loss in 1 epoch: 9.42766
train loss in 1 epoch in 2 batch: 70.48288
val loss in 1 epoch: -10.30745
train loss in 1 epoch in 3 batch: 5.32267
val loss in 1 epoch: -13.30121
Round 207, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/12733958.json'}
train loss in 1 epoch in 1 batch: 233.30338
val loss in 1 epoch: 145.63723
Round 208, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/476339614.json'}
train loss in 1 epoch in 1 batch: -944.75769
val loss in 1 epoch: -89.78421
train loss in 1 epoch in 2 batch: 153.12756
val loss in 1 epoch: -90.02822
train loss in 1 epoch in 3 batch: 172.63544
val loss in 1

  if (await self.run_code(code, result,  async_=asy)):


Round 212, winrate: 0.0, max_step: 358, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/715698749.json'}
train loss in 1 epoch in 1 batch: -4.39421
val loss in 1 epoch: -196.40729
train loss in 1 epoch in 2 batch: -425.39481
val loss in 1 epoch: -204.39864
train loss in 1 epoch in 3 batch: 35.69369
val loss in 1 epoch: -213.81489
train loss in 1 epoch in 4 batch: 13.71583
val loss in 1 epoch: -221.58446
Round 213, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/938134827.json'}
train loss in 1 epoch in 1 batch: 93.12062
val loss in 1 epoch: 95.59403
Round 214, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/207206517.json'}
train loss in 1 epoch in 1 batch: 453.39014
val loss in 1 epoch: -147.67287
Round 215, winrate: 0.0, max_step: 193, reward: 0, 

  if (await self.run_code(code, result,  async_=asy)):


Round 217, winrate: 0.0, max_step: 236, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/445223570.json'}
train loss in 1 epoch in 1 batch: -909.24475
val loss in 1 epoch: -628.01055
Round 218, winrate: 0.0, max_step: 359, reward: 72, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/361866829.json'}
train loss in 1 epoch in 1 batch: 62.07920
val loss in 1 epoch: -187.76192
train loss in 1 epoch in 2 batch: 253.56903
val loss in 1 epoch: -215.24084
train loss in 1 epoch in 3 batch: -652.48303
val loss in 1 epoch: -254.32250
train loss in 1 epoch in 4 batch: -5971.69336
val loss in 1 epoch: -287.21175
Round 219, winrate: 0.0, max_step: 113, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/57953729.json'}
train loss in 1 epoch in 1 batch: 84.77856
val loss in 1 epoch: 25.68327
Round 220, winrate: 0.0, max_step: 350, reward

  if (await self.run_code(code, result,  async_=asy)):


Round 222, winrate: 0.0, max_step: 359, reward: 4, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/111791238.json'}
train loss in 1 epoch in 1 batch: -624.73688
val loss in 1 epoch: -4320.19541
train loss in 1 epoch in 2 batch: -52.76836
val loss in 1 epoch: -4355.64005
Round 223, winrate: 0.0, max_step: 196, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/280828382.json'}
train loss in 1 epoch in 1 batch: 106.68177
val loss in 1 epoch: -263.03062


  if (await self.run_code(code, result,  async_=asy)):


Round 224, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/994005916.json'}
train loss in 1 epoch in 1 batch: 32.39151
val loss in 1 epoch: -486.25937
train loss in 1 epoch in 2 batch: 171.60330
val loss in 1 epoch: -505.18747
Round 225, winrate: 0.0, max_step: 114, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/181100195.json'}
train loss in 1 epoch in 1 batch: 459.53442
val loss in 1 epoch: 45.63751


  if (await self.run_code(code, result,  async_=asy)):


Round 226, winrate: 0.0, max_step: 233, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/622586688.json'}
train loss in 1 epoch in 1 batch: 106.23001
val loss in 1 epoch: -246.17197
train loss in 1 epoch in 2 batch: -791.24023
val loss in 1 epoch: -266.53522
train loss in 1 epoch in 3 batch: -10.91455
val loss in 1 epoch: -271.75182


  if (await self.run_code(code, result,  async_=asy)):


Round 227, winrate: 0.0, max_step: 274, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/547485728.json'}
train loss in 1 epoch in 1 batch: 97.21549
val loss in 1 epoch: 26.84118
Round 228, winrate: 1.0, max_step: 119, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/746511749.json'}
train loss in 1 epoch in 1 batch: 47.75165
val loss in 1 epoch: -541.16007


  if (await self.run_code(code, result,  async_=asy)):


Round 229, winrate: 0.0, max_step: 159, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/416719193.json'}
train loss in 1 epoch in 1 batch: 1151.16870
val loss in 1 epoch: -288.25939


  if (await self.run_code(code, result,  async_=asy)):


Round 230, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/562640640.json'}
train loss in 1 epoch in 1 batch: -699.84473
val loss in 1 epoch: -374.64988
train loss in 1 epoch in 2 batch: 12.38574
val loss in 1 epoch: -408.03319


  if (await self.run_code(code, result,  async_=asy)):


Round 231, winrate: 0.0, max_step: 156, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/135265879.json'}
train loss in 1 epoch in 1 batch: 1085.16919
val loss in 1 epoch: -513.98768


  if (await self.run_code(code, result,  async_=asy)):


Round 232, winrate: 0.0, max_step: 152, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/331126199.json'}
train loss in 1 epoch in 1 batch: 460.38397
val loss in 1 epoch: 104.78005
Round 233, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/641965031.json'}
train loss in 1 epoch in 1 batch: 623.26959
val loss in 1 epoch: -362.78962


  if (await self.run_code(code, result,  async_=asy)):


Round 234, winrate: 0.0, max_step: 279, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/133003720.json'}
train loss in 1 epoch in 1 batch: -1821.33081
val loss in 1 epoch: -230.44181
Round 235, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/775619992.json'}
train loss in 1 epoch in 1 batch: 145.66914
val loss in 1 epoch: -518.74971
Round 236, winrate: 0.0, max_step: 279, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/334431105.json'}
train loss in 1 epoch in 1 batch: 243.01839
val loss in 1 epoch: -165.90552
train loss in 1 epoch in 2 batch: -609.48224
val loss in 1 epoch: -193.89548
train loss in 1 epoch in 3 batch: 118.71855
val loss in 1 epoch: -221.70375
train loss in 1 epoch in 4 batch: -79.98703
val loss in 1 epoch: -255.89395
Round 237, winrate: 0.0, max_step: 199, rew

  if (await self.run_code(code, result,  async_=asy)):


Round 238, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/280626723.json'}
train loss in 1 epoch in 1 batch: 124.38712
val loss in 1 epoch: -192.12952
Round 239, winrate: 1.0, max_step: 114, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/256973224.json'}
train loss in 1 epoch in 1 batch: 30.90671
val loss in 1 epoch: -965.72048
Round 240, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/186859151.json'}
train loss in 1 epoch in 1 batch: -45.55646
val loss in 1 epoch: 28.75565


  if (await self.run_code(code, result,  async_=asy)):


Round 241, winrate: 0.0, max_step: 315, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/493695404.json'}
train loss in 1 epoch in 1 batch: 126.60608
val loss in 1 epoch: -21.18134
Round 242, winrate: 0.0, max_step: 317, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/793222286.json'}
train loss in 1 epoch in 1 batch: -1557.50830
val loss in 1 epoch: -477.84269
train loss in 1 epoch in 2 batch: -1639.59058
val loss in 1 epoch: -485.32818
train loss in 1 epoch in 3 batch: 7.27555
val loss in 1 epoch: -492.10600
Round 243, winrate: 0.0, max_step: 237, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/891787127.json'}
train loss in 1 epoch in 1 batch: 161.83153
val loss in 1 epoch: -92.12510
train loss in 1 epoch in 2 batch: 81.98151
val loss in 1 epoch: -104.32682
Round 244, winrate: 1.0, max_step: 198, reward:

  if (await self.run_code(code, result,  async_=asy)):


Round 245, winrate: 0.0, max_step: 359, reward: 20, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/673682229.json'}
train loss in 1 epoch in 1 batch: 23.47559
val loss in 1 epoch: -362.45604
train loss in 1 epoch in 2 batch: 102.51939
val loss in 1 epoch: -364.49843
train loss in 1 epoch in 3 batch: -33.32129
val loss in 1 epoch: -366.14471
train loss in 1 epoch in 4 batch: -30.49549
val loss in 1 epoch: -364.54313
Round 246, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/986336164.json'}
train loss in 1 epoch in 1 batch: 180.23970
val loss in 1 epoch: -208.86663
train loss in 1 epoch in 2 batch: -25.34607
val loss in 1 epoch: -180.95130
Round 247, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/498990719.json'}
train loss in 1 epoch in 1 batch: -129.26270
v

  if (await self.run_code(code, result,  async_=asy)):


Round 251, winrate: 0.0, max_step: 359, reward: 9, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/591035737.json'}
train loss in 1 epoch in 1 batch: 173.16452
val loss in 1 epoch: -131.02117
train loss in 1 epoch in 2 batch: 43.81367
val loss in 1 epoch: -141.03536
Round 252, winrate: 0.0, max_step: 236, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/23250075.json'}
train loss in 1 epoch in 1 batch: 172.43454
val loss in 1 epoch: -69.11194
Round 253, winrate: 0.0, max_step: 159, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/51796747.json'}
train loss in 1 epoch in 1 batch: 55.72289
val loss in 1 epoch: -1048.08600
Round 254, winrate: 0.0, max_step: 231, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/51804998.json'}
train loss in 1 epoch in 1 batch: 286

  if (await self.run_code(code, result,  async_=asy)):


Round 255, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/918850355.json'}
train loss in 1 epoch in 1 batch: 310.34070
val loss in 1 epoch: -32.11755
train loss in 1 epoch in 2 batch: 64.78797
val loss in 1 epoch: -18.45713
Round 256, winrate: 0.0, max_step: 114, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/196639904.json'}
train loss in 1 epoch in 1 batch: 147.51936
val loss in 1 epoch: -425.57439
Round 257, winrate: 0.0, max_step: 359, reward: 9, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/772348806.json'}
train loss in 1 epoch in 1 batch: 93.77234
val loss in 1 epoch: -154.50446
train loss in 1 epoch in 2 batch: 63.66265
val loss in 1 epoch: -186.91056
train loss in 1 epoch in 3 batch: -925.47272
val loss in 1 epoch: -223.04391
Round 258, winrate: 0.0, max_step: 190, reward: 0,

  if (await self.run_code(code, result,  async_=asy)):


Round 260, winrate: 0.0, max_step: 199, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/414944266.json'}
train loss in 1 epoch in 1 batch: 274.81903
val loss in 1 epoch: -1247.44137
Round 261, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/218768096.json'}
train loss in 1 epoch in 1 batch: -978.95520
val loss in 1 epoch: -302.73416


  if (await self.run_code(code, result,  async_=asy)):


Round 262, winrate: 0.0, max_step: 236, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/610542285.json'}
train loss in 1 epoch in 1 batch: 227.50027
val loss in 1 epoch: 25.41799
Round 263, winrate: 0.0, max_step: 359, reward: 3, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/314751461.json'}
train loss in 1 epoch in 1 batch: 339.85144
val loss in 1 epoch: -350.33020
train loss in 1 epoch in 2 batch: 24.70251
val loss in 1 epoch: -341.70529
Round 264, winrate: 0.0, max_step: 153, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/113670318.json'}
train loss in 1 epoch in 1 batch: 116.51313
val loss in 1 epoch: -56.72362
Round 265, winrate: 0.0, max_step: 314, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/94157877.json'}
train loss in 1 epoch in 1 batch: 298

  if (await self.run_code(code, result,  async_=asy)):


Round 270, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/833760288.json'}
train loss in 1 epoch in 1 batch: 338.05408
val loss in 1 epoch: -32.41704
Round 271, winrate: 1.0, max_step: 158, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/596505449.json'}
train loss in 1 epoch in 1 batch: -733.10114
val loss in 1 epoch: -304.47954


  if (await self.run_code(code, result,  async_=asy)):


Round 272, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/735954539.json'}
train loss in 1 epoch in 1 batch: 301.37701
val loss in 1 epoch: -99.01256


  if (await self.run_code(code, result,  async_=asy)):


Round 273, winrate: 0.0, max_step: 356, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/312366831.json'}
train loss in 1 epoch in 1 batch: 194.04044
val loss in 1 epoch: -302.88793
train loss in 1 epoch in 2 batch: -20.54819
val loss in 1 epoch: -310.39967
train loss in 1 epoch in 3 batch: 38.81747
val loss in 1 epoch: -323.12326
Round 274, winrate: 0.0, max_step: 358, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/139566706.json'}
train loss in 1 epoch in 1 batch: 168.83273
val loss in 1 epoch: -71.49102
train loss in 1 epoch in 2 batch: 143.05682
val loss in 1 epoch: -58.50363


  if (await self.run_code(code, result,  async_=asy)):


Round 275, winrate: 0.0, max_step: 359, reward: 2, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/804541783.json'}
train loss in 1 epoch in 1 batch: -398.78369
val loss in 1 epoch: -195.17406
train loss in 1 epoch in 2 batch: -12.79688
val loss in 1 epoch: -204.97360
train loss in 1 epoch in 3 batch: -38.25330
val loss in 1 epoch: -211.24904
train loss in 1 epoch in 4 batch: -94.13318
val loss in 1 epoch: -218.68017
Round 276, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/918172834.json'}
train loss in 1 epoch in 1 batch: 140.62895
val loss in 1 epoch: 25.13117
Round 277, winrate: 0.0, max_step: 231, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/121744725.json'}
train loss in 1 epoch in 1 batch: -1843.19543
val loss in 1 epoch: -655.72544
Round 278, winrate: 0.0, max_step: 235, rewar

  if (await self.run_code(code, result,  async_=asy)):


Round 281, winrate: 0.0, max_step: 232, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/878061684.json'}
train loss in 1 epoch in 1 batch: 185.06487
val loss in 1 epoch: -797.18399


  if (await self.run_code(code, result,  async_=asy)):


Round 282, winrate: 0.0, max_step: 239, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/508637049.json'}
train loss in 1 epoch in 1 batch: 93.54794
val loss in 1 epoch: -259.00255
train loss in 1 epoch in 2 batch: 252.09198
val loss in 1 epoch: -296.30295
Round 283, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/418394767.json'}
train loss in 1 epoch in 1 batch: 67.65028
val loss in 1 epoch: -532.26705


  if (await self.run_code(code, result,  async_=asy)):


Round 284, winrate: 0.0, max_step: 152, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/411791308.json'}
train loss in 1 epoch in 1 batch: -1248.85229
val loss in 1 epoch: -355.25307
Round 285, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/10317473.json'}
train loss in 1 epoch in 1 batch: 43.78827
val loss in 1 epoch: -138.03916
Round 286, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/622873037.json'}
train loss in 1 epoch in 1 batch: 434.06995
val loss in 1 epoch: 216.89113


  if (await self.run_code(code, result,  async_=asy)):


Round 287, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/620559351.json'}
train loss in 1 epoch in 1 batch: 19.87335
val loss in 1 epoch: -230.74061
Round 288, winrate: 0.0, max_step: 191, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/228791458.json'}
train loss in 1 epoch in 1 batch: -2113.25366
val loss in 1 epoch: -819.12495
Round 289, winrate: 0.0, max_step: 158, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/975522479.json'}
train loss in 1 epoch in 1 batch: -1251.88330
val loss in 1 epoch: -511.63214


  if (await self.run_code(code, result,  async_=asy)):


Round 290, winrate: 0.0, max_step: 191, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/100556268.json'}
train loss in 1 epoch in 1 batch: 663.41602
val loss in 1 epoch: 166.81427
Round 291, winrate: 0.0, max_step: 236, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/422279215.json'}
train loss in 1 epoch in 1 batch: -500.28427
val loss in 1 epoch: -260.83218
Round 292, winrate: 0.0, max_step: 159, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/5563564.json'}
train loss in 1 epoch in 1 batch: -20.86177
val loss in 1 epoch: -309.96477
Round 293, winrate: 1.0, max_step: 359, reward: 196, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/282341798.json'}
train loss in 1 epoch in 1 batch: 413.75879
val loss in 1 epoch: -85.92735
train loss in 1 epoch in 2 batch:

  if (await self.run_code(code, result,  async_=asy)):


Round 294, winrate: 0.0, max_step: 359, reward: 9, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/721663450.json'}
train loss in 1 epoch in 1 batch: -578.59198
val loss in 1 epoch: -1897.27177
train loss in 1 epoch in 2 batch: -0.16048
val loss in 1 epoch: -1924.62765
Round 295, winrate: 0.0, max_step: 197, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/524644576.json'}
train loss in 1 epoch in 1 batch: -1542.46179
val loss in 1 epoch: -288.90632


  if (await self.run_code(code, result,  async_=asy)):


Round 296, winrate: 0.0, max_step: 359, reward: 4, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/234810708.json'}
train loss in 1 epoch in 1 batch: -871.78418
val loss in 1 epoch: -338.35759
train loss in 1 epoch in 2 batch: 87.09397
val loss in 1 epoch: -334.98186
train loss in 1 epoch in 3 batch: 31.29208
val loss in 1 epoch: -346.52700


  if (await self.run_code(code, result,  async_=asy)):


Round 297, winrate: 0.0, max_step: 275, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/857257094.json'}
train loss in 1 epoch in 1 batch: 133.77203
val loss in 1 epoch: -580.63923


  if (await self.run_code(code, result,  async_=asy)):


Round 298, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/806161868.json'}
train loss in 1 epoch in 1 batch: -10.77434
val loss in 1 epoch: -434.77565
Round 299, winrate: 0.0, max_step: 117, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/212837082.json'}
train loss in 1 epoch in 1 batch: 39.63770
val loss in 1 epoch: -354.01107
Round 300, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/863814865.json'}
train loss in 1 epoch in 1 batch: -1026.23950
val loss in 1 epoch: -70.02166


  if (await self.run_code(code, result,  async_=asy)):


Round 301, winrate: 0.0, max_step: 199, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/374864757.json'}
train loss in 1 epoch in 1 batch: 197.10001
val loss in 1 epoch: 146.05913
Round 302, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/585323358.json'}
train loss in 1 epoch in 1 batch: 375.44656
val loss in 1 epoch: -4.51885


  if (await self.run_code(code, result,  async_=asy)):


Round 303, winrate: 0.0, max_step: 274, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/637499.json'}
train loss in 1 epoch in 1 batch: -70.21721
val loss in 1 epoch: -356.72995


  if (await self.run_code(code, result,  async_=asy)):


Round 304, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/716111135.json'}
train loss in 1 epoch in 1 batch: -650.17761
val loss in 1 epoch: -1208.38560
train loss in 1 epoch in 2 batch: 265.42596
val loss in 1 epoch: -1205.75599
Round 305, winrate: 1.0, max_step: 359, reward: 4, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/666434985.json'}
train loss in 1 epoch in 1 batch: -2092.57275
val loss in 1 epoch: -292.36554
Round 306, winrate: 0.5, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/172161760.json'}
train loss in 1 epoch in 1 batch: -782.12402
val loss in 1 epoch: -264.74580
train loss in 1 epoch in 2 batch: 237.71893
val loss in 1 epoch: -292.83840
train loss in 1 epoch in 3 batch: -3140.77368
val loss in 1 epoch: -322.40996


  if (await self.run_code(code, result,  async_=asy)):


Round 307, winrate: 0.0, max_step: 276, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/303357813.json'}
train loss in 1 epoch in 1 batch: 90.39635
val loss in 1 epoch: -160.16550
train loss in 1 epoch in 2 batch: -708.03821
val loss in 1 epoch: -191.83324
train loss in 1 epoch in 3 batch: 201.70366
val loss in 1 epoch: -220.76808
Round 308, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/148017079.json'}
train loss in 1 epoch in 1 batch: 385.88443
val loss in 1 epoch: -407.82201


  if (await self.run_code(code, result,  async_=asy)):


Round 309, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/163002273.json'}
train loss in 1 epoch in 1 batch: 212.47305
val loss in 1 epoch: -76.73810
Round 310, winrate: 0.0, max_step: 157, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/294288374.json'}
train loss in 1 epoch in 1 batch: 136.79268
val loss in 1 epoch: 86.97942


  if (await self.run_code(code, result,  async_=asy)):
