In [1]:
import numpy as np
import pandas as pd


import json
import os

import subprocess

from matplotlib import pyplot as plt

In [6]:
%matplotlib inline
%config Completer.use_jedi = False

In [3]:
import torch
from torch import nn
from torch import optim

from tensorboardX import SummaryWriter

In [674]:
from importlib import reload
from utils import dataset

reload(dataset)

<module 'utils.dataset' from '/Users/sergmiller/Documents/my/lux-ai-v1/research/utils/dataset.py'>

In [573]:
def learn(train, val, model_ff, criterion, epochs=5, batch_size=64, shuffle=True, freq=10,lr=1e-3, l2=1e-5, use_tb=True): 
    if use_tb:
        writer = SummaryWriter()
    
#     np.random.seed(1)
    ids_nn = np.arange(train.targets.shape[0])
    
    reshape_to_last = lambda x: torch.reshape(x, [np.prod(x.shape[:-1]), x.shape[-1]])

    optimizer = optim.Adam(model_ff.parameters(), lr=lr, weight_decay=l2)

    time_for_print_loss = lambda i: (i + 1) % freq == 0
    
    n_iter = 0


    for epoch in np.arange(epochs):
        np.random.shuffle(ids_nn)

        model_ff.train(True)

        for b in np.arange(0, train.targets.shape[0], batch_size):
            X_batch = torch.FloatTensor(train.features[ids_nn[b:b+batch_size]])
            y_batch = torch.FloatTensor(train.weights[ids_nn[b:b+batch_size]])  # reward(advantage)
            a_batch = torch.LongTensor(train.targets[ids_nn[b:b+batch_size]])  # action

            optimizer.zero_grad()
            y_pred_logits = model_ff(X_batch)

            loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
            loss.backward()

            optimizer.step()

            if (b // batch_size + 1) % freq == 0:
                print('train loss in %d epoch in %d batch: %.5f' %
                  (epoch + 1, b // batch_size + 1, loss.item()))
                
                if use_tb:
                    writer.add_scalar('data/train_loss', loss.item(), n_iter)
                    writer.add_scalar('data/epoch', epoch + 1, n_iter)
                    writer.add_scalar('data/batch', b // batch_size + 1, n_iter)

                val_loss = 0
                its = 0
                model_ff.train(False)
                for b in np.arange(0, val.targets.shape[0], batch_size):
                    its += 1
                    X_batch = torch.FloatTensor(val.features[b:b+batch_size])
#                     X_batch = reshape_to_last(X_batch)

                    y_batch = torch.FloatTensor(val.weights[b:b+batch_size])
                    a_batch = torch.LongTensor(val.targets[b:b+batch_size])
                    with torch.no_grad():
                        y_pred_logits = model_ff(X_batch)
                    loss = criterion(y_pred_logits, y_batch, a_batch, X_batch)
                    val_loss += loss.item()
                val_loss /= its
                print('val loss in %d epoch: %.5f' % (epoch + 1, val_loss))
                
                if use_tb:
                    writer.add_scalar('data/val_loss', val_loss, n_iter)
                n_iter += 1


In [96]:
# datasets = dataset.read_datasets_from_dir("features_v3/")

In [97]:
# dataset.read_columns_from_random_file("features_v3")

[(0, 'cargo_vol_total'),
 (1, 'cargo_fuel_total'),
 (2, 'unit_can_build'),
 (3, 'unit_routine'),
 (4, 'unit_last_action'),
 (5, 'near_city_dist'),
 (6, 'near_city_dir'),
 (7, 'near_city_fuel'),
 (8, 'near_city_light_upkeep'),
 (9, 'city_size'),
 (10, 'opp_near_city_dist'),
 (11, 'opp_near_city_dir'),
 (12, 'opp_near_city_fuel'),
 (13, 'opp_near_city_light_upkeep'),
 (14, 'opp_city_size'),
 (15, 'near_resource_dist'),
 (16, 'near_resource_dir'),
 (17, 'near_resource_type'),
 (18, 'near_resource_amount'),
 (19, 'my_city_count'),
 (20, 'opp_city_count'),
 (21, 'turn'),
 (22, 'is_night'),
 (23, 'time_to_night'),
 (24, 'width'),
 (25, 'height'),
 (26, 'my_research'),
 (27, 'opp_research'),
 (28, 'my_research_coal'),
 (29, 'opp_research_coal'),
 (30, 'my_research_uran'),
 (31, 'opp_research_uran'),
 (32, 'action'),
 (33, 'my_tiles'),
 (34, 'opp_tiles')]

In [100]:
dataset.CAT_FEATURES

[2, 3, 4, 6, 11, 16, 17, 22, 28, 29, 30, 31]

In [409]:
FLOAT_FEATURES = [i for i in range(42 + 32*32*7) if i not in dataset.CAT_FEATURES_V4]

In [410]:
FLOAT_FEATURES[:20], FLOAT_FEATURES[-10:]

([0, 1, 5, 6, 7, 9, 10, 11, 12, 13, 15, 17, 18, 19, 20, 21, 23, 26, 27, 28],
 [7200, 7201, 7202, 7203, 7204, 7205, 7206, 7207, 7208, 7209])

In [87]:
import pickle
from sklearn.preprocessing import OneHotEncoder

In [392]:
with open("../submissions/simple/models/ohe_v2", "rb") as f:
    OHE = pickle.load(f)

In [419]:
def prepare_features(t: dataset.Dataset, v: dataset.Dataset, ohe=None, categories=None) -> (dataset.Dataset, dataset.Dataset):
    create_ohe = ohe is None
    if create_ohe:
         ohe = OneHotEncoder(sparse=False, categories=categories)
    def prepare(d, is_train):
        cf = d.features[:, dataset.CAT_FEATURES_V4]
        ff = d.features[:, FLOAT_FEATURES]
        cf[cf == "False"] = False
        cf[cf == "True"] = True
        cf[cf == None] = "None"
        cf[cf == "1"] = 1
        cf[cf == "2"] = 2
        cf[cf == "3"] = 3
        ff[ff == "None"] = 0
        cf_o = ohe.fit_transform(cf) if is_train and create_ohe else ohe.transform(cf)
        return dataset.Dataset(
            features=np.array(np.concatenate([cf_o, ff], axis=1), dtype=np.float),
            targets=np.array(d.targets, dtype=np.float),
            weights=np.array(d.weights, dtype=np.float),
            next_state_id = d.next_state_id
        )
    t = prepare(t, True)
    v = prepare(v, False)
    return (t,v, ohe)

In [184]:
# dt,dv,OHE = prepare_features(data, data, None, [
#      np.array(["None", False, True], dtype=object),
#      np.array([1, 2, 3], dtype=object),
#      np.array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
#      np.array(['None', 'coal', 'uranium', 'wood'], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object),
#      np.array(["None", False, True], dtype=object)])

In [185]:
OHE.categories_

[array(['None', False, True], dtype=object),
 array([1, 2, 3], dtype=object),
 array(['None', 'bcity', 'e', 'n', 'p', 's', 'w'], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', 'c', 'e', 'n', 's', 'w'], dtype=object),
 array(['None', 'coal', 'uranium', 'wood'], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object),
 array(['None', False, True], dtype=object)]

In [188]:
# with open("../submissions/simple/models/ohe_v2", "wb") as f:
#     pickle.dump(OHE, f)

In [142]:
# with open("../submissions/simple/models/ohe_v1", "wb") as f:
#     pickle.dump(ohe, f)

In [626]:
MAP_F = 32 * 32 * 7

class NNWithCustomFeatures(nn.Module):
    def __init__(self, INPUT_F, DROP_P, H, A=6):
        super().__init__()
        INPUT_F_C = INPUT_F + 128
        self.model_q =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A),
            nn.ReLU()
#             nn.Sigmoid()
        )
        
        self.model_p =  nn.Sequential(
            nn.Dropout(DROP_P),
            nn.Linear(INPUT_F_C, H),
            nn.LayerNorm(H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Dropout(DROP_P),
            nn.Linear(H, H),
            nn.ReLU(),
            nn.Linear(H, A)
#             nn.Sigmoid()
        )
        
        self.map_model = nn.Sequential(
            nn.Conv2d(7, 64, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (16,16)
            nn.Conv2d(64, 128, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (8, 8)
            nn.Conv2d(128, 256, 3),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),  # after -> (4, 4)
#             nn.Conv2d(128, 256, 3),
#             nn.ReLU(inplace=True),
#             nn.MaxPool2d(2),  # after -> (1, 1)
        )
        self.avgpool = nn.AdaptiveAvgPool2d((4, 4))
        self.proj = nn.Sequential(
            nn.Dropout(p=DROP_P),
            nn.Linear(256 * 4 * 4, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(p=DROP_P),
            nn.Linear(256, 128)
        )
        
    def forward(self, x):
        L = x.shape[1]
        cur_r = self.forward_impl(x[:, :L // 2])
        next_r =  self.forward_impl(x[:, L // 2:])
        return torch.cat([cur_r, next_r],dim=1)

    def forward_impl(self, x):
        mapp = x[:, -MAP_F:].reshape(-1, 32, 32, 7)
        rest = x[:, :-MAP_F]
        mapp = torch.transpose(mapp, 1, -1)
        mapp = self.avgpool(self.map_model(mapp))
        mapp = torch.flatten(mapp, 1)
        mapp_f = self.proj(mapp)
#         print(mapp_f.shape)
        input_x = torch.cat([rest, mapp_f], dim=1)
#         print(input_x.shape)
#         return self.model_q(input_x)
        return torch.cat([self.model_q(input_x), self.model_p(input_x)], dim=1)

In [627]:
model = NNWithCustomFeatures(63, 0.05, 64)

In [630]:
model.forward_impl(torch.Tensor(4, 63 + 32*32*7))

tensor([[ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450],
        [ 0.0000,  0.0000,  0.1094,  0.0437,  0.0000,  0.0924, -0.0958, -0.0997,
         -0.1076,  0.0741,  0.0568, -0.0414],
        [ 0.0000,  0.0000,  0.1109,  0.0476,  0.0000,  0.0915, -0.0927, -0.0949,
         -0.1044,  0.0684,  0.0588, -0.0371],
        [ 0.0000,  0.0000,  0.1147,  0.0556,  0.0000,  0.0966, -0.0937, -0.0995,
         -0.1064,  0.0649,  0.0547, -0.0450]], grad_fn=<CatBackward>)

In [680]:
ENTROPY_REG = 1e-3
def policy_loss(pi_logits, reward_batch, a_batch, X_batch):
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    return torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * reward_batch 
                      - torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))

def q_loss(q_vals, reward_batch, a_batch, X_batch):
    q_vals_per_reward = q_vals[np.arange(q_vals.shape[0]), a_batch]
    return torch.nn.MSELoss()(q_vals_per_reward, reward_batch) * 0.01

gamma = 0.99

def get_is_last_state(x):
    t = torch.sum(torch.isclose(x, torch.ones_like(x) * (-1)), dim=1) == x.shape[1]
    return t.float()

def q_loss_pair(q_vals_cur_and_next, reward_batch, a_batch, X_batch):
    q_vals = q_vals_cur_and_next[:, :6]
    q_vals_next = q_vals_cur_and_next[:, 6:12]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
#     print(list(enumerate([q_vals_per_reward_cur, reward_batch, best_q_vals_next, q_vals_next])))
#     print(0.99 * best_q_vals_next)
#     return torch.nn.MSELoss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)
    return torch.nn.SmoothL1Loss()(target=q_vals_per_reward_cur.detach(), input=reward_batch + gamma * best_q_vals_next)


def actor_critic_loss(q_pi_payload, reward_batch, a_batch, X_batch):
    q_vals = q_pi_payload[:, :6]
    pi_logits =  q_pi_payload[:, 6:12]
    pi_probs = torch.nn.Softmax(dim=1)(pi_logits)
    q_vals_next = q_pi_payload[:, 12:18]
    q_vals_per_reward_cur = q_vals[np.arange(q_vals.shape[0]), a_batch]
    X_batch_next = X_batch[:, X_batch.shape[1] // 2:]
    best_q_vals_next = torch.max(q_vals_next,dim=1)[0] * (1 - get_is_last_state(X_batch_next))
    advantage = reward_batch + gamma * best_q_vals_next - q_vals_per_reward_cur
    q_loss = torch.nn.SmoothL1Loss()(target=reward_batch + gamma * best_q_vals_next.detach(), input=q_vals_per_reward_cur)
    pi_loss =  torch.mean(torch.nn.CrossEntropyLoss(reduction='none')(pi_logits, a_batch) * advantage.detach()
                      + torch.sum(pi_probs * torch.log(pi_probs) * ENTROPY_REG, dim=1))
#     print("q_loss={}, pi_loss={}".format(q_loss.item(), pi_loss.item()))
    return q_loss + pi_loss

In [429]:
simple_bot = "../submissions/simple/main.py"
replays = "replays"

def run_game(left_bot=simple_bot, right_bot=simple_bot, seed=42, loglevel=2):
    replay_path = "replay.json"
    python_v = "python3.7"
    
    replay_path = os.path.join(replays, str(np.random.randint(1e9)) + ".json")
    
    size = np.random.choice([12,16,24,32], size=1)[0]
    
    res = subprocess.run([
        "lux-ai-2021",
        left_bot,
        right_bot,
#         "--statefulReplay",
        "--width={}".format(size),
        "--height={}".format(size),
        "--loglevel={}".format(loglevel),
        "--python={}".format(python_v),
        "--maxtime=100000",
        "--maxConcurrentMatches=1",
        "--seed={}".format(seed),
        "--out={}".format(replay_path)], stdout=subprocess.PIPE)
    
    if loglevel > 0:
        print(res.stdout.decode())

    assert res.returncode == 0

    with open(replay_path, "r") as f:
        result = json.load(f)
    return result, res.stdout.decode()

In [432]:
run_game(simple_bot, simple_bot)  # <-- test run one game with default bot

In [98]:
import hashlib

def build_runnable_bot_with_flags(flags: dict, origin = simple_bot, base_path = '../submissions/simple/') -> str:
    lines = []
    with open(origin, "r") as f:
        for line in f:
            lines.append(line[:-1])
    text = '\n'.join(lines)
    f = json.dumps(flags)
    text = text.format(f)
    h = int(hashlib.sha256(f.encode('utf-8')).hexdigest(), 16) % (10 ** 18)
    path = base_path + "main_" + str(h) + ".py"
    with open(path, "w") as f:
        f.write(text)
    return path

In [229]:
def count_series(results: list):
    wins = []
    for i, r in enumerate(results):
        ranks = r[0]['results']['ranks']
        teams = r[0]['teamDetails']
        if ranks[0]['rank'] == 1 and ranks[1]['rank'] == 2:
            if ranks[0]["agentID"] == i % 2:
                wins.append(1)
            else:
                wins.append(0)
        else:
            wins.append(0.5)
    return wins

In [100]:
from joblib import Parallel, delayed
import tqdm

In [614]:
def sample_dataset(d, p=0.5):
    N = len(d.features)
    ids = np.random.choice(N, size=int(N * p))
    return dataset.Dataset(features = d.features[ids], weights = d.weights[ids], targets = d.targets[ids])

In [492]:
def add_next_features(d):
    assert d.next_state_id is not None
    coupled_features = []
    weights = []
    targets = []
    for i in np.arange(d.features.shape[0]):
        next_i = d.next_state_id[i]
        if d.next_state_id[i] != -1:
            next_f = d.features[next_i]
        else:
            next_f = np.ones_like(d.features[i]) * (-1)
        coupled_features.append(np.concatenate([d.features[i], next_f]))
        weights.append(d.weights[i])
        targets.append(d.targets[i])
    return dataset.Dataset(
        features=np.array(coupled_features),
        weights=np.array(weights),
        targets=np.array(targets))

In [None]:
# t = 0  #  1778 - value_iter
# B = 1

# model = NNWithCustomFeatures(83, 0.05, 128)

# writer = SummaryWriter()

while True:
    t += 1
    np.random.seed(t)
    torch.save(model.state_dict(), '../submissions/simple/models/ac_iter_v{}'.format(t))
    r = []
    for i in np.arange(B):
        seed = t * B + i
#         _f = str(seed) + ".txt"
        _f = "log.txt"
        bot = build_runnable_bot_with_flags({
            "model_path": "models/ac_iter_v{}".format(t),
            "use_policy": True,
            "is_neural": True,
            "prob_use_default_agent": 0.5 / np.log(t + 1),
            "prob_use_random": 0.05,
            "ohe_path": "models/ohe_v2",
            "use_old_units_cargo_rules": False,
            "log_features_path": "../../research/features_iter/", "log_path_file_name": _f
        })
        if t % 2 == 0:
            _r = run_game(bot, simple_bot, loglevel=0, seed=seed)
        else:
            _r = run_game(simple_bot, bot, loglevel=0, seed=seed)
        r.append(_r)
    wins = np.mean(count_series(r))
    if t % 2 == 1:
        wins = 1 - wins
    trainD = dataset.get_dataset_from_file(os.path.join("features_iter/", _f), wins)
    reward = np.sum(trainD.weights)
    trainD_ohe, valD_ohe, _ = prepare_features(trainD, trainD, OHE)
    max_step = np.max(trainD.features[:, 31])
    trainD_ohe_with_next = add_next_features(trainD_ohe)
    valD_ohe_with_next = add_next_features(valD_ohe)
    trainD_ohe_with_next_sampled = sample_dataset(trainD_ohe_with_next, 0.1)
    print("Round {}, winrate: {}, max_step: {}, reward: {}, example: {}".format(t, wins, max_step, reward, r[0][0]['results']))
    writer.add_scalar('data/reward', reward, t)
    writer.add_scalar('data/winrate', wins, t)
    writer.add_scalar('data/max_step', max_step, t)
    try:
        learn(trainD_ohe_with_next_sampled, valD_ohe_with_next, model, actor_critic_loss, lr=1e-1 / (t + 1), batch_size=64, epochs=1, freq=1, l2=1e-5, use_tb=False)
    except Exception as e:
        print(e)

Round 573, winrate: 0.0, max_step: 359, reward: 4, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/942916128.json'}
train loss in 1 epoch in 1 batch: 40.83069
val loss in 1 epoch: 59.94242
Round 574, winrate: 1.0, max_step: 314, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/247202987.json'}
train loss in 1 epoch in 1 batch: -121.76593
val loss in 1 epoch: 52.52039
train loss in 1 epoch in 2 batch: -7.54506
val loss in 1 epoch: 49.04658


  if (await self.run_code(code, result,  async_=asy)):


Round 575, winrate: 0.0, max_step: 154, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/629527449.json'}
train loss in 1 epoch in 1 batch: 588.56189
val loss in 1 epoch: -219.64782
Round 576, winrate: 0.0, max_step: 238, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/574497171.json'}
train loss in 1 epoch in 1 batch: 157.23141
val loss in 1 epoch: -290.68017
train loss in 1 epoch in 2 batch: -49.59634
val loss in 1 epoch: -290.82744
train loss in 1 epoch in 3 batch: 43.97684
val loss in 1 epoch: -291.89313
Round 577, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/901790531.json'}
train loss in 1 epoch in 1 batch: 449.53827
val loss in 1 epoch: -1497.95533
Round 578, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0

  if (await self.run_code(code, result,  async_=asy)):


Round 581, winrate: 0.0, max_step: 277, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/503366333.json'}
train loss in 1 epoch in 1 batch: 642.22485
val loss in 1 epoch: 78.06393
Round 582, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/431861209.json'}
train loss in 1 epoch in 1 batch: 539.09265
val loss in 1 epoch: -300.61381
Round 583, winrate: 0.0, max_step: 235, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/553093625.json'}
train loss in 1 epoch in 1 batch: -33.30542
val loss in 1 epoch: -183.00376
Round 584, winrate: 1.0, max_step: 79, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/441633633.json'}
train loss in 1 epoch in 1 batch: 343.89520
val loss in 1 epoch: -2375.89562
Round 585, winrate: 0.0, max_step:

  if (await self.run_code(code, result,  async_=asy)):


Round 586, winrate: 0.0, max_step: 353, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/792531345.json'}
train loss in 1 epoch in 1 batch: 687.06335
val loss in 1 epoch: -59.41430
train loss in 1 epoch in 2 batch: 144.06239
val loss in 1 epoch: -61.31743


  if (await self.run_code(code, result,  async_=asy)):


Round 587, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/879796242.json'}
train loss in 1 epoch in 1 batch: 369.49579
val loss in 1 epoch: -417.95461
Round 588, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/956182263.json'}
train loss in 1 epoch in 1 batch: -439.42944
val loss in 1 epoch: -7.34088
train loss in 1 epoch in 2 batch: 9.26074
val loss in 1 epoch: -8.98777


  if (await self.run_code(code, result,  async_=asy)):


Round 589, winrate: 0.0, max_step: 235, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/403614944.json'}
train loss in 1 epoch in 1 batch: 45.98357
val loss in 1 epoch: -300.84298
train loss in 1 epoch in 2 batch: -10.38044
val loss in 1 epoch: -300.54806
Round 590, winrate: 0.0, max_step: 233, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/293792490.json'}
train loss in 1 epoch in 1 batch: 1057.64807
val loss in 1 epoch: -480.22763


  if (await self.run_code(code, result,  async_=asy)):


Round 591, winrate: 0.0, max_step: 349, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/273192155.json'}
train loss in 1 epoch in 1 batch: 645.70050
val loss in 1 epoch: -445.46130
train loss in 1 epoch in 2 batch: 376.87769
val loss in 1 epoch: -448.86061


  if (await self.run_code(code, result,  async_=asy)):


Round 592, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/548863990.json'}
train loss in 1 epoch in 1 batch: -1169.01392
val loss in 1 epoch: -747.26821
Round 593, winrate: 0.0, max_step: 115, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/506381372.json'}
train loss in 1 epoch in 1 batch: 289.67783
val loss in 1 epoch: 63.66300
Round 594, winrate: 0.0, max_step: 119, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/962147311.json'}
train loss in 1 epoch in 1 batch: 247.09306
val loss in 1 epoch: 122.91373
Round 595, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/87590795.json'}
train loss in 1 epoch in 1 batch: 290.75772
val loss in 1 epoch: 88.63070


  if (await self.run_code(code, result,  async_=asy)):


Round 596, winrate: 0.0, max_step: 154, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/647626395.json'}
train loss in 1 epoch in 1 batch: 187.01886
val loss in 1 epoch: 629.00479
Round 597, winrate: 0.0, max_step: 315, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/895872160.json'}
train loss in 1 epoch in 1 batch: 212.64984
val loss in 1 epoch: -332.72406
train loss in 1 epoch in 2 batch: -74.79206
val loss in 1 epoch: -338.09233
Round 598, winrate: 0.0, max_step: 311, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/207955727.json'}
train loss in 1 epoch in 1 batch: -217.78748
val loss in 1 epoch: -361.73671
Round 599, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/867253489.json'}
train loss in 1 epoch in 1 batch

  if (await self.run_code(code, result,  async_=asy)):


Round 606, winrate: 0.0, max_step: 310, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/345509510.json'}
train loss in 1 epoch in 1 batch: 305.28030
val loss in 1 epoch: -163.46776


  if (await self.run_code(code, result,  async_=asy)):


Round 607, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/747943270.json'}
train loss in 1 epoch in 1 batch: -655.42004
val loss in 1 epoch: -341.21716
train loss in 1 epoch in 2 batch: -11.80509
val loss in 1 epoch: -327.94213


  if (await self.run_code(code, result,  async_=asy)):


Round 608, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/903099021.json'}
train loss in 1 epoch in 1 batch: 259.16006
val loss in 1 epoch: -327.76852


  if (await self.run_code(code, result,  async_=asy)):


Round 609, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/823883549.json'}
train loss in 1 epoch in 1 batch: 217.83253
val loss in 1 epoch: -460.31643
train loss in 1 epoch in 2 batch: -986.37573
val loss in 1 epoch: -464.84851
train loss in 1 epoch in 3 batch: -24.82981
val loss in 1 epoch: -462.33473


  if (await self.run_code(code, result,  async_=asy)):


Round 610, winrate: 0.0, max_step: 359, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/380828244.json'}
train loss in 1 epoch in 1 batch: 512.57922
val loss in 1 epoch: -203.16424
train loss in 1 epoch in 2 batch: -1300.77075
val loss in 1 epoch: -216.50474
train loss in 1 epoch in 3 batch: -2120.36206
val loss in 1 epoch: -218.00224
train loss in 1 epoch in 4 batch: 177.81682
val loss in 1 epoch: -215.18894
train loss in 1 epoch in 5 batch: -7.45139
val loss in 1 epoch: -212.57440
Round 611, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/241843726.json'}
train loss in 1 epoch in 1 batch: -2853.04517
val loss in 1 epoch: -369.70402
Round 612, winrate: 0.0, max_step: 191, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/616205957.json'}
train loss in 1 epoch in 1 batch: -67.934

  if (await self.run_code(code, result,  async_=asy)):


Round 617, winrate: 0.0, max_step: 239, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/3264686.json'}
train loss in 1 epoch in 1 batch: 288.36377
val loss in 1 epoch: -388.03643


  if (await self.run_code(code, result,  async_=asy)):


Round 618, winrate: 0.0, max_step: 199, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/192513567.json'}
train loss in 1 epoch in 1 batch: 944.55176
val loss in 1 epoch: -361.29231
Round 619, winrate: 0.0, max_step: 359, reward: 16, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/884164348.json'}
train loss in 1 epoch in 1 batch: -838.00427
val loss in 1 epoch: -357.08308
train loss in 1 epoch in 2 batch: 40.64564
val loss in 1 epoch: -368.47451
train loss in 1 epoch in 3 batch: 113.17244
val loss in 1 epoch: -368.49784


  if (await self.run_code(code, result,  async_=asy)):


Round 620, winrate: 0.0, max_step: 230, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/242583715.json'}
train loss in 1 epoch in 1 batch: 73.30986
val loss in 1 epoch: 1.66815
Round 621, winrate: 0.0, max_step: 152, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/178274211.json'}
train loss in 1 epoch in 1 batch: 1142.19861
val loss in 1 epoch: 160.10362


  if (await self.run_code(code, result,  async_=asy)):


Round 622, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/690014903.json'}
train loss in 1 epoch in 1 batch: 749.98218
val loss in 1 epoch: -524.79771
Round 623, winrate: 0.0, max_step: 359, reward: 3, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/875016726.json'}
train loss in 1 epoch in 1 batch: -1031.05225
val loss in 1 epoch: -874.19831
train loss in 1 epoch in 2 batch: 239.25305
val loss in 1 epoch: -888.42352


  if (await self.run_code(code, result,  async_=asy)):


Round 624, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/452090300.json'}
train loss in 1 epoch in 1 batch: 629.05304
val loss in 1 epoch: -182.37998
train loss in 1 epoch in 2 batch: -9.39334
val loss in 1 epoch: -170.31727
Round 625, winrate: 0.0, max_step: 312, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/53784371.json'}
train loss in 1 epoch in 1 batch: 170.42647
val loss in 1 epoch: -356.56254
Round 626, winrate: 0.0, max_step: 359, reward: 30, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/58829777.json'}
train loss in 1 epoch in 1 batch: 150.41827
val loss in 1 epoch: -393.17174
train loss in 1 epoch in 2 batch: 147.88171
val loss in 1 epoch: -403.28964
train loss in 1 epoch in 3 batch: 18.65964
val loss in 1 epoch: -404.37245


  if (await self.run_code(code, result,  async_=asy)):


Round 627, winrate: 0.0, max_step: 156, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/913463700.json'}
train loss in 1 epoch in 1 batch: -863.06750
val loss in 1 epoch: -640.60254
Round 628, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/326392043.json'}
train loss in 1 epoch in 1 batch: 197.53008
val loss in 1 epoch: 80.00874


  if (await self.run_code(code, result,  async_=asy)):


Round 629, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/854446421.json'}
train loss in 1 epoch in 1 batch: 339.62854
val loss in 1 epoch: -136.21115


  if (await self.run_code(code, result,  async_=asy)):


Round 630, winrate: 0.0, max_step: 277, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/230216792.json'}
train loss in 1 epoch in 1 batch: 328.86813
val loss in 1 epoch: -165.66997


  if (await self.run_code(code, result,  async_=asy)):


Round 631, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/536616985.json'}
train loss in 1 epoch in 1 batch: 106.92581
val loss in 1 epoch: -161.34508
train loss in 1 epoch in 2 batch: 3.11719
val loss in 1 epoch: -162.30573


  if (await self.run_code(code, result,  async_=asy)):


Round 632, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/350265706.json'}
train loss in 1 epoch in 1 batch: 398.31335
val loss in 1 epoch: -59.60318


  if (await self.run_code(code, result,  async_=asy)):


Round 633, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/878101368.json'}
train loss in 1 epoch in 1 batch: 223.96086
val loss in 1 epoch: 1.69141


  if (await self.run_code(code, result,  async_=asy)):


Round 634, winrate: 0.0, max_step: 155, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/583350560.json'}
train loss in 1 epoch in 1 batch: 279.43851
val loss in 1 epoch: -790.62368
Round 635, winrate: 0.0, max_step: 233, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/475613272.json'}
train loss in 1 epoch in 1 batch: -288.42990
val loss in 1 epoch: -84.54897


  if (await self.run_code(code, result,  async_=asy)):


Round 636, winrate: 0.0, max_step: 359, reward: 2, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/840565948.json'}
train loss in 1 epoch in 1 batch: 386.53946
val loss in 1 epoch: -387.39975
train loss in 1 epoch in 2 batch: 70.20407
val loss in 1 epoch: -385.50882
train loss in 1 epoch in 3 batch: -2583.67310
val loss in 1 epoch: -382.36630
Round 637, winrate: 0.0, max_step: 351, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/860919779.json'}
train loss in 1 epoch in 1 batch: 424.19171
val loss in 1 epoch: -881.20848


  if (await self.run_code(code, result,  async_=asy)):


Round 638, winrate: 0.0, max_step: 233, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/820980108.json'}
train loss in 1 epoch in 1 batch: 427.71118
val loss in 1 epoch: -187.16805
Round 639, winrate: 0.0, max_step: 30, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/706882780.json'}
train loss in 1 epoch in 1 batch: 231.77405
val loss in 1 epoch: 93.60442
Round 640, winrate: 0.0, max_step: 279, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/145049628.json'}
train loss in 1 epoch in 1 batch: 162.58940
val loss in 1 epoch: -244.01136
Round 641, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/744233467.json'}
train loss in 1 epoch in 1 batch: 81.15877
val loss in 1 epoch: -265.16287
train loss in 1 epoch in 2 batch: 41

  if (await self.run_code(code, result,  async_=asy)):


Round 648, winrate: 1.0, max_step: 159, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/641928918.json'}
train loss in 1 epoch in 1 batch: -108.77982
val loss in 1 epoch: -324.12580
Round 649, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/249583918.json'}
train loss in 1 epoch in 1 batch: 231.65419
val loss in 1 epoch: 87.72397
Round 650, winrate: 0.0, max_step: 278, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/558083164.json'}
train loss in 1 epoch in 1 batch: -2154.53345
val loss in 1 epoch: -189.99353
Round 651, winrate: 0.0, max_step: 354, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/319318239.json'}
train loss in 1 epoch in 1 batch: -38.07872
val loss in 1 epoch: -498.20043
Round 652, winrate: 0.0, max_st

  if (await self.run_code(code, result,  async_=asy)):


Round 656, winrate: 0.0, max_step: 150, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/255132843.json'}
train loss in 1 epoch in 1 batch: -2073.84595
val loss in 1 epoch: -384.61723


  if (await self.run_code(code, result,  async_=asy)):


Round 657, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/559402794.json'}
train loss in 1 epoch in 1 batch: -77.09784
val loss in 1 epoch: 28.96879
train loss in 1 epoch in 2 batch: 58.17724
val loss in 1 epoch: 21.67445


  if (await self.run_code(code, result,  async_=asy)):


Round 658, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/672069062.json'}
train loss in 1 epoch in 1 batch: 454.89893
val loss in 1 epoch: 118.59660
Round 659, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/266411815.json'}
train loss in 1 epoch in 1 batch: 248.68655
val loss in 1 epoch: -68.35709
Round 660, winrate: 0.0, max_step: 356, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/838218517.json'}
train loss in 1 epoch in 1 batch: 248.96854
val loss in 1 epoch: -1041.38328
train loss in 1 epoch in 2 batch: 67.45953
val loss in 1 epoch: -1027.35433
Round 661, winrate: 0.0, max_step: 191, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/310462940.json'}
train loss in 1 epoch in 1 batch:

  if (await self.run_code(code, result,  async_=asy)):


Round 671, winrate: 1.0, max_step: 359, reward: 64, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/879890721.json'}
train loss in 1 epoch in 1 batch: 0.02884
val loss in 1 epoch: 56.50146
train loss in 1 epoch in 2 batch: 126.23271
val loss in 1 epoch: 46.13435


  if (await self.run_code(code, result,  async_=asy)):


Round 672, winrate: 0.0, max_step: 231, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/343294743.json'}
train loss in 1 epoch in 1 batch: 475.47092
val loss in 1 epoch: -327.88345
train loss in 1 epoch in 2 batch: 12.46109
val loss in 1 epoch: -323.46066


  if (await self.run_code(code, result,  async_=asy)):


Round 673, winrate: 0.0, max_step: 229, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/809455129.json'}
train loss in 1 epoch in 1 batch: -2562.58569
val loss in 1 epoch: -1954.14795
Round 674, winrate: 0.5, max_step: 359, reward: 169, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 1, 'agentID': 1}], 'replayFile': 'replays/146231997.json'}
train loss in 1 epoch in 1 batch: 430.37900
val loss in 1 epoch: -212.92216
train loss in 1 epoch in 2 batch: 38.65897
val loss in 1 epoch: -208.09786
train loss in 1 epoch in 3 batch: -986.32605
val loss in 1 epoch: -207.56849
train loss in 1 epoch in 4 batch: 173.15326
val loss in 1 epoch: -207.73431


  if (await self.run_code(code, result,  async_=asy)):


Round 675, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/63113897.json'}
train loss in 1 epoch in 1 batch: 69.31630
val loss in 1 epoch: 39.53467


  if (await self.run_code(code, result,  async_=asy)):


Round 676, winrate: 0.0, max_step: 239, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/501304664.json'}
train loss in 1 epoch in 1 batch: 596.82672
val loss in 1 epoch: -22.90248
Round 677, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/23698688.json'}
train loss in 1 epoch in 1 batch: -1917.85291
val loss in 1 epoch: -479.06239
train loss in 1 epoch in 2 batch: -1066.79163
val loss in 1 epoch: -484.82002
train loss in 1 epoch in 3 batch: -938.56769
val loss in 1 epoch: -486.78466
train loss in 1 epoch in 4 batch: -71.88418
val loss in 1 epoch: -485.18294
Round 678, winrate: 0.0, max_step: 317, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/94020689.json'}
train loss in 1 epoch in 1 batch: -1420.08301
val loss in 1 epoch: -315.13847
train loss in 1 epoch in 2 batch: -28.6454

  if (await self.run_code(code, result,  async_=asy)):


Round 683, winrate: 0.0, max_step: 359, reward: 1, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/468885466.json'}
train loss in 1 epoch in 1 batch: 264.11493
val loss in 1 epoch: -1494.00112
train loss in 1 epoch in 2 batch: 231.61781
val loss in 1 epoch: -1480.05559
Round 684, winrate: 0.0, max_step: 118, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/7641146.json'}
train loss in 1 epoch in 1 batch: 179.87442
val loss in 1 epoch: 53.43691


  if (await self.run_code(code, result,  async_=asy)):


Round 685, winrate: 0.0, max_step: 350, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/349043665.json'}
train loss in 1 epoch in 1 batch: 234.25981
val loss in 1 epoch: 88.97838
Round 686, winrate: 0.0, max_step: 313, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/313301374.json'}
train loss in 1 epoch in 1 batch: 325.56668
val loss in 1 epoch: -387.25094
train loss in 1 epoch in 2 batch: -1316.00000
val loss in 1 epoch: -379.45848


  if (await self.run_code(code, result,  async_=asy)):


Round 687, winrate: 0.0, max_step: 277, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/462969300.json'}
train loss in 1 epoch in 1 batch: 244.25395
val loss in 1 epoch: -274.86834


  if (await self.run_code(code, result,  async_=asy)):


Round 688, winrate: 0.0, max_step: 359, reward: 20, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/797230513.json'}
train loss in 1 epoch in 1 batch: 138.10641
val loss in 1 epoch: -384.68814
train loss in 1 epoch in 2 batch: 444.80008
val loss in 1 epoch: -380.13317
Round 689, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/118953464.json'}
train loss in 1 epoch in 1 batch: -4652.45508
val loss in 1 epoch: -338.69275
Round 690, winrate: 0.0, max_step: 114, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/488721355.json'}
train loss in 1 epoch in 1 batch: 75.08498
val loss in 1 epoch: 457.69883
Round 691, winrate: 0.0, max_step: 194, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/196881768.json'}
train loss in 1 epoch in 1 batc

  if (await self.run_code(code, result,  async_=asy)):


Round 693, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/487748850.json'}
train loss in 1 epoch in 1 batch: -89.95288
val loss in 1 epoch: -401.20529
Round 694, winrate: 0.0, max_step: 239, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/689595245.json'}
train loss in 1 epoch in 1 batch: 378.29303
val loss in 1 epoch: -629.01949
train loss in 1 epoch in 2 batch: 0.04270
val loss in 1 epoch: -630.82435
Round 695, winrate: 0.0, max_step: 38, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/986146962.json'}
train loss in 1 epoch in 1 batch: 258.10858
val loss in 1 epoch: 106.89748
Round 696, winrate: 0.0, max_step: 275, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/176004031.json'}
train loss in 1 epoch in 1 batch: 26

  if (await self.run_code(code, result,  async_=asy)):


Round 699, winrate: 0.0, max_step: 279, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/354538713.json'}
train loss in 1 epoch in 1 batch: 232.24193
val loss in 1 epoch: -424.00465
train loss in 1 epoch in 2 batch: -67.70117
val loss in 1 epoch: -423.78846
Round 700, winrate: 0.0, max_step: 76, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/672361527.json'}
train loss in 1 epoch in 1 batch: 104.05782
val loss in 1 epoch: 287.90532


  if (await self.run_code(code, result,  async_=asy)):


Round 701, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/516015590.json'}
train loss in 1 epoch in 1 batch: -496.41495
val loss in 1 epoch: -64.20951
Round 702, winrate: 0.0, max_step: 277, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/664752119.json'}
train loss in 1 epoch in 1 batch: -192.63358
val loss in 1 epoch: -179.38091
train loss in 1 epoch in 2 batch: -6.41666
val loss in 1 epoch: -187.90253
train loss in 1 epoch in 3 batch: -129.54245
val loss in 1 epoch: -185.31076
Round 703, winrate: 0.0, max_step: 309, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/802115870.json'}
train loss in 1 epoch in 1 batch: 307.22726
val loss in 1 epoch: -3202.72055


  if (await self.run_code(code, result,  async_=asy)):


Round 704, winrate: 0.0, max_step: 313, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/120869359.json'}
train loss in 1 epoch in 1 batch: 200.89401
val loss in 1 epoch: -1513.68162


  if (await self.run_code(code, result,  async_=asy)):


Round 705, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/529534609.json'}
train loss in 1 epoch in 1 batch: -1770.36536
val loss in 1 epoch: -1639.73968


  if (await self.run_code(code, result,  async_=asy)):


Round 706, winrate: 0.0, max_step: 271, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/942526334.json'}
train loss in 1 epoch in 1 batch: 750.04846
val loss in 1 epoch: -209.37868
Round 707, winrate: 0.0, max_step: 158, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/911401819.json'}
train loss in 1 epoch in 1 batch: 333.81131
val loss in 1 epoch: -237.78481
Round 708, winrate: 0.0, max_step: 157, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/65071272.json'}
train loss in 1 epoch in 1 batch: 127.29951
val loss in 1 epoch: -4927.49292


  if (await self.run_code(code, result,  async_=asy)):


Round 709, winrate: 0.0, max_step: 192, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/981560335.json'}
train loss in 1 epoch in 1 batch: -399.78665
val loss in 1 epoch: -334.96003
train loss in 1 epoch in 2 batch: -1695.93152
val loss in 1 epoch: -320.77352
Round 710, winrate: 0.0, max_step: 198, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/443508454.json'}
train loss in 1 epoch in 1 batch: -571.52185
val loss in 1 epoch: -258.54473
Round 711, winrate: 0.0, max_step: 190, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/246344090.json'}
train loss in 1 epoch in 1 batch: 377.23853
val loss in 1 epoch: -484.71077
train loss in 1 epoch in 2 batch: 180.80745
val loss in 1 epoch: -474.59800
Round 712, winrate: 0.0, max_step: 189, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID

  if (await self.run_code(code, result,  async_=asy)):


Round 713, winrate: 0.0, max_step: 270, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/992760525.json'}
train loss in 1 epoch in 1 batch: 819.14490
val loss in 1 epoch: -482.79422
Round 714, winrate: 0.0, max_step: 115, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/73290591.json'}
train loss in 1 epoch in 1 batch: 209.37206
val loss in 1 epoch: -1247.27004
Round 715, winrate: 0.0, max_step: 113, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 0}, {'rank': 2, 'agentID': 1}], 'replayFile': 'replays/618679734.json'}
train loss in 1 epoch in 1 batch: 162.94649
val loss in 1 epoch: 6.59137
Round 716, winrate: 0.0, max_step: 110, reward: 0, example: {'ranks': [{'rank': 1, 'agentID': 1}, {'rank': 2, 'agentID': 0}], 'replayFile': 'replays/409857046.json'}
train loss in 1 epoch in 1 batch: -5776.57715
val loss in 1 epoch: -966.57802
