### Repeated Prisioner's Dilemma

- There are two agents that interact in repeated prisioner's dilemma. 
- Actions are: defect (D/0) or cooperate (C/1).
- Defect is dominant strategy, and (D,D) is Nash Eqbm of stage game. 
- Sustained Cooperation is best for everyone in repeated game. 
- Rewards for agents are 

    - (1,1) for (D,D) 
    - (3,0) for (D,C) 
    - (0,3) for (C,D) 
    - (2.5,2.5) for (C,C)

- There is just one state that does change
- There is "discounting". Agents value future profits - they do not seek to maximize discounted sum. 

### Import Packages

In [1]:
from itertools import count
import torch
import math
import torch.optim as optim 
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
plt.rcParams["figure.figsize"] = (30, 10)
plt.rcParams['font.size']=20

### Helper Functions

In [57]:
def gameReward(a1,a2):
    if (a1 == 0 and a2 == 0): # DD
        return (1,1)
    if (a1 == 0 and a2 == 1): # DC
        return (3,0)
    if (a1 == 1 and a2 == 0): # CD
        return (0,3)
    if (a1 == 1 and a2 == 1): # CC
        return (2.9,2.9)
    
import random
from collections import namedtuple, deque
Transition = namedtuple('Transition',('state1', 'state2', 'next_state1', 'next_state2','a1','a2','r1','r2'))

class ReplayMemory(object):
    # Holds transitions from experience and gives a random batch of transitions for training
    def __init__(self,capacity):
        self.memory = deque([], maxlen=capacity)
        
    def push(self,*args):
        "save transition"
        self.memory.append(Transition(*args))
        
    def sample(self, batch_size):
        return random.sample(self.memory, batch_size)
    
    def __len__(self):
        return len(self.memory)

class DQN(nn.Module):
    # Input: State
    # Output: 0,1
    # Network is going to try to predict the expected Return of taking each action given the INPUT
    def __init__(self, n_obs, n_actions):
        super(DQN,self).__init__()
        self.layer1 = nn.Linear(n_obs, 128)
        self.layer2 = nn.Linear(128,128)
        self.layer3 = nn.Linear(128, n_actions)
    
    def forward(self,x):
        x = F.relu(self.layer1(x))
        x = F.relu(self.layer2(x))
        return self.layer3(x)   

def select_action(state, policy_net, idx):
    # Given state selects action either from the DQN or randomly. 
    global steps_done1, steps_done2
    sample = random.random()
    if idx == 0:
        eps_threshold = EPS_END + (EPS_START-EPS_END)*math.exp(-1.*steps_done1/EPS_DECAY)
        steps_done1 += 1
    else:
        eps_threshold = EPS_END + (EPS_START-EPS_END)*math.exp(-1.*steps_done2/EPS_DECAY)
        steps_done2 += 1
    
    if sample>eps_threshold:
        with torch.no_grad():
            #try: 
            #    return policy_net(state).max(1)[1].view(1,1) # pick up action with largest expected reward
            #except:
            #    return policy_net(state).max(0)[1].view(1,1) # pick up action with largest expected reward
            a = torch.tensor([0,1])
            p = torch.exp(policy_net(state)/BETA)/torch.sum(torch.exp(policy_net(state)/BETA))
            index = p.multinomial(num_samples=1, replacement=True)
            b = a[index]
            return torch.tensor([[b]],dtype=torch.long)
    else:
        randchoice = random.choice([0,1])
        return torch.tensor([[randchoice]],dtype=torch.long)
    
def update_target_net(target_net, policy_net):
    target_net_state_dict = target_net.state_dict()
    policy_net_state_dict = policy_net.state_dict()
    for key in policy_net_state_dict:
        target_net_state_dict[key] = policy_net_state_dict[key]*TAU+target_net_state_dict[key]*(1-TAU)
    target_net.load_state_dict(target_net_state_dict)

def optimize_model(policy_net,target_net,idx,optimizer):
    if len(memory)<BATCH_SIZE:
        return
    transitions = memory.sample(BATCH_SIZE)
    batch = Transition(*zip(*transitions))
    if idx == 1:
        action_batch = torch.cat(batch.a1)
        reward_batch = torch.cat(batch.r1)
         # we begin with a batch of transitions - (state, action, reward)
        state_batch = torch.cat(batch.state1)
        # final state is after simulation is done
        # these are the "s(t+1)" from the transitions
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state1)),dtype=torch.bool)
        non_final_next_states = torch.cat([s for s in batch.next_state1 if s is not None])
    elif idx == 2:
        action_batch = torch.cat(batch.a2)
        reward_batch = torch.cat(batch.r2)
         # we begin with a batch of transitions - (state, action, reward)
        state_batch = torch.cat(batch.state2)
        
        # final state is after simulation is done
        # these are the "s(t+1)" from the transitions
        non_final_mask = torch.tensor(tuple(map(lambda s: s is not None, batch.next_state2)),dtype=torch.bool)
        non_final_next_states = torch.cat([s for s in batch.next_state2 if s is not None])
    
    # For these batch states we compute optimal policy according to our policy-network
    # Compute Q(s,a) and get actions - these are the actions that would have been taken for each batch state 
    state_action_values = policy_net(state_batch.reshape(-1,1)).gather(1,action_batch)
    
    # Compute V(s_t+1)
    next_state_values = torch.zeros(BATCH_SIZE)
    with torch.no_grad():
        # Use the "old" target_net to obtain value (Expected Return) on optimal actions from sampled s(t+1)
        # This is Q_old(s',a')
        next_state_values[non_final_mask] = target_net(non_final_next_states.reshape(-1,1)).max(1)[0]
        
    # Here we have add r + max Q_old(s',a')
    expected_state_action_values = (next_state_values*GAMMA)+reward_batch

    # Compute the loss
    # Q_new(s,a) - r + max Q_old(s',a'): is the error
    # Huber loss function
    criterion = nn.SmoothL1Loss()
    loss = criterion(state_action_values,expected_state_action_values.unsqueeze(1))
    optimizer.zero_grad()
    loss.backward()
    torch.nn.utils.clip_grad_value_(policy_net.parameters(),100)
    optimizer.step()    
    print(f'Loss Agent{idx}:{loss.item()}')

### HyperParameters and Initializations

In [77]:
BATCH_SIZE = 512 # number of transitions sampled from replay buffer
GAMMA = 0.99 # Discount factor
EPS_START = 0.999 # Initial value of Epsilon
EPS_END = 0.05 # End value of Epsilon
EPS_DECAY = 1000 # controls decay rate of Epsilon
TAU = 0.0001 # update rate of target network
LR = 1e-5 # Learning rate of Adam
BETA = 1 # temperature of the greedy-exploratory policy
BETA_DECAY = 1 # temperature of the greedy-exploratory policy

n_obs = 1
n_actions = 2
policy_net1 = DQN(n_obs,n_actions)
target_net1 = DQN(n_obs,n_actions)
policy_net2 = DQN(n_obs,n_actions)
target_net2 = DQN(n_obs,n_actions)
steps_done1 = 0
steps_done2 = 0
optimizer1 = optim.AdamW(policy_net1.parameters(),lr=LR,amsgrad=True)
optimizer2 = optim.AdamW(policy_net2.parameters(),lr=LR,amsgrad=True)
memory=ReplayMemory(10000)

### Training Loop

In [78]:
state1 = torch.tensor(1,dtype=torch.float32).unsqueeze(0)
state2 = torch.tensor(1,dtype=torch.float32).unsqueeze(0)

actions = []
epochs = 10000
for t in range(epochs):
    BETA = 5*0.999**t
    print('\n Iteration:',t)
    # Take action
    a1 = select_action(state1, policy_net1, 1)
    a2 = select_action(state2, policy_net2, 2)

    # Obtain Rewards
    r1, r2 = gameReward(int(a1),int(a2))
    r1 = torch.tensor([r1])
    r2 = torch.tensor([r2])

    # Compute next state
    next_state1 = torch.tensor(a2.item(),dtype=torch.float32).unsqueeze(0)
    next_state2 = torch.tensor(a1.item(),dtype=torch.float32).unsqueeze(0)

    # store memory in transition
    memory.push(state1, state2, next_state1, next_state2, a1,a2,r1,r2)

    # move to next state
    state1 = next_state1
    state2 = next_state2
    
    # optimize
    optimize_model(policy_net1,target_net1, 1, optimizer1)
    optimize_model(policy_net2,target_net2, 2, optimizer2)

    # soft update target_net
    update_target_net(target_net1, policy_net1)
    update_target_net(target_net2, policy_net2)
    
    # record actions
    actions.append([a1.item(),a2.item()])
    
    with torch.no_grad():
        temp1 = torch.tensor(int(0),dtype=torch.float32).unsqueeze(0)
        temp2 = torch.tensor(int(1),dtype=torch.float32).unsqueeze(0)
        print(policy_net1(temp1))
        print(policy_net1(temp2))
        print(policy_net2(temp1))
        print(policy_net2(temp2))


 Iteration: 0
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 1
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 2
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 3
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 4
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 5
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 6
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 7
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 8
tensor([ 0.0291, -0.0596])
tensor

tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 76
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 77
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 78
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 79
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 80
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 81
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 82
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 83
tensor([ 0.0291, -0.0596])
tensor([-0.01

tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 158
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 159
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 160
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 161
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 162
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 163
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 164
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 165
tensor([ 0.0291, -0.0596])
tenso

tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 228
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 229
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 230
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 231
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 232
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 233
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 234
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 235
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor(

tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 308
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 309
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 310
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 311
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 312
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 313
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 314
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 315
tensor([ 0.0291, -0.0596])
tenso

tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 424
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 425
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 426
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 427
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 428
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 429
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 430
tensor([ 0.0291, -0.0596])
tensor([-0.0133,  0.0044])
tensor([0.1919, 0.1534])
tensor([0.3408, 0.2762])

 Iteration: 431
tensor([ 0.0291, -0.0596])
tenso

Loss Agent1:1.3114761114120483
Loss Agent2:1.1891663074493408
tensor([ 0.0398, -0.0544])
tensor([0.0017, 0.0123])
tensor([0.2038, 0.1562])
tensor([0.3567, 0.2804])

 Iteration: 522
Loss Agent1:1.304785132408142
Loss Agent2:1.1806328296661377
tensor([ 0.0408, -0.0540])
tensor([0.0031, 0.0130])
tensor([0.2049, 0.1564])
tensor([0.3582, 0.2808])

 Iteration: 523
Loss Agent1:1.3050596714019775
Loss Agent2:1.1912747621536255
tensor([ 0.0418, -0.0535])
tensor([0.0045, 0.0137])
tensor([0.2060, 0.1567])
tensor([0.3596, 0.2812])

 Iteration: 524
Loss Agent1:1.3041843175888062
Loss Agent2:1.1714911460876465
tensor([ 0.0427, -0.0530])
tensor([0.0058, 0.0144])
tensor([0.2070, 0.1569])
tensor([0.3610, 0.2816])

 Iteration: 525
Loss Agent1:1.300787091255188
Loss Agent2:1.1719131469726562
tensor([ 0.0437, -0.0525])
tensor([0.0072, 0.0151])
tensor([0.2081, 0.1572])
tensor([0.3625, 0.2820])

 Iteration: 526
Loss Agent1:1.305415153503418
Loss Agent2:1.1821644306182861
tensor([ 0.0447, -0.0521])
tensor([0

Loss Agent1:1.2900006771087646
Loss Agent2:1.1279332637786865
tensor([ 0.0842, -0.0327])
tensor([0.0650, 0.0427])
tensor([0.2532, 0.1691])
tensor([0.4230, 0.2985])

 Iteration: 568
Loss Agent1:1.2328130006790161
Loss Agent2:1.1495721340179443
tensor([ 0.0851, -0.0322])
tensor([0.0664, 0.0433])
tensor([0.2543, 0.1694])
tensor([0.4244, 0.2989])

 Iteration: 569
Loss Agent1:1.2706756591796875
Loss Agent2:1.1382044553756714
tensor([ 0.0861, -0.0318])
tensor([0.0678, 0.0439])
tensor([0.2553, 0.1697])
tensor([0.4258, 0.2992])

 Iteration: 570
Loss Agent1:1.253280520439148
Loss Agent2:1.1290018558502197
tensor([ 0.0871, -0.0313])
tensor([0.0691, 0.0445])
tensor([0.2564, 0.1700])
tensor([0.4273, 0.2996])

 Iteration: 571
Loss Agent1:1.2711570262908936
Loss Agent2:1.1250308752059937
tensor([ 0.0880, -0.0309])
tensor([0.0705, 0.0451])
tensor([0.2575, 0.1702])
tensor([0.4287, 0.3000])

 Iteration: 572
Loss Agent1:1.2411391735076904
Loss Agent2:1.138249397277832
tensor([ 0.0890, -0.0304])
tensor([

Loss Agent1:1.2685000896453857
Loss Agent2:1.0930119752883911
tensor([ 0.1321, -0.0090])
tensor([0.1346, 0.0742])
tensor([0.3075, 0.1872])
tensor([0.4965, 0.3187])

 Iteration: 619
Loss Agent1:1.2251120805740356
Loss Agent2:1.1154148578643799
tensor([ 0.1331, -0.0085])
tensor([0.1360, 0.0749])
tensor([0.3086, 0.1876])
tensor([0.4980, 0.3192])

 Iteration: 620
Loss Agent1:1.222554087638855
Loss Agent2:1.100656509399414
tensor([ 0.1340, -0.0081])
tensor([0.1374, 0.0755])
tensor([0.3096, 0.1880])
tensor([0.4994, 0.3196])

 Iteration: 621
Loss Agent1:1.2823817729949951
Loss Agent2:1.1648762226104736
tensor([ 0.1349, -0.0076])
tensor([0.1387, 0.0761])
tensor([0.3107, 0.1884])
tensor([0.5008, 0.3201])

 Iteration: 622
Loss Agent1:1.224510908126831
Loss Agent2:1.0961463451385498
tensor([ 0.1358, -0.0071])
tensor([0.1400, 0.0767])
tensor([0.3117, 0.1888])
tensor([0.5022, 0.3206])

 Iteration: 623
Loss Agent1:1.229599118232727
Loss Agent2:1.1032036542892456
tensor([ 0.1368, -0.0066])
tensor([0.

Loss Agent2:1.0549662113189697
tensor([0.1772, 0.0136])
tensor([0.1987, 0.1058])
tensor([0.3571, 0.2065])
tensor([0.5638, 0.3408])

 Iteration: 667
Loss Agent1:1.2405643463134766
Loss Agent2:1.0703939199447632
tensor([0.1782, 0.0141])
tensor([0.2001, 0.1065])
tensor([0.3581, 0.2069])
tensor([0.5652, 0.3413])

 Iteration: 668
Loss Agent1:1.2375752925872803
Loss Agent2:1.0372955799102783
tensor([0.1791, 0.0146])
tensor([0.2014, 0.1072])
tensor([0.3591, 0.2073])
tensor([0.5666, 0.3418])

 Iteration: 669
Loss Agent1:1.238468885421753
Loss Agent2:1.042696475982666
tensor([0.1801, 0.0150])
tensor([0.2028, 0.1079])
tensor([0.3601, 0.2077])
tensor([0.5679, 0.3423])

 Iteration: 670
Loss Agent1:1.2487236261367798
Loss Agent2:1.0531471967697144
tensor([0.1810, 0.0155])
tensor([0.2041, 0.1086])
tensor([0.3611, 0.2081])
tensor([0.5693, 0.3428])

 Iteration: 671
Loss Agent1:1.235248327255249
Loss Agent2:1.0531047582626343
tensor([0.1820, 0.0160])
tensor([0.2055, 0.1093])
tensor([0.3621, 0.2085])
te

Loss Agent2:1.0500253438949585
tensor([0.2232, 0.0380])
tensor([0.2632, 0.1389])
tensor([0.4022, 0.2255])
tensor([0.6297, 0.3677])

 Iteration: 714
Loss Agent1:1.19489324092865
Loss Agent2:1.0119669437408447
tensor([0.2242, 0.0385])
tensor([0.2646, 0.1395])
tensor([0.4032, 0.2259])
tensor([0.6312, 0.3683])

 Iteration: 715
Loss Agent1:1.2085379362106323
Loss Agent2:1.039217472076416
tensor([0.2252, 0.0391])
tensor([0.2660, 0.1401])
tensor([0.4041, 0.2263])
tensor([0.6327, 0.3690])

 Iteration: 716
Loss Agent1:1.2583613395690918
Loss Agent2:1.03155517578125
tensor([0.2262, 0.0396])
tensor([0.2675, 0.1408])
tensor([0.4051, 0.2268])
tensor([0.6343, 0.3696])

 Iteration: 717
Loss Agent1:1.2120441198349
Loss Agent2:1.0699411630630493
tensor([0.2272, 0.0402])
tensor([0.2689, 0.1414])
tensor([0.4060, 0.2272])
tensor([0.6358, 0.3702])

 Iteration: 718
Loss Agent1:1.2010266780853271
Loss Agent2:1.0188113451004028
tensor([0.2282, 0.0407])
tensor([0.2703, 0.1421])
tensor([0.4070, 0.2276])
tensor(

Loss Agent1:1.2053008079528809
Loss Agent2:1.0688869953155518
tensor([0.2772, 0.0695])
tensor([0.3388, 0.1771])
tensor([0.4549, 0.2489])
tensor([0.7143, 0.4010])

 Iteration: 770
Loss Agent1:1.1489216089248657
Loss Agent2:1.0266236066818237
tensor([0.2781, 0.0701])
tensor([0.3401, 0.1778])
tensor([0.4558, 0.2493])
tensor([0.7157, 0.4015])

 Iteration: 771
Loss Agent1:1.163593053817749
Loss Agent2:0.986351490020752
tensor([0.2791, 0.0707])
tensor([0.3414, 0.1785])
tensor([0.4567, 0.2497])
tensor([0.7172, 0.4021])

 Iteration: 772
Loss Agent1:1.1323497295379639
Loss Agent2:0.9904073476791382
tensor([0.2800, 0.0713])
tensor([0.3427, 0.1793])
tensor([0.4576, 0.2500])
tensor([0.7186, 0.4026])

 Iteration: 773
Loss Agent1:1.1781895160675049
Loss Agent2:1.0306882858276367
tensor([0.2810, 0.0719])
tensor([0.3440, 0.1800])
tensor([0.4586, 0.2504])
tensor([0.7200, 0.4031])

 Iteration: 774
Loss Agent1:1.1560763120651245
Loss Agent2:1.0142537355422974
tensor([0.2819, 0.0725])
tensor([0.3453, 0.18

tensor([0.3306, 0.1015])
tensor([0.4106, 0.2155])
tensor([0.5081, 0.2740])
tensor([0.7946, 0.4325])

 Iteration: 826
Loss Agent1:1.109147071838379
Loss Agent2:0.9993270635604858
tensor([0.3315, 0.1021])
tensor([0.4118, 0.2162])
tensor([0.5090, 0.2745])
tensor([0.7960, 0.4331])

 Iteration: 827
Loss Agent1:1.1466838121414185
Loss Agent2:0.9731424450874329
tensor([0.3325, 0.1027])
tensor([0.4131, 0.2170])
tensor([0.5100, 0.2750])
tensor([0.7975, 0.4338])

 Iteration: 828
Loss Agent1:1.1655231714248657
Loss Agent2:0.9560465812683105
tensor([0.3334, 0.1032])
tensor([0.4143, 0.2177])
tensor([0.5109, 0.2755])
tensor([0.7989, 0.4344])

 Iteration: 829
Loss Agent1:1.1775550842285156
Loss Agent2:0.983917236328125
tensor([0.3344, 0.1038])
tensor([0.4155, 0.2184])
tensor([0.5119, 0.2760])
tensor([0.8003, 0.4350])

 Iteration: 830
Loss Agent1:1.2135361433029175
Loss Agent2:1.0317885875701904
tensor([0.3353, 0.1044])
tensor([0.4168, 0.2192])
tensor([0.5129, 0.2764])
tensor([0.8017, 0.4356])

 Itera

Loss Agent1:1.1583998203277588
Loss Agent2:0.941382646560669
tensor([0.3765, 0.1299])
tensor([0.4713, 0.2511])
tensor([0.5547, 0.2988])
tensor([0.8647, 0.4636])

 Iteration: 875
Loss Agent1:1.137451410293579
Loss Agent2:0.9914182424545288
tensor([0.3774, 0.1305])
tensor([0.4726, 0.2518])
tensor([0.5557, 0.2993])
tensor([0.8662, 0.4643])

 Iteration: 876
Loss Agent1:1.098605751991272
Loss Agent2:0.9676690697669983
tensor([0.3783, 0.1310])
tensor([0.4738, 0.2525])
tensor([0.5566, 0.2998])
tensor([0.8676, 0.4649])

 Iteration: 877
Loss Agent1:1.0778286457061768
Loss Agent2:0.9605352878570557
tensor([0.3792, 0.1316])
tensor([0.4750, 0.2531])
tensor([0.5576, 0.3003])
tensor([0.8691, 0.4656])

 Iteration: 878
Loss Agent1:1.1006739139556885
Loss Agent2:0.9419828653335571
tensor([0.3802, 0.1321])
tensor([0.4763, 0.2538])
tensor([0.5585, 0.3009])
tensor([0.8706, 0.4663])

 Iteration: 879
Loss Agent1:1.1852803230285645
Loss Agent2:0.974754810333252
tensor([0.3811, 0.1327])
tensor([0.4776, 0.2545

Loss Agent2:0.9281616806983948
tensor([0.4242, 0.1574])
tensor([0.5379, 0.2838])
tensor([0.6040, 0.3246])
tensor([0.9399, 0.4966])

 Iteration: 927
Loss Agent1:1.0644354820251465
Loss Agent2:0.8900372982025146
tensor([0.4251, 0.1579])
tensor([0.5392, 0.2844])
tensor([0.6049, 0.3251])
tensor([0.9413, 0.4972])

 Iteration: 928
Loss Agent1:1.1104819774627686
Loss Agent2:0.9168094396591187
tensor([0.4261, 0.1584])
tensor([0.5405, 0.2850])
tensor([0.6058, 0.3255])
tensor([0.9428, 0.4977])

 Iteration: 929
Loss Agent1:1.1112695932388306
Loss Agent2:0.9417047500610352
tensor([0.4270, 0.1589])
tensor([0.5418, 0.2856])
tensor([0.6068, 0.3260])
tensor([0.9442, 0.4983])

 Iteration: 930
Loss Agent1:1.0878992080688477
Loss Agent2:0.9033777117729187
tensor([0.4279, 0.1594])
tensor([0.5431, 0.2862])
tensor([0.6077, 0.3264])
tensor([0.9456, 0.4989])

 Iteration: 931
Loss Agent1:1.100651502609253
Loss Agent2:0.905899703502655
tensor([0.4288, 0.1599])
tensor([0.5444, 0.2868])
tensor([0.6087, 0.3269])
t

Loss Agent1:1.053697109222412
Loss Agent2:0.9145991802215576
tensor([0.4723, 0.1864])
tensor([0.6044, 0.3195])
tensor([0.6546, 0.3532])
tensor([1.0145, 0.5330])

 Iteration: 981
Loss Agent1:1.0937895774841309
Loss Agent2:0.9253062605857849
tensor([0.4732, 0.1870])
tensor([0.6056, 0.3202])
tensor([0.6556, 0.3538])
tensor([1.0159, 0.5338])

 Iteration: 982
Loss Agent1:1.0272941589355469
Loss Agent2:0.8918649554252625
tensor([0.4741, 0.1875])
tensor([0.6068, 0.3208])
tensor([0.6565, 0.3545])
tensor([1.0172, 0.5345])

 Iteration: 983
Loss Agent1:1.0799107551574707
Loss Agent2:0.884477972984314
tensor([0.4749, 0.1880])
tensor([0.6080, 0.3215])
tensor([0.6574, 0.3551])
tensor([1.0185, 0.5352])

 Iteration: 984
Loss Agent1:1.0588366985321045
Loss Agent2:0.9056885242462158
tensor([0.4758, 0.1886])
tensor([0.6092, 0.3222])
tensor([0.6583, 0.3557])
tensor([1.0198, 0.5359])

 Iteration: 985
Loss Agent1:1.0874440670013428
Loss Agent2:0.9150449633598328
tensor([0.4767, 0.1892])
tensor([0.6104, 0.32

Loss Agent1:1.08299720287323
Loss Agent2:0.8961962461471558
tensor([0.5152, 0.2130])
tensor([0.6639, 0.3521])
tensor([0.7007, 0.3814])
tensor([1.0806, 0.5675])

 Iteration: 1030
Loss Agent1:1.0301917791366577
Loss Agent2:0.8639733791351318
tensor([0.5161, 0.2136])
tensor([0.6651, 0.3528])
tensor([0.7016, 0.3820])
tensor([1.0820, 0.5681])

 Iteration: 1031
Loss Agent1:1.0877485275268555
Loss Agent2:0.9288426041603088
tensor([0.5169, 0.2142])
tensor([0.6663, 0.3534])
tensor([0.7026, 0.3825])
tensor([1.0834, 0.5688])

 Iteration: 1032
Loss Agent1:1.0530295372009277
Loss Agent2:0.9102942943572998
tensor([0.5178, 0.2147])
tensor([0.6675, 0.3541])
tensor([0.7035, 0.3831])
tensor([1.0847, 0.5695])

 Iteration: 1033
Loss Agent1:1.0617607831954956
Loss Agent2:0.8825105428695679
tensor([0.5187, 0.2153])
tensor([0.6687, 0.3548])
tensor([0.7045, 0.3837])
tensor([1.0861, 0.5702])

 Iteration: 1034
Loss Agent1:1.0520433187484741
Loss Agent2:0.8824582099914551
tensor([0.5196, 0.2159])
tensor([0.6699,

Loss Agent1:1.0030817985534668
Loss Agent2:0.863122820854187
tensor([0.5633, 0.2433])
tensor([0.7290, 0.3894])
tensor([0.7532, 0.4105])
tensor([1.1571, 0.6061])

 Iteration: 1085
Loss Agent1:1.023113489151001
Loss Agent2:0.8297466039657593
tensor([0.5642, 0.2438])
tensor([0.7302, 0.3901])
tensor([0.7542, 0.4110])
tensor([1.1585, 0.6068])

 Iteration: 1086
Loss Agent1:1.0512248277664185
Loss Agent2:0.8624993562698364
tensor([0.5651, 0.2444])
tensor([0.7314, 0.3908])
tensor([0.7551, 0.4115])
tensor([1.1599, 0.6075])

 Iteration: 1087
Loss Agent1:1.008838176727295
Loss Agent2:0.9184802770614624
tensor([0.5660, 0.2449])
tensor([0.7326, 0.3914])
tensor([0.7561, 0.4121])
tensor([1.1613, 0.6082])

 Iteration: 1088
Loss Agent1:1.0406173467636108
Loss Agent2:0.8700896501541138
tensor([0.5668, 0.2455])
tensor([0.7337, 0.3921])
tensor([0.7570, 0.4126])
tensor([1.1626, 0.6089])

 Iteration: 1089
Loss Agent1:0.9759497046470642
Loss Agent2:0.882485568523407
tensor([0.5677, 0.2460])
tensor([0.7349, 0

Loss Agent1:1.0056380033493042
Loss Agent2:0.8504694700241089
tensor([0.6095, 0.2711])
tensor([0.7916, 0.4235])
tensor([0.8041, 0.4378])
tensor([1.2306, 0.6440])

 Iteration: 1138
Loss Agent1:0.948311448097229
Loss Agent2:0.8929835557937622
tensor([0.6104, 0.2716])
tensor([0.7928, 0.4242])
tensor([0.8051, 0.4383])
tensor([1.2320, 0.6448])

 Iteration: 1139
Loss Agent1:0.9758892059326172
Loss Agent2:0.8797981142997742
tensor([0.6112, 0.2721])
tensor([0.7939, 0.4248])
tensor([0.8060, 0.4389])
tensor([1.2333, 0.6456])

 Iteration: 1140
Loss Agent1:0.9844415783882141
Loss Agent2:0.8617942333221436
tensor([0.6121, 0.2726])
tensor([0.7951, 0.4254])
tensor([0.8070, 0.4394])
tensor([1.2347, 0.6463])

 Iteration: 1141
Loss Agent1:1.0112111568450928
Loss Agent2:0.860232949256897
tensor([0.6130, 0.2731])
tensor([0.7963, 0.4261])
tensor([0.8079, 0.4400])
tensor([1.2360, 0.6471])

 Iteration: 1142
Loss Agent1:0.9913805723190308
Loss Agent2:0.8751856088638306
tensor([0.6139, 0.2737])
tensor([0.7974,

Loss Agent1:0.926662027835846
Loss Agent2:0.8607678413391113
tensor([0.6563, 0.2993])
tensor([0.8549, 0.4578])
tensor([0.8551, 0.4651])
tensor([1.3030, 0.6821])

 Iteration: 1192
Loss Agent1:0.9852643013000488
Loss Agent2:0.8447771072387695
tensor([0.6572, 0.2998])
tensor([0.8561, 0.4585])
tensor([0.8561, 0.4656])
tensor([1.3044, 0.6828])

 Iteration: 1193
Loss Agent1:0.9777752161026001
Loss Agent2:0.8283292055130005
tensor([0.6581, 0.3003])
tensor([0.8572, 0.4591])
tensor([0.8570, 0.4661])
tensor([1.3057, 0.6836])

 Iteration: 1194
Loss Agent1:0.9273903965950012
Loss Agent2:0.8873506784439087
tensor([0.6589, 0.3008])
tensor([0.8584, 0.4597])
tensor([0.8580, 0.4666])
tensor([1.3071, 0.6843])

 Iteration: 1195
Loss Agent1:0.9617763757705688
Loss Agent2:0.8537059426307678
tensor([0.6598, 0.3013])
tensor([0.8596, 0.4603])
tensor([0.8589, 0.4671])
tensor([1.3084, 0.6850])

 Iteration: 1196
Loss Agent1:0.9138066172599792
Loss Agent2:0.787574052810669
tensor([0.6607, 0.3018])
tensor([0.8607,

tensor([0.7033, 0.3261])
tensor([0.9187, 0.4895])
tensor([0.9054, 0.4913])
tensor([1.3741, 0.7180])

 Iteration: 1246
Loss Agent1:0.9473744630813599
Loss Agent2:0.8268589973449707
tensor([0.7043, 0.3265])
tensor([0.9199, 0.4900])
tensor([0.9063, 0.4918])
tensor([1.3753, 0.7187])

 Iteration: 1247
Loss Agent1:0.9236277937889099
Loss Agent2:0.82549649477005
tensor([0.7052, 0.3270])
tensor([0.9211, 0.4906])
tensor([0.9073, 0.4923])
tensor([1.3766, 0.7194])

 Iteration: 1248
Loss Agent1:0.9140483140945435
Loss Agent2:0.8203129768371582
tensor([0.7061, 0.3275])
tensor([0.9223, 0.4911])
tensor([0.9082, 0.4928])
tensor([1.3779, 0.7202])

 Iteration: 1249
Loss Agent1:0.9253433346748352
Loss Agent2:0.8487948179244995
tensor([0.7070, 0.3280])
tensor([0.9235, 0.4917])
tensor([0.9091, 0.4933])
tensor([1.3792, 0.7209])

 Iteration: 1250
Loss Agent1:0.9832863211631775
Loss Agent2:0.7735325694084167
tensor([0.7079, 0.3285])
tensor([0.9247, 0.4923])
tensor([0.9101, 0.4938])
tensor([1.3805, 0.7217])

 

Loss Agent1:0.8832516074180603
Loss Agent2:0.7658163905143738
tensor([0.7471, 0.3491])
tensor([0.9745, 0.5181])
tensor([0.9477, 0.5126])
tensor([1.4324, 0.7466])

 Iteration: 1293
Loss Agent1:0.9159977436065674
Loss Agent2:0.8377610445022583
tensor([0.7480, 0.3496])
tensor([0.9757, 0.5187])
tensor([0.9486, 0.5130])
tensor([1.4336, 0.7472])

 Iteration: 1294
Loss Agent1:0.8957931995391846
Loss Agent2:0.7773280143737793
tensor([0.7490, 0.3500])
tensor([0.9768, 0.5193])
tensor([0.9495, 0.5135])
tensor([1.4348, 0.7478])

 Iteration: 1295
Loss Agent1:0.897969126701355
Loss Agent2:0.7607491612434387
tensor([0.7499, 0.3505])
tensor([0.9780, 0.5199])
tensor([0.9504, 0.5139])
tensor([1.4360, 0.7484])

 Iteration: 1296
Loss Agent1:0.9162291288375854
Loss Agent2:0.7813946008682251
tensor([0.7508, 0.3510])
tensor([0.9792, 0.5205])
tensor([0.9513, 0.5143])
tensor([1.4372, 0.7490])

 Iteration: 1297
Loss Agent1:0.9257676005363464
Loss Agent2:0.7751021981239319
tensor([0.7518, 0.3515])
tensor([0.9804

Loss Agent1:0.9284922480583191
Loss Agent2:0.7643268704414368
tensor([0.7897, 0.3716])
tensor([1.0279, 0.5466])
tensor([0.9882, 0.5329])
tensor([1.4867, 0.7725])

 Iteration: 1339
Loss Agent1:0.8810689449310303
Loss Agent2:0.7852771878242493
tensor([0.7906, 0.3722])
tensor([1.0291, 0.5474])
tensor([0.9890, 0.5334])
tensor([1.4878, 0.7731])

 Iteration: 1340
Loss Agent1:0.9628711938858032
Loss Agent2:0.7734149694442749
tensor([0.7915, 0.3727])
tensor([1.0302, 0.5481])
tensor([0.9899, 0.5338])
tensor([1.4889, 0.7736])

 Iteration: 1341
Loss Agent1:0.919297456741333
Loss Agent2:0.8090850710868835
tensor([0.7925, 0.3732])
tensor([1.0314, 0.5488])
tensor([0.9907, 0.5343])
tensor([1.4900, 0.7742])

 Iteration: 1342
Loss Agent1:0.9234771728515625
Loss Agent2:0.7866049408912659
tensor([0.7934, 0.3738])
tensor([1.0325, 0.5495])
tensor([0.9916, 0.5347])
tensor([1.4911, 0.7747])

 Iteration: 1343
Loss Agent1:0.900255024433136
Loss Agent2:0.7910379767417908
tensor([0.7943, 0.3743])
tensor([1.0337,

Loss Agent2:0.7568854689598083
tensor([0.8351, 0.3956])
tensor([1.0861, 0.5769])
tensor([1.0310, 0.5540])
tensor([1.5422, 0.7999])

 Iteration: 1389
Loss Agent1:0.803913414478302
Loss Agent2:0.8393862247467041
tensor([0.8359, 0.3960])
tensor([1.0872, 0.5775])
tensor([1.0318, 0.5544])
tensor([1.5433, 0.8004])

 Iteration: 1390
Loss Agent1:0.8757954835891724
Loss Agent2:0.8272475004196167
tensor([0.8368, 0.3965])
tensor([1.0883, 0.5781])
tensor([1.0327, 0.5548])
tensor([1.5444, 0.8010])

 Iteration: 1391
Loss Agent1:0.8594532608985901
Loss Agent2:0.8126121759414673
tensor([0.8376, 0.3969])
tensor([1.0894, 0.5787])
tensor([1.0336, 0.5553])
tensor([1.5455, 0.8016])

 Iteration: 1392
Loss Agent1:0.8425251245498657
Loss Agent2:0.7883033156394958
tensor([0.8385, 0.3974])
tensor([1.0905, 0.5793])
tensor([1.0344, 0.5557])
tensor([1.5466, 0.8022])

 Iteration: 1393
Loss Agent1:0.8479101061820984
Loss Agent2:0.7369190454483032
tensor([0.8393, 0.3978])
tensor([1.0916, 0.5799])
tensor([1.0353, 0.55

tensor([1.6014, 0.8282])

 Iteration: 1446
Loss Agent1:0.8542366623878479
Loss Agent2:0.7523631453514099
tensor([0.8854, 0.4201])
tensor([1.1516, 0.6092])
tensor([1.0791, 0.5759])
tensor([1.6024, 0.8287])

 Iteration: 1447
Loss Agent1:0.8589764833450317
Loss Agent2:0.7342662811279297
tensor([0.8863, 0.4205])
tensor([1.1527, 0.6098])
tensor([1.0799, 0.5763])
tensor([1.6035, 0.8291])

 Iteration: 1448
Loss Agent1:0.8885443210601807
Loss Agent2:0.7646164298057556
tensor([0.8871, 0.4209])
tensor([1.1538, 0.6103])
tensor([1.0807, 0.5766])
tensor([1.6045, 0.8295])

 Iteration: 1449
Loss Agent1:0.8199791312217712
Loss Agent2:0.7213174104690552
tensor([0.8880, 0.4212])
tensor([1.1549, 0.6108])
tensor([1.0815, 0.5770])
tensor([1.6056, 0.8299])

 Iteration: 1450
Loss Agent1:0.7947697043418884
Loss Agent2:0.7425681352615356
tensor([0.8889, 0.4216])
tensor([1.1561, 0.6113])
tensor([1.0824, 0.5773])
tensor([1.6066, 0.8302])

 Iteration: 1451
Loss Agent1:0.850850522518158
Loss Agent2:0.8053429126739

Loss Agent2:0.7754136323928833
tensor([0.9317, 0.4423])
tensor([1.2147, 0.6382])
tensor([1.1244, 0.5940])
tensor([1.6582, 0.8508])

 Iteration: 1502
Loss Agent1:0.8144571185112
Loss Agent2:0.7480184435844421
tensor([0.9325, 0.4427])
tensor([1.2158, 0.6388])
tensor([1.1253, 0.5944])
tensor([1.6592, 0.8513])

 Iteration: 1503
Loss Agent1:0.8561603426933289
Loss Agent2:0.7827231884002686
tensor([0.9333, 0.4432])
tensor([1.2169, 0.6393])
tensor([1.1261, 0.5947])
tensor([1.6602, 0.8517])

 Iteration: 1504
Loss Agent1:0.8703951835632324
Loss Agent2:0.7416679263114929
tensor([0.9342, 0.4436])
tensor([1.2181, 0.6399])
tensor([1.1269, 0.5951])
tensor([1.6612, 0.8522])

 Iteration: 1505
Loss Agent1:0.8077484369277954
Loss Agent2:0.7740882039070129
tensor([0.9350, 0.4440])
tensor([1.2192, 0.6404])
tensor([1.1278, 0.5954])
tensor([1.6622, 0.8527])

 Iteration: 1506
Loss Agent1:0.836093544960022
Loss Agent2:0.7445446848869324
tensor([0.9358, 0.4444])
tensor([1.2203, 0.6410])
tensor([1.1286, 0.5958]

Loss Agent1:0.7504060864448547
Loss Agent2:0.7191137075424194
tensor([0.9697, 0.4595])
tensor([1.2655, 0.6603])
tensor([1.1614, 0.6093])
tensor([1.7027, 0.8696])

 Iteration: 1548
Loss Agent1:0.8127994537353516
Loss Agent2:0.8021469712257385
tensor([0.9705, 0.4599])
tensor([1.2666, 0.6607])
tensor([1.1622, 0.6097])
tensor([1.7036, 0.8700])

 Iteration: 1549
Loss Agent1:0.8084673881530762
Loss Agent2:0.7165665030479431
tensor([0.9713, 0.4602])
tensor([1.2676, 0.6611])
tensor([1.1629, 0.6100])
tensor([1.7045, 0.8704])

 Iteration: 1550
Loss Agent1:0.8360784649848938
Loss Agent2:0.746279239654541
tensor([0.9721, 0.4606])
tensor([1.2687, 0.6616])
tensor([1.1637, 0.6103])
tensor([1.7054, 0.8708])

 Iteration: 1551
Loss Agent1:0.7840545177459717
Loss Agent2:0.740618109703064
tensor([0.9729, 0.4610])
tensor([1.2697, 0.6621])
tensor([1.1644, 0.6107])
tensor([1.7063, 0.8711])

 Iteration: 1552
Loss Agent1:0.8084496855735779
Loss Agent2:0.7222967147827148
tensor([0.9737, 0.4613])
tensor([1.2708,

tensor([1.0156, 0.4809])
tensor([1.3250, 0.6862])
tensor([1.2042, 0.6262])
tensor([1.7519, 0.8877])

 Iteration: 1605
Loss Agent1:0.7681219577789307
Loss Agent2:0.7483658194541931
tensor([1.0164, 0.4813])
tensor([1.3261, 0.6867])
tensor([1.2049, 0.6265])
tensor([1.7527, 0.8880])

 Iteration: 1606
Loss Agent1:0.8456677198410034
Loss Agent2:0.7562936544418335
tensor([1.0172, 0.4817])
tensor([1.3271, 0.6872])
tensor([1.2056, 0.6267])
tensor([1.7535, 0.8882])

 Iteration: 1607
Loss Agent1:0.8125219941139221
Loss Agent2:0.7274782657623291
tensor([1.0180, 0.4821])
tensor([1.3282, 0.6877])
tensor([1.2063, 0.6270])
tensor([1.7543, 0.8884])

 Iteration: 1608
Loss Agent1:0.7480229735374451
Loss Agent2:0.7347526550292969
tensor([1.0188, 0.4824])
tensor([1.3292, 0.6881])
tensor([1.2070, 0.6273])
tensor([1.7551, 0.8887])

 Iteration: 1609
Loss Agent1:0.7615987658500671
Loss Agent2:0.7721812725067139
tensor([1.0196, 0.4828])
tensor([1.3303, 0.6886])
tensor([1.2077, 0.6276])
tensor([1.7559, 0.8890])


Loss Agent2:0.7438486218452454
tensor([1.0608, 0.5001])
tensor([1.3823, 0.7095])
tensor([1.2446, 0.6402])
tensor([1.7971, 0.8999])

 Iteration: 1662
Loss Agent1:0.7669252753257751
Loss Agent2:0.7324152588844299
tensor([1.0616, 0.5004])
tensor([1.3833, 0.7099])
tensor([1.2453, 0.6404])
tensor([1.7978, 0.9002])

 Iteration: 1663
Loss Agent1:0.7487311363220215
Loss Agent2:0.7220872640609741
tensor([1.0623, 0.5007])
tensor([1.3842, 0.7102])
tensor([1.2459, 0.6407])
tensor([1.7985, 0.9004])

 Iteration: 1664
Loss Agent1:0.8032239675521851
Loss Agent2:0.7407358884811401
tensor([1.0631, 0.5010])
tensor([1.3852, 0.7106])
tensor([1.2466, 0.6409])
tensor([1.7993, 0.9007])

 Iteration: 1665
Loss Agent1:0.7198594212532043
Loss Agent2:0.7262054681777954
tensor([1.0639, 0.5013])
tensor([1.3862, 0.7110])
tensor([1.2473, 0.6412])
tensor([1.8000, 0.9009])

 Iteration: 1666
Loss Agent1:0.7503370642662048
Loss Agent2:0.701746940612793
tensor([1.0647, 0.5017])
tensor([1.3871, 0.7113])
tensor([1.2480, 0.64

Loss Agent2:0.7189955711364746
tensor([1.0973, 0.5160])
tensor([1.4273, 0.7282])
tensor([1.2776, 0.6513])
tensor([1.8332, 0.9105])

 Iteration: 1710
Loss Agent1:0.7354617714881897
Loss Agent2:0.7375279068946838
tensor([1.0981, 0.5163])
tensor([1.4282, 0.7285])
tensor([1.2783, 0.6516])
tensor([1.8340, 0.9106])

 Iteration: 1711
Loss Agent1:0.6888928413391113
Loss Agent2:0.7237741947174072
tensor([1.0988, 0.5166])
tensor([1.4291, 0.7288])
tensor([1.2790, 0.6518])
tensor([1.8347, 0.9109])

 Iteration: 1712
Loss Agent1:0.73695969581604
Loss Agent2:0.7168368101119995
tensor([1.0996, 0.5169])
tensor([1.4301, 0.7291])
tensor([1.2797, 0.6520])
tensor([1.8355, 0.9111])

 Iteration: 1713
Loss Agent1:0.7445985674858093
Loss Agent2:0.6765950322151184
tensor([1.1003, 0.5171])
tensor([1.4310, 0.7294])
tensor([1.2804, 0.6523])
tensor([1.8362, 0.9113])

 Iteration: 1714
Loss Agent1:0.7788290977478027
Loss Agent2:0.705780565738678
tensor([1.1011, 0.5174])
tensor([1.4319, 0.7298])
tensor([1.2811, 0.6524

tensor([1.1397, 0.5327])
tensor([1.4786, 0.7467])
tensor([1.3141, 0.6620])
tensor([1.8723, 0.9185])

 Iteration: 1767
Loss Agent1:0.7724323868751526
Loss Agent2:0.6624245643615723
tensor([1.1404, 0.5330])
tensor([1.4795, 0.7471])
tensor([1.3147, 0.6621])
tensor([1.8729, 0.9186])

 Iteration: 1768
Loss Agent1:0.6739037036895752
Loss Agent2:0.6531069278717041
tensor([1.1412, 0.5333])
tensor([1.4803, 0.7475])
tensor([1.3154, 0.6622])
tensor([1.8736, 0.9185])

 Iteration: 1769
Loss Agent1:0.7986327409744263
Loss Agent2:0.7029415369033813
tensor([1.1419, 0.5336])
tensor([1.4812, 0.7478])
tensor([1.3160, 0.6623])
tensor([1.8742, 0.9185])

 Iteration: 1770
Loss Agent1:0.7323651313781738
Loss Agent2:0.7074655294418335
tensor([1.1427, 0.5339])
tensor([1.4821, 0.7482])
tensor([1.3166, 0.6624])
tensor([1.8749, 0.9185])

 Iteration: 1771
Loss Agent1:0.7454843521118164
Loss Agent2:0.6984676122665405
tensor([1.1434, 0.5342])
tensor([1.4830, 0.7485])
tensor([1.3173, 0.6625])
tensor([1.8756, 0.9186])


tensor([1.9024, 0.9217])

 Iteration: 1820
Loss Agent1:0.7566933631896973
Loss Agent2:0.7210002541542053
tensor([1.1780, 0.5476])
tensor([1.5246, 0.7635])
tensor([1.3454, 0.6697])
tensor([1.9029, 0.9218])

 Iteration: 1821
Loss Agent1:0.7337026596069336
Loss Agent2:0.6964203715324402
tensor([1.1787, 0.5479])
tensor([1.5254, 0.7639])
tensor([1.3460, 0.6699])
tensor([1.9034, 0.9220])

 Iteration: 1822
Loss Agent1:0.7135458588600159
Loss Agent2:0.6837397813796997
tensor([1.1793, 0.5482])
tensor([1.5262, 0.7642])
tensor([1.3465, 0.6701])
tensor([1.9039, 0.9221])

 Iteration: 1823
Loss Agent1:0.7156457304954529
Loss Agent2:0.7109391689300537
tensor([1.1800, 0.5485])
tensor([1.5271, 0.7645])
tensor([1.3471, 0.6703])
tensor([1.9044, 0.9223])

 Iteration: 1824
Loss Agent1:0.7262753844261169
Loss Agent2:0.7151626944541931
tensor([1.1808, 0.5488])
tensor([1.5279, 0.7648])
tensor([1.3476, 0.6706])
tensor([1.9049, 0.9225])

 Iteration: 1825
Loss Agent1:0.7174912095069885
Loss Agent2:0.726775288581

Loss Agent1:0.72457355260849
Loss Agent2:0.6992397308349609
tensor([1.2148, 0.5629])
tensor([1.5681, 0.7811])
tensor([1.3752, 0.6811])
tensor([1.9295, 0.9312])

 Iteration: 1877
Loss Agent1:0.6887847781181335
Loss Agent2:0.6756104230880737
tensor([1.2155, 0.5632])
tensor([1.5689, 0.7814])
tensor([1.3757, 0.6812])
tensor([1.9298, 0.9313])

 Iteration: 1878
Loss Agent1:0.7047747373580933
Loss Agent2:0.6903271079063416
tensor([1.2161, 0.5634])
tensor([1.5696, 0.7817])
tensor([1.3761, 0.6814])
tensor([1.9302, 0.9314])

 Iteration: 1879
Loss Agent1:0.6853684782981873
Loss Agent2:0.6950603127479553
tensor([1.2167, 0.5636])
tensor([1.5704, 0.7819])
tensor([1.3766, 0.6815])
tensor([1.9306, 0.9314])

 Iteration: 1880
Loss Agent1:0.7102661728858948
Loss Agent2:0.7166799902915955
tensor([1.2174, 0.5639])
tensor([1.5711, 0.7821])
tensor([1.3771, 0.6817])
tensor([1.9310, 0.9316])

 Iteration: 1881
Loss Agent1:0.6877560615539551
Loss Agent2:0.6807861924171448
tensor([1.2180, 0.5641])
tensor([1.5719,

Loss Agent2:0.690639078617096
tensor([1.2433, 0.5736])
tensor([1.6014, 0.7924])
tensor([1.3974, 0.6884])
tensor([1.9475, 0.9351])

 Iteration: 1924
Loss Agent1:0.6277591586112976
Loss Agent2:0.7407233715057373
tensor([1.2439, 0.5737])
tensor([1.6020, 0.7926])
tensor([1.3979, 0.6886])
tensor([1.9479, 0.9352])

 Iteration: 1925
Loss Agent1:0.7367702722549438
Loss Agent2:0.6705528497695923
tensor([1.2444, 0.5739])
tensor([1.6027, 0.7927])
tensor([1.3984, 0.6888])
tensor([1.9482, 0.9353])

 Iteration: 1926
Loss Agent1:0.7159006595611572
Loss Agent2:0.6841242909431458
tensor([1.2450, 0.5741])
tensor([1.6034, 0.7929])
tensor([1.3988, 0.6890])
tensor([1.9486, 0.9355])

 Iteration: 1927
Loss Agent1:0.7199766039848328
Loss Agent2:0.7279618382453918
tensor([1.2456, 0.5742])
tensor([1.6041, 0.7930])
tensor([1.3993, 0.6892])
tensor([1.9489, 0.9357])

 Iteration: 1928
Loss Agent1:0.7219095826148987
Loss Agent2:0.6927642822265625
tensor([1.2462, 0.5744])
tensor([1.6048, 0.7932])
tensor([1.3997, 0.68

Loss Agent1:0.7141762971878052
Loss Agent2:0.6747229099273682
tensor([1.2712, 0.5837])
tensor([1.6333, 0.8025])
tensor([1.4173, 0.6945])
tensor([1.9610, 0.9371])

 Iteration: 1971
Loss Agent1:0.693973183631897
Loss Agent2:0.6983852982521057
tensor([1.2718, 0.5840])
tensor([1.6340, 0.8027])
tensor([1.4177, 0.6945])
tensor([1.9612, 0.9370])

 Iteration: 1972
Loss Agent1:0.6957043409347534
Loss Agent2:0.6617175340652466
tensor([1.2724, 0.5842])
tensor([1.6347, 0.8030])
tensor([1.4181, 0.6946])
tensor([1.9615, 0.9368])

 Iteration: 1973
Loss Agent1:0.6353577971458435
Loss Agent2:0.6818931102752686
tensor([1.2729, 0.5844])
tensor([1.6353, 0.8032])
tensor([1.4185, 0.6946])
tensor([1.9617, 0.9367])

 Iteration: 1974
Loss Agent1:0.6942914724349976
Loss Agent2:0.6634535789489746
tensor([1.2735, 0.5846])
tensor([1.6360, 0.8034])
tensor([1.4189, 0.6946])
tensor([1.9619, 0.9365])

 Iteration: 1975
Loss Agent1:0.6647778749465942
Loss Agent2:0.6585944294929504
tensor([1.2741, 0.5848])
tensor([1.6367

Loss Agent2:0.6734790205955505
tensor([1.3023, 0.5949])
tensor([1.6681, 0.8145])
tensor([1.4376, 0.6979])
tensor([1.9725, 0.9342])

 Iteration: 2028
Loss Agent1:0.718345582485199
Loss Agent2:0.6618883609771729
tensor([1.3028, 0.5951])
tensor([1.6687, 0.8147])
tensor([1.4379, 0.6980])
tensor([1.9726, 0.9341])

 Iteration: 2029
Loss Agent1:0.6904823780059814
Loss Agent2:0.6545444130897522
tensor([1.3033, 0.5953])
tensor([1.6693, 0.8149])
tensor([1.4381, 0.6981])
tensor([1.9726, 0.9340])

 Iteration: 2030
Loss Agent1:0.7044713497161865
Loss Agent2:0.6623967289924622
tensor([1.3039, 0.5956])
tensor([1.6699, 0.8151])
tensor([1.4384, 0.6982])
tensor([1.9727, 0.9340])

 Iteration: 2031
Loss Agent1:0.660399854183197
Loss Agent2:0.7025728225708008
tensor([1.3044, 0.5958])
tensor([1.6705, 0.8153])
tensor([1.4387, 0.6982])
tensor([1.9728, 0.9340])

 Iteration: 2032
Loss Agent1:0.6494801044464111
Loss Agent2:0.6899107694625854
tensor([1.3049, 0.5960])
tensor([1.6711, 0.8155])
tensor([1.4390, 0.698

Loss Agent1:0.6610081195831299
Loss Agent2:0.6958460211753845
tensor([1.3267, 0.6039])
tensor([1.6950, 0.8228])
tensor([1.4521, 0.7018])
tensor([1.9764, 0.9323])

 Iteration: 2078
Loss Agent1:0.6934105753898621
Loss Agent2:0.7110643982887268
tensor([1.3271, 0.6041])
tensor([1.6954, 0.8230])
tensor([1.4524, 0.7019])
tensor([1.9764, 0.9323])

 Iteration: 2079
Loss Agent1:0.7009425163269043
Loss Agent2:0.6854759454727173
tensor([1.3276, 0.6043])
tensor([1.6959, 0.8232])
tensor([1.4527, 0.7020])
tensor([1.9764, 0.9323])

 Iteration: 2080
Loss Agent1:0.6607794761657715
Loss Agent2:0.6718338131904602
tensor([1.3280, 0.6044])
tensor([1.6964, 0.8234])
tensor([1.4530, 0.7021])
tensor([1.9765, 0.9324])

 Iteration: 2081
Loss Agent1:0.6955479383468628
Loss Agent2:0.6465762853622437
tensor([1.3285, 0.6046])
tensor([1.6969, 0.8236])
tensor([1.4533, 0.7022])
tensor([1.9766, 0.9323])

 Iteration: 2082
Loss Agent1:0.6794183850288391
Loss Agent2:0.6885416507720947
tensor([1.3289, 0.6048])
tensor([1.697

Loss Agent1:0.6805992722511292
Loss Agent2:0.6768259406089783
tensor([1.3509, 0.6137])
tensor([1.7205, 0.8326])
tensor([1.4688, 0.7065])
tensor([1.9825, 0.9306])

 Iteration: 2133
Loss Agent1:0.686181902885437
Loss Agent2:0.6701139807701111
tensor([1.3513, 0.6138])
tensor([1.7209, 0.8328])
tensor([1.4690, 0.7065])
tensor([1.9825, 0.9305])

 Iteration: 2134
Loss Agent1:0.6844945549964905
Loss Agent2:0.6499055624008179
tensor([1.3517, 0.6140])
tensor([1.7213, 0.8330])
tensor([1.4693, 0.7066])
tensor([1.9825, 0.9305])

 Iteration: 2135
Loss Agent1:0.6231745481491089
Loss Agent2:0.6929476261138916
tensor([1.3521, 0.6142])
tensor([1.7217, 0.8332])
tensor([1.4695, 0.7067])
tensor([1.9825, 0.9304])

 Iteration: 2136
Loss Agent1:0.682476282119751
Loss Agent2:0.6514990925788879
tensor([1.3525, 0.6143])
tensor([1.7221, 0.8334])
tensor([1.4697, 0.7068])
tensor([1.9824, 0.9304])

 Iteration: 2137
Loss Agent1:0.6293773055076599
Loss Agent2:0.700269877910614
tensor([1.3529, 0.6145])
tensor([1.7225, 

Loss Agent2:0.6476912498474121
tensor([1.3723, 0.6213])
tensor([1.7414, 0.8399])
tensor([1.4812, 0.7108])
tensor([1.9815, 0.9289])

 Iteration: 2189
Loss Agent1:0.6606638431549072
Loss Agent2:0.6494014859199524
tensor([1.3726, 0.6213])
tensor([1.7417, 0.8399])
tensor([1.4814, 0.7108])
tensor([1.9816, 0.9287])

 Iteration: 2190
Loss Agent1:0.6730397343635559
Loss Agent2:0.6468153595924377
tensor([1.3730, 0.6213])
tensor([1.7420, 0.8400])
tensor([1.4816, 0.7108])
tensor([1.9815, 0.9286])

 Iteration: 2191
Loss Agent1:0.6720305681228638
Loss Agent2:0.6855818629264832
tensor([1.3733, 0.6214])
tensor([1.7423, 0.8400])
tensor([1.4818, 0.7109])
tensor([1.9814, 0.9285])

 Iteration: 2192
Loss Agent1:0.688418447971344
Loss Agent2:0.6307169198989868
tensor([1.3737, 0.6215])
tensor([1.7426, 0.8401])
tensor([1.4819, 0.7109])
tensor([1.9814, 0.9284])

 Iteration: 2193
Loss Agent1:0.6873270273208618
Loss Agent2:0.6508784294128418
tensor([1.3741, 0.6216])
tensor([1.7429, 0.8402])
tensor([1.4821, 0.71

Loss Agent2:0.6077911853790283
tensor([1.3894, 0.6277])
tensor([1.7583, 0.8463])
tensor([1.4909, 0.7129])
tensor([1.9807, 0.9254])

 Iteration: 2238
Loss Agent1:0.6584499478340149
Loss Agent2:0.6222761273384094
tensor([1.3898, 0.6278])
tensor([1.7586, 0.8465])
tensor([1.4911, 0.7128])
tensor([1.9806, 0.9252])

 Iteration: 2239
Loss Agent1:0.655635416507721
Loss Agent2:0.6870418787002563
tensor([1.3901, 0.6280])
tensor([1.7590, 0.8466])
tensor([1.4912, 0.7129])
tensor([1.9805, 0.9251])

 Iteration: 2240
Loss Agent1:0.6346970200538635
Loss Agent2:0.6665742993354797
tensor([1.3904, 0.6281])
tensor([1.7593, 0.8468])
tensor([1.4914, 0.7129])
tensor([1.9804, 0.9250])

 Iteration: 2241
Loss Agent1:0.6540637612342834
Loss Agent2:0.6819043159484863
tensor([1.3908, 0.6283])
tensor([1.7596, 0.8469])
tensor([1.4916, 0.7129])
tensor([1.9804, 0.9249])

 Iteration: 2242
Loss Agent1:0.6247578859329224
Loss Agent2:0.6447838544845581
tensor([1.3911, 0.6284])
tensor([1.7599, 0.8470])
tensor([1.4918, 0.71

Loss Agent1:0.6453279852867126
Loss Agent2:0.6726438999176025
tensor([1.4030, 0.6317])
tensor([1.7713, 0.8488])
tensor([1.4992, 0.7161])
tensor([1.9767, 0.9251])

 Iteration: 2285
Loss Agent1:0.6171364188194275
Loss Agent2:0.6650692224502563
tensor([1.4033, 0.6317])
tensor([1.7716, 0.8488])
tensor([1.4994, 0.7162])
tensor([1.9767, 0.9252])

 Iteration: 2286
Loss Agent1:0.6470715999603271
Loss Agent2:0.6558133363723755
tensor([1.4036, 0.6318])
tensor([1.7719, 0.8489])
tensor([1.4997, 0.7163])
tensor([1.9767, 0.9252])

 Iteration: 2287
Loss Agent1:0.6772202253341675
Loss Agent2:0.6362082958221436
tensor([1.4039, 0.6318])
tensor([1.7722, 0.8489])
tensor([1.4999, 0.7164])
tensor([1.9767, 0.9252])

 Iteration: 2288
Loss Agent1:0.6007979512214661
Loss Agent2:0.652092695236206
tensor([1.4041, 0.6319])
tensor([1.7725, 0.8490])
tensor([1.5000, 0.7165])
tensor([1.9766, 0.9252])

 Iteration: 2289
Loss Agent1:0.6510645747184753
Loss Agent2:0.6567379236221313
tensor([1.4044, 0.6320])
tensor([1.7727

Loss Agent2:0.6679723858833313
tensor([1.4158, 0.6356])
tensor([1.7838, 0.8522])
tensor([1.5052, 0.7180])
tensor([1.9707, 0.9216])

 Iteration: 2333
Loss Agent1:0.6323464512825012
Loss Agent2:0.6260923147201538
tensor([1.4161, 0.6357])
tensor([1.7840, 0.8523])
tensor([1.5053, 0.7180])
tensor([1.9705, 0.9214])

 Iteration: 2334
Loss Agent1:0.6225190758705139
Loss Agent2:0.6466944217681885
tensor([1.4163, 0.6358])
tensor([1.7843, 0.8523])
tensor([1.5054, 0.7180])
tensor([1.9704, 0.9213])

 Iteration: 2335
Loss Agent1:0.62317955493927
Loss Agent2:0.6378777623176575
tensor([1.4166, 0.6358])
tensor([1.7845, 0.8524])
tensor([1.5056, 0.7179])
tensor([1.9703, 0.9212])

 Iteration: 2336
Loss Agent1:0.6339330673217773
Loss Agent2:0.6394081711769104
tensor([1.4168, 0.6359])
tensor([1.7847, 0.8525])
tensor([1.5057, 0.7179])
tensor([1.9702, 0.9211])

 Iteration: 2337
Loss Agent1:0.6573867797851562
Loss Agent2:0.6465768218040466
tensor([1.4171, 0.6360])
tensor([1.7850, 0.8526])
tensor([1.5059, 0.717

Loss Agent1:0.6021857857704163
Loss Agent2:0.6166641116142273
tensor([1.4270, 0.6393])
tensor([1.7942, 0.8553])
tensor([1.5107, 0.7190])
tensor([1.9659, 0.9180])

 Iteration: 2380
Loss Agent1:0.6063580513000488
Loss Agent2:0.6279330849647522
tensor([1.4272, 0.6394])
tensor([1.7943, 0.8553])
tensor([1.5108, 0.7190])
tensor([1.9657, 0.9178])

 Iteration: 2381
Loss Agent1:0.6061438322067261
Loss Agent2:0.6027429699897766
tensor([1.4274, 0.6394])
tensor([1.7945, 0.8553])
tensor([1.5108, 0.7189])
tensor([1.9656, 0.9177])

 Iteration: 2382
Loss Agent1:0.6189619898796082
Loss Agent2:0.6637654900550842
tensor([1.4276, 0.6395])
tensor([1.7947, 0.8554])
tensor([1.5109, 0.7189])
tensor([1.9655, 0.9175])

 Iteration: 2383
Loss Agent1:0.5816949605941772
Loss Agent2:0.5906291604042053
tensor([1.4278, 0.6395])
tensor([1.7948, 0.8554])
tensor([1.5110, 0.7189])
tensor([1.9654, 0.9174])

 Iteration: 2384
Loss Agent1:0.6132203340530396
Loss Agent2:0.6134977340698242
tensor([1.4280, 0.6395])
tensor([1.795

Loss Agent1:0.6159822940826416
Loss Agent2:0.6060833930969238
tensor([1.4355, 0.6403])
tensor([1.8010, 0.8550])
tensor([1.5137, 0.7181])
tensor([1.9575, 0.9118])

 Iteration: 2426
Loss Agent1:0.647068440914154
Loss Agent2:0.6378489136695862
tensor([1.4356, 0.6404])
tensor([1.8011, 0.8550])
tensor([1.5137, 0.7181])
tensor([1.9573, 0.9117])

 Iteration: 2427
Loss Agent1:0.5966467261314392
Loss Agent2:0.6048679351806641
tensor([1.4358, 0.6404])
tensor([1.8012, 0.8550])
tensor([1.5138, 0.7180])
tensor([1.9572, 0.9115])

 Iteration: 2428
Loss Agent1:0.5897281169891357
Loss Agent2:0.6133649945259094
tensor([1.4359, 0.6404])
tensor([1.8013, 0.8549])
tensor([1.5139, 0.7179])
tensor([1.9570, 0.9113])

 Iteration: 2429
Loss Agent1:0.6179254651069641
Loss Agent2:0.5725734829902649
tensor([1.4360, 0.6404])
tensor([1.8014, 0.8549])
tensor([1.5139, 0.7178])
tensor([1.9568, 0.9110])

 Iteration: 2430
Loss Agent1:0.6200510263442993
Loss Agent2:0.6242395043373108
tensor([1.4362, 0.6404])
tensor([1.8014

Loss Agent1:0.6268141269683838
Loss Agent2:0.5983731746673584
tensor([1.4426, 0.6393])
tensor([1.8054, 0.8535])
tensor([1.5147, 0.7155])
tensor([1.9453, 0.9020])

 Iteration: 2481
Loss Agent1:0.6443395614624023
Loss Agent2:0.6211864948272705
tensor([1.4427, 0.6394])
tensor([1.8055, 0.8535])
tensor([1.5147, 0.7154])
tensor([1.9451, 0.9018])

 Iteration: 2482
Loss Agent1:0.5715742707252502
Loss Agent2:0.6019577383995056
tensor([1.4429, 0.6394])
tensor([1.8056, 0.8534])
tensor([1.5148, 0.7154])
tensor([1.9450, 0.9016])

 Iteration: 2483
Loss Agent1:0.6115572452545166
Loss Agent2:0.6440608501434326
tensor([1.4430, 0.6394])
tensor([1.8057, 0.8534])
tensor([1.5148, 0.7153])
tensor([1.9448, 0.9015])

 Iteration: 2484
Loss Agent1:0.6247472167015076
Loss Agent2:0.5802817344665527
tensor([1.4431, 0.6394])
tensor([1.8057, 0.8534])
tensor([1.5148, 0.7153])
tensor([1.9446, 0.9013])

 Iteration: 2485
Loss Agent1:0.6004199981689453
Loss Agent2:0.6312045454978943
tensor([1.4432, 0.6394])
tensor([1.805

Loss Agent2:0.612157940864563
tensor([1.4481, 0.6400])
tensor([1.8073, 0.8531])
tensor([1.5162, 0.7142])
tensor([1.9374, 0.8955])

 Iteration: 2532
Loss Agent1:0.6093700528144836
Loss Agent2:0.5928376317024231
tensor([1.4482, 0.6400])
tensor([1.8074, 0.8530])
tensor([1.5163, 0.7142])
tensor([1.9373, 0.8954])

 Iteration: 2533
Loss Agent1:0.6160452365875244
Loss Agent2:0.6442826986312866
tensor([1.4483, 0.6401])
tensor([1.8074, 0.8530])
tensor([1.5164, 0.7142])
tensor([1.9372, 0.8954])

 Iteration: 2534
Loss Agent1:0.5864636898040771
Loss Agent2:0.6566466093063354
tensor([1.4483, 0.6401])
tensor([1.8074, 0.8531])
tensor([1.5164, 0.7142])
tensor([1.9371, 0.8954])

 Iteration: 2535
Loss Agent1:0.5937676429748535
Loss Agent2:0.608700692653656
tensor([1.4484, 0.6402])
tensor([1.8074, 0.8531])
tensor([1.5165, 0.7143])
tensor([1.9369, 0.8953])

 Iteration: 2536
Loss Agent1:0.622025728225708
Loss Agent2:0.6045094728469849
tensor([1.4485, 0.6402])
tensor([1.8075, 0.8531])
tensor([1.5165, 0.7143

Loss Agent1:0.5918130874633789
Loss Agent2:0.5963686108589172
tensor([1.4510, 0.6410])
tensor([1.8061, 0.8529])
tensor([1.5172, 0.7133])
tensor([1.9269, 0.8902])

 Iteration: 2588
Loss Agent1:0.6059073209762573
Loss Agent2:0.6019203662872314
tensor([1.4510, 0.6410])
tensor([1.8061, 0.8529])
tensor([1.5172, 0.7133])
tensor([1.9266, 0.8901])

 Iteration: 2589
Loss Agent1:0.5828198790550232
Loss Agent2:0.5828613638877869
tensor([1.4510, 0.6410])
tensor([1.8060, 0.8529])
tensor([1.5172, 0.7132])
tensor([1.9264, 0.8899])

 Iteration: 2590
Loss Agent1:0.6077986359596252
Loss Agent2:0.5654218792915344
tensor([1.4511, 0.6410])
tensor([1.8060, 0.8529])
tensor([1.5171, 0.7131])
tensor([1.9262, 0.8897])

 Iteration: 2591
Loss Agent1:0.6259417533874512
Loss Agent2:0.5512253642082214
tensor([1.4511, 0.6410])
tensor([1.8060, 0.8529])
tensor([1.5171, 0.7130])
tensor([1.9259, 0.8895])

 Iteration: 2592
Loss Agent1:0.5764040946960449
Loss Agent2:0.6059411764144897
tensor([1.4512, 0.6410])
tensor([1.805

Loss Agent1:0.6165887713432312
Loss Agent2:0.5746237635612488
tensor([1.4539, 0.6405])
tensor([1.8062, 0.8522])
tensor([1.5134, 0.7101])
tensor([1.9122, 0.8824])

 Iteration: 2634
Loss Agent1:0.5977294445037842
Loss Agent2:0.6584217548370361
tensor([1.4540, 0.6405])
tensor([1.8062, 0.8523])
tensor([1.5133, 0.7102])
tensor([1.9119, 0.8823])

 Iteration: 2635
Loss Agent1:0.5849089026451111
Loss Agent2:0.5831084251403809
tensor([1.4542, 0.6405])
tensor([1.8063, 0.8523])
tensor([1.5133, 0.7102])
tensor([1.9116, 0.8823])

 Iteration: 2636
Loss Agent1:0.5764694809913635
Loss Agent2:0.573606550693512
tensor([1.4543, 0.6406])
tensor([1.8064, 0.8523])
tensor([1.5132, 0.7102])
tensor([1.9113, 0.8822])

 Iteration: 2637
Loss Agent1:0.5637580156326294
Loss Agent2:0.6571813225746155
tensor([1.4543, 0.6406])
tensor([1.8064, 0.8523])
tensor([1.5131, 0.7102])
tensor([1.9110, 0.8822])

 Iteration: 2638
Loss Agent1:0.614363968372345
Loss Agent2:0.5958072543144226
tensor([1.4544, 0.6406])
tensor([1.8065,

Loss Agent1:0.5866444110870361
Loss Agent2:0.5973082184791565
tensor([1.4559, 0.6402])
tensor([1.8055, 0.8511])
tensor([1.5110, 0.7093])
tensor([1.8994, 0.8781])

 Iteration: 2687
Loss Agent1:0.5515424609184265
Loss Agent2:0.5816538333892822
tensor([1.4559, 0.6402])
tensor([1.8055, 0.8510])
tensor([1.5109, 0.7093])
tensor([1.8992, 0.8780])

 Iteration: 2688
Loss Agent1:0.6029207706451416
Loss Agent2:0.5845378041267395
tensor([1.4559, 0.6402])
tensor([1.8054, 0.8511])
tensor([1.5109, 0.7092])
tensor([1.8991, 0.8779])

 Iteration: 2689
Loss Agent1:0.5752966403961182
Loss Agent2:0.5619354844093323
tensor([1.4559, 0.6402])
tensor([1.8054, 0.8511])
tensor([1.5109, 0.7092])
tensor([1.8989, 0.8778])

 Iteration: 2690
Loss Agent1:0.5564987063407898
Loss Agent2:0.594923734664917
tensor([1.4559, 0.6402])
tensor([1.8054, 0.8511])
tensor([1.5108, 0.7091])
tensor([1.8988, 0.8777])

 Iteration: 2691
Loss Agent1:0.5763646364212036
Loss Agent2:0.607085108757019
tensor([1.4559, 0.6401])
tensor([1.8053,

Loss Agent1:0.5906485319137573
Loss Agent2:0.5973543524742126
tensor([1.4559, 0.6391])
tensor([1.8032, 0.8494])
tensor([1.5078, 0.7082])
tensor([1.8873, 0.8743])

 Iteration: 2739
Loss Agent1:0.5467879772186279
Loss Agent2:0.5916620492935181
tensor([1.4558, 0.6391])
tensor([1.8031, 0.8494])
tensor([1.5078, 0.7082])
tensor([1.8870, 0.8743])

 Iteration: 2740
Loss Agent1:0.6042945384979248
Loss Agent2:0.5870295166969299
tensor([1.4558, 0.6390])
tensor([1.8029, 0.8494])
tensor([1.5077, 0.7082])
tensor([1.8867, 0.8742])

 Iteration: 2741
Loss Agent1:0.6156330108642578
Loss Agent2:0.6060083508491516
tensor([1.4558, 0.6390])
tensor([1.8028, 0.8494])
tensor([1.5076, 0.7082])
tensor([1.8864, 0.8741])

 Iteration: 2742
Loss Agent1:0.6123501062393188
Loss Agent2:0.5911109447479248
tensor([1.4557, 0.6391])
tensor([1.8027, 0.8494])
tensor([1.5075, 0.7082])
tensor([1.8861, 0.8740])

 Iteration: 2743
Loss Agent1:0.6020099520683289
Loss Agent2:0.5920708775520325
tensor([1.4557, 0.6391])
tensor([1.802

Loss Agent1:0.5889129638671875
Loss Agent2:0.5856611728668213
tensor([1.4530, 0.6383])
tensor([1.7970, 0.8482])
tensor([1.5030, 0.7070])
tensor([1.8732, 0.8695])

 Iteration: 2793
Loss Agent1:0.5817549824714661
Loss Agent2:0.5596309900283813
tensor([1.4530, 0.6383])
tensor([1.7969, 0.8481])
tensor([1.5029, 0.7071])
tensor([1.8729, 0.8695])

 Iteration: 2794
Loss Agent1:0.5435320138931274
Loss Agent2:0.5761076211929321
tensor([1.4530, 0.6382])
tensor([1.7969, 0.8480])
tensor([1.5027, 0.7071])
tensor([1.8725, 0.8695])

 Iteration: 2795
Loss Agent1:0.5812066793441772
Loss Agent2:0.5529484748840332
tensor([1.4529, 0.6382])
tensor([1.7968, 0.8479])
tensor([1.5026, 0.7071])
tensor([1.8722, 0.8694])

 Iteration: 2796
Loss Agent1:0.5968214869499207
Loss Agent2:0.5564477443695068
tensor([1.4529, 0.6381])
tensor([1.7967, 0.8478])
tensor([1.5024, 0.7070])
tensor([1.8719, 0.8693])

 Iteration: 2797
Loss Agent1:0.5988079905509949
Loss Agent2:0.5786277651786804
tensor([1.4529, 0.6381])
tensor([1.796

Loss Agent2:0.5445226430892944
tensor([1.4504, 0.6350])
tensor([1.7927, 0.8436])
tensor([1.4981, 0.7060])
tensor([1.8609, 0.8659])

 Iteration: 2840
Loss Agent1:0.5454516410827637
Loss Agent2:0.5697798728942871
tensor([1.4503, 0.6349])
tensor([1.7926, 0.8435])
tensor([1.4979, 0.7060])
tensor([1.8606, 0.8658])

 Iteration: 2841
Loss Agent1:0.5770976543426514
Loss Agent2:0.5794402360916138
tensor([1.4502, 0.6348])
tensor([1.7924, 0.8434])
tensor([1.4978, 0.7060])
tensor([1.8603, 0.8658])

 Iteration: 2842
Loss Agent1:0.5341067910194397
Loss Agent2:0.6177953481674194
tensor([1.4502, 0.6347])
tensor([1.7923, 0.8433])
tensor([1.4977, 0.7060])
tensor([1.8601, 0.8659])

 Iteration: 2843
Loss Agent1:0.5336248874664307
Loss Agent2:0.5651471018791199
tensor([1.4501, 0.6346])
tensor([1.7922, 0.8431])
tensor([1.4976, 0.7060])
tensor([1.8598, 0.8659])

 Iteration: 2844
Loss Agent1:0.5816864371299744
Loss Agent2:0.541408121585846
tensor([1.4500, 0.6345])
tensor([1.7921, 0.8430])
tensor([1.4975, 0.70

Loss Agent1:0.526561975479126
Loss Agent2:0.5456015467643738
tensor([1.4460, 0.6328])
tensor([1.7852, 0.8404])
tensor([1.4917, 0.7045])
tensor([1.8469, 0.8627])

 Iteration: 2897
Loss Agent1:0.5426429510116577
Loss Agent2:0.5818964838981628
tensor([1.4459, 0.6328])
tensor([1.7850, 0.8403])
tensor([1.4916, 0.7044])
tensor([1.8466, 0.8626])

 Iteration: 2898
Loss Agent1:0.5169839859008789
Loss Agent2:0.592704713344574
tensor([1.4458, 0.6327])
tensor([1.7849, 0.8402])
tensor([1.4915, 0.7044])
tensor([1.8464, 0.8626])

 Iteration: 2899
Loss Agent1:0.5836889147758484
Loss Agent2:0.5937551259994507
tensor([1.4457, 0.6327])
tensor([1.7847, 0.8401])
tensor([1.4914, 0.7044])
tensor([1.8462, 0.8625])

 Iteration: 2900
Loss Agent1:0.5788609385490417
Loss Agent2:0.5486122965812683
tensor([1.4456, 0.6326])
tensor([1.7845, 0.8401])
tensor([1.4913, 0.7043])
tensor([1.8459, 0.8624])

 Iteration: 2901
Loss Agent1:0.5573638677597046
Loss Agent2:0.5345469117164612
tensor([1.4455, 0.6326])
tensor([1.7844,

Loss Agent1:0.5729573965072632
Loss Agent2:0.5000153183937073
tensor([1.4420, 0.6313])
tensor([1.7783, 0.8379])
tensor([1.4854, 0.7023])
tensor([1.8338, 0.8592])

 Iteration: 2954
Loss Agent1:0.5823478698730469
Loss Agent2:0.5192712545394897
tensor([1.4419, 0.6313])
tensor([1.7782, 0.8379])
tensor([1.4852, 0.7023])
tensor([1.8336, 0.8590])

 Iteration: 2955
Loss Agent1:0.5679165720939636
Loss Agent2:0.5682979226112366
tensor([1.4418, 0.6313])
tensor([1.7780, 0.8379])
tensor([1.4851, 0.7022])
tensor([1.8334, 0.8589])

 Iteration: 2956
Loss Agent1:0.5838931202888489
Loss Agent2:0.5457453727722168
tensor([1.4418, 0.6313])
tensor([1.7779, 0.8379])
tensor([1.4850, 0.7022])
tensor([1.8332, 0.8588])

 Iteration: 2957
Loss Agent1:0.5365065336227417
Loss Agent2:0.5550406575202942
tensor([1.4417, 0.6313])
tensor([1.7778, 0.8379])
tensor([1.4849, 0.7021])
tensor([1.8331, 0.8587])

 Iteration: 2958
Loss Agent1:0.5633408427238464
Loss Agent2:0.5747138261795044
tensor([1.4416, 0.6313])
tensor([1.777

tensor([1.4791, 0.7003])
tensor([1.8215, 0.8555])

 Iteration: 3007
Loss Agent1:0.5717519521713257
Loss Agent2:0.5531461834907532
tensor([1.4365, 0.6293])
tensor([1.7701, 0.8348])
tensor([1.4790, 0.7002])
tensor([1.8212, 0.8553])

 Iteration: 3008
Loss Agent1:0.5866093635559082
Loss Agent2:0.5352898240089417
tensor([1.4364, 0.6293])
tensor([1.7701, 0.8347])
tensor([1.4788, 0.7002])
tensor([1.8210, 0.8553])

 Iteration: 3009
Loss Agent1:0.5064764022827148
Loss Agent2:0.565315306186676
tensor([1.4363, 0.6293])
tensor([1.7700, 0.8346])
tensor([1.4787, 0.7002])
tensor([1.8207, 0.8552])

 Iteration: 3010
Loss Agent1:0.5668004155158997
Loss Agent2:0.5487452149391174
tensor([1.4363, 0.6293])
tensor([1.7698, 0.8345])
tensor([1.4786, 0.7002])
tensor([1.8204, 0.8551])

 Iteration: 3011
Loss Agent1:0.5569021701812744
Loss Agent2:0.5529801249504089
tensor([1.4362, 0.6293])
tensor([1.7697, 0.8344])
tensor([1.4784, 0.7002])
tensor([1.8201, 0.8551])

 Iteration: 3012
Loss Agent1:0.5745944380760193
Lo

Loss Agent2:0.5195763111114502
tensor([1.4307, 0.6285])
tensor([1.7622, 0.8331])
tensor([1.4729, 0.6989])
tensor([1.8095, 0.8516])

 Iteration: 3056
Loss Agent1:0.5625873804092407
Loss Agent2:0.5536829829216003
tensor([1.4306, 0.6285])
tensor([1.7620, 0.8332])
tensor([1.4728, 0.6988])
tensor([1.8093, 0.8515])

 Iteration: 3057
Loss Agent1:0.5252930521965027
Loss Agent2:0.503071129322052
tensor([1.4304, 0.6285])
tensor([1.7617, 0.8332])
tensor([1.4726, 0.6987])
tensor([1.8091, 0.8513])

 Iteration: 3058
Loss Agent1:0.5456721782684326
Loss Agent2:0.5681824684143066
tensor([1.4303, 0.6286])
tensor([1.7615, 0.8332])
tensor([1.4725, 0.6987])
tensor([1.8089, 0.8511])

 Iteration: 3059
Loss Agent1:0.5183334946632385
Loss Agent2:0.5589970350265503
tensor([1.4301, 0.6286])
tensor([1.7613, 0.8332])
tensor([1.4724, 0.6986])
tensor([1.8087, 0.8510])

 Iteration: 3060
Loss Agent1:0.5622268319129944
Loss Agent2:0.5877975225448608
tensor([1.4299, 0.6286])
tensor([1.7610, 0.8333])
tensor([1.4723, 0.69

Loss Agent1:0.491866797208786
Loss Agent2:0.5214040279388428
tensor([1.4246, 0.6275])
tensor([1.7540, 0.8319])
tensor([1.4666, 0.6976])
tensor([1.7994, 0.8495])

 Iteration: 3107
Loss Agent1:0.4938870668411255
Loss Agent2:0.5674235820770264
tensor([1.4244, 0.6274])
tensor([1.7538, 0.8318])
tensor([1.4665, 0.6976])
tensor([1.7992, 0.8495])

 Iteration: 3108
Loss Agent1:0.5352649092674255
Loss Agent2:0.5168519020080566
tensor([1.4243, 0.6273])
tensor([1.7536, 0.8317])
tensor([1.4664, 0.6976])
tensor([1.7990, 0.8495])

 Iteration: 3109
Loss Agent1:0.560829758644104
Loss Agent2:0.5219112634658813
tensor([1.4241, 0.6272])
tensor([1.7535, 0.8316])
tensor([1.4663, 0.6976])
tensor([1.7988, 0.8495])

 Iteration: 3110
Loss Agent1:0.5319055914878845
Loss Agent2:0.5278756022453308
tensor([1.4240, 0.6272])
tensor([1.7533, 0.8316])
tensor([1.4661, 0.6975])
tensor([1.7987, 0.8495])

 Iteration: 3111
Loss Agent1:0.5481455326080322
Loss Agent2:0.5230542421340942
tensor([1.4239, 0.6271])
tensor([1.7532,

tensor([1.4181, 0.6260])
tensor([1.7468, 0.8298])
tensor([1.4599, 0.6938])
tensor([1.7888, 0.8443])

 Iteration: 3154
Loss Agent1:0.507886528968811
Loss Agent2:0.500624418258667
tensor([1.4179, 0.6259])
tensor([1.7466, 0.8297])
tensor([1.4598, 0.6938])
tensor([1.7886, 0.8443])

 Iteration: 3155
Loss Agent1:0.5575975775718689
Loss Agent2:0.4995662271976471
tensor([1.4177, 0.6259])
tensor([1.7464, 0.8297])
tensor([1.4596, 0.6937])
tensor([1.7884, 0.8442])

 Iteration: 3156
Loss Agent1:0.4956906735897064
Loss Agent2:0.5470972061157227
tensor([1.4176, 0.6258])
tensor([1.7461, 0.8296])
tensor([1.4595, 0.6937])
tensor([1.7883, 0.8442])

 Iteration: 3157
Loss Agent1:0.5314055681228638
Loss Agent2:0.5846952795982361
tensor([1.4174, 0.6258])
tensor([1.7459, 0.8296])
tensor([1.4594, 0.6937])
tensor([1.7881, 0.8442])

 Iteration: 3158
Loss Agent1:0.5157746076583862
Loss Agent2:0.4884088337421417
tensor([1.4172, 0.6257])
tensor([1.7456, 0.8295])
tensor([1.4593, 0.6937])
tensor([1.7880, 0.8442])

 

Loss Agent2:0.5201318264007568
tensor([1.4114, 0.6243])
tensor([1.7384, 0.8274])
tensor([1.4540, 0.6912])
tensor([1.7798, 0.8409])

 Iteration: 3202
Loss Agent1:0.5000051856040955
Loss Agent2:0.5088260769844055
tensor([1.4112, 0.6242])
tensor([1.7382, 0.8273])
tensor([1.4539, 0.6912])
tensor([1.7796, 0.8409])

 Iteration: 3203
Loss Agent1:0.4975707530975342
Loss Agent2:0.4779442548751831
tensor([1.4110, 0.6241])
tensor([1.7380, 0.8272])
tensor([1.4537, 0.6911])
tensor([1.7793, 0.8408])

 Iteration: 3204
Loss Agent1:0.5219205021858215
Loss Agent2:0.5378390550613403
tensor([1.4109, 0.6240])
tensor([1.7378, 0.8272])
tensor([1.4536, 0.6911])
tensor([1.7791, 0.8407])

 Iteration: 3205
Loss Agent1:0.5232663750648499
Loss Agent2:0.5519863367080688
tensor([1.4107, 0.6240])
tensor([1.7376, 0.8271])
tensor([1.4535, 0.6910])
tensor([1.7789, 0.8407])

 Iteration: 3206
Loss Agent1:0.5197938680648804
Loss Agent2:0.5346449017524719
tensor([1.4106, 0.6239])
tensor([1.7374, 0.8270])
tensor([1.4534, 0.6

Loss Agent1:0.5020582675933838
Loss Agent2:0.5280373692512512
tensor([1.4048, 0.6221])
tensor([1.7306, 0.8252])
tensor([1.4473, 0.6900])
tensor([1.7694, 0.8391])

 Iteration: 3253
Loss Agent1:0.46706461906433105
Loss Agent2:0.5226987600326538
tensor([1.4046, 0.6221])
tensor([1.7305, 0.8251])
tensor([1.4472, 0.6901])
tensor([1.7692, 0.8392])

 Iteration: 3254
Loss Agent1:0.5232077240943909
Loss Agent2:0.5306766629219055
tensor([1.4045, 0.6220])
tensor([1.7303, 0.8251])
tensor([1.4470, 0.6901])
tensor([1.7691, 0.8393])

 Iteration: 3255
Loss Agent1:0.5047728419303894
Loss Agent2:0.5096046924591064
tensor([1.4043, 0.6220])
tensor([1.7301, 0.8250])
tensor([1.4469, 0.6902])
tensor([1.7689, 0.8393])

 Iteration: 3256
Loss Agent1:0.5265634655952454
Loss Agent2:0.5138195753097534
tensor([1.4042, 0.6219])
tensor([1.7300, 0.8250])
tensor([1.4468, 0.6903])
tensor([1.7687, 0.8394])

 Iteration: 3257
Loss Agent1:0.5205831527709961
Loss Agent2:0.5438811182975769
tensor([1.4040, 0.6219])
tensor([1.72

Loss Agent1:0.5063709616661072
Loss Agent2:0.49402713775634766
tensor([1.3990, 0.6211])
tensor([1.7239, 0.8241])
tensor([1.4424, 0.6911])
tensor([1.7626, 0.8404])

 Iteration: 3299
Loss Agent1:0.498916894197464
Loss Agent2:0.5338535308837891
tensor([1.3989, 0.6211])
tensor([1.7237, 0.8241])
tensor([1.4422, 0.6911])
tensor([1.7624, 0.8404])

 Iteration: 3300
Loss Agent1:0.5037064552307129
Loss Agent2:0.4826430380344391
tensor([1.3987, 0.6211])
tensor([1.7236, 0.8240])
tensor([1.4421, 0.6911])
tensor([1.7623, 0.8404])

 Iteration: 3301
Loss Agent1:0.4580681622028351
Loss Agent2:0.49809110164642334
tensor([1.3986, 0.6211])
tensor([1.7234, 0.8240])
tensor([1.4420, 0.6911])
tensor([1.7622, 0.8404])

 Iteration: 3302
Loss Agent1:0.4713144302368164
Loss Agent2:0.4744970500469208
tensor([1.3984, 0.6210])
tensor([1.7233, 0.8239])
tensor([1.4419, 0.6910])
tensor([1.7621, 0.8403])

 Iteration: 3303
Loss Agent1:0.48896828293800354
Loss Agent2:0.48467814922332764
tensor([1.3983, 0.6210])
tensor([1.

Loss Agent1:0.5106548070907593
Loss Agent2:0.5147373080253601
tensor([1.3910, 0.6200])
tensor([1.7154, 0.8222])
tensor([1.4340, 0.6902])
tensor([1.7530, 0.8393])

 Iteration: 3351
Loss Agent1:0.5100054144859314
Loss Agent2:0.518805205821991
tensor([1.3909, 0.6200])
tensor([1.7153, 0.8222])
tensor([1.4338, 0.6902])
tensor([1.7527, 0.8393])

 Iteration: 3352
Loss Agent1:0.5186612606048584
Loss Agent2:0.5195437073707581
tensor([1.3908, 0.6199])
tensor([1.7151, 0.8221])
tensor([1.4337, 0.6901])
tensor([1.7525, 0.8393])

 Iteration: 3353
Loss Agent1:0.5051180720329285
Loss Agent2:0.5107298493385315
tensor([1.3907, 0.6199])
tensor([1.7150, 0.8221])
tensor([1.4335, 0.6901])
tensor([1.7524, 0.8394])

 Iteration: 3354
Loss Agent1:0.45740222930908203
Loss Agent2:0.5166440010070801
tensor([1.3906, 0.6198])
tensor([1.7149, 0.8220])
tensor([1.4334, 0.6901])
tensor([1.7522, 0.8394])

 Iteration: 3355
Loss Agent1:0.5178697109222412
Loss Agent2:0.5102665424346924
tensor([1.3904, 0.6198])
tensor([1.714

Loss Agent2:0.5005722641944885
tensor([1.3843, 0.6190])
tensor([1.7081, 0.8207])
tensor([1.4280, 0.6896])
tensor([1.7445, 0.8392])

 Iteration: 3399
Loss Agent1:0.5010402202606201
Loss Agent2:0.4662688970565796
tensor([1.3842, 0.6190])
tensor([1.7080, 0.8206])
tensor([1.4280, 0.6896])
tensor([1.7444, 0.8392])

 Iteration: 3400
Loss Agent1:0.5057727098464966
Loss Agent2:0.45629769563674927
tensor([1.3840, 0.6190])
tensor([1.7079, 0.8206])
tensor([1.4278, 0.6895])
tensor([1.7442, 0.8392])

 Iteration: 3401
Loss Agent1:0.45478904247283936
Loss Agent2:0.5156005620956421
tensor([1.3839, 0.6190])
tensor([1.7078, 0.8206])
tensor([1.4277, 0.6895])
tensor([1.7441, 0.8391])

 Iteration: 3402
Loss Agent1:0.4907355308532715
Loss Agent2:0.5244722366333008
tensor([1.3838, 0.6189])
tensor([1.7077, 0.8206])
tensor([1.4276, 0.6895])
tensor([1.7439, 0.8391])

 Iteration: 3403
Loss Agent1:0.49787670373916626
Loss Agent2:0.49015215039253235
tensor([1.3837, 0.6189])
tensor([1.7075, 0.8206])
tensor([1.4274,

Loss Agent2:0.502894401550293
tensor([1.3759, 0.6176])
tensor([1.6997, 0.8191])
tensor([1.4216, 0.6893])
tensor([1.7372, 0.8388])

 Iteration: 3451
Loss Agent1:0.4556066691875458
Loss Agent2:0.5161401629447937
tensor([1.3757, 0.6175])
tensor([1.6995, 0.8190])
tensor([1.4216, 0.6892])
tensor([1.7372, 0.8388])

 Iteration: 3452
Loss Agent1:0.5346381664276123
Loss Agent2:0.5134097337722778
tensor([1.3756, 0.6174])
tensor([1.6993, 0.8189])
tensor([1.4215, 0.6892])
tensor([1.7371, 0.8387])

 Iteration: 3453
Loss Agent1:0.5106547474861145
Loss Agent2:0.4972832202911377
tensor([1.3755, 0.6173])
tensor([1.6992, 0.8188])
tensor([1.4215, 0.6892])
tensor([1.7371, 0.8387])

 Iteration: 3454
Loss Agent1:0.4865803122520447
Loss Agent2:0.5273651480674744
tensor([1.3753, 0.6172])
tensor([1.6990, 0.8187])
tensor([1.4215, 0.6892])
tensor([1.7370, 0.8387])

 Iteration: 3455
Loss Agent1:0.47900545597076416
Loss Agent2:0.48477742075920105
tensor([1.3752, 0.6172])
tensor([1.6989, 0.8186])
tensor([1.4214, 0.

Loss Agent1:0.4713670313358307
Loss Agent2:0.5040507912635803
tensor([1.3693, 0.6145])
tensor([1.6937, 0.8160])
tensor([1.4167, 0.6880])
tensor([1.7313, 0.8376])

 Iteration: 3501
Loss Agent1:0.47628483176231384
Loss Agent2:0.48927950859069824
tensor([1.3692, 0.6145])
tensor([1.6936, 0.8160])
tensor([1.4167, 0.6880])
tensor([1.7312, 0.8376])

 Iteration: 3502
Loss Agent1:0.4709717035293579
Loss Agent2:0.5074798464775085
tensor([1.3690, 0.6145])
tensor([1.6934, 0.8160])
tensor([1.4166, 0.6880])
tensor([1.7311, 0.8376])

 Iteration: 3503
Loss Agent1:0.47620689868927
Loss Agent2:0.4964502453804016
tensor([1.3689, 0.6144])
tensor([1.6933, 0.8160])
tensor([1.4165, 0.6881])
tensor([1.7310, 0.8377])

 Iteration: 3504
Loss Agent1:0.45137131214141846
Loss Agent2:0.4976775646209717
tensor([1.3687, 0.6144])
tensor([1.6932, 0.8159])
tensor([1.4164, 0.6881])
tensor([1.7309, 0.8378])

 Iteration: 3505
Loss Agent1:0.4868968427181244
Loss Agent2:0.501325249671936
tensor([1.3686, 0.6143])
tensor([1.693

Loss Agent2:0.4546912908554077
tensor([1.3620, 0.6138])
tensor([1.6872, 0.8165])
tensor([1.4095, 0.6892])
tensor([1.7225, 0.8393])

 Iteration: 3557
Loss Agent1:0.4816666543483734
Loss Agent2:0.45107847452163696
tensor([1.3618, 0.6138])
tensor([1.6870, 0.8165])
tensor([1.4094, 0.6891])
tensor([1.7223, 0.8393])

 Iteration: 3558
Loss Agent1:0.4704417288303375
Loss Agent2:0.4922383427619934
tensor([1.3616, 0.6138])
tensor([1.6869, 0.8166])
tensor([1.4092, 0.6891])
tensor([1.7221, 0.8392])

 Iteration: 3559
Loss Agent1:0.4685191512107849
Loss Agent2:0.4711342751979828
tensor([1.3615, 0.6138])
tensor([1.6867, 0.8166])
tensor([1.4091, 0.6890])
tensor([1.7219, 0.8392])

 Iteration: 3560
Loss Agent1:0.48353084921836853
Loss Agent2:0.5043648481369019
tensor([1.3613, 0.6138])
tensor([1.6865, 0.8166])
tensor([1.4090, 0.6890])
tensor([1.7218, 0.8391])

 Iteration: 3561
Loss Agent1:0.49942952394485474
Loss Agent2:0.48078346252441406
tensor([1.3612, 0.6138])
tensor([1.6863, 0.8166])
tensor([1.4089,

Loss Agent2:0.44064873456954956
tensor([1.3543, 0.6122])
tensor([1.6793, 0.8156])
tensor([1.4038, 0.6870])
tensor([1.7153, 0.8373])

 Iteration: 3606
Loss Agent1:0.4375912547111511
Loss Agent2:0.46391037106513977
tensor([1.3542, 0.6121])
tensor([1.6792, 0.8156])
tensor([1.4037, 0.6869])
tensor([1.7151, 0.8373])

 Iteration: 3607
Loss Agent1:0.4516652822494507
Loss Agent2:0.48568305373191833
tensor([1.3541, 0.6121])
tensor([1.6791, 0.8155])
tensor([1.4036, 0.6869])
tensor([1.7150, 0.8372])

 Iteration: 3608
Loss Agent1:0.5317452549934387
Loss Agent2:0.47522130608558655
tensor([1.3540, 0.6120])
tensor([1.6790, 0.8155])
tensor([1.4034, 0.6869])
tensor([1.7148, 0.8372])

 Iteration: 3609
Loss Agent1:0.4651312828063965
Loss Agent2:0.4882779121398926
tensor([1.3539, 0.6120])
tensor([1.6789, 0.8155])
tensor([1.4033, 0.6869])
tensor([1.7146, 0.8373])

 Iteration: 3610
Loss Agent1:0.4678460955619812
Loss Agent2:0.48051828145980835
tensor([1.3538, 0.6119])
tensor([1.6788, 0.8154])
tensor([1.4032

Loss Agent2:0.5003306865692139
tensor([1.3474, 0.6102])
tensor([1.6726, 0.8138])
tensor([1.3991, 0.6865])
tensor([1.7104, 0.8379])

 Iteration: 3653
Loss Agent1:0.5089766979217529
Loss Agent2:0.47594714164733887
tensor([1.3473, 0.6102])
tensor([1.6725, 0.8138])
tensor([1.3990, 0.6865])
tensor([1.7103, 0.8380])

 Iteration: 3654
Loss Agent1:0.44307053089141846
Loss Agent2:0.5134936571121216
tensor([1.3471, 0.6101])
tensor([1.6723, 0.8138])
tensor([1.3989, 0.6866])
tensor([1.7102, 0.8380])

 Iteration: 3655
Loss Agent1:0.40861502289772034
Loss Agent2:0.4495829939842224
tensor([1.3470, 0.6101])
tensor([1.6722, 0.8137])
tensor([1.3988, 0.6866])
tensor([1.7100, 0.8381])

 Iteration: 3656
Loss Agent1:0.41380980610847473
Loss Agent2:0.4799218773841858
tensor([1.3469, 0.6100])
tensor([1.6720, 0.8136])
tensor([1.3987, 0.6866])
tensor([1.7099, 0.8382])

 Iteration: 3657
Loss Agent1:0.4815928041934967
Loss Agent2:0.426033079624176
tensor([1.3467, 0.6099])
tensor([1.6719, 0.8136])
tensor([1.3985, 

Loss Agent1:0.47561147809028625
Loss Agent2:0.4743528366088867
tensor([1.3416, 0.6096])
tensor([1.6674, 0.8138])
tensor([1.3931, 0.6876])
tensor([1.7043, 0.8404])

 Iteration: 3700
Loss Agent1:0.47974151372909546
Loss Agent2:0.4969192147254944
tensor([1.3415, 0.6096])
tensor([1.6673, 0.8138])
tensor([1.3930, 0.6876])
tensor([1.7043, 0.8404])

 Iteration: 3701
Loss Agent1:0.465923547744751
Loss Agent2:0.47821229696273804
tensor([1.3415, 0.6096])
tensor([1.6672, 0.8138])
tensor([1.3929, 0.6877])
tensor([1.7042, 0.8405])

 Iteration: 3702
Loss Agent1:0.46077439188957214
Loss Agent2:0.5060467720031738
tensor([1.3413, 0.6096])
tensor([1.6672, 0.8138])
tensor([1.3929, 0.6877])
tensor([1.7041, 0.8405])

 Iteration: 3703
Loss Agent1:0.47860974073410034
Loss Agent2:0.4161534607410431
tensor([1.3413, 0.6096])
tensor([1.6671, 0.8138])
tensor([1.3928, 0.6877])
tensor([1.7040, 0.8405])

 Iteration: 3704
Loss Agent1:0.4528409242630005
Loss Agent2:0.43180397152900696
tensor([1.3412, 0.6095])
tensor([

Loss Agent1:0.48109757900238037
Loss Agent2:0.47890087962150574
tensor([1.3361, 0.6077])
tensor([1.6623, 0.8129])
tensor([1.3880, 0.6855])
tensor([1.6977, 0.8375])

 Iteration: 3749
Loss Agent1:0.4535791277885437
Loss Agent2:0.4199070334434509
tensor([1.3360, 0.6077])
tensor([1.6622, 0.8129])
tensor([1.3879, 0.6854])
tensor([1.6976, 0.8374])

 Iteration: 3750
Loss Agent1:0.4821586012840271
Loss Agent2:0.4651508033275604
tensor([1.3358, 0.6077])
tensor([1.6621, 0.8129])
tensor([1.3878, 0.6853])
tensor([1.6974, 0.8373])

 Iteration: 3751
Loss Agent1:0.49190711975097656
Loss Agent2:0.40438634157180786
tensor([1.3357, 0.6077])
tensor([1.6620, 0.8129])
tensor([1.3876, 0.6852])
tensor([1.6973, 0.8371])

 Iteration: 3752
Loss Agent1:0.4493563771247864
Loss Agent2:0.41951096057891846
tensor([1.3356, 0.6076])
tensor([1.6618, 0.8129])
tensor([1.3875, 0.6851])
tensor([1.6971, 0.8370])

 Iteration: 3753
Loss Agent1:0.4457019865512848
Loss Agent2:0.47475630044937134
tensor([1.3355, 0.6076])
tensor(

Loss Agent2:0.4752352833747864
tensor([1.3299, 0.6061])
tensor([1.6566, 0.8121])
tensor([1.3832, 0.6855])
tensor([1.6933, 0.8377])

 Iteration: 3800
Loss Agent1:0.43616920709609985
Loss Agent2:0.4633633494377136
tensor([1.3298, 0.6060])
tensor([1.6565, 0.8120])
tensor([1.3831, 0.6855])
tensor([1.6933, 0.8377])

 Iteration: 3801
Loss Agent1:0.44223424792289734
Loss Agent2:0.4330493211746216
tensor([1.3296, 0.6060])
tensor([1.6564, 0.8120])
tensor([1.3831, 0.6854])
tensor([1.6933, 0.8377])

 Iteration: 3802
Loss Agent1:0.45535701513290405
Loss Agent2:0.45615318417549133
tensor([1.3295, 0.6059])
tensor([1.6563, 0.8120])
tensor([1.3830, 0.6854])
tensor([1.6932, 0.8378])

 Iteration: 3803
Loss Agent1:0.40841272473335266
Loss Agent2:0.466017484664917
tensor([1.3294, 0.6059])
tensor([1.6561, 0.8119])
tensor([1.3829, 0.6854])
tensor([1.6932, 0.8378])

 Iteration: 3804
Loss Agent1:0.47143858671188354
Loss Agent2:0.46829134225845337
tensor([1.3292, 0.6059])
tensor([1.6560, 0.8119])
tensor([1.382

Loss Agent1:0.47137197852134705
Loss Agent2:0.45164117217063904
tensor([1.3241, 0.6036])
tensor([1.6511, 0.8091])
tensor([1.3787, 0.6832])
tensor([1.6898, 0.8348])

 Iteration: 3859
Loss Agent1:0.4336845278739929
Loss Agent2:0.42427921295166016
tensor([1.3240, 0.6036])
tensor([1.6511, 0.8092])
tensor([1.3786, 0.6831])
tensor([1.6898, 0.8348])

 Iteration: 3860
Loss Agent1:0.4301372468471527
Loss Agent2:0.4777154326438904
tensor([1.3239, 0.6036])
tensor([1.6510, 0.8092])
tensor([1.3785, 0.6831])
tensor([1.6897, 0.8349])

 Iteration: 3861
Loss Agent1:0.41869187355041504
Loss Agent2:0.42352592945098877
tensor([1.3238, 0.6036])
tensor([1.6509, 0.8092])
tensor([1.3785, 0.6831])
tensor([1.6897, 0.8348])

 Iteration: 3862
Loss Agent1:0.4455992579460144
Loss Agent2:0.42106422781944275
tensor([1.3237, 0.6036])
tensor([1.6508, 0.8092])
tensor([1.3784, 0.6831])
tensor([1.6896, 0.8348])

 Iteration: 3863
Loss Agent1:0.4482181668281555
Loss Agent2:0.4456281065940857
tensor([1.3236, 0.6036])
tensor(

tensor([1.3195, 0.6030])
tensor([1.6471, 0.8092])
tensor([1.3751, 0.6831])
tensor([1.6867, 0.8356])

 Iteration: 3907
Loss Agent1:0.44674617052078247
Loss Agent2:0.502013087272644
tensor([1.3194, 0.6030])
tensor([1.6470, 0.8093])
tensor([1.3750, 0.6832])
tensor([1.6866, 0.8357])

 Iteration: 3908
Loss Agent1:0.4384290874004364
Loss Agent2:0.4834091067314148
tensor([1.3193, 0.6030])
tensor([1.6469, 0.8093])
tensor([1.3750, 0.6833])
tensor([1.6866, 0.8358])

 Iteration: 3909
Loss Agent1:0.4736543297767639
Loss Agent2:0.45625340938568115
tensor([1.3192, 0.6029])
tensor([1.6469, 0.8093])
tensor([1.3749, 0.6834])
tensor([1.6865, 0.8359])

 Iteration: 3910
Loss Agent1:0.45683208107948303
Loss Agent2:0.4431133270263672
tensor([1.3192, 0.6030])
tensor([1.6468, 0.8094])
tensor([1.3749, 0.6834])
tensor([1.6864, 0.8360])

 Iteration: 3911
Loss Agent1:0.4545172452926636
Loss Agent2:0.46599090099334717
tensor([1.3191, 0.6030])
tensor([1.6467, 0.8095])
tensor([1.3748, 0.6835])
tensor([1.6863, 0.8361

Loss Agent1:0.4432280659675598
Loss Agent2:0.4412863552570343
tensor([1.3147, 0.6035])
tensor([1.6429, 0.8113])
tensor([1.3697, 0.6820])
tensor([1.6809, 0.8346])

 Iteration: 3961
Loss Agent1:0.4505915343761444
Loss Agent2:0.4498065710067749
tensor([1.3147, 0.6035])
tensor([1.6429, 0.8113])
tensor([1.3696, 0.6819])
tensor([1.6808, 0.8345])

 Iteration: 3962
Loss Agent1:0.4146732687950134
Loss Agent2:0.48326733708381653
tensor([1.3146, 0.6035])
tensor([1.6429, 0.8113])
tensor([1.3695, 0.6819])
tensor([1.6807, 0.8343])

 Iteration: 3963
Loss Agent1:0.429304838180542
Loss Agent2:0.4650382697582245
tensor([1.3145, 0.6035])
tensor([1.6428, 0.8113])
tensor([1.3695, 0.6818])
tensor([1.6806, 0.8342])

 Iteration: 3964
Loss Agent1:0.4169817566871643
Loss Agent2:0.4784117639064789
tensor([1.3145, 0.6035])
tensor([1.6428, 0.8113])
tensor([1.3694, 0.6817])
tensor([1.6806, 0.8341])

 Iteration: 3965
Loss Agent1:0.425558477640152
Loss Agent2:0.4442361891269684
tensor([1.3144, 0.6035])
tensor([1.6428

Loss Agent2:0.4201713800430298
tensor([1.3112, 0.6025])
tensor([1.6399, 0.8103])
tensor([1.3659, 0.6805])
tensor([1.6783, 0.8335])

 Iteration: 4009
Loss Agent1:0.4261891543865204
Loss Agent2:0.4760853350162506
tensor([1.3111, 0.6026])
tensor([1.6398, 0.8104])
tensor([1.3658, 0.6805])
tensor([1.6782, 0.8335])

 Iteration: 4010
Loss Agent1:0.45941972732543945
Loss Agent2:0.46551358699798584
tensor([1.3110, 0.6026])
tensor([1.6397, 0.8105])
tensor([1.3658, 0.6805])
tensor([1.6782, 0.8335])

 Iteration: 4011
Loss Agent1:0.47171905636787415
Loss Agent2:0.4926658868789673
tensor([1.3110, 0.6027])
tensor([1.6397, 0.8105])
tensor([1.3657, 0.6805])
tensor([1.6781, 0.8335])

 Iteration: 4012
Loss Agent1:0.4291592240333557
Loss Agent2:0.44213777780532837
tensor([1.3109, 0.6027])
tensor([1.6397, 0.8106])
tensor([1.3657, 0.6805])
tensor([1.6781, 0.8335])

 Iteration: 4013
Loss Agent1:0.42526909708976746
Loss Agent2:0.44848471879959106
tensor([1.3109, 0.6027])
tensor([1.6396, 0.8106])
tensor([1.365

Loss Agent1:0.44884026050567627
Loss Agent2:0.4487224519252777
tensor([1.3074, 0.6022])
tensor([1.6369, 0.8104])
tensor([1.3619, 0.6811])
tensor([1.6741, 0.8358])

 Iteration: 4059
Loss Agent1:0.40032678842544556
Loss Agent2:0.4685972034931183
tensor([1.3073, 0.6022])
tensor([1.6368, 0.8104])
tensor([1.3618, 0.6810])
tensor([1.6740, 0.8358])

 Iteration: 4060
Loss Agent1:0.4183432161808014
Loss Agent2:0.4391930103302002
tensor([1.3072, 0.6022])
tensor([1.6368, 0.8103])
tensor([1.3618, 0.6810])
tensor([1.6739, 0.8357])

 Iteration: 4061
Loss Agent1:0.42437100410461426
Loss Agent2:0.41439345479011536
tensor([1.3071, 0.6021])
tensor([1.6367, 0.8103])
tensor([1.3617, 0.6809])
tensor([1.6739, 0.8357])

 Iteration: 4062
Loss Agent1:0.4329625964164734
Loss Agent2:0.40859729051589966
tensor([1.3071, 0.6021])
tensor([1.6367, 0.8102])
tensor([1.3616, 0.6809])
tensor([1.6738, 0.8356])

 Iteration: 4063
Loss Agent1:0.379182368516922
Loss Agent2:0.4256405532360077
tensor([1.3070, 0.6020])
tensor([1

Loss Agent1:0.4149186611175537
Loss Agent2:0.4333457946777344
tensor([1.3033, 0.6003])
tensor([1.6337, 0.8089])
tensor([1.3575, 0.6787])
tensor([1.6691, 0.8337])

 Iteration: 4110
Loss Agent1:0.4465593695640564
Loss Agent2:0.3976110816001892
tensor([1.3032, 0.6003])
tensor([1.6336, 0.8089])
tensor([1.3574, 0.6786])
tensor([1.6691, 0.8337])

 Iteration: 4111
Loss Agent1:0.42116785049438477
Loss Agent2:0.43797609210014343
tensor([1.3031, 0.6003])
tensor([1.6335, 0.8089])
tensor([1.3573, 0.6786])
tensor([1.6690, 0.8337])

 Iteration: 4112
Loss Agent1:0.4378414452075958
Loss Agent2:0.4082452654838562
tensor([1.3031, 0.6003])
tensor([1.6335, 0.8090])
tensor([1.3572, 0.6786])
tensor([1.6689, 0.8337])

 Iteration: 4113
Loss Agent1:0.4834330677986145
Loss Agent2:0.43151646852493286
tensor([1.3030, 0.6003])
tensor([1.6334, 0.8090])
tensor([1.3571, 0.6786])
tensor([1.6688, 0.8337])

 Iteration: 4114
Loss Agent1:0.4191644787788391
Loss Agent2:0.41434913873672485
tensor([1.3029, 0.6003])
tensor([1

Loss Agent1:0.4437131881713867
Loss Agent2:0.45239436626434326
tensor([1.3006, 0.6000])
tensor([1.6320, 0.8093])
tensor([1.3543, 0.6784])
tensor([1.6667, 0.8346])

 Iteration: 4158
Loss Agent1:0.3880871534347534
Loss Agent2:0.4118683934211731
tensor([1.3006, 0.6000])
tensor([1.6319, 0.8092])
tensor([1.3542, 0.6784])
tensor([1.6666, 0.8346])

 Iteration: 4159
Loss Agent1:0.3569001853466034
Loss Agent2:0.4090502858161926
tensor([1.3005, 0.5999])
tensor([1.6319, 0.8092])
tensor([1.3542, 0.6784])
tensor([1.6666, 0.8346])

 Iteration: 4160
Loss Agent1:0.46197542548179626
Loss Agent2:0.37855347990989685
tensor([1.3004, 0.5999])
tensor([1.6318, 0.8092])
tensor([1.3541, 0.6783])
tensor([1.6665, 0.8346])

 Iteration: 4161
Loss Agent1:0.44533228874206543
Loss Agent2:0.44265756011009216
tensor([1.3004, 0.5999])
tensor([1.6318, 0.8092])
tensor([1.3540, 0.6783])
tensor([1.6664, 0.8346])

 Iteration: 4162
Loss Agent1:0.38343724608421326
Loss Agent2:0.37340590357780457
tensor([1.3003, 0.5999])
tensor

Loss Agent2:0.44817790389060974
tensor([1.2977, 0.6006])
tensor([1.6301, 0.8106])
tensor([1.3487, 0.6766])
tensor([1.6616, 0.8344])

 Iteration: 4207
Loss Agent1:0.3728393316268921
Loss Agent2:0.4314090311527252
tensor([1.2976, 0.6006])
tensor([1.6300, 0.8107])
tensor([1.3486, 0.6766])
tensor([1.6616, 0.8344])

 Iteration: 4208
Loss Agent1:0.40638548135757446
Loss Agent2:0.44782859086990356
tensor([1.2975, 0.6006])
tensor([1.6300, 0.8107])
tensor([1.3485, 0.6766])
tensor([1.6615, 0.8344])

 Iteration: 4209
Loss Agent1:0.38607487082481384
Loss Agent2:0.4311216473579407
tensor([1.2974, 0.6006])
tensor([1.6299, 0.8108])
tensor([1.3484, 0.6766])
tensor([1.6615, 0.8344])

 Iteration: 4210
Loss Agent1:0.4603460729122162
Loss Agent2:0.4097556471824646
tensor([1.2973, 0.6007])
tensor([1.6298, 0.8109])
tensor([1.3483, 0.6766])
tensor([1.6614, 0.8344])

 Iteration: 4211
Loss Agent1:0.41705653071403503
Loss Agent2:0.43282586336135864
tensor([1.2972, 0.6007])
tensor([1.6297, 0.8109])
tensor([1.348

Loss Agent1:0.43384256958961487
Loss Agent2:0.41848695278167725
tensor([1.2945, 0.6004])
tensor([1.6276, 0.8111])
tensor([1.3453, 0.6770])
tensor([1.6590, 0.8351])

 Iteration: 4257
Loss Agent1:0.3985894024372101
Loss Agent2:0.38122180104255676
tensor([1.2945, 0.6004])
tensor([1.6276, 0.8111])
tensor([1.3452, 0.6770])
tensor([1.6589, 0.8350])

 Iteration: 4258
Loss Agent1:0.4142587184906006
Loss Agent2:0.44936567544937134
tensor([1.2944, 0.6004])
tensor([1.6275, 0.8111])
tensor([1.3452, 0.6769])
tensor([1.6589, 0.8350])

 Iteration: 4259
Loss Agent1:0.41362571716308594
Loss Agent2:0.38287168741226196
tensor([1.2944, 0.6003])
tensor([1.6275, 0.8111])
tensor([1.3451, 0.6769])
tensor([1.6588, 0.8349])

 Iteration: 4260
Loss Agent1:0.4130902588367462
Loss Agent2:0.4574849605560303
tensor([1.2943, 0.6003])
tensor([1.6275, 0.8111])
tensor([1.3450, 0.6769])
tensor([1.6587, 0.8349])

 Iteration: 4261
Loss Agent1:0.4289489984512329
Loss Agent2:0.4167894124984741
tensor([1.2943, 0.6003])
tensor(

Loss Agent2:0.41714122891426086
tensor([1.2924, 0.6002])
tensor([1.6267, 0.8115])
tensor([1.3419, 0.6779])
tensor([1.6573, 0.8367])

 Iteration: 4309
Loss Agent1:0.39775627851486206
Loss Agent2:0.39844241738319397
tensor([1.2923, 0.6002])
tensor([1.6267, 0.8116])
tensor([1.3418, 0.6780])
tensor([1.6573, 0.8368])

 Iteration: 4310
Loss Agent1:0.4198076128959656
Loss Agent2:0.3975543975830078
tensor([1.2923, 0.6003])
tensor([1.6267, 0.8117])
tensor([1.3417, 0.6780])
tensor([1.6572, 0.8368])

 Iteration: 4311
Loss Agent1:0.3893700838088989
Loss Agent2:0.3448264002799988
tensor([1.2923, 0.6003])
tensor([1.6267, 0.8117])
tensor([1.3415, 0.6779])
tensor([1.6570, 0.8368])

 Iteration: 4312
Loss Agent1:0.4561925530433655
Loss Agent2:0.3798220753669739
tensor([1.2922, 0.6004])
tensor([1.6266, 0.8118])
tensor([1.3414, 0.6779])
tensor([1.6569, 0.8368])

 Iteration: 4313
Loss Agent1:0.396762490272522
Loss Agent2:0.3987286686897278
tensor([1.2922, 0.6005])
tensor([1.6266, 0.8119])
tensor([1.3413, 0

Loss Agent1:0.40584659576416016
Loss Agent2:0.399897962808609
tensor([1.2892, 0.6023])
tensor([1.6246, 0.8142])
tensor([1.3370, 0.6764])
tensor([1.6530, 0.8352])

 Iteration: 4364
Loss Agent1:0.42601147294044495
Loss Agent2:0.3845437169075012
tensor([1.2891, 0.6022])
tensor([1.6245, 0.8142])
tensor([1.3369, 0.6764])
tensor([1.6530, 0.8351])

 Iteration: 4365
Loss Agent1:0.4097655415534973
Loss Agent2:0.42589694261550903
tensor([1.2891, 0.6022])
tensor([1.6245, 0.8142])
tensor([1.3369, 0.6764])
tensor([1.6529, 0.8351])

 Iteration: 4366
Loss Agent1:0.42484697699546814
Loss Agent2:0.37110376358032227
tensor([1.2890, 0.6022])
tensor([1.6245, 0.8143])
tensor([1.3368, 0.6763])
tensor([1.6529, 0.8351])

 Iteration: 4367
Loss Agent1:0.41115033626556396
Loss Agent2:0.4022786617279053
tensor([1.2890, 0.6023])
tensor([1.6245, 0.8143])
tensor([1.3368, 0.6762])
tensor([1.6529, 0.8350])

 Iteration: 4368
Loss Agent1:0.43040895462036133
Loss Agent2:0.4219910502433777
tensor([1.2889, 0.6023])
tensor(

Loss Agent2:0.3851623833179474
tensor([1.2866, 0.6032])
tensor([1.6229, 0.8156])
tensor([1.3333, 0.6743])
tensor([1.6511, 0.8333])

 Iteration: 4416
Loss Agent1:0.3897922933101654
Loss Agent2:0.3705143630504608
tensor([1.2866, 0.6032])
tensor([1.6229, 0.8156])
tensor([1.3333, 0.6743])
tensor([1.6511, 0.8333])

 Iteration: 4417
Loss Agent1:0.3943628668785095
Loss Agent2:0.4245028495788574
tensor([1.2865, 0.6031])
tensor([1.6228, 0.8156])
tensor([1.3333, 0.6743])
tensor([1.6512, 0.8333])

 Iteration: 4418
Loss Agent1:0.393658310174942
Loss Agent2:0.3807990550994873
tensor([1.2865, 0.6031])
tensor([1.6228, 0.8156])
tensor([1.3332, 0.6742])
tensor([1.6512, 0.8332])

 Iteration: 4419
Loss Agent1:0.37918922305107117
Loss Agent2:0.3827736973762512
tensor([1.2864, 0.6031])
tensor([1.6227, 0.8155])
tensor([1.3332, 0.6742])
tensor([1.6511, 0.8332])

 Iteration: 4420
Loss Agent1:0.4023036062717438
Loss Agent2:0.3512951731681824
tensor([1.2864, 0.6031])
tensor([1.6227, 0.8155])
tensor([1.3331, 0.6

Loss Agent2:0.43789827823638916
tensor([1.2845, 0.6016])
tensor([1.6221, 0.8139])
tensor([1.3316, 0.6741])
tensor([1.6508, 0.8328])

 Iteration: 4463
Loss Agent1:0.3771439492702484
Loss Agent2:0.3867817223072052
tensor([1.2844, 0.6016])
tensor([1.6221, 0.8138])
tensor([1.3316, 0.6741])
tensor([1.6509, 0.8328])

 Iteration: 4464
Loss Agent1:0.3810286521911621
Loss Agent2:0.3842723071575165
tensor([1.2843, 0.6015])
tensor([1.6220, 0.8137])
tensor([1.3316, 0.6741])
tensor([1.6509, 0.8328])

 Iteration: 4465
Loss Agent1:0.4117463529109955
Loss Agent2:0.3705522119998932
tensor([1.2843, 0.6014])
tensor([1.6220, 0.8136])
tensor([1.3315, 0.6741])
tensor([1.6509, 0.8328])

 Iteration: 4466
Loss Agent1:0.34937572479248047
Loss Agent2:0.37040185928344727
tensor([1.2842, 0.6014])
tensor([1.6219, 0.8136])
tensor([1.3315, 0.6740])
tensor([1.6509, 0.8328])

 Iteration: 4467
Loss Agent1:0.3669946491718292
Loss Agent2:0.4092755615711212
tensor([1.2841, 0.6013])
tensor([1.6218, 0.8135])
tensor([1.3315, 

Loss Agent1:0.37050050497055054
Loss Agent2:0.3866409957408905
tensor([1.2820, 0.6006])
tensor([1.6200, 0.8132])
tensor([1.3293, 0.6726])
tensor([1.6491, 0.8314])

 Iteration: 4510
Loss Agent1:0.4615762233734131
Loss Agent2:0.3556719422340393
tensor([1.2820, 0.6007])
tensor([1.6200, 0.8133])
tensor([1.3292, 0.6726])
tensor([1.6490, 0.8313])

 Iteration: 4511
Loss Agent1:0.44155341386795044
Loss Agent2:0.3588874340057373
tensor([1.2820, 0.6007])
tensor([1.6200, 0.8134])
tensor([1.3291, 0.6725])
tensor([1.6489, 0.8313])

 Iteration: 4512
Loss Agent1:0.42682814598083496
Loss Agent2:0.37051233649253845
tensor([1.2820, 0.6008])
tensor([1.6200, 0.8135])
tensor([1.3290, 0.6724])
tensor([1.6488, 0.8312])

 Iteration: 4513
Loss Agent1:0.40572044253349304
Loss Agent2:0.4004902243614197
tensor([1.2820, 0.6009])
tensor([1.6200, 0.8136])
tensor([1.3289, 0.6724])
tensor([1.6488, 0.8311])

 Iteration: 4514
Loss Agent1:0.38401085138320923
Loss Agent2:0.4015592336654663
tensor([1.2820, 0.6009])
tensor(

tensor([1.6475, 0.8314])

 Iteration: 4561
Loss Agent1:0.37135788798332214
Loss Agent2:0.3762631416320801
tensor([1.2808, 0.6016])
tensor([1.6197, 0.8145])
tensor([1.3267, 0.6717])
tensor([1.6474, 0.8314])

 Iteration: 4562
Loss Agent1:0.41361647844314575
Loss Agent2:0.43801751732826233
tensor([1.2808, 0.6016])
tensor([1.6197, 0.8145])
tensor([1.3266, 0.6717])
tensor([1.6474, 0.8314])

 Iteration: 4563
Loss Agent1:0.3567923605442047
Loss Agent2:0.38657504320144653
tensor([1.2807, 0.6016])
tensor([1.6197, 0.8145])
tensor([1.3265, 0.6717])
tensor([1.6474, 0.8315])

 Iteration: 4564
Loss Agent1:0.3950454890727997
Loss Agent2:0.42495983839035034
tensor([1.2807, 0.6016])
tensor([1.6197, 0.8146])
tensor([1.3265, 0.6717])
tensor([1.6473, 0.8315])

 Iteration: 4565
Loss Agent1:0.3610121011734009
Loss Agent2:0.36593374609947205
tensor([1.2807, 0.6016])
tensor([1.6197, 0.8146])
tensor([1.3265, 0.6717])
tensor([1.6473, 0.8316])

 Iteration: 4566
Loss Agent1:0.3868662118911743
Loss Agent2:0.363333

Loss Agent1:0.39891481399536133
Loss Agent2:0.45534008741378784
tensor([1.2781, 0.6022])
tensor([1.6178, 0.8158])
tensor([1.3245, 0.6709])
tensor([1.6471, 0.8325])

 Iteration: 4615
Loss Agent1:0.362981915473938
Loss Agent2:0.4043877422809601
tensor([1.2780, 0.6022])
tensor([1.6177, 0.8158])
tensor([1.3245, 0.6709])
tensor([1.6471, 0.8325])

 Iteration: 4616
Loss Agent1:0.43471479415893555
Loss Agent2:0.36375319957733154
tensor([1.2779, 0.6022])
tensor([1.6177, 0.8158])
tensor([1.3244, 0.6709])
tensor([1.6471, 0.8325])

 Iteration: 4617
Loss Agent1:0.3744523227214813
Loss Agent2:0.3900849223136902
tensor([1.2779, 0.6022])
tensor([1.6176, 0.8159])
tensor([1.3243, 0.6709])
tensor([1.6470, 0.8325])

 Iteration: 4618
Loss Agent1:0.39140528440475464
Loss Agent2:0.400767982006073
tensor([1.2779, 0.6022])
tensor([1.6176, 0.8159])
tensor([1.3243, 0.6710])
tensor([1.6470, 0.8326])

 Iteration: 4619
Loss Agent1:0.3420056104660034
Loss Agent2:0.4072815179824829
tensor([1.2778, 0.6021])
tensor([1.

Loss Agent2:0.38497406244277954
tensor([1.2763, 0.6031])
tensor([1.6169, 0.8172])
tensor([1.3226, 0.6725])
tensor([1.6466, 0.8352])

 Iteration: 4665
Loss Agent1:0.35235562920570374
Loss Agent2:0.34231802821159363
tensor([1.2762, 0.6032])
tensor([1.6169, 0.8173])
tensor([1.3225, 0.6725])
tensor([1.6466, 0.8352])

 Iteration: 4666
Loss Agent1:0.3230160176753998
Loss Agent2:0.4058268070220947
tensor([1.2762, 0.6032])
tensor([1.6168, 0.8173])
tensor([1.3225, 0.6725])
tensor([1.6466, 0.8352])

 Iteration: 4667
Loss Agent1:0.4042544662952423
Loss Agent2:0.35022157430648804
tensor([1.2761, 0.6032])
tensor([1.6168, 0.8174])
tensor([1.3224, 0.6724])
tensor([1.6465, 0.8352])

 Iteration: 4668
Loss Agent1:0.3758130371570587
Loss Agent2:0.3905721604824066
tensor([1.2760, 0.6033])
tensor([1.6168, 0.8174])
tensor([1.3224, 0.6724])
tensor([1.6465, 0.8352])

 Iteration: 4669
Loss Agent1:0.38835346698760986
Loss Agent2:0.3505100607872009
tensor([1.2760, 0.6033])
tensor([1.6167, 0.8174])
tensor([1.3223

Loss Agent2:0.4039827883243561
tensor([1.2735, 0.6034])
tensor([1.6149, 0.8175])
tensor([1.3213, 0.6721])
tensor([1.6475, 0.8359])

 Iteration: 4717
Loss Agent1:0.38571444153785706
Loss Agent2:0.3637414872646332
tensor([1.2735, 0.6034])
tensor([1.6148, 0.8175])
tensor([1.3213, 0.6721])
tensor([1.6475, 0.8360])

 Iteration: 4718
Loss Agent1:0.42291465401649475
Loss Agent2:0.42675015330314636
tensor([1.2734, 0.6034])
tensor([1.6148, 0.8175])
tensor([1.3212, 0.6721])
tensor([1.6475, 0.8360])

 Iteration: 4719
Loss Agent1:0.3724536597728729
Loss Agent2:0.40297025442123413
tensor([1.2734, 0.6034])
tensor([1.6148, 0.8176])
tensor([1.3212, 0.6722])
tensor([1.6475, 0.8361])

 Iteration: 4720
Loss Agent1:0.3898519277572632
Loss Agent2:0.42504116892814636
tensor([1.2734, 0.6034])
tensor([1.6148, 0.8176])
tensor([1.3212, 0.6723])
tensor([1.6475, 0.8362])

 Iteration: 4721
Loss Agent1:0.4343218505382538
Loss Agent2:0.38334381580352783
tensor([1.2733, 0.6034])
tensor([1.6148, 0.8176])
tensor([1.321

Loss Agent2:0.3792799413204193
tensor([1.2727, 0.6039])
tensor([1.6156, 0.8182])
tensor([1.3197, 0.6739])
tensor([1.6477, 0.8390])

 Iteration: 4767
Loss Agent1:0.3684224784374237
Loss Agent2:0.39926677942276
tensor([1.2727, 0.6039])
tensor([1.6156, 0.8181])
tensor([1.3197, 0.6740])
tensor([1.6477, 0.8390])

 Iteration: 4768
Loss Agent1:0.39441412687301636
Loss Agent2:0.4059239625930786
tensor([1.2726, 0.6038])
tensor([1.6156, 0.8181])
tensor([1.3197, 0.6741])
tensor([1.6477, 0.8391])

 Iteration: 4769
Loss Agent1:0.4100188612937927
Loss Agent2:0.37560853362083435
tensor([1.2726, 0.6038])
tensor([1.6156, 0.8180])
tensor([1.3196, 0.6742])
tensor([1.6476, 0.8392])

 Iteration: 4770
Loss Agent1:0.3934355080127716
Loss Agent2:0.3415147662162781
tensor([1.2726, 0.6038])
tensor([1.6156, 0.8180])
tensor([1.3195, 0.6742])
tensor([1.6475, 0.8393])

 Iteration: 4771
Loss Agent1:0.35544899106025696
Loss Agent2:0.34341198205947876
tensor([1.2726, 0.6038])
tensor([1.6156, 0.8180])
tensor([1.3195, 0

Loss Agent2:0.4293976128101349
tensor([1.2711, 0.6042])
tensor([1.6152, 0.8191])
tensor([1.3174, 0.6744])
tensor([1.6457, 0.8396])

 Iteration: 4821
Loss Agent1:0.38669490814208984
Loss Agent2:0.36576345562934875
tensor([1.2711, 0.6043])
tensor([1.6153, 0.8191])
tensor([1.3174, 0.6744])
tensor([1.6456, 0.8396])

 Iteration: 4822
Loss Agent1:0.3584509491920471
Loss Agent2:0.37050366401672363
tensor([1.2711, 0.6043])
tensor([1.6153, 0.8192])
tensor([1.3173, 0.6744])
tensor([1.6456, 0.8397])

 Iteration: 4823
Loss Agent1:0.40464523434638977
Loss Agent2:0.3562496304512024
tensor([1.2710, 0.6043])
tensor([1.6153, 0.8192])
tensor([1.3173, 0.6744])
tensor([1.6456, 0.8397])

 Iteration: 4824
Loss Agent1:0.3859327435493469
Loss Agent2:0.40042635798454285
tensor([1.2710, 0.6043])
tensor([1.6153, 0.8193])
tensor([1.3172, 0.6744])
tensor([1.6455, 0.8397])

 Iteration: 4825
Loss Agent1:0.3564215898513794
Loss Agent2:0.38994577527046204
tensor([1.2710, 0.6043])
tensor([1.6154, 0.8193])
tensor([1.317

tensor([1.2696, 0.6046])
tensor([1.6140, 0.8201])
tensor([1.3149, 0.6746])
tensor([1.6442, 0.8415])

 Iteration: 4870
Loss Agent1:0.4136744439601898
Loss Agent2:0.41060149669647217
tensor([1.2695, 0.6047])
tensor([1.6140, 0.8201])
tensor([1.3149, 0.6746])
tensor([1.6443, 0.8415])

 Iteration: 4871
Loss Agent1:0.4184468984603882
Loss Agent2:0.35302338004112244
tensor([1.2695, 0.6047])
tensor([1.6140, 0.8202])
tensor([1.3149, 0.6746])
tensor([1.6443, 0.8416])

 Iteration: 4872
Loss Agent1:0.34701305627822876
Loss Agent2:0.41285455226898193
tensor([1.2695, 0.6048])
tensor([1.6140, 0.8203])
tensor([1.3149, 0.6747])
tensor([1.6443, 0.8417])

 Iteration: 4873
Loss Agent1:0.37951111793518066
Loss Agent2:0.37929072976112366
tensor([1.2695, 0.6048])
tensor([1.6140, 0.8203])
tensor([1.3149, 0.6747])
tensor([1.6444, 0.8417])

 Iteration: 4874
Loss Agent1:0.3578844368457794
Loss Agent2:0.3232938349246979
tensor([1.2695, 0.6048])
tensor([1.6140, 0.8203])
tensor([1.3148, 0.6747])
tensor([1.6444, 0.8

Loss Agent2:0.3444279134273529
tensor([1.2699, 0.6059])
tensor([1.6158, 0.8221])
tensor([1.3134, 0.6743])
tensor([1.6439, 0.8423])

 Iteration: 4917
Loss Agent1:0.3946745991706848
Loss Agent2:0.33728736639022827
tensor([1.2699, 0.6059])
tensor([1.6158, 0.8222])
tensor([1.3133, 0.6743])
tensor([1.6439, 0.8424])

 Iteration: 4918
Loss Agent1:0.3814687430858612
Loss Agent2:0.37513306736946106
tensor([1.2699, 0.6060])
tensor([1.6159, 0.8222])
tensor([1.3133, 0.6743])
tensor([1.6438, 0.8424])

 Iteration: 4919
Loss Agent1:0.3603839576244354
Loss Agent2:0.3405877351760864
tensor([1.2699, 0.6060])
tensor([1.6159, 0.8223])
tensor([1.3132, 0.6743])
tensor([1.6438, 0.8425])

 Iteration: 4920
Loss Agent1:0.37560775876045227
Loss Agent2:0.35476234555244446
tensor([1.2699, 0.6060])
tensor([1.6160, 0.8223])
tensor([1.3132, 0.6742])
tensor([1.6437, 0.8425])

 Iteration: 4921
Loss Agent1:0.396999716758728
Loss Agent2:0.39199593663215637
tensor([1.2700, 0.6061])
tensor([1.6161, 0.8223])
tensor([1.3131,

tensor([1.2696, 0.6062])
tensor([1.6171, 0.8227])
tensor([1.3112, 0.6745])
tensor([1.6425, 0.8444])

 Iteration: 4966
Loss Agent1:0.3699546754360199
Loss Agent2:0.37205833196640015
tensor([1.2695, 0.6062])
tensor([1.6171, 0.8228])
tensor([1.3111, 0.6745])
tensor([1.6425, 0.8444])

 Iteration: 4967
Loss Agent1:0.39076101779937744
Loss Agent2:0.38937610387802124
tensor([1.2695, 0.6062])
tensor([1.6171, 0.8228])
tensor([1.3111, 0.6745])
tensor([1.6425, 0.8445])

 Iteration: 4968
Loss Agent1:0.37338846921920776
Loss Agent2:0.35283976793289185
tensor([1.2695, 0.6063])
tensor([1.6171, 0.8229])
tensor([1.3111, 0.6745])
tensor([1.6425, 0.8445])

 Iteration: 4969
Loss Agent1:0.35339459776878357
Loss Agent2:0.36347565054893494
tensor([1.2694, 0.6063])
tensor([1.6170, 0.8230])
tensor([1.3110, 0.6745])
tensor([1.6425, 0.8446])

 Iteration: 4970
Loss Agent1:0.3382294178009033
Loss Agent2:0.35990527272224426
tensor([1.2694, 0.6064])
tensor([1.6170, 0.8230])
tensor([1.3110, 0.6745])
tensor([1.6425, 0

Loss Agent2:0.3595927655696869
tensor([1.2679, 0.6070])
tensor([1.6166, 0.8247])
tensor([1.3092, 0.6727])
tensor([1.6420, 0.8427])

 Iteration: 5016
Loss Agent1:0.37996548414230347
Loss Agent2:0.36021214723587036
tensor([1.2678, 0.6070])
tensor([1.6166, 0.8247])
tensor([1.3092, 0.6727])
tensor([1.6421, 0.8428])

 Iteration: 5017
Loss Agent1:0.35342031717300415
Loss Agent2:0.3471311032772064
tensor([1.2678, 0.6070])
tensor([1.6166, 0.8247])
tensor([1.3092, 0.6727])
tensor([1.6421, 0.8427])

 Iteration: 5018
Loss Agent1:0.36678603291511536
Loss Agent2:0.3628152310848236
tensor([1.2678, 0.6070])
tensor([1.6165, 0.8248])
tensor([1.3092, 0.6726])
tensor([1.6422, 0.8427])

 Iteration: 5019
Loss Agent1:0.3515546917915344
Loss Agent2:0.34236404299736023
tensor([1.2677, 0.6070])
tensor([1.6165, 0.8248])
tensor([1.3092, 0.6726])
tensor([1.6422, 0.8427])

 Iteration: 5020
Loss Agent1:0.3498191833496094
Loss Agent2:0.3736599385738373
tensor([1.2677, 0.6071])
tensor([1.6165, 0.8248])
tensor([1.3092

Loss Agent1:0.35981443524360657
Loss Agent2:0.37712955474853516
tensor([1.2669, 0.6073])
tensor([1.6167, 0.8252])
tensor([1.3082, 0.6732])
tensor([1.6425, 0.8442])

 Iteration: 5064
Loss Agent1:0.3804515302181244
Loss Agent2:0.3700528144836426
tensor([1.2669, 0.6073])
tensor([1.6167, 0.8252])
tensor([1.3082, 0.6732])
tensor([1.6426, 0.8442])

 Iteration: 5065
Loss Agent1:0.339280903339386
Loss Agent2:0.3791455924510956
tensor([1.2669, 0.6073])
tensor([1.6166, 0.8252])
tensor([1.3082, 0.6732])
tensor([1.6427, 0.8443])

 Iteration: 5066
Loss Agent1:0.3377760648727417
Loss Agent2:0.3424493670463562
tensor([1.2668, 0.6073])
tensor([1.6166, 0.8252])
tensor([1.3082, 0.6732])
tensor([1.6427, 0.8443])

 Iteration: 5067
Loss Agent1:0.3519904911518097
Loss Agent2:0.3589648902416229
tensor([1.2668, 0.6073])
tensor([1.6166, 0.8252])
tensor([1.3082, 0.6733])
tensor([1.6428, 0.8444])

 Iteration: 5068
Loss Agent1:0.3572461009025574
Loss Agent2:0.4070981740951538
tensor([1.2667, 0.6073])
tensor([1.61

tensor([1.3073, 0.6723])
tensor([1.6435, 0.8437])

 Iteration: 5110
Loss Agent1:0.37759265303611755
Loss Agent2:0.29704418778419495
tensor([1.2659, 0.6065])
tensor([1.6163, 0.8241])
tensor([1.3072, 0.6723])
tensor([1.6436, 0.8437])

 Iteration: 5111
Loss Agent1:0.37352463603019714
Loss Agent2:0.37993884086608887
tensor([1.2659, 0.6065])
tensor([1.6163, 0.8241])
tensor([1.3072, 0.6723])
tensor([1.6436, 0.8438])

 Iteration: 5112
Loss Agent1:0.3523436486721039
Loss Agent2:0.31974777579307556
tensor([1.2659, 0.6064])
tensor([1.6163, 0.8240])
tensor([1.3072, 0.6723])
tensor([1.6436, 0.8437])

 Iteration: 5113
Loss Agent1:0.3510218560695648
Loss Agent2:0.34639886021614075
tensor([1.2659, 0.6064])
tensor([1.6163, 0.8240])
tensor([1.3072, 0.6722])
tensor([1.6436, 0.8437])

 Iteration: 5114
Loss Agent1:0.3693496882915497
Loss Agent2:0.32298001646995544
tensor([1.2659, 0.6064])
tensor([1.6164, 0.8240])
tensor([1.3071, 0.6722])
tensor([1.6436, 0.8437])

 Iteration: 5115
Loss Agent1:0.32187363505

Loss Agent1:0.3170749843120575
Loss Agent2:0.3222801387310028
tensor([1.2652, 0.6061])
tensor([1.6165, 0.8243])
tensor([1.3061, 0.6723])
tensor([1.6442, 0.8450])

 Iteration: 5157
Loss Agent1:0.34849876165390015
Loss Agent2:0.3768659234046936
tensor([1.2652, 0.6061])
tensor([1.6166, 0.8244])
tensor([1.3061, 0.6723])
tensor([1.6442, 0.8451])

 Iteration: 5158
Loss Agent1:0.33807530999183655
Loss Agent2:0.35582637786865234
tensor([1.2652, 0.6061])
tensor([1.6166, 0.8244])
tensor([1.3061, 0.6723])
tensor([1.6442, 0.8451])

 Iteration: 5159
Loss Agent1:0.33831676840782166
Loss Agent2:0.3931601643562317
tensor([1.2651, 0.6062])
tensor([1.6166, 0.8245])
tensor([1.3060, 0.6723])
tensor([1.6442, 0.8450])

 Iteration: 5160
Loss Agent1:0.4036707282066345
Loss Agent2:0.39424723386764526
tensor([1.2651, 0.6062])
tensor([1.6166, 0.8245])
tensor([1.3060, 0.6723])
tensor([1.6442, 0.8451])

 Iteration: 5161
Loss Agent1:0.376811683177948
Loss Agent2:0.35671934485435486
tensor([1.2651, 0.6062])
tensor([

Loss Agent1:0.3303486108779907
Loss Agent2:0.32948610186576843
tensor([1.2632, 0.6078])
tensor([1.6148, 0.8276])
tensor([1.3053, 0.6739])
tensor([1.6444, 0.8483])

 Iteration: 5210
Loss Agent1:0.3551280200481415
Loss Agent2:0.33508753776550293
tensor([1.2631, 0.6078])
tensor([1.6147, 0.8276])
tensor([1.3052, 0.6739])
tensor([1.6444, 0.8483])

 Iteration: 5211
Loss Agent1:0.32828861474990845
Loss Agent2:0.33505111932754517
tensor([1.2631, 0.6078])
tensor([1.6147, 0.8276])
tensor([1.3052, 0.6739])
tensor([1.6443, 0.8483])

 Iteration: 5212
Loss Agent1:0.3848274350166321
Loss Agent2:0.3311459720134735
tensor([1.2631, 0.6078])
tensor([1.6147, 0.8276])
tensor([1.3051, 0.6739])
tensor([1.6443, 0.8482])

 Iteration: 5213
Loss Agent1:0.3771477937698364
Loss Agent2:0.34058162569999695
tensor([1.2631, 0.6078])
tensor([1.6147, 0.8276])
tensor([1.3051, 0.6738])
tensor([1.6444, 0.8482])

 Iteration: 5214
Loss Agent1:0.2935013473033905
Loss Agent2:0.35944828391075134
tensor([1.2630, 0.6078])
tensor(

tensor([1.2618, 0.6084])
tensor([1.6141, 0.8287])
tensor([1.3033, 0.6722])
tensor([1.6441, 0.8467])

 Iteration: 5261
Loss Agent1:0.34045684337615967
Loss Agent2:0.3757465183734894
tensor([1.2618, 0.6083])
tensor([1.6141, 0.8287])
tensor([1.3033, 0.6722])
tensor([1.6441, 0.8466])

 Iteration: 5262
Loss Agent1:0.3582547605037689
Loss Agent2:0.34459546208381653
tensor([1.2618, 0.6084])
tensor([1.6140, 0.8287])
tensor([1.3032, 0.6722])
tensor([1.6441, 0.8466])

 Iteration: 5263
Loss Agent1:0.3260299563407898
Loss Agent2:0.32217392325401306
tensor([1.2617, 0.6084])
tensor([1.6140, 0.8287])
tensor([1.3032, 0.6721])
tensor([1.6441, 0.8465])

 Iteration: 5264
Loss Agent1:0.34625443816185
Loss Agent2:0.3700214922428131
tensor([1.2617, 0.6084])
tensor([1.6140, 0.8287])
tensor([1.3032, 0.6721])
tensor([1.6441, 0.8464])

 Iteration: 5265
Loss Agent1:0.36391302943229675
Loss Agent2:0.34519335627555847
tensor([1.2617, 0.6084])
tensor([1.6140, 0.8288])
tensor([1.3032, 0.6720])
tensor([1.6441, 0.8463

Loss Agent1:0.34856346249580383
Loss Agent2:0.3516259789466858
tensor([1.2615, 0.6087])
tensor([1.6152, 0.8294])
tensor([1.3023, 0.6718])
tensor([1.6442, 0.8469])

 Iteration: 5311
Loss Agent1:0.35676634311676025
Loss Agent2:0.30626818537712097
tensor([1.2615, 0.6087])
tensor([1.6152, 0.8294])
tensor([1.3023, 0.6718])
tensor([1.6442, 0.8470])

 Iteration: 5312
Loss Agent1:0.311790406703949
Loss Agent2:0.3618980944156647
tensor([1.2615, 0.6087])
tensor([1.6153, 0.8294])
tensor([1.3023, 0.6718])
tensor([1.6442, 0.8471])

 Iteration: 5313
Loss Agent1:0.33973070979118347
Loss Agent2:0.28074365854263306
tensor([1.2615, 0.6086])
tensor([1.6153, 0.8294])
tensor([1.3023, 0.6719])
tensor([1.6442, 0.8471])

 Iteration: 5314
Loss Agent1:0.3493085205554962
Loss Agent2:0.3772040903568268
tensor([1.2615, 0.6086])
tensor([1.6154, 0.8294])
tensor([1.3022, 0.6719])
tensor([1.6442, 0.8472])

 Iteration: 5315
Loss Agent1:0.3657482862472534
Loss Agent2:0.36431166529655457
tensor([1.2615, 0.6087])
tensor([

tensor([1.3022, 0.6727])
tensor([1.6461, 0.8489])

 Iteration: 5357
Loss Agent1:0.3688068985939026
Loss Agent2:0.34585362672805786
tensor([1.2611, 0.6093])
tensor([1.6156, 0.8309])
tensor([1.3022, 0.6727])
tensor([1.6461, 0.8490])

 Iteration: 5358
Loss Agent1:0.3108336925506592
Loss Agent2:0.3715896010398865
tensor([1.2610, 0.6094])
tensor([1.6156, 0.8309])
tensor([1.3022, 0.6728])
tensor([1.6461, 0.8490])

 Iteration: 5359
Loss Agent1:0.31540507078170776
Loss Agent2:0.35143429040908813
tensor([1.2610, 0.6094])
tensor([1.6156, 0.8310])
tensor([1.3021, 0.6728])
tensor([1.6461, 0.8490])

 Iteration: 5360
Loss Agent1:0.3230046033859253
Loss Agent2:0.34007808566093445
tensor([1.2610, 0.6095])
tensor([1.6156, 0.8310])
tensor([1.3021, 0.6728])
tensor([1.6462, 0.8491])

 Iteration: 5361
Loss Agent1:0.3542943000793457
Loss Agent2:0.33679696917533875
tensor([1.2610, 0.6095])
tensor([1.6156, 0.8311])
tensor([1.3021, 0.6728])
tensor([1.6462, 0.8491])

 Iteration: 5362
Loss Agent1:0.3194192647933

Loss Agent2:0.3588899075984955
tensor([1.2613, 0.6105])
tensor([1.6165, 0.8329])
tensor([1.3012, 0.6732])
tensor([1.6467, 0.8512])

 Iteration: 5403
Loss Agent1:0.34855738282203674
Loss Agent2:0.30982211232185364
tensor([1.2613, 0.6105])
tensor([1.6166, 0.8329])
tensor([1.3012, 0.6731])
tensor([1.6467, 0.8512])

 Iteration: 5404
Loss Agent1:0.3934493660926819
Loss Agent2:0.3542870581150055
tensor([1.2613, 0.6106])
tensor([1.6166, 0.8330])
tensor([1.3012, 0.6731])
tensor([1.6467, 0.8511])

 Iteration: 5405
Loss Agent1:0.33808884024620056
Loss Agent2:0.32666924595832825
tensor([1.2614, 0.6106])
tensor([1.6167, 0.8331])
tensor([1.3011, 0.6731])
tensor([1.6467, 0.8511])

 Iteration: 5406
Loss Agent1:0.3398239314556122
Loss Agent2:0.3087694048881531
tensor([1.2614, 0.6107])
tensor([1.6167, 0.8332])
tensor([1.3011, 0.6730])
tensor([1.6466, 0.8511])

 Iteration: 5407
Loss Agent1:0.34006065130233765
Loss Agent2:0.3050667941570282
tensor([1.2614, 0.6107])
tensor([1.6167, 0.8333])
tensor([1.3011

Loss Agent1:0.3395501375198364
Loss Agent2:0.3455800414085388
tensor([1.2609, 0.6110])
tensor([1.6168, 0.8346])
tensor([1.3007, 0.6725])
tensor([1.6475, 0.8522])

 Iteration: 5454
Loss Agent1:0.32466045022010803
Loss Agent2:0.3090111315250397
tensor([1.2609, 0.6110])
tensor([1.6168, 0.8347])
tensor([1.3007, 0.6725])
tensor([1.6475, 0.8522])

 Iteration: 5455
Loss Agent1:0.3634968400001526
Loss Agent2:0.3495226502418518
tensor([1.2609, 0.6111])
tensor([1.6168, 0.8347])
tensor([1.3007, 0.6724])
tensor([1.6475, 0.8522])

 Iteration: 5456
Loss Agent1:0.34645548462867737
Loss Agent2:0.3263261318206787
tensor([1.2609, 0.6111])
tensor([1.6168, 0.8348])
tensor([1.3007, 0.6724])
tensor([1.6475, 0.8522])

 Iteration: 5457
Loss Agent1:0.35231468081474304
Loss Agent2:0.30248525738716125
tensor([1.2609, 0.6111])
tensor([1.6169, 0.8348])
tensor([1.3006, 0.6724])
tensor([1.6475, 0.8521])

 Iteration: 5458
Loss Agent1:0.3086884319782257
Loss Agent2:0.3837597668170929
tensor([1.2610, 0.6111])
tensor([1

Loss Agent2:0.32917213439941406
tensor([1.2609, 0.6125])
tensor([1.6180, 0.8370])
tensor([1.3010, 0.6735])
tensor([1.6501, 0.8550])

 Iteration: 5504
Loss Agent1:0.317126601934433
Loss Agent2:0.30682307481765747
tensor([1.2609, 0.6126])
tensor([1.6180, 0.8371])
tensor([1.3010, 0.6736])
tensor([1.6502, 0.8550])

 Iteration: 5505
Loss Agent1:0.3243415355682373
Loss Agent2:0.29769206047058105
tensor([1.2609, 0.6126])
tensor([1.6180, 0.8372])
tensor([1.3010, 0.6735])
tensor([1.6502, 0.8551])

 Iteration: 5506
Loss Agent1:0.317167192697525
Loss Agent2:0.3043915331363678
tensor([1.2609, 0.6127])
tensor([1.6180, 0.8372])
tensor([1.3010, 0.6735])
tensor([1.6502, 0.8551])

 Iteration: 5507
Loss Agent1:0.32407140731811523
Loss Agent2:0.37296879291534424
tensor([1.2609, 0.6127])
tensor([1.6180, 0.8372])
tensor([1.3010, 0.6735])
tensor([1.6503, 0.8551])

 Iteration: 5508
Loss Agent1:0.32418015599250793
Loss Agent2:0.2950915992259979
tensor([1.2609, 0.6127])
tensor([1.6180, 0.8372])
tensor([1.3009,

Loss Agent2:0.3402386009693146
tensor([1.2604, 0.6135])
tensor([1.6181, 0.8387])
tensor([1.3005, 0.6730])
tensor([1.6513, 0.8549])

 Iteration: 5560
Loss Agent1:0.3541550636291504
Loss Agent2:0.3870282769203186
tensor([1.2604, 0.6136])
tensor([1.6181, 0.8388])
tensor([1.3005, 0.6731])
tensor([1.6514, 0.8550])

 Iteration: 5561
Loss Agent1:0.3221686780452728
Loss Agent2:0.34697645902633667
tensor([1.2604, 0.6137])
tensor([1.6182, 0.8389])
tensor([1.3006, 0.6731])
tensor([1.6515, 0.8551])

 Iteration: 5562
Loss Agent1:0.3265535831451416
Loss Agent2:0.3176097571849823
tensor([1.2604, 0.6138])
tensor([1.6182, 0.8390])
tensor([1.3006, 0.6732])
tensor([1.6516, 0.8552])

 Iteration: 5563
Loss Agent1:0.3279151916503906
Loss Agent2:0.32166588306427
tensor([1.2604, 0.6138])
tensor([1.6182, 0.8391])
tensor([1.3006, 0.6732])
tensor([1.6517, 0.8553])

 Iteration: 5564
Loss Agent1:0.32222577929496765
Loss Agent2:0.33070874214172363
tensor([1.2604, 0.6139])
tensor([1.6182, 0.8392])
tensor([1.3006, 0.

Loss Agent2:0.27978742122650146
tensor([1.2608, 0.6154])
tensor([1.6192, 0.8414])
tensor([1.3001, 0.6734])
tensor([1.6525, 0.8561])

 Iteration: 5606
Loss Agent1:0.33371540904045105
Loss Agent2:0.3640545606613159
tensor([1.2608, 0.6155])
tensor([1.6193, 0.8414])
tensor([1.3001, 0.6733])
tensor([1.6525, 0.8561])

 Iteration: 5607
Loss Agent1:0.36818185448646545
Loss Agent2:0.37520831823349
tensor([1.2609, 0.6155])
tensor([1.6194, 0.8415])
tensor([1.3000, 0.6733])
tensor([1.6525, 0.8561])

 Iteration: 5608
Loss Agent1:0.3518410325050354
Loss Agent2:0.30636149644851685
tensor([1.2609, 0.6156])
tensor([1.6195, 0.8415])
tensor([1.3000, 0.6733])
tensor([1.6524, 0.8561])

 Iteration: 5609
Loss Agent1:0.32237595319747925
Loss Agent2:0.34368160367012024
tensor([1.2610, 0.6156])
tensor([1.6196, 0.8416])
tensor([1.3000, 0.6733])
tensor([1.6524, 0.8562])

 Iteration: 5610
Loss Agent1:0.30386245250701904
Loss Agent2:0.3409017324447632
tensor([1.2610, 0.6156])
tensor([1.6197, 0.8416])
tensor([1.2999

RuntimeError: invalid multinomial distribution (encountering probability entry < 0)

### Evaluate Networks (Convergence and Value of states)

##### Agent 1

In [79]:
# When opponent played D/0
state = torch.tensor(int(0),dtype=torch.float32).unsqueeze(0)
policy_net1(state), target_net1(state)

(tensor([1.2611, 0.6157], grad_fn=<AddBackward0>),
 tensor([0.1263, 0.0633], grad_fn=<AddBackward0>))

In [80]:
BETA = 4

a = torch.tensor([0,1])
p = torch.exp(policy_net1(state)/BETA)/torch.sum(torch.exp(policy_net1(state)/BETA))
index = p.multinomial(num_samples=1, replacement=True)
b = a[index]
b

tensor([1])

In [81]:
# When opponent played C/1
state = torch.tensor(int(1),dtype=torch.float32).unsqueeze(0)
policy_net1(state), target_net1(state)

(tensor([1.6198, 0.8418], grad_fn=<AddBackward0>),
 tensor([0.1942, 0.1181], grad_fn=<AddBackward0>))

##### Agent 2

In [82]:
# When opponent played D/0
state = torch.tensor(int(0),dtype=torch.float32).unsqueeze(0)
policy_net2(state), target_net2(state)

(tensor([1.2998, 0.6734], grad_fn=<AddBackward0>),
 tensor([0.1641, 0.1045], grad_fn=<AddBackward0>))

In [83]:
# When opponent played C/1
state = torch.tensor(int(1),dtype=torch.float32).unsqueeze(0)
policy_net2(state), target_net2(state)

(tensor([1.6523, 0.8563], grad_fn=<AddBackward0>),
 tensor([0.2385, 0.0881], grad_fn=<AddBackward0>))

### Trace Path

In [1]:
def moving_average(x, w):
    return np.convolve(x, np.ones(w), 'valid') / w

plt.rcParams["figure.figsize"] = (20, 10)
plt.rcParams['font.size']=20
plt.plot(moving_average(np.array(actions)[:,0],100), label = 'Agent 1 Actions', marker = 'o', c = 'b')
plt.plot(moving_average(np.array(actions)[:,1],100), label = 'Agent 2 Actions', marker = 'x', c = 'r')
#plt.title('Fraction of Co-operations in 100 games') 
plt.show()

NameError: name 'plt' is not defined