In [42]:
import random
import numpy as np

In [43]:
##Simulation Data
def die_simulation(num, prob):
    """
    Returns a sequence of events of die tossed with probability of heads = prob
     
    Input: 
    num - number of trials
    prob - list of probabilites corresponding to probability of rolling index+1 value 
    
    Ouput: sequence x1, x2, ... xnum where xi in {1, 2, 3, 4, 5, 6} based on die's probability
        
    *Note prob = 1/6 for all indices is a fair die, otherwise is a unfair die
    """
    binomial_trials = []
    for i in range(num):
        rand_val = random.random()
        trial_val = 6
        
        if rand_val <= prob[0]:
            trial_val = 1
        elif rand_val <= prob[0] + prob[1]:
            trial_val = 2
        elif rand_val <= prob[0] + prob[1] + prob[2]:
            trial_val = 3
        elif rand_val <= prob[0] + prob[1] + prob[2] + prob[3]:
            trial_val = 4      
        elif rand_val <= prob[0] + prob[1] + prob[2] + prob[3] + prob[4]:
            trial_val = 5  
        
        binomial_trials.append(trial_val)

    return binomial_trials

trial = binomial_simulation(7, 0.5)
print(trial)

[0, 1, 1, 0, 1, 1, 0]


In [44]:
#Random Unfair Die
def unfair_die():
    """
    Generates a random unfair die
    
    Input: None
    Ouput: list of probabilites corresponding to probability of rolling index+1 value for an unfair die
    """
    prob = []
    max_prob = 1
    for i in range(5):
#         print("i", i)
        new_prob = random.uniform(0, max_prob)
        prob.append(new_prob)
        max_prob -= new_prob
    prob.append(max_prob)
    return prob
x = unfair_die()
print(x)

sum = 0
for i in x:
    sum += i
print(sum)

i 0
i 1
i 2
i 3
i 4
[0.773436751228663, 0.2009370836424203, 0.011113873976000997, 0.01030910957147693, 0.0007487811721708824, 0.0034544004092679353]
1.0


In [45]:
training_data = []
train_label = []
train_prob = []

test_data = []
test_label = []
test_prob = []

TRIALS = 10000
BATCH_SIZE = 1
NUM = 100
FAIR = [0.16666666666, 0.16666666666, 0.16666666666, 0.16666666666, 0.16666666666, 0.16666666666]

##TRAIN Data
for i in range(TRIALS):
    batch_data = []
    batch_label = []
    batch_prob = []
    for j in range(BATCH_SIZE):
        
        rand = random.randint(0,1)
        if rand == 0:
            #fair die
            trial_seq = die_simulation(NUM, FAIR)
            batch_data.append(trial_seq)
            batch_label.append(0)
            batch_prob.append(FAIR)
        else: #rand == 1
            #unfair die
            
            UNFAIR = unfair_die()

            trial_seq = die_simulation(NUM, UNFAIR)
            batch_data.append(trial_seq)
            batch_label.append(1)
            batch_prob.append(UNFAIR)
            
    batch_data = torch.tensor(batch_data, dtype = torch.float)
    batch_label = torch.tensor(batch_label, dtype = torch.long)
        
    training_data.append(batch_data)
    train_label.append(batch_label)
    train_prob.append(batch_prob)
    
##TEST Data
for i in range(TRIALS):
    batch_data = []
    batch_label = []
    batch_prob = []
    for j in range(BATCH_SIZE):
        
        rand = random.randint(0,1)
        if rand == 0:
            #fair die
            trial_seq = die_simulation(NUM, FAIR)
            batch_data.append(trial_seq)
            batch_label.append(0)
            batch_prob.append(FAIR)
        else:
            #unfair die
            
            UNFAIR = unfair_die()
                
            trial_seq = die_simulation(NUM, UNFAIR)
            batch_data.append(trial_seq)
            batch_label.append(1)
            batch_prob.append(UNFAIR)
            
    batch_data = torch.tensor(batch_data, dtype = torch.float)
    batch_label = torch.tensor(batch_label, dtype = torch.long)
            
    test_data.append(batch_data)
    test_label.append(batch_label)
    test_prob.append(batch_prob)

print(train_prob, "\n")

i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1


i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0


i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0


i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3


i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2


i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0


i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2


i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4


i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1
i 2
i 3
i 4
i 0
i 1


In [46]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    
    def __init__(self):
        super(Net, self).__init__()
        self.fc1 = nn.Linear(in_features=100, out_features=50)
        self.fc2 = nn.Linear(in_features=50, out_features=25)
        self.fc3 = nn.Linear(in_features=25, out_features=2)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        #print("fc1: ", x)
        x = F.relu(self.fc2(x))
        #print("fc2: ", x)
        x = self.fc3(x)
        #print("fc2: ", x)
        return x
    
net = Net()
print(net)

Net(
  (fc1): Linear(in_features=100, out_features=50, bias=True)
  (fc2): Linear(in_features=50, out_features=25, bias=True)
  (fc3): Linear(in_features=25, out_features=2, bias=True)
)


In [47]:
import torch.optim as optim
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr = 0.001, momentum = 0.9)

In [48]:
running_loss = 0.0
EPOCHS = 2
for j in range(EPOCHS):
    for i, data in enumerate(training_data, 0):
        inputs = data
        labels = train_label[i]

        optimizer.zero_grad()

        outputs = net(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        print(i, "loss ", running_loss, outputs, labels)
        running_loss = 0.0

0 loss  0.5538061857223511 tensor([[-0.2576,  0.0437]], grad_fn=<AddmmBackward>) tensor([1])
1 loss  0.8065657019615173 tensor([[-0.3939, -0.1786]], grad_fn=<AddmmBackward>) tensor([0])
2 loss  0.589892566204071 tensor([[-0.2075,  0.0109]], grad_fn=<AddmmBackward>) tensor([1])
3 loss  0.6473332643508911 tensor([[-0.2434, -0.3372]], grad_fn=<AddmmBackward>) tensor([0])
4 loss  0.8862212300300598 tensor([[-0.2692,  0.0857]], grad_fn=<AddmmBackward>) tensor([0])
5 loss  0.6673904657363892 tensor([[-0.3072, -0.3594]], grad_fn=<AddmmBackward>) tensor([0])
6 loss  0.735833466053009 tensor([[-0.2816, -0.1980]], grad_fn=<AddmmBackward>) tensor([0])
7 loss  0.5599386692047119 tensor([[-0.1446, -0.4315]], grad_fn=<AddmmBackward>) tensor([0])
8 loss  0.8273398876190186 tensor([[-0.0872, -0.3397]], grad_fn=<AddmmBackward>) tensor([1])
9 loss  0.7033823132514954 tensor([[-0.1701, -0.1904]], grad_fn=<AddmmBackward>) tensor([1])
10 loss  0.5353982448577881 tensor([[-0.1409, -0.4860]], grad_fn=<AddmmB

123 loss  0.5302280783653259 tensor([[-0.2262,  0.1315]], grad_fn=<AddmmBackward>) tensor([1])
124 loss  0.6438869833946228 tensor([[-0.4186, -0.5197]], grad_fn=<AddmmBackward>) tensor([0])
125 loss  0.578978419303894 tensor([[-0.2646, -0.0216]], grad_fn=<AddmmBackward>) tensor([1])
126 loss  0.5673339366912842 tensor([[-0.2016,  0.0682]], grad_fn=<AddmmBackward>) tensor([1])
127 loss  0.645496129989624 tensor([[-0.1098, -0.0121]], grad_fn=<AddmmBackward>) tensor([1])
128 loss  0.5719843506813049 tensor([[-0.0164, -0.2755]], grad_fn=<AddmmBackward>) tensor([0])
129 loss  0.6597751975059509 tensor([[-0.2325, -0.3004]], grad_fn=<AddmmBackward>) tensor([0])
130 loss  0.5090747475624084 tensor([[-0.2995,  0.1103]], grad_fn=<AddmmBackward>) tensor([1])
131 loss  0.7149320244789124 tensor([[-0.2471, -0.2040]], grad_fn=<AddmmBackward>) tensor([0])
132 loss  0.5508981943130493 tensor([[-0.2061,  0.1020]], grad_fn=<AddmmBackward>) tensor([1])
133 loss  0.6014495491981506 tensor([[-0.1807,  0.01

226 loss  0.5221084952354431 tensor([[-0.2344,  0.1431]], grad_fn=<AddmmBackward>) tensor([1])
227 loss  0.42234525084495544 tensor([[-0.3542,  0.2891]], grad_fn=<AddmmBackward>) tensor([1])
228 loss  0.8396859169006348 tensor([[-0.2247,  0.0497]], grad_fn=<AddmmBackward>) tensor([0])
229 loss  0.8401690721511841 tensor([[-0.2195,  0.0557]], grad_fn=<AddmmBackward>) tensor([0])
230 loss  0.47939297556877136 tensor([[-0.3703,  0.1156]], grad_fn=<AddmmBackward>) tensor([1])
231 loss  0.8824363946914673 tensor([[-0.3786, -0.0303]], grad_fn=<AddmmBackward>) tensor([0])
232 loss  0.8026444911956787 tensor([[-0.2055,  0.0027]], grad_fn=<AddmmBackward>) tensor([0])
233 loss  0.504963755607605 tensor([[-0.3656,  0.0546]], grad_fn=<AddmmBackward>) tensor([1])
234 loss  0.49238067865371704 tensor([[-0.3233,  0.1290]], grad_fn=<AddmmBackward>) tensor([1])
235 loss  0.6548445224761963 tensor([[-0.2095, -0.1314]], grad_fn=<AddmmBackward>) tensor([1])
236 loss  0.5782901048660278 tensor([[-0.2685, -

347 loss  0.3505963087081909 tensor([[ 0.0062, -0.8616]], grad_fn=<AddmmBackward>) tensor([0])
348 loss  0.7377951145172119 tensor([[-0.1024, -0.1898]], grad_fn=<AddmmBackward>) tensor([1])
349 loss  0.38445666432380676 tensor([[ 0.0491, -0.7084]], grad_fn=<AddmmBackward>) tensor([0])
350 loss  0.9730100631713867 tensor([[-0.0303, -0.5286]], grad_fn=<AddmmBackward>) tensor([1])
351 loss  0.4686152935028076 tensor([[-0.3446,  0.1700]], grad_fn=<AddmmBackward>) tensor([1])
352 loss  0.6706987619400024 tensor([[-0.1795, -0.1341]], grad_fn=<AddmmBackward>) tensor([1])
353 loss  0.5771406292915344 tensor([[-0.2852, -0.0380]], grad_fn=<AddmmBackward>) tensor([1])
354 loss  0.41385528445243835 tensor([[-0.0903, -0.7585]], grad_fn=<AddmmBackward>) tensor([0])
355 loss  0.4045286476612091 tensor([[ 0.0567, -0.6392]], grad_fn=<AddmmBackward>) tensor([0])
356 loss  0.3508453071117401 tensor([[ 0.1980, -0.6689]], grad_fn=<AddmmBackward>) tensor([0])
357 loss  0.37923091650009155 tensor([[ 0.1539, 

438 loss  1.0249767303466797 tensor([[ 0.1064, -0.4742]], grad_fn=<AddmmBackward>) tensor([1])
439 loss  0.10489653795957565 tensor([[ 0.7529, -1.4489]], grad_fn=<AddmmBackward>) tensor([0])
440 loss  0.17189261317253113 tensor([[ 0.4546, -1.2191]], grad_fn=<AddmmBackward>) tensor([0])
441 loss  0.8679900765419006 tensor([[ 0.0091, -0.3145]], grad_fn=<AddmmBackward>) tensor([1])
442 loss  0.16492615640163422 tensor([[ 0.5410, -1.1776]], grad_fn=<AddmmBackward>) tensor([0])
443 loss  0.5000243186950684 tensor([[-0.2835,  0.1492]], grad_fn=<AddmmBackward>) tensor([1])
444 loss  0.19832554459571838 tensor([[ 0.5570, -0.9601]], grad_fn=<AddmmBackward>) tensor([0])
445 loss  0.8616690635681152 tensor([[ 0.0215, -0.2912]], grad_fn=<AddmmBackward>) tensor([1])
446 loss  1.0111457109451294 tensor([[ 0.0560, -0.5029]], grad_fn=<AddmmBackward>) tensor([1])
447 loss  0.579190194606781 tensor([[-0.2645, -0.0219]], grad_fn=<AddmmBackward>) tensor([1])
448 loss  0.9811190366744995 tensor([[ 0.0242, 

525 loss  0.6925147771835327 tensor([[-0.2508, -0.2521]], grad_fn=<AddmmBackward>) tensor([0])
526 loss  0.7498677968978882 tensor([[-0.2513, -0.1409]], grad_fn=<AddmmBackward>) tensor([0])
527 loss  0.6729888916015625 tensor([[-0.1558, -0.1151]], grad_fn=<AddmmBackward>) tensor([1])
528 loss  0.414829283952713 tensor([[-0.4292,  0.2361]], grad_fn=<AddmmBackward>) tensor([1])
529 loss  0.7203184962272644 tensor([[-0.2629, -0.2093]], grad_fn=<AddmmBackward>) tensor([0])
530 loss  0.5093152523040771 tensor([[ 0.1729, -0.2364]], grad_fn=<AddmmBackward>) tensor([0])
531 loss  0.593997061252594 tensor([[-0.0816, -0.2908]], grad_fn=<AddmmBackward>) tensor([0])
532 loss  0.5241790413856506 tensor([[ 0.0522, -0.3202]], grad_fn=<AddmmBackward>) tensor([0])
533 loss  0.40910327434539795 tensor([[-0.4109,  0.2714]], grad_fn=<AddmmBackward>) tensor([1])
534 loss  0.6631341576576233 tensor([[-0.2308, -0.1699]], grad_fn=<AddmmBackward>) tensor([1])
535 loss  0.39631932973861694 tensor([[ 0.3653, -0.

625 loss  0.30566853284835815 tensor([[-0.5327,  0.4958]], grad_fn=<AddmmBackward>) tensor([1])
626 loss  0.37475839257240295 tensor([[ 0.4144, -0.3739]], grad_fn=<AddmmBackward>) tensor([0])
627 loss  0.5730603933334351 tensor([[-0.1974,  0.0592]], grad_fn=<AddmmBackward>) tensor([1])
628 loss  0.42138707637786865 tensor([[-0.4544,  0.1917]], grad_fn=<AddmmBackward>) tensor([1])
629 loss  0.6764724254608154 tensor([[-0.0425, -0.0762]], grad_fn=<AddmmBackward>) tensor([0])
630 loss  0.6380799412727356 tensor([[ 0.0952, -0.0181]], grad_fn=<AddmmBackward>) tensor([0])
631 loss  0.7140934467315674 tensor([[-0.1965, -0.1551]], grad_fn=<AddmmBackward>) tensor([0])
632 loss  0.31965404748916626 tensor([[-0.4852,  0.4913]], grad_fn=<AddmmBackward>) tensor([1])
633 loss  0.3518754839897156 tensor([[-0.4599,  0.4035]], grad_fn=<AddmmBackward>) tensor([1])
634 loss  0.3217215836048126 tensor([[-0.4935,  0.4754]], grad_fn=<AddmmBackward>) tensor([1])
635 loss  0.4692513048648834 tensor([[-0.3240,

755 loss  0.5164905786514282 tensor([[ 0.1073, -0.2840]], grad_fn=<AddmmBackward>) tensor([0])
756 loss  0.4572180509567261 tensor([[-0.3245,  0.2208]], grad_fn=<AddmmBackward>) tensor([1])
757 loss  0.5488851070404053 tensor([[-0.1480,  0.1649]], grad_fn=<AddmmBackward>) tensor([1])
758 loss  0.4506421387195587 tensor([[-0.3346,  0.2287]], grad_fn=<AddmmBackward>) tensor([1])
759 loss  0.8355332016944885 tensor([[ 0.0735, -0.1935]], grad_fn=<AddmmBackward>) tensor([1])
760 loss  0.3598397672176361 tensor([[-0.4769,  0.3598]], grad_fn=<AddmmBackward>) tensor([1])
761 loss  0.5817297101020813 tensor([[-0.1448, -0.3816]], grad_fn=<AddmmBackward>) tensor([0])
762 loss  0.7373070120811462 tensor([[-0.1154, -0.0289]], grad_fn=<AddmmBackward>) tensor([0])
763 loss  0.39698830246925354 tensor([[ 0.2472, -0.4716]], grad_fn=<AddmmBackward>) tensor([0])
764 loss  0.37453964352607727 tensor([[-0.5075,  0.2814]], grad_fn=<AddmmBackward>) tensor([1])
765 loss  0.6975752711296082 tensor([[-0.1079, -

844 loss  0.30753177404403687 tensor([[-0.5597,  0.4618]], grad_fn=<AddmmBackward>) tensor([1])
845 loss  0.8633105754852295 tensor([[-0.2591,  0.0565]], grad_fn=<AddmmBackward>) tensor([0])
846 loss  0.3497084379196167 tensor([[-0.5059,  0.3648]], grad_fn=<AddmmBackward>) tensor([1])
847 loss  0.3759618401527405 tensor([[-0.4904,  0.2940]], grad_fn=<AddmmBackward>) tensor([1])
848 loss  0.49172884225845337 tensor([[ 0.0788, -0.3751]], grad_fn=<AddmmBackward>) tensor([0])
849 loss  0.7652770280838013 tensor([[-0.1960, -0.0566]], grad_fn=<AddmmBackward>) tensor([0])
850 loss  0.5073006749153137 tensor([[-0.0240, -0.4383]], grad_fn=<AddmmBackward>) tensor([0])
851 loss  0.7102439403533936 tensor([[-0.0275, -0.0614]], grad_fn=<AddmmBackward>) tensor([1])
852 loss  0.657214879989624 tensor([[-0.0896, -0.1628]], grad_fn=<AddmmBackward>) tensor([0])
853 loss  0.4603566527366638 tensor([[ 0.0662, -0.4706]], grad_fn=<AddmmBackward>) tensor([0])
854 loss  0.3380710780620575 tensor([[ 0.2964, -0

982 loss  0.22525523602962494 tensor([[-0.7645,  0.6113]], grad_fn=<AddmmBackward>) tensor([1])
983 loss  0.8355008959770203 tensor([[-0.1935,  0.0734]], grad_fn=<AddmmBackward>) tensor([0])
984 loss  0.8099845051765442 tensor([[-0.2499, -0.0285]], grad_fn=<AddmmBackward>) tensor([0])
985 loss  0.6771148443222046 tensor([[-0.1421, -0.1744]], grad_fn=<AddmmBackward>) tensor([0])
986 loss  0.30406254529953003 tensor([[-0.6888,  0.3458]], grad_fn=<AddmmBackward>) tensor([1])
987 loss  0.41986283659935 tensor([[-0.3623,  0.2882]], grad_fn=<AddmmBackward>) tensor([1])
988 loss  0.30784526467323303 tensor([[ 0.4613, -0.5590]], grad_fn=<AddmmBackward>) tensor([0])
989 loss  0.6060229539871216 tensor([[-0.0289, -0.2115]], grad_fn=<AddmmBackward>) tensor([0])
990 loss  0.41380012035369873 tensor([[ 0.1719, -0.4964]], grad_fn=<AddmmBackward>) tensor([0])
991 loss  1.4089865684509277 tensor([[ 0.3798, -0.7490]], grad_fn=<AddmmBackward>) tensor([1])
992 loss  1.3301613330841064 tensor([[ 0.4064, -

1078 loss  0.24183358252048492 tensor([[ 0.4450, -0.8512]], grad_fn=<AddmmBackward>) tensor([0])
1079 loss  0.09815093129873276 tensor([[ 0.9837, -1.2881]], grad_fn=<AddmmBackward>) tensor([0])
1080 loss  0.39588379859924316 tensor([[-0.4661,  0.2560]], grad_fn=<AddmmBackward>) tensor([1])
1081 loss  0.1377091109752655 tensor([[ 0.6548, -1.2582]], grad_fn=<AddmmBackward>) tensor([0])
1082 loss  0.0968785211443901 tensor([[ 0.9883, -1.2972]], grad_fn=<AddmmBackward>) tensor([0])
1083 loss  0.39093291759490967 tensor([[-0.4172,  0.3202]], grad_fn=<AddmmBackward>) tensor([1])
1084 loss  0.09162603318691254 tensor([[ 0.9426, -1.4013]], grad_fn=<AddmmBackward>) tensor([0])
1085 loss  0.06617561727762222 tensor([[ 1.0714, -1.6108]], grad_fn=<AddmmBackward>) tensor([0])
1086 loss  0.0632437914609909 tensor([[ 1.1367, -1.5923]], grad_fn=<AddmmBackward>) tensor([0])
1087 loss  0.05019591376185417 tensor([[ 1.3275, -1.6391]], grad_fn=<AddmmBackward>) tensor([0])
1088 loss  0.1429842859506607 ten

1172 loss  0.5060797333717346 tensor([[ 0.0782, -0.3392]], grad_fn=<AddmmBackward>) tensor([0])
1173 loss  0.41258367896080017 tensor([[ 0.3526, -0.3193]], grad_fn=<AddmmBackward>) tensor([0])
1174 loss  0.48984289169311523 tensor([[-0.1774,  0.2814]], grad_fn=<AddmmBackward>) tensor([1])
1175 loss  0.22162242233753204 tensor([[ 0.6883, -0.7056]], grad_fn=<AddmmBackward>) tensor([0])
1176 loss  1.6255913972854614 tensor([[ 0.6519, -0.7546]], grad_fn=<AddmmBackward>) tensor([1])
1177 loss  0.3920222818851471 tensor([[-0.4791,  0.2550]], grad_fn=<AddmmBackward>) tensor([1])
1178 loss  0.34924355149269104 tensor([[ 0.3485, -0.5238]], grad_fn=<AddmmBackward>) tensor([0])
1179 loss  0.243791863322258 tensor([[ 0.5795, -0.7076]], grad_fn=<AddmmBackward>) tensor([0])
1180 loss  0.5674379467964172 tensor([[-0.1809,  0.0886]], grad_fn=<AddmmBackward>) tensor([1])
1181 loss  0.3386327624320984 tensor([[-0.4223,  0.4865]], grad_fn=<AddmmBackward>) tensor([1])
1182 loss  0.38572245836257935 tensor

1308 loss  0.1824663281440735 tensor([[ 0.7044, -0.9042]], grad_fn=<AddmmBackward>) tensor([0])
1309 loss  0.16284559667110443 tensor([[ 0.7876, -0.9448]], grad_fn=<AddmmBackward>) tensor([0])
1310 loss  1.5870635509490967 tensor([[ 0.6018, -0.7564]], grad_fn=<AddmmBackward>) tensor([1])
1311 loss  0.6065458655357361 tensor([[-0.1385,  0.0429]], grad_fn=<AddmmBackward>) tensor([1])
1312 loss  0.44165942072868347 tensor([[-0.3316,  0.2567]], grad_fn=<AddmmBackward>) tensor([1])
1313 loss  0.23948591947555542 tensor([[-0.6967,  0.6104]], grad_fn=<AddmmBackward>) tensor([1])
1314 loss  0.5576877593994141 tensor([[ 0.0645, -0.2277]], grad_fn=<AddmmBackward>) tensor([0])
1315 loss  0.3202284276485443 tensor([[-0.4529,  0.5214]], grad_fn=<AddmmBackward>) tensor([1])
1316 loss  0.8448752760887146 tensor([[-0.1812,  0.1023]], grad_fn=<AddmmBackward>) tensor([0])
1317 loss  0.3009240925312042 tensor([[-0.6092,  0.4375]], grad_fn=<AddmmBackward>) tensor([1])
1318 loss  0.18667703866958618 tensor

1435 loss  0.4434644281864166 tensor([[ 0.2022, -0.3811]], grad_fn=<AddmmBackward>) tensor([0])
1436 loss  0.26443377137184143 tensor([[-0.6108,  0.5843]], grad_fn=<AddmmBackward>) tensor([1])
1437 loss  0.4410112202167511 tensor([[ 0.3241, -0.2660]], grad_fn=<AddmmBackward>) tensor([0])
1438 loss  0.21190442144870758 tensor([[ 0.6932, -0.7506]], grad_fn=<AddmmBackward>) tensor([0])
1439 loss  0.14310652017593384 tensor([[ 0.8810, -0.9908]], grad_fn=<AddmmBackward>) tensor([0])
1440 loss  0.24638980627059937 tensor([[ 0.5321, -0.7430]], grad_fn=<AddmmBackward>) tensor([0])
1441 loss  1.1554925441741943 tensor([[ 0.4151, -0.3622]], grad_fn=<AddmmBackward>) tensor([1])
1442 loss  1.6229103803634644 tensor([[ 0.6429, -0.7602]], grad_fn=<AddmmBackward>) tensor([1])
1443 loss  0.18826140463352203 tensor([[ 0.6876, -0.8868]], grad_fn=<AddmmBackward>) tensor([0])
1444 loss  0.8025206327438354 tensor([[ 0.0734, -0.1346]], grad_fn=<AddmmBackward>) tensor([1])
1445 loss  0.6511291265487671 tenso

1559 loss  0.258548766374588 tensor([[ 0.4034, -0.8172]], grad_fn=<AddmmBackward>) tensor([0])
1560 loss  0.2723797559738159 tensor([[ 0.5061, -0.6552]], grad_fn=<AddmmBackward>) tensor([0])
1561 loss  0.33413460850715637 tensor([[ 0.2692, -0.6553]], grad_fn=<AddmmBackward>) tensor([0])
1562 loss  0.5184587836265564 tensor([[-0.2359,  0.1506]], grad_fn=<AddmmBackward>) tensor([1])
1563 loss  0.09930601716041565 tensor([[ 1.0350, -1.2244]], grad_fn=<AddmmBackward>) tensor([0])
1564 loss  0.1417996734380722 tensor([[ 0.8093, -1.0723]], grad_fn=<AddmmBackward>) tensor([0])
1565 loss  0.21890483796596527 tensor([[-0.7251,  0.6826]], grad_fn=<AddmmBackward>) tensor([1])
1566 loss  1.9940526485443115 tensor([[ 0.7408, -1.1069]], grad_fn=<AddmmBackward>) tensor([1])
1567 loss  1.6563622951507568 tensor([[ 0.5115, -0.9331]], grad_fn=<AddmmBackward>) tensor([1])
1568 loss  0.33336326479911804 tensor([[-0.5471,  0.3801]], grad_fn=<AddmmBackward>) tensor([1])
1569 loss  0.20359304547309875 tensor

1683 loss  0.10751934349536896 tensor([[-1.1694,  1.0064]], grad_fn=<AddmmBackward>) tensor([1])
1684 loss  0.10223466157913208 tensor([[-1.1511,  1.0778]], grad_fn=<AddmmBackward>) tensor([1])
1685 loss  0.09615915268659592 tensor([[-1.1633,  1.1300]], grad_fn=<AddmmBackward>) tensor([1])
1686 loss  1.3509935140609741 tensor([[-0.6760,  0.3752]], grad_fn=<AddmmBackward>) tensor([0])
1687 loss  1.2891736030578613 tensor([[-0.5282,  0.4387]], grad_fn=<AddmmBackward>) tensor([0])
1688 loss  0.6398569941520691 tensor([[-0.1484, -0.0388]], grad_fn=<AddmmBackward>) tensor([1])
1689 loss  0.16645893454551697 tensor([[-0.8736,  0.8350]], grad_fn=<AddmmBackward>) tensor([1])
1690 loss  1.3549983501434326 tensor([[-0.7126,  0.3440]], grad_fn=<AddmmBackward>) tensor([0])
1691 loss  1.2288103103637695 tensor([[-0.4670,  0.4156]], grad_fn=<AddmmBackward>) tensor([0])
1692 loss  0.15691639482975006 tensor([[-0.9304,  0.8422]], grad_fn=<AddmmBackward>) tensor([1])
1693 loss  0.12164156883955002 tens

1811 loss  0.3001447916030884 tensor([[ 0.2908, -0.7589]], grad_fn=<AddmmBackward>) tensor([0])
1812 loss  0.28486368060112 tensor([[-0.6291,  0.4809]], grad_fn=<AddmmBackward>) tensor([1])
1813 loss  0.15349014103412628 tensor([[ 0.6024, -1.1940]], grad_fn=<AddmmBackward>) tensor([0])
1814 loss  0.0992870181798935 tensor([[ 0.7820, -1.4777]], grad_fn=<AddmmBackward>) tensor([0])
1815 loss  0.49476346373558044 tensor([[-0.3416,  0.1045]], grad_fn=<AddmmBackward>) tensor([1])
1816 loss  0.18135692179203033 tensor([[ 0.4926, -1.1226]], grad_fn=<AddmmBackward>) tensor([0])
1817 loss  0.3876429498195648 tensor([[-0.4872,  0.2604]], grad_fn=<AddmmBackward>) tensor([1])
1818 loss  0.21552391350269318 tensor([[-0.8078,  0.6172]], grad_fn=<AddmmBackward>) tensor([1])
1819 loss  0.22955255210399628 tensor([[ 0.4425, -0.9121]], grad_fn=<AddmmBackward>) tensor([0])
1820 loss  0.19912463426589966 tensor([[ 0.3632, -1.1494]], grad_fn=<AddmmBackward>) tensor([0])
1821 loss  0.06513585895299911 tenso

1943 loss  0.014525854960083961 tensor([[ 1.7667, -2.4579]], grad_fn=<AddmmBackward>) tensor([0])
1944 loss  1.1513928174972534 tensor([[ 0.2115, -0.5598]], grad_fn=<AddmmBackward>) tensor([1])
1945 loss  0.33408474922180176 tensor([[-0.5250,  0.3996]], grad_fn=<AddmmBackward>) tensor([1])
1946 loss  0.05373900383710861 tensor([[ 1.2053, -1.6913]], grad_fn=<AddmmBackward>) tensor([0])
1947 loss  0.4612945318222046 tensor([[-0.2639,  0.2703]], grad_fn=<AddmmBackward>) tensor([1])
1948 loss  2.0230507850646973 tensor([[ 0.6853, -1.1959]], grad_fn=<AddmmBackward>) tensor([1])
1949 loss  0.18762582540512085 tensor([[ 0.7049, -0.8732]], grad_fn=<AddmmBackward>) tensor([0])
1950 loss  1.3099323511123657 tensor([[ 0.3403, -0.6552]], grad_fn=<AddmmBackward>) tensor([1])
1951 loss  0.2046179175376892 tensor([[ 0.4951, -0.9875]], grad_fn=<AddmmBackward>) tensor([0])
1952 loss  0.34905001521110535 tensor([[ 0.2402, -0.6328]], grad_fn=<AddmmBackward>) tensor([0])
1953 loss  0.6206439137458801 tens

2067 loss  0.25155776739120483 tensor([[ 0.3784, -0.8733]], grad_fn=<AddmmBackward>) tensor([0])
2068 loss  0.5481787323951721 tensor([[-0.2592,  0.0554]], grad_fn=<AddmmBackward>) tensor([1])
2069 loss  0.18944869935512543 tensor([[ 0.6677, -0.8997]], grad_fn=<AddmmBackward>) tensor([0])
2070 loss  0.2053997814655304 tensor([[-0.8132,  0.6651]], grad_fn=<AddmmBackward>) tensor([1])
2071 loss  0.3195740282535553 tensor([[-0.5494,  0.4274]], grad_fn=<AddmmBackward>) tensor([1])
2072 loss  0.1503230482339859 tensor([[ 0.6749, -1.1440]], grad_fn=<AddmmBackward>) tensor([0])
2073 loss  0.201231449842453 tensor([[-0.7654,  0.7356]], grad_fn=<AddmmBackward>) tensor([1])
2074 loss  0.2613738477230072 tensor([[ 0.6307, -0.5775]], grad_fn=<AddmmBackward>) tensor([0])
2075 loss  0.44138360023498535 tensor([[-0.4083,  0.1808]], grad_fn=<AddmmBackward>) tensor([1])
2076 loss  0.19304950535297394 tensor([[-0.8427,  0.7040]], grad_fn=<AddmmBackward>) tensor([1])
2077 loss  0.14512984454631805 tensor

2192 loss  0.230018749833107 tensor([[ 0.3811, -0.9713]], grad_fn=<AddmmBackward>) tensor([0])
2193 loss  0.15779945254325867 tensor([[ 0.7465, -1.0200]], grad_fn=<AddmmBackward>) tensor([0])
2194 loss  0.22779874503612518 tensor([[ 0.4597, -0.9035]], grad_fn=<AddmmBackward>) tensor([0])
2195 loss  0.19217491149902344 tensor([[ 0.7506, -0.8011]], grad_fn=<AddmmBackward>) tensor([0])
2196 loss  2.3452677726745605 tensor([[ 1.0159, -1.2287]], grad_fn=<AddmmBackward>) tensor([1])
2197 loss  0.3081989288330078 tensor([[-0.5554,  0.4635]], grad_fn=<AddmmBackward>) tensor([1])
2198 loss  0.23773573338985443 tensor([[ 0.4819, -0.8335]], grad_fn=<AddmmBackward>) tensor([0])
2199 loss  0.16750794649124146 tensor([[-0.8806,  0.8212]], grad_fn=<AddmmBackward>) tensor([1])
2200 loss  0.24028514325618744 tensor([[ 0.4326, -0.8708]], grad_fn=<AddmmBackward>) tensor([0])
2201 loss  0.3223470151424408 tensor([[-0.5730,  0.3936]], grad_fn=<AddmmBackward>) tensor([1])
2202 loss  0.11377594619989395 tens

2320 loss  0.10232356190681458 tensor([[ 0.9655, -1.2625]], grad_fn=<AddmmBackward>) tensor([0])
2321 loss  0.7592292428016663 tensor([[-0.1569, -0.2849]], grad_fn=<AddmmBackward>) tensor([1])
2322 loss  0.10695027559995651 tensor([[ 0.8391, -1.3423]], grad_fn=<AddmmBackward>) tensor([0])
2323 loss  0.18907202780246735 tensor([[ 0.6537, -0.9159]], grad_fn=<AddmmBackward>) tensor([0])
2324 loss  0.31783756613731384 tensor([[ 0.3919, -0.5911]], grad_fn=<AddmmBackward>) tensor([0])
2325 loss  0.125947505235672 tensor([[-1.0122,  0.9961]], grad_fn=<AddmmBackward>) tensor([1])
2326 loss  0.03985331580042839 tensor([[ 1.4031, -1.7994]], grad_fn=<AddmmBackward>) tensor([0])
2327 loss  0.05253139138221741 tensor([[ 1.4330, -1.4870]], grad_fn=<AddmmBackward>) tensor([0])
2328 loss  0.0340084433555603 tensor([[ 1.3974, -1.9667]], grad_fn=<AddmmBackward>) tensor([0])
2329 loss  0.9661626815795898 tensor([[ 0.1262, -0.3610]], grad_fn=<AddmmBackward>) tensor([1])
2330 loss  0.04572025686502457 tens

2449 loss  0.26353728771209717 tensor([[-0.6910,  0.5079]], grad_fn=<AddmmBackward>) tensor([1])
2450 loss  0.4185112416744232 tensor([[ 0.2727, -0.3818]], grad_fn=<AddmmBackward>) tensor([0])
2451 loss  0.4622608423233032 tensor([[-0.2721,  0.2595]], grad_fn=<AddmmBackward>) tensor([1])
2452 loss  0.26430490612983704 tensor([[ 0.5584, -0.6372]], grad_fn=<AddmmBackward>) tensor([0])
2453 loss  0.1301981508731842 tensor([[-1.0097,  0.9632]], grad_fn=<AddmmBackward>) tensor([1])
2454 loss  0.2382596731185913 tensor([[ 0.5856, -0.7273]], grad_fn=<AddmmBackward>) tensor([0])
2455 loss  0.3502570688724518 tensor([[ 0.3966, -0.4723]], grad_fn=<AddmmBackward>) tensor([0])
2456 loss  0.3946949243545532 tensor([[ 0.2398, -0.4860]], grad_fn=<AddmmBackward>) tensor([0])
2457 loss  1.3940061330795288 tensor([[ 0.3307, -0.7781]], grad_fn=<AddmmBackward>) tensor([1])
2458 loss  0.13712194561958313 tensor([[ 0.8403, -1.0772]], grad_fn=<AddmmBackward>) tensor([0])
2459 loss  0.24616923928260803 tensor

2575 loss  0.05211891233921051 tensor([[ 1.4169, -1.5112]], grad_fn=<AddmmBackward>) tensor([0])
2576 loss  0.7760599851608276 tensor([[-0.0262, -0.1857]], grad_fn=<AddmmBackward>) tensor([1])
2577 loss  0.0812368392944336 tensor([[-1.2336,  1.2359]], grad_fn=<AddmmBackward>) tensor([1])
2578 loss  0.8258460164070129 tensor([[-0.3314, -0.0815]], grad_fn=<AddmmBackward>) tensor([0])
2579 loss  0.07861319184303284 tensor([[-1.2511,  1.2526]], grad_fn=<AddmmBackward>) tensor([1])
2580 loss  0.4346885681152344 tensor([[ 0.2892, -0.3187]], grad_fn=<AddmmBackward>) tensor([0])
2581 loss  0.9833769798278809 tensor([[ 0.2989, -0.2160]], grad_fn=<AddmmBackward>) tensor([1])
2582 loss  0.5589593052864075 tensor([[ 0.0879, -0.2013]], grad_fn=<AddmmBackward>) tensor([0])
2583 loss  0.31730207800865173 tensor([[-0.5115,  0.4736]], grad_fn=<AddmmBackward>) tensor([1])
2584 loss  0.09449892491102219 tensor([[-1.1714,  1.1402]], grad_fn=<AddmmBackward>) tensor([1])
2585 loss  0.24033764004707336 tenso

2701 loss  0.5812867879867554 tensor([[ 0.0957, -0.1422]], grad_fn=<AddmmBackward>) tensor([0])
2702 loss  0.5404710173606873 tensor([[-0.1923,  0.1407]], grad_fn=<AddmmBackward>) tensor([1])
2703 loss  0.5519614815711975 tensor([[ 0.0559, -0.2498]], grad_fn=<AddmmBackward>) tensor([0])
2704 loss  0.23920215666294098 tensor([[ 0.6285, -0.6800]], grad_fn=<AddmmBackward>) tensor([0])
2705 loss  0.21432776749134064 tensor([[-0.7616,  0.6696]], grad_fn=<AddmmBackward>) tensor([1])
2706 loss  0.13609007000923157 tensor([[-0.9518,  0.9739]], grad_fn=<AddmmBackward>) tensor([1])
2707 loss  1.9414114952087402 tensor([[ 0.7386, -1.0479]], grad_fn=<AddmmBackward>) tensor([1])
2708 loss  0.2947325110435486 tensor([[ 0.3725, -0.6982]], grad_fn=<AddmmBackward>) tensor([0])
2709 loss  0.5435584187507629 tensor([[ 0.1175, -0.2080]], grad_fn=<AddmmBackward>) tensor([0])
2710 loss  0.11887196451425552 tensor([[-1.0350,  1.0347]], grad_fn=<AddmmBackward>) tensor([1])
2711 loss  0.3635214567184448 tensor

2820 loss  0.1366700381040573 tensor([[-1.0163,  0.9048]], grad_fn=<AddmmBackward>) tensor([1])
2821 loss  0.15072914958000183 tensor([[ 0.6225, -1.1934]], grad_fn=<AddmmBackward>) tensor([0])
2822 loss  0.4469774663448334 tensor([[-0.3183,  0.2551]], grad_fn=<AddmmBackward>) tensor([1])
2823 loss  0.07144105434417725 tensor([[-1.3318,  1.2711]], grad_fn=<AddmmBackward>) tensor([1])
2824 loss  0.9448391199111938 tensor([[ 0.0544, -0.3982]], grad_fn=<AddmmBackward>) tensor([1])
2825 loss  0.19000987708568573 tensor([[-0.8511,  0.7131]], grad_fn=<AddmmBackward>) tensor([1])
2826 loss  0.678741455078125 tensor([[-0.1643, -0.1934]], grad_fn=<AddmmBackward>) tensor([0])
2827 loss  0.517463743686676 tensor([[ 0.0734, -0.3155]], grad_fn=<AddmmBackward>) tensor([0])
2828 loss  0.13289380073547363 tensor([[-1.0298,  0.9212]], grad_fn=<AddmmBackward>) tensor([1])
2829 loss  0.8128345012664795 tensor([[-0.2212,  0.0053]], grad_fn=<AddmmBackward>) tensor([0])
2830 loss  0.5505543351173401 tensor([

2933 loss  0.21821600198745728 tensor([[-0.7799,  0.6313]], grad_fn=<AddmmBackward>) tensor([1])
2934 loss  0.19127759337425232 tensor([[ 0.5345, -1.0224]], grad_fn=<AddmmBackward>) tensor([0])
2935 loss  0.08001017570495605 tensor([[ 1.1267, -1.3586]], grad_fn=<AddmmBackward>) tensor([0])
2936 loss  0.3213827610015869 tensor([[-0.5807,  0.3895]], grad_fn=<AddmmBackward>) tensor([1])
2937 loss  0.08246804028749466 tensor([[ 1.1268, -1.3270]], grad_fn=<AddmmBackward>) tensor([0])
2938 loss  2.129176139831543 tensor([[ 0.7814, -1.2212]], grad_fn=<AddmmBackward>) tensor([1])
2939 loss  0.026536015793681145 tensor([[ 1.5660, -2.0500]], grad_fn=<AddmmBackward>) tensor([0])
2940 loss  0.5683070421218872 tensor([[-0.2282,  0.0393]], grad_fn=<AddmmBackward>) tensor([1])
2941 loss  0.6689088940620422 tensor([[-0.0826, -0.0335]], grad_fn=<AddmmBackward>) tensor([1])
2942 loss  0.04646783694624901 tensor([[ 1.1693, -1.8764]], grad_fn=<AddmmBackward>) tensor([0])
2943 loss  0.23105192184448242 ten

3065 loss  0.15228833258152008 tensor([[ 0.7188, -1.0861]], grad_fn=<AddmmBackward>) tensor([0])
3066 loss  1.3729488849639893 tensor([[ 0.4554, -0.6254]], grad_fn=<AddmmBackward>) tensor([1])
3067 loss  0.06690450012683868 tensor([[ 1.3677, -1.3031]], grad_fn=<AddmmBackward>) tensor([0])
3068 loss  0.2090855985879898 tensor([[ 0.5619, -0.8967]], grad_fn=<AddmmBackward>) tensor([0])
3069 loss  0.19791573286056519 tensor([[ 0.6984, -0.8209]], grad_fn=<AddmmBackward>) tensor([0])
3070 loss  0.19967132806777954 tensor([[ 0.6045, -0.9050]], grad_fn=<AddmmBackward>) tensor([0])
3071 loss  1.5928741693496704 tensor([[ 0.5810, -0.7845]], grad_fn=<AddmmBackward>) tensor([1])
3072 loss  0.26992008090019226 tensor([[-0.6989,  0.4727]], grad_fn=<AddmmBackward>) tensor([1])
3073 loss  0.22541110217571259 tensor([[-0.7316,  0.6435]], grad_fn=<AddmmBackward>) tensor([1])
3074 loss  0.44680044054985046 tensor([[ 0.1698, -0.4042]], grad_fn=<AddmmBackward>) tensor([0])
3075 loss  0.07592982798814774 te

3202 loss  0.5423294305801392 tensor([[-0.0420, -0.3705]], grad_fn=<AddmmBackward>) tensor([0])
3203 loss  0.4221261739730835 tensor([[ 0.2099, -0.4341]], grad_fn=<AddmmBackward>) tensor([0])
3204 loss  0.1358834207057953 tensor([[-1.0236,  0.9037]], grad_fn=<AddmmBackward>) tensor([1])
3205 loss  0.4019508361816406 tensor([[ 0.1255, -0.5782]], grad_fn=<AddmmBackward>) tensor([0])
3206 loss  0.5763704776763916 tensor([[-0.2024,  0.0466]], grad_fn=<AddmmBackward>) tensor([1])
3207 loss  0.17277579009532928 tensor([[-0.8822,  0.7859]], grad_fn=<AddmmBackward>) tensor([1])
3208 loss  0.3166973292827606 tensor([[ 0.3587, -0.6286]], grad_fn=<AddmmBackward>) tensor([0])
3209 loss  0.08753371238708496 tensor([[ 0.9455, -1.4461]], grad_fn=<AddmmBackward>) tensor([0])
3210 loss  1.1027424335479736 tensor([[ 0.2000, -0.4993]], grad_fn=<AddmmBackward>) tensor([1])
3211 loss  3.565349817276001 tensor([[ 1.5144, -2.0222]], grad_fn=<AddmmBackward>) tensor([1])
3212 loss  0.13553433120250702 tensor([

3339 loss  0.5236615538597107 tensor([[ 0.1588, -0.2148]], grad_fn=<AddmmBackward>) tensor([0])
3340 loss  0.21097107231616974 tensor([[ 0.5182, -0.9305]], grad_fn=<AddmmBackward>) tensor([0])
3341 loss  0.09580057114362717 tensor([[-1.1046,  1.1926]], grad_fn=<AddmmBackward>) tensor([1])
3342 loss  0.198494553565979 tensor([[ 0.6252, -0.8909]], grad_fn=<AddmmBackward>) tensor([0])
3343 loss  0.6536267995834351 tensor([[-0.1148, -0.1954]], grad_fn=<AddmmBackward>) tensor([0])
3344 loss  0.07524465024471283 tensor([[-1.3011,  1.2481]], grad_fn=<AddmmBackward>) tensor([1])
3345 loss  0.32920220494270325 tensor([[ 0.2562, -0.6858]], grad_fn=<AddmmBackward>) tensor([0])
3346 loss  1.516816258430481 tensor([[ 0.5478, -0.7213]], grad_fn=<AddmmBackward>) tensor([1])
3347 loss  0.8275378942489624 tensor([[-0.0632, -0.3160]], grad_fn=<AddmmBackward>) tensor([1])
3348 loss  0.24768468737602234 tensor([[-0.6919,  0.5773]], grad_fn=<AddmmBackward>) tensor([1])
3349 loss  0.34329837560653687 tensor

3472 loss  0.2616911232471466 tensor([[ 0.4239, -0.7830]], grad_fn=<AddmmBackward>) tensor([0])
3473 loss  0.13026565313339233 tensor([[ 0.8346, -1.1377]], grad_fn=<AddmmBackward>) tensor([0])
3474 loss  0.1656324863433838 tensor([[ 0.6633, -1.0507]], grad_fn=<AddmmBackward>) tensor([0])
3475 loss  0.24816054105758667 tensor([[-0.6771,  0.5899]], grad_fn=<AddmmBackward>) tensor([1])
3476 loss  0.31187257170677185 tensor([[ 0.3468, -0.6584]], grad_fn=<AddmmBackward>) tensor([0])
3477 loss  0.09672810882329941 tensor([[ 0.8590, -1.4281]], grad_fn=<AddmmBackward>) tensor([0])
3478 loss  0.1610516756772995 tensor([[-0.9314,  0.8130]], grad_fn=<AddmmBackward>) tensor([1])
3479 loss  0.06706838309764862 tensor([[-1.3590,  1.3094]], grad_fn=<AddmmBackward>) tensor([1])
3480 loss  0.06701921671628952 tensor([[ 1.1419, -1.5272]], grad_fn=<AddmmBackward>) tensor([0])
3481 loss  0.0969371646642685 tensor([[ 0.9733, -1.3115]], grad_fn=<AddmmBackward>) tensor([0])
3482 loss  0.8020898699760437 tens

3606 loss  0.17747513949871063 tensor([[-0.8870,  0.7519]], grad_fn=<AddmmBackward>) tensor([1])
3607 loss  0.24044309556484222 tensor([[ 0.4387, -0.8640]], grad_fn=<AddmmBackward>) tensor([0])
3608 loss  0.2017573118209839 tensor([[-0.7502,  0.7479]], grad_fn=<AddmmBackward>) tensor([1])
3609 loss  0.059780433773994446 tensor([[-1.4391,  1.3479]], grad_fn=<AddmmBackward>) tensor([1])
3610 loss  0.15967033803462982 tensor([[-0.9655,  0.7883]], grad_fn=<AddmmBackward>) tensor([1])
3611 loss  0.12496516108512878 tensor([[ 0.7915, -1.2251]], grad_fn=<AddmmBackward>) tensor([0])
3612 loss  2.3475089073181152 tensor([[ 0.7571, -1.4899]], grad_fn=<AddmmBackward>) tensor([1])
3613 loss  0.24906986951828003 tensor([[ 0.4304, -0.8325]], grad_fn=<AddmmBackward>) tensor([0])
3614 loss  0.4585803151130676 tensor([[-0.3527,  0.1889]], grad_fn=<AddmmBackward>) tensor([1])
3615 loss  0.36642739176750183 tensor([[ 0.0878, -0.7273]], grad_fn=<AddmmBackward>) tensor([0])
3616 loss  0.41741716861724854 t

3733 loss  0.197048619389534 tensor([[ 0.4521, -1.0720]], grad_fn=<AddmmBackward>) tensor([0])
3734 loss  0.06927518546581268 tensor([[ 1.0106, -1.6242]], grad_fn=<AddmmBackward>) tensor([0])
3735 loss  0.13622073829174042 tensor([[ 0.7925, -1.1321]], grad_fn=<AddmmBackward>) tensor([0])
3736 loss  2.5179014205932617 tensor([[ 0.7088, -1.7250]], grad_fn=<AddmmBackward>) tensor([1])
3737 loss  0.06875459849834442 tensor([[ 0.9391, -1.7035]], grad_fn=<AddmmBackward>) tensor([0])
3738 loss  3.570394992828369 tensor([[ 1.2493, -2.2925]], grad_fn=<AddmmBackward>) tensor([1])
3739 loss  0.2765791118144989 tensor([[ 0.2153, -0.9285]], grad_fn=<AddmmBackward>) tensor([0])
3740 loss  0.11172531545162201 tensor([[-1.1067,  1.0286]], grad_fn=<AddmmBackward>) tensor([1])
3741 loss  0.25982367992401123 tensor([[ 0.2228, -0.9922]], grad_fn=<AddmmBackward>) tensor([0])
3742 loss  0.2736825942993164 tensor([[-0.7226,  0.4332]], grad_fn=<AddmmBackward>) tensor([1])
3743 loss  0.28394344449043274 tensor

3837 loss  1.7232582569122314 tensor([[ 0.5519, -0.9748]], grad_fn=<AddmmBackward>) tensor([1])
3838 loss  0.007501173764467239 tensor([[ 2.1675, -2.7215]], grad_fn=<AddmmBackward>) tensor([0])
3839 loss  0.036613013595342636 tensor([[ 1.4091, -1.8799]], grad_fn=<AddmmBackward>) tensor([0])
3840 loss  2.0828754901885986 tensor([[ 0.8075, -1.1424]], grad_fn=<AddmmBackward>) tensor([1])
3841 loss  0.05434717237949371 tensor([[-1.4346,  1.4505]], grad_fn=<AddmmBackward>) tensor([1])
3842 loss  0.09294402599334717 tensor([[-1.2561,  1.0729]], grad_fn=<AddmmBackward>) tensor([1])
3843 loss  0.7877806425094604 tensor([[-0.2219, -0.0408]], grad_fn=<AddmmBackward>) tensor([0])
3844 loss  0.06184402108192444 tensor([[-1.4351,  1.3170]], grad_fn=<AddmmBackward>) tensor([1])
3845 loss  0.11207631230354309 tensor([[-1.1583,  0.9737]], grad_fn=<AddmmBackward>) tensor([1])
3846 loss  2.374934434890747 tensor([[-1.0877,  1.1896]], grad_fn=<AddmmBackward>) tensor([0])
3847 loss  0.032393552362918854 t

3971 loss  0.014978206716477871 tensor([[ 1.5737, -2.6200]], grad_fn=<AddmmBackward>) tensor([0])
3972 loss  0.08877158910036087 tensor([[-1.2177,  1.1593]], grad_fn=<AddmmBackward>) tensor([1])
3973 loss  1.760758876800537 tensor([[ 0.5780, -0.9942]], grad_fn=<AddmmBackward>) tensor([1])
3974 loss  0.01353209838271141 tensor([[ 1.6355, -2.6604]], grad_fn=<AddmmBackward>) tensor([0])
3975 loss  0.07888919860124588 tensor([[ 1.0154, -1.4846]], grad_fn=<AddmmBackward>) tensor([0])
3976 loss  0.06131047010421753 tensor([[ 1.0978, -1.6632]], grad_fn=<AddmmBackward>) tensor([0])
3977 loss  0.17045460641384125 tensor([[-0.9335,  0.7494]], grad_fn=<AddmmBackward>) tensor([1])
3978 loss  0.03389252349734306 tensor([[ 1.2808, -2.0868]], grad_fn=<AddmmBackward>) tensor([0])
3979 loss  0.030626920983195305 tensor([[ 1.2370, -2.2336]], grad_fn=<AddmmBackward>) tensor([0])
3980 loss  0.13077087700366974 tensor([[-1.1719,  0.7963]], grad_fn=<AddmmBackward>) tensor([1])
3981 loss  0.08635903894901276

4108 loss  0.10096722841262817 tensor([[ 0.7539, -1.4881]], grad_fn=<AddmmBackward>) tensor([0])
4109 loss  0.11156325787305832 tensor([[ 0.5917, -1.5451]], grad_fn=<AddmmBackward>) tensor([0])
4110 loss  0.0569164901971817 tensor([[-1.4071,  1.4305]], grad_fn=<AddmmBackward>) tensor([1])
4111 loss  0.09957161545753479 tensor([[ 0.6849, -1.5718]], grad_fn=<AddmmBackward>) tensor([0])
4112 loss  0.1596800982952118 tensor([[ 0.3494, -1.4043]], grad_fn=<AddmmBackward>) tensor([0])
4113 loss  0.1850007176399231 tensor([[ 0.6569, -0.9366]], grad_fn=<AddmmBackward>) tensor([0])
4114 loss  0.6987447738647461 tensor([[-0.1819, -0.1931]], grad_fn=<AddmmBackward>) tensor([1])
4115 loss  0.028238384053111076 tensor([[ 1.3135, -2.2394]], grad_fn=<AddmmBackward>) tensor([0])
4116 loss  0.13700324296951294 tensor([[-1.0234,  0.8950]], grad_fn=<AddmmBackward>) tensor([1])
4117 loss  0.7277754545211792 tensor([[-0.0926, -0.1607]], grad_fn=<AddmmBackward>) tensor([1])
4118 loss  0.039778854697942734 te

4231 loss  0.06591594219207764 tensor([[ 0.7457, -1.9405]], grad_fn=<AddmmBackward>) tensor([0])
4232 loss  0.08310561627149582 tensor([[ 1.1251, -1.3207]], grad_fn=<AddmmBackward>) tensor([0])
4233 loss  0.09499294310808182 tensor([[ 0.7474, -1.5587]], grad_fn=<AddmmBackward>) tensor([0])
4234 loss  0.17733028531074524 tensor([[-0.8713,  0.7685]], grad_fn=<AddmmBackward>) tensor([1])
4235 loss  0.4000120162963867 tensor([[-0.5257,  0.1839]], grad_fn=<AddmmBackward>) tensor([1])
4236 loss  0.03382188081741333 tensor([[ 1.3949, -1.9747]], grad_fn=<AddmmBackward>) tensor([0])
4237 loss  0.5163733959197998 tensor([[-0.3412,  0.0504]], grad_fn=<AddmmBackward>) tensor([1])
4238 loss  1.153998851776123 tensor([[ 0.1987, -0.5765]], grad_fn=<AddmmBackward>) tensor([1])
4239 loss  0.15680980682373047 tensor([[ 0.6915, -1.0817]], grad_fn=<AddmmBackward>) tensor([0])
4240 loss  0.052569057792425156 tensor([[-1.5731,  1.3462]], grad_fn=<AddmmBackward>) tensor([1])
4241 loss  1.1238408088684082 ten

4354 loss  0.05217469856142998 tensor([[-1.6091,  1.3179]], grad_fn=<AddmmBackward>) tensor([1])
4355 loss  0.0239428598433733 tensor([[-1.8999,  1.8202]], grad_fn=<AddmmBackward>) tensor([1])
4356 loss  0.046803370118141174 tensor([[-1.5061,  1.5322]], grad_fn=<AddmmBackward>) tensor([1])
4357 loss  0.2760755121707916 tensor([[-0.7398,  0.4061]], grad_fn=<AddmmBackward>) tensor([1])
4358 loss  0.03346974775195122 tensor([[-1.8685,  1.5119]], grad_fn=<AddmmBackward>) tensor([1])
4359 loss  1.845030426979065 tensor([[-0.9064,  0.7667]], grad_fn=<AddmmBackward>) tensor([0])
4360 loss  1.9053239822387695 tensor([[-1.0132,  0.7311]], grad_fn=<AddmmBackward>) tensor([0])
4361 loss  0.18028143048286438 tensor([[-0.8489,  0.7728]], grad_fn=<AddmmBackward>) tensor([1])
4362 loss  1.991168737411499 tensor([[-1.0203,  0.8241]], grad_fn=<AddmmBackward>) tensor([0])
4363 loss  1.0471712350845337 tensor([[-0.4167,  0.1983]], grad_fn=<AddmmBackward>) tensor([0])
4364 loss  0.1059970110654831 tensor(

4470 loss  0.4039096534252167 tensor([[-0.5168,  0.1810]], grad_fn=<AddmmBackward>) tensor([1])
4471 loss  0.15929163992404938 tensor([[ 0.5022, -1.2541]], grad_fn=<AddmmBackward>) tensor([0])
4472 loss  0.26425400376319885 tensor([[ 0.3212, -0.8746]], grad_fn=<AddmmBackward>) tensor([0])
4473 loss  0.64568030834198 tensor([[-0.3020, -0.2047]], grad_fn=<AddmmBackward>) tensor([1])
4474 loss  0.286747545003891 tensor([[ 0.2006, -0.9017]], grad_fn=<AddmmBackward>) tensor([0])
4475 loss  0.2850971519947052 tensor([[ 0.2785, -0.8305]], grad_fn=<AddmmBackward>) tensor([0])
4476 loss  0.47290605306625366 tensor([[-0.0918, -0.5949]], grad_fn=<AddmmBackward>) tensor([0])
4477 loss  0.20593442022800446 tensor([[ 0.3907, -1.0847]], grad_fn=<AddmmBackward>) tensor([0])
4478 loss  0.22143234312534332 tensor([[ 0.3988, -0.9960]], grad_fn=<AddmmBackward>) tensor([0])
4479 loss  0.3105457127094269 tensor([[ 0.2169, -0.7932]], grad_fn=<AddmmBackward>) tensor([0])
4480 loss  0.07604219764471054 tensor(

4587 loss  0.3791601061820984 tensor([[ 0.0279, -0.7463]], grad_fn=<AddmmBackward>) tensor([0])
4588 loss  0.48164400458335876 tensor([[-0.1309, -0.6109]], grad_fn=<AddmmBackward>) tensor([0])
4589 loss  0.1415793001651764 tensor([[-1.1707,  0.7126]], grad_fn=<AddmmBackward>) tensor([1])
4590 loss  0.2708992063999176 tensor([[-0.8650,  0.3025]], grad_fn=<AddmmBackward>) tensor([1])
4591 loss  0.7461526989936829 tensor([[-0.2058, -0.1025]], grad_fn=<AddmmBackward>) tensor([0])
4592 loss  0.38975760340690613 tensor([[ 0.0461, -0.6949]], grad_fn=<AddmmBackward>) tensor([0])
4593 loss  0.3219474256038666 tensor([[ 0.2075, -0.7606]], grad_fn=<AddmmBackward>) tensor([0])
4594 loss  0.07471676915884018 tensor([[ 0.9341, -1.6224]], grad_fn=<AddmmBackward>) tensor([0])
4595 loss  0.061752237379550934 tensor([[ 1.0765, -1.6771]], grad_fn=<AddmmBackward>) tensor([0])
4596 loss  0.025872355327010155 tensor([[ 1.5292, -2.1124]], grad_fn=<AddmmBackward>) tensor([0])
4597 loss  0.2727390229701996 ten

4712 loss  0.015109728090465069 tensor([[ 1.6676, -2.5172]], grad_fn=<AddmmBackward>) tensor([0])
4713 loss  0.014541598036885262 tensor([[ 1.6194, -2.6041]], grad_fn=<AddmmBackward>) tensor([0])
4714 loss  0.012750530615448952 tensor([[ 1.6687, -2.6871]], grad_fn=<AddmmBackward>) tensor([0])
4715 loss  0.01841145008802414 tensor([[ 1.5656, -2.4199]], grad_fn=<AddmmBackward>) tensor([0])
4716 loss  0.005750896409153938 tensor([[ 1.9396, -3.2159]], grad_fn=<AddmmBackward>) tensor([0])
4717 loss  2.2608633041381836 tensor([[ 0.8002, -1.3506]], grad_fn=<AddmmBackward>) tensor([1])
4718 loss  0.03295521065592766 tensor([[ 1.3455, -2.0506]], grad_fn=<AddmmBackward>) tensor([0])
4719 loss  0.005815609358251095 tensor([[ 2.0578, -3.0865]], grad_fn=<AddmmBackward>) tensor([0])
4720 loss  0.028715621680021286 tensor([[ 1.5648, -1.9711]], grad_fn=<AddmmBackward>) tensor([0])
4721 loss  0.5457874536514282 tensor([[-0.3687, -0.0485]], grad_fn=<AddmmBackward>) tensor([1])
4722 loss  0.0067603443749

4836 loss  0.15185540914535522 tensor([[ 0.4810, -1.3269]], grad_fn=<AddmmBackward>) tensor([0])
4837 loss  0.09096360951662064 tensor([[ 0.8753, -1.4762]], grad_fn=<AddmmBackward>) tensor([0])
4838 loss  0.035563211888074875 tensor([[-1.7061,  1.6125]], grad_fn=<AddmmBackward>) tensor([1])
4839 loss  0.14091074466705322 tensor([[ 0.6252, -1.2631]], grad_fn=<AddmmBackward>) tensor([0])
4840 loss  0.026644088327884674 tensor([[ 1.3334, -2.2784]], grad_fn=<AddmmBackward>) tensor([0])
4841 loss  0.06705968827009201 tensor([[-1.3616,  1.3068]], grad_fn=<AddmmBackward>) tensor([1])
4842 loss  0.4444138705730438 tensor([[-0.5923, -0.0118]], grad_fn=<AddmmBackward>) tensor([1])
4843 loss  0.09694982320070267 tensor([[-1.3034,  0.9813]], grad_fn=<AddmmBackward>) tensor([1])
4844 loss  0.06497713178396225 tensor([[ 1.0995, -1.6016]], grad_fn=<AddmmBackward>) tensor([0])
4845 loss  0.43153294920921326 tensor([[-0.4882,  0.1287]], grad_fn=<AddmmBackward>) tensor([1])
4846 loss  0.0199176035821437

4944 loss  0.045814886689186096 tensor([[ 1.1941, -1.8660]], grad_fn=<AddmmBackward>) tensor([0])
4945 loss  0.20301344990730286 tensor([[-0.8564,  0.6349]], grad_fn=<AddmmBackward>) tensor([1])
4946 loss  0.19512422382831573 tensor([[ 0.6997, -0.8353]], grad_fn=<AddmmBackward>) tensor([0])
4947 loss  0.06821247935295105 tensor([[-1.4202,  1.2306]], grad_fn=<AddmmBackward>) tensor([1])
4948 loss  0.706594705581665 tensor([[-0.1504, -0.1237]], grad_fn=<AddmmBackward>) tensor([0])
4949 loss  0.08194274455308914 tensor([[ 1.0119, -1.4486]], grad_fn=<AddmmBackward>) tensor([0])
4950 loss  0.043119918555021286 tensor([[-1.6063,  1.5158]], grad_fn=<AddmmBackward>) tensor([1])
4951 loss  0.05299842357635498 tensor([[ 1.0204, -1.8905]], grad_fn=<AddmmBackward>) tensor([0])
4952 loss  0.0992453470826149 tensor([[-1.1675,  1.0926]], grad_fn=<AddmmBackward>) tensor([1])
4953 loss  0.7424469590187073 tensor([[-0.1328, -0.2291]], grad_fn=<AddmmBackward>) tensor([1])
4954 loss  0.02371436171233654 t

5044 loss  0.10435111820697784 tensor([[ 0.7556, -1.4518]], grad_fn=<AddmmBackward>) tensor([0])
5045 loss  0.13340814411640167 tensor([[ 0.6960, -1.2508]], grad_fn=<AddmmBackward>) tensor([0])
5046 loss  0.41637611389160156 tensor([[-0.4177,  0.2430]], grad_fn=<AddmmBackward>) tensor([1])
5047 loss  0.3734501600265503 tensor([[ 0.1685, -0.6240]], grad_fn=<AddmmBackward>) tensor([0])
5048 loss  0.7348101139068604 tensor([[-0.1286, -0.2103]], grad_fn=<AddmmBackward>) tensor([1])
5049 loss  0.18175309896469116 tensor([[-0.9451,  0.6678]], grad_fn=<AddmmBackward>) tensor([1])
5050 loss  0.32716983556747437 tensor([[ 0.2470, -0.7022]], grad_fn=<AddmmBackward>) tensor([0])
5051 loss  0.032303184270858765 tensor([[-1.7270,  1.6894]], grad_fn=<AddmmBackward>) tensor([1])
5052 loss  0.4029094874858856 tensor([[ 0.0359, -0.6650]], grad_fn=<AddmmBackward>) tensor([0])
5053 loss  0.541347861289978 tensor([[-0.0905, -0.4213]], grad_fn=<AddmmBackward>) tensor([0])
5054 loss  0.2921712398529053 tens

5142 loss  0.313337117433548 tensor([[ 0.3433, -0.6564]], grad_fn=<AddmmBackward>) tensor([0])
5143 loss  0.25594091415405273 tensor([[ 0.3460, -0.8861]], grad_fn=<AddmmBackward>) tensor([0])
5144 loss  0.06122817099094391 tensor([[ 1.1960, -1.5663]], grad_fn=<AddmmBackward>) tensor([0])
5145 loss  0.10045189410448074 tensor([[ 0.8870, -1.3604]], grad_fn=<AddmmBackward>) tensor([0])
5146 loss  0.06212726980447769 tensor([[ 1.0636, -1.6838]], grad_fn=<AddmmBackward>) tensor([0])
5147 loss  1.5029008388519287 tensor([[ 0.3015, -0.9498]], grad_fn=<AddmmBackward>) tensor([1])
5148 loss  0.022972170263528824 tensor([[ 1.4526, -2.3094]], grad_fn=<AddmmBackward>) tensor([0])
5149 loss  2.0369515419006348 tensor([[ 0.7154, -1.1818]], grad_fn=<AddmmBackward>) tensor([1])
5150 loss  0.06484261900186539 tensor([[-1.5009,  1.2023]], grad_fn=<AddmmBackward>) tensor([1])
5151 loss  0.05129672586917877 tensor([[-1.5745,  1.3699]], grad_fn=<AddmmBackward>) tensor([1])
5152 loss  1.5219683647155762 ten

5257 loss  0.7462955117225647 tensor([[-0.0133, -0.1169]], grad_fn=<AddmmBackward>) tensor([1])
5258 loss  0.5539800524711609 tensor([[-0.2770,  0.0239]], grad_fn=<AddmmBackward>) tensor([1])
5259 loss  2.912559986114502 tensor([[ 1.0069, -1.8498]], grad_fn=<AddmmBackward>) tensor([1])
5260 loss  0.07621408998966217 tensor([[-1.3758,  1.1601]], grad_fn=<AddmmBackward>) tensor([1])
5261 loss  0.059175219386816025 tensor([[-1.4416,  1.3559]], grad_fn=<AddmmBackward>) tensor([1])
5262 loss  0.6975719332695007 tensor([[-0.1564, -0.1476]], grad_fn=<AddmmBackward>) tensor([0])
5263 loss  0.1473255306482315 tensor([[-1.0091,  0.8315]], grad_fn=<AddmmBackward>) tensor([1])
5264 loss  1.4747439622879028 tensor([[-0.7762,  0.4387]], grad_fn=<AddmmBackward>) tensor([0])
5265 loss  1.4141329526901245 tensor([[-0.8452,  0.2904]], grad_fn=<AddmmBackward>) tensor([0])
5266 loss  0.06322789937257767 tensor([[-1.3861,  1.3431]], grad_fn=<AddmmBackward>) tensor([1])
5267 loss  0.5395178198814392 tensor(

5350 loss  0.17163962125778198 tensor([[ 0.6738, -1.0015]], grad_fn=<AddmmBackward>) tensor([0])
5351 loss  0.11586929857730865 tensor([[-1.1160,  0.9808]], grad_fn=<AddmmBackward>) tensor([1])
5352 loss  0.36480093002319336 tensor([[-0.5909,  0.2296]], grad_fn=<AddmmBackward>) tensor([1])
5353 loss  0.007459406275302172 tensor([[ 2.0692, -2.8254]], grad_fn=<AddmmBackward>) tensor([0])
5354 loss  0.013065913692116737 tensor([[ 1.8092, -2.5220]], grad_fn=<AddmmBackward>) tensor([0])
5355 loss  0.10929298400878906 tensor([[-1.1323,  1.0262]], grad_fn=<AddmmBackward>) tensor([1])
5356 loss  0.017750689759850502 tensor([[ 1.7565, -2.2659]], grad_fn=<AddmmBackward>) tensor([0])
5357 loss  0.006717243697494268 tensor([[ 2.2642, -2.7355]], grad_fn=<AddmmBackward>) tensor([0])
5358 loss  0.4697554409503937 tensor([[-0.4522,  0.0593]], grad_fn=<AddmmBackward>) tensor([1])
5359 loss  0.0015983913326635957 tensor([[ 2.7526, -3.6853]], grad_fn=<AddmmBackward>) tensor([0])
5360 loss  0.001225792802

5447 loss  0.22612714767456055 tensor([[ 0.4323, -0.9391]], grad_fn=<AddmmBackward>) tensor([0])
5448 loss  0.43221113085746765 tensor([[-0.4359,  0.1791]], grad_fn=<AddmmBackward>) tensor([1])
5449 loss  0.3645162284374237 tensor([[-0.5174,  0.3040]], grad_fn=<AddmmBackward>) tensor([1])
5450 loss  0.03295105695724487 tensor([[-1.7554,  1.6408]], grad_fn=<AddmmBackward>) tensor([1])
5451 loss  0.1251414716243744 tensor([[ 0.6975, -1.3176]], grad_fn=<AddmmBackward>) tensor([0])
5452 loss  0.021668478846549988 tensor([[-1.9256,  1.8955]], grad_fn=<AddmmBackward>) tensor([1])
5453 loss  0.4131717085838318 tensor([[ 0.0388, -0.6314]], grad_fn=<AddmmBackward>) tensor([0])
5454 loss  0.21056988835334778 tensor([[ 0.4536, -0.9972]], grad_fn=<AddmmBackward>) tensor([0])
5455 loss  0.29892322421073914 tensor([[ 0.4546, -0.5998]], grad_fn=<AddmmBackward>) tensor([0])
5456 loss  0.048700764775276184 tensor([[-1.5320,  1.4656]], grad_fn=<AddmmBackward>) tensor([1])
5457 loss  2.2314138412475586 t

5567 loss  0.3876137435436249 tensor([[ 0.3071, -0.4406]], grad_fn=<AddmmBackward>) tensor([0])
5568 loss  0.04132720082998276 tensor([[-1.5803,  1.5852]], grad_fn=<AddmmBackward>) tensor([1])
5569 loss  0.5287294387817383 tensor([[ 0.0824, -0.2789]], grad_fn=<AddmmBackward>) tensor([0])
5570 loss  0.07005693763494492 tensor([[-1.3251,  1.2981]], grad_fn=<AddmmBackward>) tensor([1])
5571 loss  0.06229787319898605 tensor([[-1.4103,  1.3343]], grad_fn=<AddmmBackward>) tensor([1])
5572 loss  0.629601240158081 tensor([[-0.1328, -0.2642]], grad_fn=<AddmmBackward>) tensor([0])
5573 loss  0.5097759962081909 tensor([[-0.0151, -0.4232]], grad_fn=<AddmmBackward>) tensor([0])
5574 loss  0.2674044966697693 tensor([[ 0.3218, -0.8605]], grad_fn=<AddmmBackward>) tensor([0])
5575 loss  0.29775571823120117 tensor([[ 0.3250, -0.7339]], grad_fn=<AddmmBackward>) tensor([0])
5576 loss  0.1993607133626938 tensor([[ 0.6139, -0.8974]], grad_fn=<AddmmBackward>) tensor([0])
5577 loss  0.0485539436340332 tensor(

5682 loss  0.04785860329866409 tensor([[-1.5569,  1.4586]], grad_fn=<AddmmBackward>) tensor([1])
5683 loss  1.5760917663574219 tensor([[-0.7229,  0.6215]], grad_fn=<AddmmBackward>) tensor([0])
5684 loss  0.7120528817176819 tensor([[0.0013, 0.0388]], grad_fn=<AddmmBackward>) tensor([0])
5685 loss  0.45652714371681213 tensor([[ 0.0659, -0.4813]], grad_fn=<AddmmBackward>) tensor([0])
5686 loss  0.9938614368438721 tensor([[-0.2611,  0.2705]], grad_fn=<AddmmBackward>) tensor([0])
5687 loss  0.629589855670929 tensor([[ 0.1184, -0.0130]], grad_fn=<AddmmBackward>) tensor([0])
5688 loss  0.17322741448879242 tensor([[ 0.6300, -1.0353]], grad_fn=<AddmmBackward>) tensor([0])
5689 loss  0.19096170365810394 tensor([[ 0.4991, -1.0596]], grad_fn=<AddmmBackward>) tensor([0])
5690 loss  0.17598986625671387 tensor([[ 0.7308, -0.9172]], grad_fn=<AddmmBackward>) tensor([0])
5691 loss  0.03620472177863121 tensor([[ 1.6429, -1.6576]], grad_fn=<AddmmBackward>) tensor([0])
5692 loss  0.007482242304831743 tenso

5774 loss  4.065723896026611 tensor([[ 1.9911, -2.0573]], grad_fn=<AddmmBackward>) tensor([1])
5775 loss  0.14968228340148926 tensor([[-0.9817,  0.8418]], grad_fn=<AddmmBackward>) tensor([1])
5776 loss  0.027177657932043076 tensor([[-1.7970,  1.7947]], grad_fn=<AddmmBackward>) tensor([1])
5777 loss  0.021170591935515404 tensor([[-1.9175,  1.9270]], grad_fn=<AddmmBackward>) tensor([1])
5778 loss  1.561415433883667 tensor([[-0.7427,  0.5832]], grad_fn=<AddmmBackward>) tensor([0])
5779 loss  1.54839289188385 tensor([[-0.7638,  0.5456]], grad_fn=<AddmmBackward>) tensor([0])
5780 loss  2.129517078399658 tensor([[-0.9805,  1.0224]], grad_fn=<AddmmBackward>) tensor([0])
5781 loss  1.6366206407546997 tensor([[-0.8089,  0.6112]], grad_fn=<AddmmBackward>) tensor([0])
5782 loss  0.03564247488975525 tensor([[-1.7003,  1.6160]], grad_fn=<AddmmBackward>) tensor([1])
5783 loss  1.0096670389175415 tensor([[-0.3684,  0.1882]], grad_fn=<AddmmBackward>) tensor([0])
5784 loss  0.021075468510389328 tensor(

5900 loss  0.2915593087673187 tensor([[ 0.3971, -0.6860]], grad_fn=<AddmmBackward>) tensor([0])
5901 loss  0.07002726197242737 tensor([[-1.2975,  1.3262]], grad_fn=<AddmmBackward>) tensor([1])
5902 loss  0.5786483883857727 tensor([[ 0.1056, -0.1382]], grad_fn=<AddmmBackward>) tensor([0])
5903 loss  0.22431397438049316 tensor([[ 0.5646, -0.8159]], grad_fn=<AddmmBackward>) tensor([0])
5904 loss  0.14382626116275787 tensor([[ 0.8545, -1.0119]], grad_fn=<AddmmBackward>) tensor([0])
5905 loss  0.02563941851258278 tensor([[-1.8000,  1.8508]], grad_fn=<AddmmBackward>) tensor([1])
5906 loss  0.07289297133684158 tensor([[-1.2852,  1.2969]], grad_fn=<AddmmBackward>) tensor([1])
5907 loss  1.4137475490570068 tensor([[ 0.5186, -0.6165]], grad_fn=<AddmmBackward>) tensor([1])
5908 loss  0.031140921637415886 tensor([[ 1.5245, -1.9292]], grad_fn=<AddmmBackward>) tensor([0])
5909 loss  0.20760482549667358 tensor([[-0.7877,  0.6788]], grad_fn=<AddmmBackward>) tensor([1])
5910 loss  0.07507491111755371 t

6026 loss  0.17550498247146606 tensor([[-0.9010,  0.7501]], grad_fn=<AddmmBackward>) tensor([1])
6027 loss  2.041851758956909 tensor([[ 0.8081, -1.0948]], grad_fn=<AddmmBackward>) tensor([1])
6028 loss  0.037980932742357254 tensor([[-1.6063,  1.6453]], grad_fn=<AddmmBackward>) tensor([1])
6029 loss  0.061807937920093536 tensor([[-1.3968,  1.3558]], grad_fn=<AddmmBackward>) tensor([1])
6030 loss  0.7849664092063904 tensor([[ 0.0075, -0.1684]], grad_fn=<AddmmBackward>) tensor([1])
6031 loss  0.10996463894844055 tensor([[ 0.9330, -1.2192]], grad_fn=<AddmmBackward>) tensor([0])
6032 loss  0.08142706751823425 tensor([[-1.2509,  1.2162]], grad_fn=<AddmmBackward>) tensor([1])
6033 loss  0.25461456179618835 tensor([[ 0.5690, -0.6690]], grad_fn=<AddmmBackward>) tensor([0])
6034 loss  0.3011338710784912 tensor([[ 0.3584, -0.6874]], grad_fn=<AddmmBackward>) tensor([0])
6035 loss  0.2673519551753998 tensor([[-0.6584,  0.5241]], grad_fn=<AddmmBackward>) tensor([1])
6036 loss  0.028199443593621254 t

6149 loss  0.1025691032409668 tensor([[ 0.8862, -1.3393]], grad_fn=<AddmmBackward>) tensor([0])
6150 loss  0.7693767547607422 tensor([[-0.1025, -0.2495]], grad_fn=<AddmmBackward>) tensor([1])
6151 loss  0.5886550545692444 tensor([[-0.2740, -0.0528]], grad_fn=<AddmmBackward>) tensor([1])
6152 loss  0.03326047956943512 tensor([[-1.6930,  1.6937]], grad_fn=<AddmmBackward>) tensor([1])
6153 loss  0.9498626589775085 tensor([[ 0.0298, -0.4310]], grad_fn=<AddmmBackward>) tensor([1])
6154 loss  0.7842280268669128 tensor([[-0.2438, -0.0692]], grad_fn=<AddmmBackward>) tensor([0])
6155 loss  0.7846266031265259 tensor([[-0.2505, -0.0752]], grad_fn=<AddmmBackward>) tensor([0])
6156 loss  0.9493101835250854 tensor([[-0.3675,  0.0924]], grad_fn=<AddmmBackward>) tensor([0])
6157 loss  1.118607521057129 tensor([[-0.3666,  0.3564]], grad_fn=<AddmmBackward>) tensor([0])
6158 loss  0.7242821455001831 tensor([[-0.1842, -0.1228]], grad_fn=<AddmmBackward>) tensor([0])
6159 loss  0.018165413290262222 tensor([

6274 loss  1.3332295417785645 tensor([[-0.6923,  0.3349]], grad_fn=<AddmmBackward>) tensor([0])
6275 loss  0.05498950183391571 tensor([[-1.4944,  1.3786]], grad_fn=<AddmmBackward>) tensor([1])
6276 loss  1.6999084949493408 tensor([[-0.8973,  0.6008]], grad_fn=<AddmmBackward>) tensor([0])
6277 loss  0.07425767183303833 tensor([[-1.3280,  1.2348]], grad_fn=<AddmmBackward>) tensor([1])
6278 loss  0.02496667578816414 tensor([[-1.8507,  1.8270]], grad_fn=<AddmmBackward>) tensor([1])
6279 loss  1.612033486366272 tensor([[ 0.3859, -1.0037]], grad_fn=<AddmmBackward>) tensor([1])
6280 loss  0.027842655777931213 tensor([[-1.8099,  1.7573]], grad_fn=<AddmmBackward>) tensor([1])
6281 loss  0.7022852897644043 tensor([[-0.2189, -0.2371]], grad_fn=<AddmmBackward>) tensor([1])
6282 loss  0.08807910978794098 tensor([[-1.3483,  1.0369]], grad_fn=<AddmmBackward>) tensor([1])
6283 loss  0.3168632984161377 tensor([[ 0.1868, -0.7999]], grad_fn=<AddmmBackward>) tensor([0])
6284 loss  0.20032252371311188 tens

6404 loss  3.283604383468628 tensor([[ 1.3454, -1.9000]], grad_fn=<AddmmBackward>) tensor([1])
6405 loss  0.8862195014953613 tensor([[-0.0852, -0.4400]], grad_fn=<AddmmBackward>) tensor([1])
6406 loss  0.22439970076084137 tensor([[ 0.3853, -0.9948]], grad_fn=<AddmmBackward>) tensor([0])
6407 loss  0.5001341104507446 tensor([[ 0.0184, -0.4140]], grad_fn=<AddmmBackward>) tensor([0])
6408 loss  0.03056957572698593 tensor([[-1.7581,  1.7143]], grad_fn=<AddmmBackward>) tensor([1])
6409 loss  0.01963149383664131 tensor([[-1.9489,  1.9719]], grad_fn=<AddmmBackward>) tensor([1])
6410 loss  0.026753541082143784 tensor([[-1.8708,  1.7369]], grad_fn=<AddmmBackward>) tensor([1])
6411 loss  1.1687859296798706 tensor([[-0.5713,  0.2253]], grad_fn=<AddmmBackward>) tensor([0])
6412 loss  0.02305954322218895 tensor([[-1.8937,  1.8644]], grad_fn=<AddmmBackward>) tensor([1])
6413 loss  0.0642055869102478 tensor([[-1.3835,  1.3299]], grad_fn=<AddmmBackward>) tensor([1])
6414 loss  0.09263775497674942 tens

6535 loss  0.017813459038734436 tensor([[-2.0261,  1.9927]], grad_fn=<AddmmBackward>) tensor([1])
6536 loss  0.8385536074638367 tensor([[-0.3520, -0.0797]], grad_fn=<AddmmBackward>) tensor([0])
6537 loss  0.03466281294822693 tensor([[-1.7212,  1.6235]], grad_fn=<AddmmBackward>) tensor([1])
6538 loss  0.10970509052276611 tensor([[-1.1751,  0.9795]], grad_fn=<AddmmBackward>) tensor([1])
6539 loss  0.40797820687294006 tensor([[-0.4722,  0.2135]], grad_fn=<AddmmBackward>) tensor([1])
6540 loss  0.06465882062911987 tensor([[ 1.1157, -1.5905]], grad_fn=<AddmmBackward>) tensor([0])
6541 loss  0.21493996679782867 tensor([[-0.8003,  0.6277]], grad_fn=<AddmmBackward>) tensor([1])
6542 loss  0.22359149158000946 tensor([[-0.8057,  0.5783]], grad_fn=<AddmmBackward>) tensor([1])
6543 loss  0.2687810957431793 tensor([[-0.7415,  0.4350]], grad_fn=<AddmmBackward>) tensor([1])
6544 loss  0.046881403774023056 tensor([[ 1.3358, -1.7008]], grad_fn=<AddmmBackward>) tensor([0])
6545 loss  0.04667333513498306

6629 loss  1.4727251529693604 tensor([[-0.7971,  0.4152]], grad_fn=<AddmmBackward>) tensor([0])
6630 loss  1.1141706705093384 tensor([[-0.6166,  0.0998]], grad_fn=<AddmmBackward>) tensor([0])
6631 loss  0.07827703654766083 tensor([[-1.2949,  1.2132]], grad_fn=<AddmmBackward>) tensor([1])
6632 loss  0.10191841423511505 tensor([[-1.2232,  1.0090]], grad_fn=<AddmmBackward>) tensor([1])
6633 loss  0.01651873253285885 tensor([[-2.0708,  2.0242]], grad_fn=<AddmmBackward>) tensor([1])
6634 loss  1.0284366607666016 tensor([[-0.4530,  0.1329]], grad_fn=<AddmmBackward>) tensor([0])
6635 loss  0.33058446645736694 tensor([[ 0.2233, -0.7137]], grad_fn=<AddmmBackward>) tensor([0])
6636 loss  0.478650838136673 tensor([[-0.0148, -0.5028]], grad_fn=<AddmmBackward>) tensor([0])
6637 loss  0.2571883499622345 tensor([[ 0.5003, -0.7263]], grad_fn=<AddmmBackward>) tensor([0])
6638 loss  0.21153323352336884 tensor([[ 0.4125, -1.0333]], grad_fn=<AddmmBackward>) tensor([0])
6639 loss  4.853864669799805 tensor(

6753 loss  0.8628327250480652 tensor([[ 0.0084, -0.3063]], grad_fn=<AddmmBackward>) tensor([1])
6754 loss  0.20341281592845917 tensor([[ 0.4359, -1.0532]], grad_fn=<AddmmBackward>) tensor([0])
6755 loss  0.02101932279765606 tensor([[-1.9482,  1.9036]], grad_fn=<AddmmBackward>) tensor([1])
6756 loss  0.49422502517700195 tensor([[-0.4043,  0.0432]], grad_fn=<AddmmBackward>) tensor([1])
6757 loss  0.2221650779247284 tensor([[-0.7986,  0.5926]], grad_fn=<AddmmBackward>) tensor([1])
6758 loss  2.880791664123535 tensor([[ 1.0861, -1.7370]], grad_fn=<AddmmBackward>) tensor([1])
6759 loss  0.017929386347532272 tensor([[-2.0288,  1.9836]], grad_fn=<AddmmBackward>) tensor([1])
6760 loss  0.8421353101730347 tensor([[-0.2884, -0.0098]], grad_fn=<AddmmBackward>) tensor([0])
6761 loss  0.9960041046142578 tensor([[-0.5033,  0.0317]], grad_fn=<AddmmBackward>) tensor([0])
6762 loss  1.255584716796875 tensor([[-0.6378,  0.2825]], grad_fn=<AddmmBackward>) tensor([0])
6763 loss  0.1593465358018875 tensor(

6874 loss  0.34317347407341003 tensor([[-0.5175,  0.3755]], grad_fn=<AddmmBackward>) tensor([1])
6875 loss  0.0961555764079094 tensor([[ 0.9215, -1.3719]], grad_fn=<AddmmBackward>) tensor([0])
6876 loss  0.03883810341358185 tensor([[ 1.3190, -1.9099]], grad_fn=<AddmmBackward>) tensor([0])
6877 loss  0.030879389494657516 tensor([[ 1.4123, -2.0499]], grad_fn=<AddmmBackward>) tensor([0])
6878 loss  0.026317516341805458 tensor([[ 1.4535, -2.1708]], grad_fn=<AddmmBackward>) tensor([0])
6879 loss  3.75990629196167 tensor([[ 1.5756, -2.1607]], grad_fn=<AddmmBackward>) tensor([1])
6880 loss  0.030649002641439438 tensor([[ 1.4843, -1.9854]], grad_fn=<AddmmBackward>) tensor([0])
6881 loss  0.341849148273468 tensor([[-0.5721,  0.3255]], grad_fn=<AddmmBackward>) tensor([1])
6882 loss  0.06792938709259033 tensor([[-1.3773,  1.2778]], grad_fn=<AddmmBackward>) tensor([1])
6883 loss  0.03874555975198746 tensor([[-1.6277,  1.6036]], grad_fn=<AddmmBackward>) tensor([1])
6884 loss  0.23454433679580688 te

6999 loss  0.27110403776168823 tensor([[ 0.3586, -0.8080]], grad_fn=<AddmmBackward>) tensor([0])
7000 loss  0.8620882034301758 tensor([[-0.0480, -0.3614]], grad_fn=<AddmmBackward>) tensor([1])
7001 loss  0.5186011791229248 tensor([[-0.0293, -0.4154]], grad_fn=<AddmmBackward>) tensor([0])
7002 loss  0.5871897339820862 tensor([[-0.3576, -0.1331]], grad_fn=<AddmmBackward>) tensor([1])
7003 loss  0.09375406801700592 tensor([[-1.2261,  1.0938]], grad_fn=<AddmmBackward>) tensor([1])
7004 loss  0.9303212761878967 tensor([[-0.3532,  0.0756]], grad_fn=<AddmmBackward>) tensor([0])
7005 loss  0.5159701704978943 tensor([[-0.0220, -0.4147]], grad_fn=<AddmmBackward>) tensor([0])
7006 loss  0.02371785417199135 tensor([[-1.8670,  1.8627]], grad_fn=<AddmmBackward>) tensor([1])
7007 loss  0.528971791267395 tensor([[-0.0702, -0.4309]], grad_fn=<AddmmBackward>) tensor([0])
7008 loss  0.6382741928100586 tensor([[-0.2506, -0.3635]], grad_fn=<AddmmBackward>) tensor([0])
7009 loss  0.2563938498497009 tensor([

7124 loss  0.04473172873258591 tensor([[-1.6272,  1.4574]], grad_fn=<AddmmBackward>) tensor([1])
7125 loss  0.2716248631477356 tensor([[ 0.3382, -0.8263]], grad_fn=<AddmmBackward>) tensor([0])
7126 loss  0.18107716739177704 tensor([[-1.0413,  0.5757]], grad_fn=<AddmmBackward>) tensor([1])
7127 loss  0.24368059635162354 tensor([[ 0.4153, -0.8723]], grad_fn=<AddmmBackward>) tensor([0])
7128 loss  0.10790584981441498 tensor([[-1.2054,  0.9667]], grad_fn=<AddmmBackward>) tensor([1])
7129 loss  0.1747055947780609 tensor([[-0.9335,  0.7226]], grad_fn=<AddmmBackward>) tensor([1])
7130 loss  0.08391840010881424 tensor([[ 0.9042, -1.5315]], grad_fn=<AddmmBackward>) tensor([0])
7131 loss  0.08852219581604004 tensor([[ 0.9881, -1.3919]], grad_fn=<AddmmBackward>) tensor([0])
7132 loss  0.09769825637340546 tensor([[ 0.9822, -1.2945]], grad_fn=<AddmmBackward>) tensor([0])
7133 loss  0.13366015255451202 tensor([[ 0.7266, -1.2183]], grad_fn=<AddmmBackward>) tensor([0])
7134 loss  3.486912727355957 ten

7248 loss  0.01774260774254799 tensor([[ 1.7208, -2.3021]], grad_fn=<AddmmBackward>) tensor([0])
7249 loss  0.015536111779510975 tensor([[ 1.6928, -2.4640]], grad_fn=<AddmmBackward>) tensor([0])
7250 loss  1.3508684635162354 tensor([[ 0.2846, -0.7665]], grad_fn=<AddmmBackward>) tensor([1])
7251 loss  2.617662191390991 tensor([[ 1.0658, -1.4761]], grad_fn=<AddmmBackward>) tensor([1])
7252 loss  0.09050341695547104 tensor([[ 0.9947, -1.3621]], grad_fn=<AddmmBackward>) tensor([0])
7253 loss  0.21364307403564453 tensor([[ 0.5131, -0.9217]], grad_fn=<AddmmBackward>) tensor([0])
7254 loss  0.2132159024477005 tensor([[ 0.4693, -0.9677]], grad_fn=<AddmmBackward>) tensor([0])
7255 loss  0.02275778353214264 tensor([[-1.9160,  1.8554]], grad_fn=<AddmmBackward>) tensor([1])
7256 loss  0.28438228368759155 tensor([[ 0.3480, -0.7639]], grad_fn=<AddmmBackward>) tensor([0])
7257 loss  0.06477927416563034 tensor([[-1.4609,  1.2433]], grad_fn=<AddmmBackward>) tensor([1])
7258 loss  0.28679925203323364 te

7375 loss  0.8823229670524597 tensor([[-0.3512, -0.0030]], grad_fn=<AddmmBackward>) tensor([0])
7376 loss  1.147650957107544 tensor([[-0.6002,  0.1656]], grad_fn=<AddmmBackward>) tensor([0])
7377 loss  0.9423786401748657 tensor([[-0.4350,  0.0136]], grad_fn=<AddmmBackward>) tensor([0])
7378 loss  1.054061770439148 tensor([[-0.4804,  0.1452]], grad_fn=<AddmmBackward>) tensor([0])
7379 loss  0.27805209159851074 tensor([[-0.7462,  0.3915]], grad_fn=<AddmmBackward>) tensor([1])
7380 loss  0.020937373861670494 tensor([[-1.9406,  1.9152]], grad_fn=<AddmmBackward>) tensor([1])
7381 loss  0.031261321157217026 tensor([[-1.7888,  1.6609]], grad_fn=<AddmmBackward>) tensor([1])
7382 loss  0.06548304855823517 tensor([[-1.4234,  1.2697]], grad_fn=<AddmmBackward>) tensor([1])
7383 loss  0.13972429931163788 tensor([[ 0.6689, -1.2285]], grad_fn=<AddmmBackward>) tensor([0])
7384 loss  0.14805950224399567 tensor([[-1.0925,  0.7427]], grad_fn=<AddmmBackward>) tensor([1])
7385 loss  0.07919420301914215 ten

7501 loss  0.01234144251793623 tensor([[-2.2037,  2.1849]], grad_fn=<AddmmBackward>) tensor([1])
7502 loss  1.509504795074463 tensor([[-0.7866,  0.4732]], grad_fn=<AddmmBackward>) tensor([0])
7503 loss  0.0742826834321022 tensor([[-1.3914,  1.1711]], grad_fn=<AddmmBackward>) tensor([1])
7504 loss  0.022246915847063065 tensor([[-1.9525,  1.8419]], grad_fn=<AddmmBackward>) tensor([1])
7505 loss  0.07963628321886063 tensor([[-1.4208,  1.0694]], grad_fn=<AddmmBackward>) tensor([1])
7506 loss  0.814299464225769 tensor([[-0.3180, -0.0888]], grad_fn=<AddmmBackward>) tensor([0])
7507 loss  0.22722019255161285 tensor([[ 0.3478, -1.0183]], grad_fn=<AddmmBackward>) tensor([0])
7508 loss  0.05899453163146973 tensor([[-1.4479,  1.3527]], grad_fn=<AddmmBackward>) tensor([1])
7509 loss  2.342494487762451 tensor([[ 0.8415, -1.4000]], grad_fn=<AddmmBackward>) tensor([1])
7510 loss  0.023360280320048332 tensor([[-1.8951,  1.8499]], grad_fn=<AddmmBackward>) tensor([1])
7511 loss  0.015477776527404785 ten

7627 loss  0.01601416990160942 tensor([[ 1.7363, -2.3900]], grad_fn=<AddmmBackward>) tensor([0])
7628 loss  0.003026787657290697 tensor([[ 2.5130, -3.2857]], grad_fn=<AddmmBackward>) tensor([0])
7629 loss  0.004886707756668329 tensor([[ 2.3686, -2.9502]], grad_fn=<AddmmBackward>) tensor([0])
7630 loss  0.007992193102836609 tensor([[ 2.0278, -2.7975]], grad_fn=<AddmmBackward>) tensor([0])
7631 loss  0.0028067738749086857 tensor([[ 2.6622, -3.2121]], grad_fn=<AddmmBackward>) tensor([0])
7632 loss  4.771027565002441 tensor([[ 2.1090, -2.6536]], grad_fn=<AddmmBackward>) tensor([1])
7633 loss  3.6830554008483887 tensor([[ 1.5009, -2.1567]], grad_fn=<AddmmBackward>) tensor([1])
7634 loss  0.020739715546369553 tensor([[ 1.6797, -2.1856]], grad_fn=<AddmmBackward>) tensor([0])
7635 loss  0.06182418391108513 tensor([[ 1.1601, -1.5923]], grad_fn=<AddmmBackward>) tensor([0])
7636 loss  0.18767336010932922 tensor([[-0.8816,  0.6962]], grad_fn=<AddmmBackward>) tensor([1])
7637 loss  0.17555499076843

7761 loss  0.3061036467552185 tensor([[ 0.2683, -0.7586]], grad_fn=<AddmmBackward>) tensor([0])
7762 loss  0.015567215159535408 tensor([[-2.0965,  2.0583]], grad_fn=<AddmmBackward>) tensor([1])
7763 loss  0.06244168430566788 tensor([[-1.3639,  1.3783]], grad_fn=<AddmmBackward>) tensor([1])
7764 loss  0.038392286747694016 tensor([[-1.5685,  1.6721]], grad_fn=<AddmmBackward>) tensor([1])
7765 loss  0.3910549283027649 tensor([[-0.0149, -0.7519]], grad_fn=<AddmmBackward>) tensor([0])
7766 loss  0.02898339554667473 tensor([[-1.8291,  1.6974]], grad_fn=<AddmmBackward>) tensor([1])
7767 loss  0.25981613993644714 tensor([[ 0.3278, -0.8872]], grad_fn=<AddmmBackward>) tensor([0])
7768 loss  0.18238775432109833 tensor([[ 0.5510, -1.0580]], grad_fn=<AddmmBackward>) tensor([0])
7769 loss  0.08523629605770111 tensor([[-1.2487,  1.1707]], grad_fn=<AddmmBackward>) tensor([1])
7770 loss  0.12350478768348694 tensor([[-1.0724,  0.9567]], grad_fn=<AddmmBackward>) tensor([1])
7771 loss  0.09908007085323334

7882 loss  0.6546592712402344 tensor([[-0.2546, -0.1761]], grad_fn=<AddmmBackward>) tensor([1])
7883 loss  0.038612063974142075 tensor([[-1.7111,  1.5237]], grad_fn=<AddmmBackward>) tensor([1])
7884 loss  1.1821980476379395 tensor([[ 0.1023, -0.7138]], grad_fn=<AddmmBackward>) tensor([1])
7885 loss  0.020455358549952507 tensor([[-1.9824,  1.8969]], grad_fn=<AddmmBackward>) tensor([1])
7886 loss  0.07163027673959732 tensor([[-1.2795,  1.3207]], grad_fn=<AddmmBackward>) tensor([1])
7887 loss  0.1847659796476364 tensor([[ 0.5023, -1.0926]], grad_fn=<AddmmBackward>) tensor([0])
7888 loss  0.53703773021698 tensor([[-0.1194, -0.4606]], grad_fn=<AddmmBackward>) tensor([0])
7889 loss  0.013259566389024258 tensor([[-2.1781,  2.1383]], grad_fn=<AddmmBackward>) tensor([1])
7890 loss  0.016917560249567032 tensor([[-2.1713,  1.8996]], grad_fn=<AddmmBackward>) tensor([1])
7891 loss  0.3923894464969635 tensor([[ 0.1819, -0.5510]], grad_fn=<AddmmBackward>) tensor([0])
7892 loss  0.277780145406723 tens

8004 loss  0.018256952986121178 tensor([[-2.0149,  1.9791]], grad_fn=<AddmmBackward>) tensor([1])
8005 loss  0.049821484833955765 tensor([[ 1.1889, -1.7854]], grad_fn=<AddmmBackward>) tensor([0])
8006 loss  0.1546686589717865 tensor([[ 0.5951, -1.1930]], grad_fn=<AddmmBackward>) tensor([0])
8007 loss  1.6330058574676514 tensor([[ 0.4264, -0.9893]], grad_fn=<AddmmBackward>) tensor([1])
8008 loss  0.2560230493545532 tensor([[ 0.2483, -0.9835]], grad_fn=<AddmmBackward>) tensor([0])
8009 loss  0.07256253063678741 tensor([[-1.3922,  1.1946]], grad_fn=<AddmmBackward>) tensor([1])
8010 loss  0.09265176951885223 tensor([[ 0.9410, -1.3912]], grad_fn=<AddmmBackward>) tensor([0])
8011 loss  0.3945336937904358 tensor([[-0.4842,  0.2421]], grad_fn=<AddmmBackward>) tensor([1])
8012 loss  1.064046025276184 tensor([[ 0.0727, -0.5681]], grad_fn=<AddmmBackward>) tensor([1])
8013 loss  0.4247734546661377 tensor([[ 0.0267, -0.6096]], grad_fn=<AddmmBackward>) tensor([0])
8014 loss  0.8602400422096252 tenso

8096 loss  0.022117728367447853 tensor([[ 1.5622, -2.2381]], grad_fn=<AddmmBackward>) tensor([0])
8097 loss  0.02170300856232643 tensor([[ 1.5538, -2.2656]], grad_fn=<AddmmBackward>) tensor([0])
8098 loss  0.2581358551979065 tensor([[-0.7829,  0.4396]], grad_fn=<AddmmBackward>) tensor([1])
8099 loss  0.21604962646961212 tensor([[-0.8826,  0.5397]], grad_fn=<AddmmBackward>) tensor([1])
8100 loss  0.6704182028770447 tensor([[-0.1819, -0.1360]], grad_fn=<AddmmBackward>) tensor([1])
8101 loss  0.05209028348326683 tensor([[ 1.0286, -1.9000]], grad_fn=<AddmmBackward>) tensor([0])
8102 loss  0.010601511225104332 tensor([[ 1.9360, -2.6055]], grad_fn=<AddmmBackward>) tensor([0])
8103 loss  0.11069602519273758 tensor([[-1.1729,  0.9722]], grad_fn=<AddmmBackward>) tensor([1])
8104 loss  0.046203114092350006 tensor([[ 1.1980, -1.8535]], grad_fn=<AddmmBackward>) tensor([0])
8105 loss  0.022368624806404114 tensor([[ 1.5654, -2.2235]], grad_fn=<AddmmBackward>) tensor([0])
8106 loss  0.053018886595964

8227 loss  0.8290265798568726 tensor([[-0.2985, -0.0430]], grad_fn=<AddmmBackward>) tensor([0])
8228 loss  0.19838683307170868 tensor([[-0.8790,  0.6377]], grad_fn=<AddmmBackward>) tensor([1])
8229 loss  0.2545182704925537 tensor([[ 0.3375, -0.9009]], grad_fn=<AddmmBackward>) tensor([0])
8230 loss  0.027682188898324966 tensor([[-1.8320,  1.7411]], grad_fn=<AddmmBackward>) tensor([1])
8231 loss  0.30996599793434143 tensor([[-0.6093,  0.4030]], grad_fn=<AddmmBackward>) tensor([1])
8232 loss  0.437387615442276 tensor([[-0.4345,  0.1658]], grad_fn=<AddmmBackward>) tensor([1])
8233 loss  1.798669457435608 tensor([[ 0.5283, -1.0894]], grad_fn=<AddmmBackward>) tensor([1])
8234 loss  0.14685948193073273 tensor([[ 0.6351, -1.2089]], grad_fn=<AddmmBackward>) tensor([0])
8235 loss  0.17630532383918762 tensor([[ 0.6016, -1.0445]], grad_fn=<AddmmBackward>) tensor([0])
8236 loss  0.1156684160232544 tensor([[ 0.7565, -1.3421]], grad_fn=<AddmmBackward>) tensor([0])
8237 loss  0.20087772607803345 tenso

8355 loss  0.12363816052675247 tensor([[ 0.7548, -1.2732]], grad_fn=<AddmmBackward>) tensor([0])
8356 loss  0.06107510253787041 tensor([[ 1.0493, -1.7157]], grad_fn=<AddmmBackward>) tensor([0])
8357 loss  1.3254404067993164 tensor([[ 0.2364, -0.7802]], grad_fn=<AddmmBackward>) tensor([1])
8358 loss  0.05012981593608856 tensor([[ 1.1233, -1.8446]], grad_fn=<AddmmBackward>) tensor([0])
8359 loss  0.22829270362854004 tensor([[-0.8423,  0.5185]], grad_fn=<AddmmBackward>) tensor([1])
8360 loss  0.18767958879470825 tensor([[-0.9502,  0.6275]], grad_fn=<AddmmBackward>) tensor([1])
8361 loss  0.10441061109304428 tensor([[ 0.7820, -1.4248]], grad_fn=<AddmmBackward>) tensor([0])
8362 loss  0.21344856917858124 tensor([[ 0.4862, -0.9495]], grad_fn=<AddmmBackward>) tensor([0])
8363 loss  0.2677074670791626 tensor([[ 0.3933, -0.7877]], grad_fn=<AddmmBackward>) tensor([0])
8364 loss  0.09407735615968704 tensor([[-1.2747,  1.0415]], grad_fn=<AddmmBackward>) tensor([1])
8365 loss  1.2196729183197021 te

8481 loss  0.02160641737282276 tensor([[ 1.6117, -2.2123]], grad_fn=<AddmmBackward>) tensor([0])
8482 loss  0.016294514760375023 tensor([[ 1.7154, -2.3934]], grad_fn=<AddmmBackward>) tensor([0])
8483 loss  0.017483755946159363 tensor([[ 1.6542, -2.3835]], grad_fn=<AddmmBackward>) tensor([0])
8484 loss  0.03587309271097183 tensor([[-1.7179,  1.5919]], grad_fn=<AddmmBackward>) tensor([1])
8485 loss  0.008430015295743942 tensor([[ 2.0088, -2.7629]], grad_fn=<AddmmBackward>) tensor([0])
8486 loss  0.005702537018805742 tensor([[ 2.2524, -2.9116]], grad_fn=<AddmmBackward>) tensor([0])
8487 loss  0.006502777803689241 tensor([[ 2.0571, -2.9752]], grad_fn=<AddmmBackward>) tensor([0])
8488 loss  6.9712815284729 tensor([[ 3.1330, -3.8373]], grad_fn=<AddmmBackward>) tensor([1])
8489 loss  0.002497175009921193 tensor([[ 2.6252, -3.3662]], grad_fn=<AddmmBackward>) tensor([0])
8490 loss  0.1974109262228012 tensor([[-0.9047,  0.6175]], grad_fn=<AddmmBackward>) tensor([1])
8491 loss  0.1621818989515304

8617 loss  0.03677918016910553 tensor([[ 1.2653, -2.0191]], grad_fn=<AddmmBackward>) tensor([0])
8618 loss  2.057605028152466 tensor([[ 0.6449, -1.2760]], grad_fn=<AddmmBackward>) tensor([1])
8619 loss  0.060627762228250504 tensor([[-1.4572,  1.3154]], grad_fn=<AddmmBackward>) tensor([1])
8620 loss  0.02294607274234295 tensor([[-1.9591,  1.8040]], grad_fn=<AddmmBackward>) tensor([1])
8621 loss  0.07038886845111847 tensor([[ 0.9775, -1.6408]], grad_fn=<AddmmBackward>) tensor([0])
8622 loss  0.13718222081661224 tensor([[ 0.7816, -1.1354]], grad_fn=<AddmmBackward>) tensor([0])
8623 loss  0.07107249647378922 tensor([[-1.4042,  1.2042]], grad_fn=<AddmmBackward>) tensor([1])
8624 loss  0.010688556358218193 tensor([[-2.2832,  2.2500]], grad_fn=<AddmmBackward>) tensor([1])
8625 loss  0.018520168960094452 tensor([[-1.9898,  1.9898]], grad_fn=<AddmmBackward>) tensor([1])
8626 loss  0.01793617755174637 tensor([[-2.0351,  1.9768]], grad_fn=<AddmmBackward>) tensor([1])
8627 loss  0.1054817885160446

8745 loss  0.010866043157875538 tensor([[-2.2653,  2.2514]], grad_fn=<AddmmBackward>) tensor([1])
8746 loss  0.0696038156747818 tensor([[-1.4366,  1.1933]], grad_fn=<AddmmBackward>) tensor([1])
8747 loss  0.5138636231422424 tensor([[ 0.0080, -0.3899]], grad_fn=<AddmmBackward>) tensor([0])
8748 loss  0.4412478804588318 tensor([[ 0.0202, -0.5693]], grad_fn=<AddmmBackward>) tensor([0])
8749 loss  0.7639280557632446 tensor([[-0.2604, -0.1235]], grad_fn=<AddmmBackward>) tensor([0])
8750 loss  0.01807445101439953 tensor([[-2.0690,  1.9352]], grad_fn=<AddmmBackward>) tensor([1])
8751 loss  0.28980299830436707 tensor([[ 0.3033, -0.7869]], grad_fn=<AddmmBackward>) tensor([0])
8752 loss  0.23071114718914032 tensor([[ 0.3700, -0.9791]], grad_fn=<AddmmBackward>) tensor([0])
8753 loss  0.3473600745201111 tensor([[-0.6652,  0.2135]], grad_fn=<AddmmBackward>) tensor([1])
8754 loss  0.012305175885558128 tensor([[-2.2241,  2.1675]], grad_fn=<AddmmBackward>) tensor([1])
8755 loss  0.20899835228919983 te

8876 loss  0.03537256643176079 tensor([[ 1.3811, -1.9430]], grad_fn=<AddmmBackward>) tensor([0])
8877 loss  0.6805158257484436 tensor([[-0.1802, -0.1547]], grad_fn=<AddmmBackward>) tensor([1])
8878 loss  1.8125197887420654 tensor([[ 0.6153, -1.0190]], grad_fn=<AddmmBackward>) tensor([1])
8879 loss  0.007554533891379833 tensor([[ 2.1253, -2.7565]], grad_fn=<AddmmBackward>) tensor([0])
8880 loss  0.01163141056895256 tensor([[-2.2202,  2.2280]], grad_fn=<AddmmBackward>) tensor([1])
8881 loss  0.0316559299826622 tensor([[ 1.4862, -1.9508]], grad_fn=<AddmmBackward>) tensor([0])
8882 loss  0.48101797699928284 tensor([[ 0.0510, -0.4307]], grad_fn=<AddmmBackward>) tensor([0])
8883 loss  0.26710784435272217 tensor([[ 0.3480, -0.8355]], grad_fn=<AddmmBackward>) tensor([0])
8884 loss  0.18553675711154938 tensor([[ 0.5857, -1.0046]], grad_fn=<AddmmBackward>) tensor([0])
8885 loss  0.1164102852344513 tensor([[ 0.7114, -1.3805]], grad_fn=<AddmmBackward>) tensor([0])
8886 loss  0.02479085884988308 te

8984 loss  0.03877021744847298 tensor([[-1.6669,  1.5638]], grad_fn=<AddmmBackward>) tensor([1])
8985 loss  0.03448696807026863 tensor([[-1.7555,  1.5944]], grad_fn=<AddmmBackward>) tensor([1])
8986 loss  0.5005335807800293 tensor([[ 0.0141, -0.4173]], grad_fn=<AddmmBackward>) tensor([0])
8987 loss  0.03692038729786873 tensor([[-1.6501,  1.6304]], grad_fn=<AddmmBackward>) tensor([1])
8988 loss  0.018438836559653282 tensor([[-2.0196,  1.9645]], grad_fn=<AddmmBackward>) tensor([1])
8989 loss  1.0678949356079102 tensor([[-0.5127,  0.1340]], grad_fn=<AddmmBackward>) tensor([0])
8990 loss  0.08333870768547058 tensor([[-1.3282,  1.1147]], grad_fn=<AddmmBackward>) tensor([1])
8991 loss  0.013383079320192337 tensor([[-2.2247,  2.0823]], grad_fn=<AddmmBackward>) tensor([1])
8992 loss  1.1161408424377441 tensor([[-0.5810,  0.1383]], grad_fn=<AddmmBackward>) tensor([0])
8993 loss  1.1349810361862183 tensor([[-0.5542,  0.1930]], grad_fn=<AddmmBackward>) tensor([0])
8994 loss  0.020391235128045082 

9074 loss  0.8734832406044006 tensor([[-0.4509, -0.1179]], grad_fn=<AddmmBackward>) tensor([0])
9075 loss  0.7156462073326111 tensor([[-0.2244, -0.1799]], grad_fn=<AddmmBackward>) tensor([0])
9076 loss  0.08189991116523743 tensor([[-1.3649,  1.0962]], grad_fn=<AddmmBackward>) tensor([1])
9077 loss  0.009753784164786339 tensor([[-2.3603,  2.2649]], grad_fn=<AddmmBackward>) tensor([1])
9078 loss  0.058613602072000504 tensor([[-1.5450,  1.2624]], grad_fn=<AddmmBackward>) tensor([1])
9079 loss  0.3152751922607422 tensor([[-0.7142,  0.2783]], grad_fn=<AddmmBackward>) tensor([1])
9080 loss  1.2058030366897583 tensor([[-0.6331,  0.2168]], grad_fn=<AddmmBackward>) tensor([0])
9081 loss  1.5379353761672974 tensor([[-0.8543,  0.4418]], grad_fn=<AddmmBackward>) tensor([0])
9082 loss  0.7753517627716064 tensor([[-0.2720, -0.1138]], grad_fn=<AddmmBackward>) tensor([0])
9083 loss  0.3649889826774597 tensor([[ 0.1429, -0.6769]], grad_fn=<AddmmBackward>) tensor([0])
9084 loss  0.6667133569717407 tenso

9162 loss  1.076975703239441 tensor([[ 0.0650, -0.5955]], grad_fn=<AddmmBackward>) tensor([1])
9163 loss  0.2866881191730499 tensor([[ 0.1985, -0.9041]], grad_fn=<AddmmBackward>) tensor([0])
9164 loss  1.8781639337539673 tensor([[ 0.5173, -1.1950]], grad_fn=<AddmmBackward>) tensor([1])
9165 loss  0.44899827241897583 tensor([[-0.0149, -0.5827]], grad_fn=<AddmmBackward>) tensor([0])
9166 loss  0.03348081186413765 tensor([[-1.8036,  1.5764]], grad_fn=<AddmmBackward>) tensor([1])
9167 loss  1.0852925777435303 tensor([[-0.5547,  0.1184]], grad_fn=<AddmmBackward>) tensor([0])
9168 loss  0.007770075462758541 tensor([[-2.4303,  2.4233]], grad_fn=<AddmmBackward>) tensor([1])
9169 loss  0.03903840854763985 tensor([[-1.7797,  1.4439]], grad_fn=<AddmmBackward>) tensor([1])
9170 loss  0.11985333263874054 tensor([[-1.2236,  0.8373]], grad_fn=<AddmmBackward>) tensor([1])
9171 loss  0.0247011948376894 tensor([[-1.9055,  1.7830]], grad_fn=<AddmmBackward>) tensor([1])
9172 loss  0.1620413064956665 tenso

9255 loss  0.7241403460502625 tensor([[-0.2269, -0.2879]], grad_fn=<AddmmBackward>) tensor([1])
9256 loss  0.05641579627990723 tensor([[-1.5455,  1.3012]], grad_fn=<AddmmBackward>) tensor([1])
9257 loss  0.045212216675281525 tensor([[-1.6374,  1.4363]], grad_fn=<AddmmBackward>) tensor([1])
9258 loss  0.20753082633018494 tensor([[ 0.4189, -1.0480]], grad_fn=<AddmmBackward>) tensor([0])
9259 loss  0.29693400859832764 tensor([[-0.7316,  0.3305]], grad_fn=<AddmmBackward>) tensor([1])
9260 loss  0.3477175533771515 tensor([[ 0.1321, -0.7454]], grad_fn=<AddmmBackward>) tensor([0])
9261 loss  0.37567630410194397 tensor([[ 0.0759, -0.7094]], grad_fn=<AddmmBackward>) tensor([0])
9262 loss  0.43245893716812134 tensor([[-0.5245,  0.0898]], grad_fn=<AddmmBackward>) tensor([1])
9263 loss  0.4161180853843689 tensor([[-0.5202,  0.1414]], grad_fn=<AddmmBackward>) tensor([1])
9264 loss  1.1275081634521484 tensor([[-0.6400,  0.0962]], grad_fn=<AddmmBackward>) tensor([0])
9265 loss  0.049821484833955765 t

9353 loss  0.9568977355957031 tensor([[ 0.0364, -0.4358]], grad_fn=<AddmmBackward>) tensor([1])
9354 loss  0.021282397210597992 tensor([[ 1.5927, -2.2465]], grad_fn=<AddmmBackward>) tensor([0])
9355 loss  0.19838057458400726 tensor([[ 0.5400, -0.9767]], grad_fn=<AddmmBackward>) tensor([0])
9356 loss  1.713219165802002 tensor([[ 0.5768, -0.9376]], grad_fn=<AddmmBackward>) tensor([1])
9357 loss  0.5220987796783447 tensor([[-0.4008, -0.0233]], grad_fn=<AddmmBackward>) tensor([1])
9358 loss  0.032667383551597595 tensor([[-1.7091,  1.6959]], grad_fn=<AddmmBackward>) tensor([1])
9359 loss  0.033716194331645966 tensor([[-1.8114,  1.5614]], grad_fn=<AddmmBackward>) tensor([1])
9360 loss  0.027446888387203217 tensor([[-1.8158,  1.7660]], grad_fn=<AddmmBackward>) tensor([1])
9361 loss  0.01117907464504242 tensor([[-2.3158,  2.1723]], grad_fn=<AddmmBackward>) tensor([1])
9362 loss  0.5936577916145325 tensor([[-0.1136, -0.3236]], grad_fn=<AddmmBackward>) tensor([0])
9363 loss  0.047915875911712646

9443 loss  0.05981849879026413 tensor([[-1.4631,  1.3233]], grad_fn=<AddmmBackward>) tensor([1])
9444 loss  0.018041318282485008 tensor([[-2.0498,  1.9563]], grad_fn=<AddmmBackward>) tensor([1])
9445 loss  0.004404842853546143 tensor([[ 2.2824, -3.1404]], grad_fn=<AddmmBackward>) tensor([0])
9446 loss  3.3651750087738037 tensor([[ 1.3385, -1.9915]], grad_fn=<AddmmBackward>) tensor([1])
9447 loss  5.255473613739014 tensor([[ 2.2067, -3.0435]], grad_fn=<AddmmBackward>) tensor([1])
9448 loss  0.05120295286178589 tensor([[ 1.0719, -1.8744]], grad_fn=<AddmmBackward>) tensor([0])
9449 loss  0.1011134535074234 tensor([[ 0.7463, -1.4942]], grad_fn=<AddmmBackward>) tensor([0])
9450 loss  0.2110021561384201 tensor([[-0.8686,  0.5799]], grad_fn=<AddmmBackward>) tensor([1])
9451 loss  1.0009821653366089 tensor([[-0.4349,  0.1080]], grad_fn=<AddmmBackward>) tensor([0])
9452 loss  0.010858495719730854 tensor([[-2.3129,  2.2045]], grad_fn=<AddmmBackward>) tensor([1])
9453 loss  0.9596485495567322 ten

9532 loss  0.002615723293274641 tensor([[ 2.6565, -3.2884]], grad_fn=<AddmmBackward>) tensor([0])
9533 loss  0.008378120139241219 tensor([[ 2.1449, -2.6330]], grad_fn=<AddmmBackward>) tensor([0])
9534 loss  0.019918305799365044 tensor([[ 1.7298, -2.1763]], grad_fn=<AddmmBackward>) tensor([0])
9535 loss  0.0022802562452852726 tensor([[ 2.7180, -3.3643]], grad_fn=<AddmmBackward>) tensor([0])
9536 loss  2.3834116458892822 tensor([[ 0.8446, -1.4421]], grad_fn=<AddmmBackward>) tensor([1])
9537 loss  0.023914460092782974 tensor([[ 1.5044, -2.2168]], grad_fn=<AddmmBackward>) tensor([0])
9538 loss  0.019335830584168434 tensor([[ 1.7232, -2.2129]], grad_fn=<AddmmBackward>) tensor([0])
9539 loss  2.5045039653778076 tensor([[ 1.0153, -1.4039]], grad_fn=<AddmmBackward>) tensor([1])
9540 loss  0.057397909462451935 tensor([[-1.4733,  1.3556]], grad_fn=<AddmmBackward>) tensor([1])
9541 loss  0.1759277731180191 tensor([[ 0.5520, -1.0964]], grad_fn=<AddmmBackward>) tensor([0])
9542 loss  0.052626173943

9623 loss  0.03985125571489334 tensor([[-1.6546,  1.5480]], grad_fn=<AddmmBackward>) tensor([1])
9624 loss  0.21888568997383118 tensor([[ 0.4442, -0.9636]], grad_fn=<AddmmBackward>) tensor([0])
9625 loss  0.01102475170046091 tensor([[ 1.9618, -2.5403]], grad_fn=<AddmmBackward>) tensor([0])
9626 loss  0.542489230632782 tensor([[-0.2803,  0.0478]], grad_fn=<AddmmBackward>) tensor([1])
9627 loss  0.537433385848999 tensor([[-0.3578, -0.0176]], grad_fn=<AddmmBackward>) tensor([1])
9628 loss  0.08314225822687149 tensor([[ 0.9636, -1.4817]], grad_fn=<AddmmBackward>) tensor([0])
9629 loss  0.0736582800745964 tensor([[ 1.0957, -1.4756]], grad_fn=<AddmmBackward>) tensor([0])
9630 loss  0.7752506732940674 tensor([[-0.0181, -0.1761]], grad_fn=<AddmmBackward>) tensor([1])
9631 loss  0.09758646041154861 tensor([[ 0.8526, -1.4253]], grad_fn=<AddmmBackward>) tensor([0])
9632 loss  0.09703139960765839 tensor([[-1.1848,  1.0990]], grad_fn=<AddmmBackward>) tensor([1])
9633 loss  0.011607727035880089 tens

9719 loss  0.11886487156152725 tensor([[ 0.9520, -1.1178]], grad_fn=<AddmmBackward>) tensor([0])
9720 loss  0.3272527754306793 tensor([[-0.6236,  0.3253]], grad_fn=<AddmmBackward>) tensor([1])
9721 loss  0.05012108385562897 tensor([[ 1.2990, -1.6692]], grad_fn=<AddmmBackward>) tensor([0])
9722 loss  0.007594285998493433 tensor([[-2.4330,  2.4435]], grad_fn=<AddmmBackward>) tensor([1])
9723 loss  0.02260523848235607 tensor([[-1.9288,  1.8495]], grad_fn=<AddmmBackward>) tensor([1])
9724 loss  0.12910346686840057 tensor([[-1.0273,  0.9546]], grad_fn=<AddmmBackward>) tensor([1])
9725 loss  0.05324869602918625 tensor([[ 1.2458, -1.6602]], grad_fn=<AddmmBackward>) tensor([0])
9726 loss  0.13009673357009888 tensor([[ 0.8054, -1.1683]], grad_fn=<AddmmBackward>) tensor([0])
9727 loss  0.022097207605838776 tensor([[-1.9900,  1.8112]], grad_fn=<AddmmBackward>) tensor([1])
9728 loss  0.024673283100128174 tensor([[-1.8334,  1.8563]], grad_fn=<AddmmBackward>) tensor([1])
9729 loss  0.023325109854340

9808 loss  0.3662891983985901 tensor([[-0.4801,  0.3355]], grad_fn=<AddmmBackward>) tensor([1])
9809 loss  0.45152056217193604 tensor([[ 0.2309, -0.3300]], grad_fn=<AddmmBackward>) tensor([0])
9810 loss  1.270136833190918 tensor([[-0.6150,  0.3256]], grad_fn=<AddmmBackward>) tensor([0])
9811 loss  0.009498050436377525 tensor([[-2.3200,  2.3319]], grad_fn=<AddmmBackward>) tensor([1])
9812 loss  0.11676346510648727 tensor([[-1.1453,  0.9434]], grad_fn=<AddmmBackward>) tensor([1])
9813 loss  0.028841178864240646 tensor([[-1.7598,  1.7717]], grad_fn=<AddmmBackward>) tensor([1])
9814 loss  0.4040825068950653 tensor([[ 0.2256, -0.4717]], grad_fn=<AddmmBackward>) tensor([0])
9815 loss  0.1272842437028885 tensor([[ 0.8048, -1.1922]], grad_fn=<AddmmBackward>) tensor([0])
9816 loss  1.9042021036148071 tensor([[ 0.7378, -1.0051]], grad_fn=<AddmmBackward>) tensor([1])
9817 loss  0.20488378405570984 tensor([[ 0.5630, -0.9181]], grad_fn=<AddmmBackward>) tensor([0])
9818 loss  0.2545974552631378 tens

9894 loss  0.03280698135495186 tensor([[-1.7497,  1.6510]], grad_fn=<AddmmBackward>) tensor([1])
9895 loss  0.09180907905101776 tensor([[ 0.9317, -1.4101]], grad_fn=<AddmmBackward>) tensor([0])
9896 loss  0.03214146941900253 tensor([[-1.7642,  1.6573]], grad_fn=<AddmmBackward>) tensor([1])
9897 loss  0.009805017150938511 tensor([[ 2.0408, -2.5792]], grad_fn=<AddmmBackward>) tensor([0])
9898 loss  0.014072944410145283 tensor([[ 1.8023, -2.4542]], grad_fn=<AddmmBackward>) tensor([0])
9899 loss  0.10377809405326843 tensor([[-1.1897,  1.0234]], grad_fn=<AddmmBackward>) tensor([1])
9900 loss  0.040507882833480835 tensor([[ 1.3866, -1.7994]], grad_fn=<AddmmBackward>) tensor([0])
9901 loss  5.220941543579102 tensor([[ 2.3851, -2.8305]], grad_fn=<AddmmBackward>) tensor([1])
9902 loss  1.028282880783081 tensor([[ 0.1379, -0.4478]], grad_fn=<AddmmBackward>) tensor([1])
9903 loss  0.02050160802900791 tensor([[ 1.6729, -2.2040]], grad_fn=<AddmmBackward>) tensor([0])
9904 loss  0.03928875923156738 

9986 loss  1.7653779983520508 tensor([[ 0.6128, -0.9649]], grad_fn=<AddmmBackward>) tensor([1])
9987 loss  0.16302143037319183 tensor([[ 0.6248, -1.1064]], grad_fn=<AddmmBackward>) tensor([0])
9988 loss  0.12145640701055527 tensor([[ 0.8431, -1.2038]], grad_fn=<AddmmBackward>) tensor([0])
9989 loss  0.6705493927001953 tensor([[-0.1456, -0.1913]], grad_fn=<AddmmBackward>) tensor([0])
9990 loss  0.2824380695819855 tensor([[ 0.2510, -0.8688]], grad_fn=<AddmmBackward>) tensor([0])
9991 loss  0.1442709118127823 tensor([[-1.0629,  0.8001]], grad_fn=<AddmmBackward>) tensor([1])
9992 loss  0.10392175614833832 tensor([[-1.1930,  1.0187]], grad_fn=<AddmmBackward>) tensor([1])
9993 loss  0.09411924332380295 tensor([[ 0.8869, -1.4289]], grad_fn=<AddmmBackward>) tensor([0])
9994 loss  0.0471111498773098 tensor([[ 1.1991, -1.8325]], grad_fn=<AddmmBackward>) tensor([0])
9995 loss  0.39528000354766846 tensor([[-0.5422,  0.1818]], grad_fn=<AddmmBackward>) tensor([1])
9996 loss  0.08479449152946472 tens

77 loss  0.09257353097200394 tensor([[ 0.7937, -1.5394]], grad_fn=<AddmmBackward>) tensor([0])
78 loss  0.2679641842842102 tensor([[-0.7802,  0.3997]], grad_fn=<AddmmBackward>) tensor([1])
79 loss  0.04932618886232376 tensor([[ 1.1599, -1.8247]], grad_fn=<AddmmBackward>) tensor([0])
80 loss  0.0362926721572876 tensor([[ 1.2848, -2.0132]], grad_fn=<AddmmBackward>) tensor([0])
81 loss  0.02526695467531681 tensor([[-1.8539,  1.8117]], grad_fn=<AddmmBackward>) tensor([1])
82 loss  0.1787930428981781 tensor([[-0.9212,  0.7096]], grad_fn=<AddmmBackward>) tensor([1])
83 loss  0.12363942712545395 tensor([[-1.1211,  0.9068]], grad_fn=<AddmmBackward>) tensor([1])
84 loss  0.008083012886345387 tensor([[ 2.0066, -2.8073]], grad_fn=<AddmmBackward>) tensor([0])
85 loss  0.02347055822610855 tensor([[-1.8998,  1.8404]], grad_fn=<AddmmBackward>) tensor([1])
86 loss  0.030107680708169937 tensor([[-1.7950,  1.6929]], grad_fn=<AddmmBackward>) tensor([1])
87 loss  8.135758399963379 tensor([[ 3.5902, -4.545

172 loss  0.004817782435566187 tensor([[ 2.2860, -3.0470]], grad_fn=<AddmmBackward>) tensor([0])
173 loss  0.020371729508042336 tensor([[-2.0191,  1.8643]], grad_fn=<AddmmBackward>) tensor([1])
174 loss  1.2109087705612183 tensor([[ 0.1944, -0.6628]], grad_fn=<AddmmBackward>) tensor([1])
175 loss  0.9226582646369934 tensor([[ 0.0149, -0.4011]], grad_fn=<AddmmBackward>) tensor([1])
176 loss  0.04142327606678009 tensor([[ 1.2447, -1.9185]], grad_fn=<AddmmBackward>) tensor([0])
177 loss  0.5238821506500244 tensor([[-0.3443,  0.0289]], grad_fn=<AddmmBackward>) tensor([1])
178 loss  0.09165529161691666 tensor([[-1.2781,  1.0654]], grad_fn=<AddmmBackward>) tensor([1])
179 loss  0.013939053751528263 tensor([[-2.1494,  2.1167]], grad_fn=<AddmmBackward>) tensor([1])
180 loss  0.6530662178993225 tensor([[-0.1838, -0.2656]], grad_fn=<AddmmBackward>) tensor([0])
181 loss  0.01900428906083107 tensor([[-2.0734,  1.8802]], grad_fn=<AddmmBackward>) tensor([1])
182 loss  0.03238882124423981 tensor([[-1

302 loss  0.16413050889968872 tensor([[ 0.6548, -1.0691]], grad_fn=<AddmmBackward>) tensor([0])
303 loss  0.04862741380929947 tensor([[ 1.2565, -1.7426]], grad_fn=<AddmmBackward>) tensor([0])
304 loss  1.3985201120376587 tensor([[ 0.2152, -0.8996]], grad_fn=<AddmmBackward>) tensor([1])
305 loss  0.30601754784584045 tensor([[-0.6656,  0.3616]], grad_fn=<AddmmBackward>) tensor([1])
306 loss  0.07844094187021255 tensor([[ 0.9802, -1.5258]], grad_fn=<AddmmBackward>) tensor([0])
307 loss  0.018548021093010902 tensor([[ 1.7160, -2.2621]], grad_fn=<AddmmBackward>) tensor([0])
308 loss  0.012469190172851086 tensor([[-2.2210,  2.1573]], grad_fn=<AddmmBackward>) tensor([1])
309 loss  0.7226271629333496 tensor([[-0.1558, -0.2140]], grad_fn=<AddmmBackward>) tensor([1])
310 loss  0.188290536403656 tensor([[ 0.5809, -0.9932]], grad_fn=<AddmmBackward>) tensor([0])
311 loss  0.10533824563026428 tensor([[ 0.7453, -1.4522]], grad_fn=<AddmmBackward>) tensor([0])
312 loss  0.00917149893939495 tensor([[-2.

439 loss  0.06490295380353928 tensor([[ 1.0198, -1.6824]], grad_fn=<AddmmBackward>) tensor([0])
440 loss  0.057329922914505005 tensor([[ 1.0713, -1.7588]], grad_fn=<AddmmBackward>) tensor([0])
441 loss  0.1175425723195076 tensor([[-1.1421,  0.9395]], grad_fn=<AddmmBackward>) tensor([1])
442 loss  0.053177036345005035 tensor([[ 1.1645, -1.7429]], grad_fn=<AddmmBackward>) tensor([0])
443 loss  0.01682601496577263 tensor([[-2.0432,  2.0332]], grad_fn=<AddmmBackward>) tensor([1])
444 loss  0.05666690692305565 tensor([[ 1.1646, -1.6775]], grad_fn=<AddmmBackward>) tensor([0])
445 loss  0.10957924276590347 tensor([[-1.1929,  0.9630]], grad_fn=<AddmmBackward>) tensor([1])
446 loss  0.30960866808891296 tensor([[-0.6944,  0.3193]], grad_fn=<AddmmBackward>) tensor([1])
447 loss  0.04329882189631462 tensor([[-1.6316,  1.4863]], grad_fn=<AddmmBackward>) tensor([1])
448 loss  0.3929513692855835 tensor([[-0.5251,  0.2061]], grad_fn=<AddmmBackward>) tensor([1])
449 loss  0.36768639087677 tensor([[ 0.1

569 loss  1.168121099472046 tensor([[ 0.2770, -0.5187]], grad_fn=<AddmmBackward>) tensor([1])
570 loss  0.09538757801055908 tensor([[-1.1575,  1.1443]], grad_fn=<AddmmBackward>) tensor([1])
571 loss  0.031720951199531555 tensor([[ 1.4620, -1.9728]], grad_fn=<AddmmBackward>) tensor([0])
572 loss  0.010822293348610401 tensor([[ 2.0823, -2.4384]], grad_fn=<AddmmBackward>) tensor([0])
573 loss  0.0159562136977911 tensor([[-2.0618,  2.0682]], grad_fn=<AddmmBackward>) tensor([1])
574 loss  0.07453897595405579 tensor([[ 1.1016, -1.4574]], grad_fn=<AddmmBackward>) tensor([0])
575 loss  0.053507495671510696 tensor([[ 1.2616, -1.6395]], grad_fn=<AddmmBackward>) tensor([0])
576 loss  0.03448616340756416 tensor([[ 1.4471, -1.9028]], grad_fn=<AddmmBackward>) tensor([0])
577 loss  0.06479200720787048 tensor([[-1.3737,  1.3303]], grad_fn=<AddmmBackward>) tensor([1])
578 loss  0.042754482477903366 tensor([[ 1.4115, -1.7193]], grad_fn=<AddmmBackward>) tensor([0])
579 loss  0.37743911147117615 tensor([[

666 loss  0.038602203130722046 tensor([[ 1.4739, -1.7611]], grad_fn=<AddmmBackward>) tensor([0])
667 loss  0.02881697192788124 tensor([[ 1.5739, -1.9584]], grad_fn=<AddmmBackward>) tensor([0])
668 loss  0.09209636598825455 tensor([[-1.1815,  1.1571]], grad_fn=<AddmmBackward>) tensor([1])
669 loss  0.09481525421142578 tensor([[ 0.9697, -1.3384]], grad_fn=<AddmmBackward>) tensor([0])
670 loss  0.22180971503257751 tensor([[-0.7448,  0.6482]], grad_fn=<AddmmBackward>) tensor([1])
671 loss  0.08082889020442963 tensor([[-1.3061,  1.1686]], grad_fn=<AddmmBackward>) tensor([1])
672 loss  0.12938809394836426 tensor([[-1.0217,  0.9578]], grad_fn=<AddmmBackward>) tensor([1])
673 loss  0.5101914405822754 tensor([[ 0.1354, -0.2716]], grad_fn=<AddmmBackward>) tensor([0])
674 loss  0.21548865735530853 tensor([[ 0.6559, -0.7692]], grad_fn=<AddmmBackward>) tensor([0])
675 loss  0.33158284425735474 tensor([[ 0.3681, -0.5654]], grad_fn=<AddmmBackward>) tensor([0])
676 loss  0.10141627490520477 tensor([[ 

762 loss  0.3152991235256195 tensor([[ 0.3252, -0.6672]], grad_fn=<AddmmBackward>) tensor([0])
763 loss  0.4253368377685547 tensor([[ 0.1945, -0.4402]], grad_fn=<AddmmBackward>) tensor([0])
764 loss  0.05302906036376953 tensor([[-1.5197,  1.3906]], grad_fn=<AddmmBackward>) tensor([1])
765 loss  1.1598901748657227 tensor([[-0.4727,  0.3110]], grad_fn=<AddmmBackward>) tensor([0])
766 loss  0.006416552234441042 tensor([[-2.5131,  2.5325]], grad_fn=<AddmmBackward>) tensor([1])
767 loss  0.5606896877288818 tensor([[ 0.0019, -0.2833]], grad_fn=<AddmmBackward>) tensor([0])
768 loss  0.5328975319862366 tensor([[ 0.1349, -0.2163]], grad_fn=<AddmmBackward>) tensor([0])
769 loss  0.10341879725456238 tensor([[-1.1423,  1.0745]], grad_fn=<AddmmBackward>) tensor([1])
770 loss  3.7223005294799805 tensor([[ 1.6945, -2.0033]], grad_fn=<AddmmBackward>) tensor([1])
771 loss  0.2780131697654724 tensor([[-0.6525,  0.4853]], grad_fn=<AddmmBackward>) tensor([1])
772 loss  0.11262702196836472 tensor([[ 0.9357

856 loss  0.0979980081319809 tensor([[ 0.8995, -1.3739]], grad_fn=<AddmmBackward>) tensor([0])
857 loss  0.529943585395813 tensor([[-0.3153,  0.0430]], grad_fn=<AddmmBackward>) tensor([1])
858 loss  0.08993253856897354 tensor([[ 1.0291, -1.3343]], grad_fn=<AddmmBackward>) tensor([0])
859 loss  0.1163075640797615 tensor([[ 0.7557, -1.3371]], grad_fn=<AddmmBackward>) tensor([0])
860 loss  0.011843955144286156 tensor([[-2.2174,  2.2126]], grad_fn=<AddmmBackward>) tensor([1])
861 loss  0.05489234998822212 tensor([[ 1.3480, -1.5269]], grad_fn=<AddmmBackward>) tensor([0])
862 loss  0.04031176492571831 tensor([[-1.6202,  1.5707]], grad_fn=<AddmmBackward>) tensor([1])
863 loss  1.9457955360412598 tensor([[ 0.7065, -1.0852]], grad_fn=<AddmmBackward>) tensor([1])
864 loss  0.19872042536735535 tensor([[-0.8405,  0.6744]], grad_fn=<AddmmBackward>) tensor([1])
865 loss  0.16104842722415924 tensor([[-0.9735,  0.7710]], grad_fn=<AddmmBackward>) tensor([1])
866 loss  0.013533157296478748 tensor([[-2.2

943 loss  0.0618804395198822 tensor([[-1.4895,  1.2620]], grad_fn=<AddmmBackward>) tensor([1])
944 loss  0.021452412009239197 tensor([[-1.9543,  1.8769]], grad_fn=<AddmmBackward>) tensor([1])
945 loss  0.594697892665863 tensor([[-0.2357, -0.0280]], grad_fn=<AddmmBackward>) tensor([1])
946 loss  0.5914717316627502 tensor([[-0.2515, -0.0367]], grad_fn=<AddmmBackward>) tensor([1])
947 loss  1.000948429107666 tensor([[-0.4266,  0.1162]], grad_fn=<AddmmBackward>) tensor([0])
948 loss  0.1264011263847351 tensor([[-1.1486,  0.8558]], grad_fn=<AddmmBackward>) tensor([1])
949 loss  1.641451358795166 tensor([[-0.8618,  0.5643]], grad_fn=<AddmmBackward>) tensor([0])
950 loss  0.02490725927054882 tensor([[-1.8991,  1.7810]], grad_fn=<AddmmBackward>) tensor([1])
951 loss  1.0774186849594116 tensor([[-0.4348,  0.2264]], grad_fn=<AddmmBackward>) tensor([0])
952 loss  0.5765794515609741 tensor([[-0.0298, -0.2784]], grad_fn=<AddmmBackward>) tensor([0])
953 loss  0.46652376651763916 tensor([[ 0.0624, -0

1034 loss  1.3351820707321167 tensor([[-0.5383,  0.4915]], grad_fn=<AddmmBackward>) tensor([0])
1035 loss  0.9112687706947327 tensor([[-0.2521,  0.1450]], grad_fn=<AddmmBackward>) tensor([0])
1036 loss  0.05499187111854553 tensor([[-1.5168,  1.3561]], grad_fn=<AddmmBackward>) tensor([1])
1037 loss  0.17802761495113373 tensor([[-0.9653,  0.6702]], grad_fn=<AddmmBackward>) tensor([1])
1038 loss  0.46792128682136536 tensor([[-0.3536,  0.1628]], grad_fn=<AddmmBackward>) tensor([1])
1039 loss  0.25891074538230896 tensor([[ 0.5486, -0.6704]], grad_fn=<AddmmBackward>) tensor([0])
1040 loss  0.05643641576170921 tensor([[-1.4529,  1.3934]], grad_fn=<AddmmBackward>) tensor([1])
1041 loss  0.06620428711175919 tensor([[ 1.1174, -1.5643]], grad_fn=<AddmmBackward>) tensor([0])
1042 loss  0.2363446205854416 tensor([[ 0.4654, -0.8566]], grad_fn=<AddmmBackward>) tensor([0])
1043 loss  0.016767755150794983 tensor([[-2.0350,  2.0449]], grad_fn=<AddmmBackward>) tensor([1])
1044 loss  0.20380152761936188 t

1150 loss  0.133559912443161 tensor([[-1.0471,  0.8986]], grad_fn=<AddmmBackward>) tensor([1])
1151 loss  0.049475960433483124 tensor([[-1.5195,  1.4619]], grad_fn=<AddmmBackward>) tensor([1])
1152 loss  0.43913260102272034 tensor([[-0.4280,  0.1674]], grad_fn=<AddmmBackward>) tensor([1])
1153 loss  0.12877705693244934 tensor([[-1.0764,  0.9082]], grad_fn=<AddmmBackward>) tensor([1])
1154 loss  0.009352562017738819 tensor([[-2.3326,  2.3348]], grad_fn=<AddmmBackward>) tensor([1])
1155 loss  0.27303972840309143 tensor([[-0.7164,  0.4421]], grad_fn=<AddmmBackward>) tensor([1])
1156 loss  0.31544575095176697 tensor([[ 0.3688, -0.6231]], grad_fn=<AddmmBackward>) tensor([0])
1157 loss  0.6056046485900879 tensor([[ 0.0078, -0.1756]], grad_fn=<AddmmBackward>) tensor([0])
1158 loss  0.16389253735542297 tensor([[-0.9707,  0.7548]], grad_fn=<AddmmBackward>) tensor([1])
1159 loss  0.13508838415145874 tensor([[-1.0843,  0.8492]], grad_fn=<AddmmBackward>) tensor([1])
1160 loss  0.006887859664857387

1245 loss  0.5063568949699402 tensor([[-0.2499,  0.1668]], grad_fn=<AddmmBackward>) tensor([1])
1246 loss  0.12690137326717377 tensor([[ 0.7829, -1.2173]], grad_fn=<AddmmBackward>) tensor([0])
1247 loss  0.01707812398672104 tensor([[ 1.8146, -2.2468]], grad_fn=<AddmmBackward>) tensor([0])
1248 loss  0.0069951158948242664 tensor([[ 2.2863, -2.6728]], grad_fn=<AddmmBackward>) tensor([0])
1249 loss  0.01565699838101864 tensor([[ 1.9117, -2.2373]], grad_fn=<AddmmBackward>) tensor([0])
1250 loss  0.002687891712412238 tensor([[ 2.7367, -3.1809]], grad_fn=<AddmmBackward>) tensor([0])
1251 loss  0.005444816313683987 tensor([[ 2.3076, -2.9028]], grad_fn=<AddmmBackward>) tensor([0])
1252 loss  0.061092037707567215 tensor([[-1.3775,  1.3871]], grad_fn=<AddmmBackward>) tensor([1])
1253 loss  0.0017995130037888885 tensor([[ 3.0111, -3.3082]], grad_fn=<AddmmBackward>) tensor([0])
1254 loss  0.5747638940811157 tensor([[-0.1893,  0.0634]], grad_fn=<AddmmBackward>) tensor([1])
1255 loss  0.039634842425

1374 loss  3.415559768676758 tensor([[ 1.4766, -1.9055]], grad_fn=<AddmmBackward>) tensor([1])
1375 loss  0.04554178938269615 tensor([[-1.5488,  1.5175]], grad_fn=<AddmmBackward>) tensor([1])
1376 loss  0.3510892689228058 tensor([[-0.6074,  0.2587]], grad_fn=<AddmmBackward>) tensor([1])
1377 loss  0.0120439687743783 tensor([[-2.2107,  2.2025]], grad_fn=<AddmmBackward>) tensor([1])
1378 loss  1.4432146549224854 tensor([[-0.7693,  0.4045]], grad_fn=<AddmmBackward>) tensor([0])
1379 loss  0.0085295420140028 tensor([[-2.3758,  2.3842]], grad_fn=<AddmmBackward>) tensor([1])
1380 loss  2.086081027984619 tensor([[-1.2092,  0.7443]], grad_fn=<AddmmBackward>) tensor([0])
1381 loss  0.0059661162085831165 tensor([[-2.5733,  2.5454]], grad_fn=<AddmmBackward>) tensor([1])
1382 loss  2.1142418384552 tensor([[-1.1414,  0.8442]], grad_fn=<AddmmBackward>) tensor([0])
1383 loss  2.1466381549835205 tensor([[-1.1438,  0.8786]], grad_fn=<AddmmBackward>) tensor([0])
1384 loss  1.8087587356567383 tensor([[-0

1506 loss  0.05416730046272278 tensor([[-1.5048,  1.3837]], grad_fn=<AddmmBackward>) tensor([1])
1507 loss  0.11656688898801804 tensor([[-1.0904,  1.0000]], grad_fn=<AddmmBackward>) tensor([1])
1508 loss  0.02717742696404457 tensor([[-1.8364,  1.7554]], grad_fn=<AddmmBackward>) tensor([1])
1509 loss  0.05311022698879242 tensor([[ 1.2760, -1.6327]], grad_fn=<AddmmBackward>) tensor([0])
1510 loss  0.5414766669273376 tensor([[-0.2409,  0.0896]], grad_fn=<AddmmBackward>) tensor([1])
1511 loss  1.0125062465667725 tensor([[ 0.1346, -0.4265]], grad_fn=<AddmmBackward>) tensor([1])
1512 loss  0.22122816741466522 tensor([[ 0.5215, -0.8744]], grad_fn=<AddmmBackward>) tensor([0])
1513 loss  0.00709525914862752 tensor([[-2.4736,  2.4711]], grad_fn=<AddmmBackward>) tensor([1])
1514 loss  0.253434956073761 tensor([[ 0.4659, -0.7774]], grad_fn=<AddmmBackward>) tensor([0])
1515 loss  0.3618502616882324 tensor([[ 0.3461, -0.4840]], grad_fn=<AddmmBackward>) tensor([0])
1516 loss  0.6848030090332031 tenso

1636 loss  0.009599123150110245 tensor([[-2.3249,  2.3164]], grad_fn=<AddmmBackward>) tensor([1])
1637 loss  0.0183876920491457 tensor([[-1.9966,  1.9903]], grad_fn=<AddmmBackward>) tensor([1])
1638 loss  0.0151565782725811 tensor([[-2.0971,  2.0846]], grad_fn=<AddmmBackward>) tensor([1])
1639 loss  0.009547289460897446 tensor([[-2.3334,  2.3133]], grad_fn=<AddmmBackward>) tensor([1])
1640 loss  0.009590149857103825 tensor([[-2.3208,  2.3214]], grad_fn=<AddmmBackward>) tensor([1])
1641 loss  0.02982828952372074 tensor([[-1.7520,  1.7453]], grad_fn=<AddmmBackward>) tensor([1])
1642 loss  1.6229935884475708 tensor([[-0.7753,  0.6279]], grad_fn=<AddmmBackward>) tensor([0])
1643 loss  0.014565917663276196 tensor([[-2.1031,  2.1187]], grad_fn=<AddmmBackward>) tensor([1])
1644 loss  1.8080415725708008 tensor([[-0.9531,  0.6758]], grad_fn=<AddmmBackward>) tensor([0])
1645 loss  0.3428927958011627 tensor([[ 0.2631, -0.6309]], grad_fn=<AddmmBackward>) tensor([0])
1646 loss  0.47113147377967834 

1762 loss  2.288132429122925 tensor([[-1.1071,  1.0741]], grad_fn=<AddmmBackward>) tensor([0])
1763 loss  0.010182811878621578 tensor([[-2.2797,  2.3022]], grad_fn=<AddmmBackward>) tensor([1])
1764 loss  0.006838962901383638 tensor([[-2.4748,  2.5068]], grad_fn=<AddmmBackward>) tensor([1])
1765 loss  1.3967419862747192 tensor([[-0.6141,  0.4984]], grad_fn=<AddmmBackward>) tensor([0])
1766 loss  0.11512192338705063 tensor([[-1.1201,  0.9836]], grad_fn=<AddmmBackward>) tensor([1])
1767 loss  0.30175459384918213 tensor([[-0.5923,  0.4512]], grad_fn=<AddmmBackward>) tensor([1])
1768 loss  0.6582018136978149 tensor([[-0.1300, -0.2012]], grad_fn=<AddmmBackward>) tensor([0])
1769 loss  0.629553496837616 tensor([[-0.0038, -0.1353]], grad_fn=<AddmmBackward>) tensor([0])
1770 loss  0.4139794111251831 tensor([[-0.4767,  0.1911]], grad_fn=<AddmmBackward>) tensor([1])
1771 loss  0.16792628169059753 tensor([[ 0.6694, -1.0297]], grad_fn=<AddmmBackward>) tensor([0])
1772 loss  0.05067412182688713 tens

1897 loss  0.8587602376937866 tensor([[-0.2317,  0.0759]], grad_fn=<AddmmBackward>) tensor([0])
1898 loss  1.0162739753723145 tensor([[-0.3155,  0.2514]], grad_fn=<AddmmBackward>) tensor([0])
1899 loss  0.4693443477153778 tensor([[ 0.0993, -0.4133]], grad_fn=<AddmmBackward>) tensor([0])
1900 loss  0.1506757289171219 tensor([[ 0.7490, -1.0674]], grad_fn=<AddmmBackward>) tensor([0])
1901 loss  0.06877084821462631 tensor([[-1.4165,  1.2259]], grad_fn=<AddmmBackward>) tensor([1])
1902 loss  0.13230569660663605 tensor([[ 0.6348, -1.3210]], grad_fn=<AddmmBackward>) tensor([0])
1903 loss  0.01574419066309929 tensor([[-2.0871,  2.0563]], grad_fn=<AddmmBackward>) tensor([1])
1904 loss  0.0038666974287480116 tensor([[ 2.5088, -3.0446]], grad_fn=<AddmmBackward>) tensor([0])
1905 loss  0.42131760716438293 tensor([[-0.4053,  0.2410]], grad_fn=<AddmmBackward>) tensor([1])
1906 loss  0.10865970700979233 tensor([[-1.1720,  0.9927]], grad_fn=<AddmmBackward>) tensor([1])
1907 loss  0.0048832674510777 te

2023 loss  0.13639061152935028 tensor([[ 0.6667, -1.2566]], grad_fn=<AddmmBackward>) tensor([0])
2024 loss  0.0934443548321724 tensor([[ 1.0288, -1.2945]], grad_fn=<AddmmBackward>) tensor([0])
2025 loss  0.01422715000808239 tensor([[-2.1327,  2.1128]], grad_fn=<AddmmBackward>) tensor([1])
2026 loss  0.01996937021613121 tensor([[-1.9373,  1.9662]], grad_fn=<AddmmBackward>) tensor([1])
2027 loss  0.49201056361198425 tensor([[-0.3363,  0.1169]], grad_fn=<AddmmBackward>) tensor([1])
2028 loss  0.22696448862552643 tensor([[-0.7333,  0.6340]], grad_fn=<AddmmBackward>) tensor([1])
2029 loss  0.009889062494039536 tensor([[-2.2894,  2.3220]], grad_fn=<AddmmBackward>) tensor([1])
2030 loss  0.09061699360609055 tensor([[ 1.0203, -1.3352]], grad_fn=<AddmmBackward>) tensor([0])
2031 loss  0.1926843225955963 tensor([[ 0.5504, -0.9985]], grad_fn=<AddmmBackward>) tensor([0])
2032 loss  0.033632632344961166 tensor([[ 1.4436, -1.9318]], grad_fn=<AddmmBackward>) tensor([0])
2033 loss  0.17795248329639435

2115 loss  0.07342148572206497 tensor([[-1.3395,  1.2351]], grad_fn=<AddmmBackward>) tensor([1])
2116 loss  0.012281154282391071 tensor([[-2.2256,  2.1679]], grad_fn=<AddmmBackward>) tensor([1])
2117 loss  0.23556427657604218 tensor([[-0.7448,  0.5809]], grad_fn=<AddmmBackward>) tensor([1])
2118 loss  0.37613731622695923 tensor([[ 0.2248, -0.5590]], grad_fn=<AddmmBackward>) tensor([0])
2119 loss  0.056072767823934555 tensor([[ 1.1975, -1.6554]], grad_fn=<AddmmBackward>) tensor([0])
2120 loss  0.8327570557594299 tensor([[-0.0474, -0.3095]], grad_fn=<AddmmBackward>) tensor([1])
2121 loss  0.01384547259658575 tensor([[-2.1288,  2.1441]], grad_fn=<AddmmBackward>) tensor([1])
2122 loss  1.4245541095733643 tensor([[ 0.4543, -0.6950]], grad_fn=<AddmmBackward>) tensor([1])
2123 loss  0.5786526799201965 tensor([[-0.0459, -0.2897]], grad_fn=<AddmmBackward>) tensor([0])
2124 loss  0.08312459290027618 tensor([[ 0.9182, -1.5274]], grad_fn=<AddmmBackward>) tensor([0])
2125 loss  0.00929776206612587 

2225 loss  0.04736153781414032 tensor([[-1.5933,  1.4329]], grad_fn=<AddmmBackward>) tensor([1])
2226 loss  0.003412497229874134 tensor([[ 2.5248, -3.1538]], grad_fn=<AddmmBackward>) tensor([0])
2227 loss  0.09590206295251846 tensor([[ 0.8872, -1.4089]], grad_fn=<AddmmBackward>) tensor([0])
2228 loss  0.036671970039606094 tensor([[ 1.4350, -1.8524]], grad_fn=<AddmmBackward>) tensor([0])
2229 loss  6.726256370544434 tensor([[ 3.0520, -3.6731]], grad_fn=<AddmmBackward>) tensor([1])
2230 loss  0.04105149954557419 tensor([[ 1.3472, -1.8251]], grad_fn=<AddmmBackward>) tensor([0])
2231 loss  0.5277566909790039 tensor([[-0.3338,  0.0298]], grad_fn=<AddmmBackward>) tensor([1])
2232 loss  0.1777563989162445 tensor([[-0.9948,  0.6424]], grad_fn=<AddmmBackward>) tensor([1])
2233 loss  0.31326591968536377 tensor([[ 0.3893, -0.6107]], grad_fn=<AddmmBackward>) tensor([0])
2234 loss  0.493805468082428 tensor([[ 0.0430, -0.4056]], grad_fn=<AddmmBackward>) tensor([0])
2235 loss  0.026096295565366745 te

2359 loss  0.5439770221710205 tensor([[-0.0315, -0.3561]], grad_fn=<AddmmBackward>) tensor([0])
2360 loss  0.010212193243205547 tensor([[-2.2818,  2.2973]], grad_fn=<AddmmBackward>) tensor([1])
2361 loss  0.12048231810331345 tensor([[-1.1022,  0.9532]], grad_fn=<AddmmBackward>) tensor([1])
2362 loss  0.1579262912273407 tensor([[ 0.6713, -1.0943]], grad_fn=<AddmmBackward>) tensor([0])
2363 loss  0.1992916613817215 tensor([[ 0.6211, -0.8906]], grad_fn=<AddmmBackward>) tensor([0])
2364 loss  0.2346220314502716 tensor([[-0.7462,  0.5840]], grad_fn=<AddmmBackward>) tensor([1])
2365 loss  0.03610159084200859 tensor([[ 1.4807, -1.8226]], grad_fn=<AddmmBackward>) tensor([0])
2366 loss  0.02562164142727852 tensor([[ 1.5200, -2.1314]], grad_fn=<AddmmBackward>) tensor([0])
2367 loss  0.008349275216460228 tensor([[ 2.1391, -2.6423]], grad_fn=<AddmmBackward>) tensor([0])
2368 loss  0.22631044685840607 tensor([[-0.7679,  0.6026]], grad_fn=<AddmmBackward>) tensor([1])
2369 loss  0.22164849936962128 t

2478 loss  0.006285304669290781 tensor([[ 2.3794, -2.6869]], grad_fn=<AddmmBackward>) tensor([0])
2479 loss  0.03136472776532173 tensor([[-1.7188,  1.7276]], grad_fn=<AddmmBackward>) tensor([1])
2480 loss  0.04488424211740494 tensor([[ 1.3736, -1.7076]], grad_fn=<AddmmBackward>) tensor([0])
2481 loss  0.029581459239125252 tensor([[-1.7514,  1.7544]], grad_fn=<AddmmBackward>) tensor([1])
2482 loss  0.03128766641020775 tensor([[ 1.4946, -1.9542]], grad_fn=<AddmmBackward>) tensor([0])
2483 loss  3.027411460876465 tensor([[ 1.3034, -1.6744]], grad_fn=<AddmmBackward>) tensor([1])
2484 loss  0.07780997455120087 tensor([[-1.3142,  1.2001]], grad_fn=<AddmmBackward>) tensor([1])
2485 loss  0.19876137375831604 tensor([[ 0.5608, -0.9539]], grad_fn=<AddmmBackward>) tensor([0])
2486 loss  0.23857055604457855 tensor([[ 0.4586, -0.8529]], grad_fn=<AddmmBackward>) tensor([0])
2487 loss  0.17081041634082794 tensor([[ 0.6538, -1.0268]], grad_fn=<AddmmBackward>) tensor([0])
2488 loss  0.1739928275346756 

2570 loss  0.025287063792347908 tensor([[ 1.7789, -1.8858]], grad_fn=<AddmmBackward>) tensor([0])
2571 loss  0.02397591434419155 tensor([[-1.9097,  1.8089]], grad_fn=<AddmmBackward>) tensor([1])
2572 loss  2.6774654388427734 tensor([[ 1.1057, -1.5005]], grad_fn=<AddmmBackward>) tensor([1])
2573 loss  0.11186464130878448 tensor([[ 0.8965, -1.2375]], grad_fn=<AddmmBackward>) tensor([0])
2574 loss  0.34816595911979675 tensor([[ 0.3059, -0.5700]], grad_fn=<AddmmBackward>) tensor([0])
2575 loss  0.03022126667201519 tensor([[ 1.5492, -1.9349]], grad_fn=<AddmmBackward>) tensor([0])
2576 loss  0.22380976378917694 tensor([[-0.8381,  0.5449]], grad_fn=<AddmmBackward>) tensor([1])
2577 loss  0.006041479296982288 tensor([[-2.5856,  2.5205]], grad_fn=<AddmmBackward>) tensor([1])
2578 loss  2.292229413986206 tensor([[-1.3523,  0.8335]], grad_fn=<AddmmBackward>) tensor([0])
2579 loss  0.0060643479228019714 tensor([[-2.5899,  2.5124]], grad_fn=<AddmmBackward>) tensor([1])
2580 loss  0.7134028077125549

2687 loss  0.011519347317516804 tensor([[-2.2685,  2.1894]], grad_fn=<AddmmBackward>) tensor([1])
2688 loss  0.3951508700847626 tensor([[ 0.1628, -0.5616]], grad_fn=<AddmmBackward>) tensor([0])
2689 loss  0.24671587347984314 tensor([[ 0.4239, -0.8497]], grad_fn=<AddmmBackward>) tensor([0])
2690 loss  0.7382563948631287 tensor([[-0.2736, -0.1853]], grad_fn=<AddmmBackward>) tensor([0])
2691 loss  0.8098945021629333 tensor([[-0.3847, -0.1634]], grad_fn=<AddmmBackward>) tensor([0])
2692 loss  0.43060120940208435 tensor([[-0.4679,  0.1517]], grad_fn=<AddmmBackward>) tensor([1])
2693 loss  0.008213551715016365 tensor([[-2.4226,  2.3752]], grad_fn=<AddmmBackward>) tensor([1])
2694 loss  0.4988519847393036 tensor([[-0.3119,  0.1238]], grad_fn=<AddmmBackward>) tensor([1])
2695 loss  0.011236836202442646 tensor([[-2.3080,  2.1750]], grad_fn=<AddmmBackward>) tensor([1])
2696 loss  0.03253272920846939 tensor([[-1.7442,  1.6650]], grad_fn=<AddmmBackward>) tensor([1])
2697 loss  0.2005920708179474 t

2822 loss  0.422198623418808 tensor([[-0.4234,  0.2204]], grad_fn=<AddmmBackward>) tensor([1])
2823 loss  0.008171696215867996 tensor([[-2.4064,  2.3966]], grad_fn=<AddmmBackward>) tensor([1])
2824 loss  1.5665453672409058 tensor([[ 0.3807, -0.9517]], grad_fn=<AddmmBackward>) tensor([1])
2825 loss  0.061175357550382614 tensor([[-1.4657,  1.2976]], grad_fn=<AddmmBackward>) tensor([1])
2826 loss  0.7773837447166443 tensor([[-0.3054, -0.1435]], grad_fn=<AddmmBackward>) tensor([0])
2827 loss  0.42544761300086975 tensor([[ 0.1108, -0.5236]], grad_fn=<AddmmBackward>) tensor([0])
2828 loss  0.026365352794528008 tensor([[-1.8416,  1.7808]], grad_fn=<AddmmBackward>) tensor([1])
2829 loss  0.7197707891464233 tensor([[-0.2332, -0.1807]], grad_fn=<AddmmBackward>) tensor([0])
2830 loss  0.7344950437545776 tensor([[-0.2938, -0.2128]], grad_fn=<AddmmBackward>) tensor([0])
2831 loss  0.4358990788459778 tensor([[-0.0395, -0.6440]], grad_fn=<AddmmBackward>) tensor([0])
2832 loss  0.4795064926147461 tens

2953 loss  0.06091393530368805 tensor([[-1.4522,  1.3155]], grad_fn=<AddmmBackward>) tensor([1])
2954 loss  0.07979347556829453 tensor([[ 1.0618, -1.4264]], grad_fn=<AddmmBackward>) tensor([0])
2955 loss  0.06720180809497833 tensor([[ 1.0488, -1.6175]], grad_fn=<AddmmBackward>) tensor([0])
2956 loss  0.2095739096403122 tensor([[-0.8480,  0.6081]], grad_fn=<AddmmBackward>) tensor([1])
2957 loss  0.42279696464538574 tensor([[ 0.2347, -0.4073]], grad_fn=<AddmmBackward>) tensor([0])
2958 loss  0.04434065520763397 tensor([[-1.6218,  1.4718]], grad_fn=<AddmmBackward>) tensor([1])
2959 loss  0.21820220351219177 tensor([[ 0.5785, -0.8328]], grad_fn=<AddmmBackward>) tensor([0])
2960 loss  0.4197060167789459 tensor([[-0.4490,  0.2020]], grad_fn=<AddmmBackward>) tensor([1])
2961 loss  0.10318722575902939 tensor([[ 0.8816, -1.3375]], grad_fn=<AddmmBackward>) tensor([0])
2962 loss  0.10459938645362854 tensor([[ 0.8465, -1.3584]], grad_fn=<AddmmBackward>) tensor([0])
2963 loss  0.2434958666563034 te

3041 loss  0.42126917839050293 tensor([[-0.4259,  0.2205]], grad_fn=<AddmmBackward>) tensor([1])
3042 loss  0.25642338395118713 tensor([[ 0.4465, -0.7835]], grad_fn=<AddmmBackward>) tensor([0])
3043 loss  0.0370534211397171 tensor([[-1.7073,  1.5695]], grad_fn=<AddmmBackward>) tensor([1])
3044 loss  0.03510269895195961 tensor([[-1.6765,  1.6554]], grad_fn=<AddmmBackward>) tensor([1])
3045 loss  0.3414802551269531 tensor([[ 0.3582, -0.5407]], grad_fn=<AddmmBackward>) tensor([0])
3046 loss  0.008136342279613018 tensor([[-2.3913,  2.4160]], grad_fn=<AddmmBackward>) tensor([1])
3047 loss  0.14466142654418945 tensor([[-0.9863,  0.8738]], grad_fn=<AddmmBackward>) tensor([1])
3048 loss  0.10100085288286209 tensor([[-1.2086,  1.0331]], grad_fn=<AddmmBackward>) tensor([1])
3049 loss  0.07126644998788834 tensor([[-1.3666,  1.2389]], grad_fn=<AddmmBackward>) tensor([1])
3050 loss  0.5131717920303345 tensor([[-0.2449,  0.1547]], grad_fn=<AddmmBackward>) tensor([1])
3051 loss  0.09043176472187042 t

3129 loss  0.14449460804462433 tensor([[-1.0694,  0.7920]], grad_fn=<AddmmBackward>) tensor([1])
3130 loss  0.007627884857356548 tensor([[-2.4207,  2.4514]], grad_fn=<AddmmBackward>) tensor([1])
3131 loss  0.5469980835914612 tensor([[-0.3141,  0.0033]], grad_fn=<AddmmBackward>) tensor([1])
3132 loss  0.02556668035686016 tensor([[-1.8195,  1.8342]], grad_fn=<AddmmBackward>) tensor([1])
3133 loss  0.06864842772483826 tensor([[ 1.0660, -1.5783]], grad_fn=<AddmmBackward>) tensor([0])
3134 loss  0.1978386491537094 tensor([[-0.9180,  0.6018]], grad_fn=<AddmmBackward>) tensor([1])
3135 loss  0.019339922815561295 tensor([[-2.0216,  1.9143]], grad_fn=<AddmmBackward>) tensor([1])
3136 loss  0.03180433064699173 tensor([[ 1.4809, -1.9513]], grad_fn=<AddmmBackward>) tensor([0])
3137 loss  2.021326780319214 tensor([[ 0.8026, -1.0766]], grad_fn=<AddmmBackward>) tensor([1])
3138 loss  0.27303093671798706 tensor([[ 0.3734, -0.7852]], grad_fn=<AddmmBackward>) tensor([0])
3139 loss  0.14094844460487366 t

3250 loss  0.06832481920719147 tensor([[-1.4111,  1.2380]], grad_fn=<AddmmBackward>) tensor([1])
3251 loss  1.111947774887085 tensor([[ 0.1639, -0.5492]], grad_fn=<AddmmBackward>) tensor([1])
3252 loss  0.025730857625603676 tensor([[ 1.5715, -2.0756]], grad_fn=<AddmmBackward>) tensor([0])
3253 loss  0.08725003898143768 tensor([[ 0.9806, -1.4145]], grad_fn=<AddmmBackward>) tensor([0])
3254 loss  0.05418277159333229 tensor([[ 1.1953, -1.6929]], grad_fn=<AddmmBackward>) tensor([0])
3255 loss  0.17124363780021667 tensor([[ 0.6137, -1.0642]], grad_fn=<AddmmBackward>) tensor([0])
3256 loss  0.21965014934539795 tensor([[ 0.5764, -0.8274]], grad_fn=<AddmmBackward>) tensor([0])
3257 loss  0.020423121750354767 tensor([[-1.9651,  1.9158]], grad_fn=<AddmmBackward>) tensor([1])
3258 loss  0.07368862628936768 tensor([[-1.3003,  1.2705]], grad_fn=<AddmmBackward>) tensor([1])
3259 loss  0.05609993264079094 tensor([[ 1.2241, -1.6283]], grad_fn=<AddmmBackward>) tensor([0])
3260 loss  0.11950495094060898

3380 loss  0.1669621467590332 tensor([[ 0.5694, -1.1359]], grad_fn=<AddmmBackward>) tensor([0])
3381 loss  0.6887780427932739 tensor([[-0.2328, -0.2415]], grad_fn=<AddmmBackward>) tensor([0])
3382 loss  0.8567233681678772 tensor([[-0.0331, -0.3373]], grad_fn=<AddmmBackward>) tensor([1])
3383 loss  0.3645794093608856 tensor([[ 0.1940, -0.6272]], grad_fn=<AddmmBackward>) tensor([0])
3384 loss  0.20532318949699402 tensor([[-0.9605,  0.5183]], grad_fn=<AddmmBackward>) tensor([1])
3385 loss  0.008773352019488811 tensor([[-2.3778,  2.3538]], grad_fn=<AddmmBackward>) tensor([1])
3386 loss  0.21794474124908447 tensor([[-0.8129,  0.5997]], grad_fn=<AddmmBackward>) tensor([1])
3387 loss  1.224088191986084 tensor([[-0.7240,  0.1519]], grad_fn=<AddmmBackward>) tensor([0])
3388 loss  0.04698047414422035 tensor([[-1.5961,  1.4383]], grad_fn=<AddmmBackward>) tensor([1])
3389 loss  0.1563245952129364 tensor([[-0.9936,  0.7830]], grad_fn=<AddmmBackward>) tensor([1])
3390 loss  0.18686792254447937 tenso

3508 loss  2.851384162902832 tensor([[ 1.1990, -1.5928]], grad_fn=<AddmmBackward>) tensor([1])
3509 loss  0.1255403608083725 tensor([[ 0.7595, -1.2522]], grad_fn=<AddmmBackward>) tensor([0])
3510 loss  0.041030675172805786 tensor([[-1.6320,  1.5408]], grad_fn=<AddmmBackward>) tensor([1])
3511 loss  0.09473393112421036 tensor([[-1.2233,  1.0857]], grad_fn=<AddmmBackward>) tensor([1])
3512 loss  0.11756546795368195 tensor([[ 0.7977, -1.2837]], grad_fn=<AddmmBackward>) tensor([0])
3513 loss  0.20172184705734253 tensor([[ 0.5316, -0.9668]], grad_fn=<AddmmBackward>) tensor([0])
3514 loss  0.009068256244063377 tensor([[-2.3660,  2.3325]], grad_fn=<AddmmBackward>) tensor([1])
3515 loss  0.34771114587783813 tensor([[ 0.3298, -0.5477]], grad_fn=<AddmmBackward>) tensor([0])
3516 loss  0.9047444462776184 tensor([[-0.4184, -0.0323]], grad_fn=<AddmmBackward>) tensor([0])
3517 loss  0.16337664425373077 tensor([[ 0.7000, -1.0289]], grad_fn=<AddmmBackward>) tensor([0])
3518 loss  0.1853625625371933 te

3633 loss  1.102443814277649 tensor([[ 0.1653, -0.5336]], grad_fn=<AddmmBackward>) tensor([1])
3634 loss  0.11976376175880432 tensor([[-1.1913,  0.8705]], grad_fn=<AddmmBackward>) tensor([1])
3635 loss  0.0071891192346811295 tensor([[-2.4640,  2.4676]], grad_fn=<AddmmBackward>) tensor([1])
3636 loss  0.2313988208770752 tensor([[-0.8664,  0.4793]], grad_fn=<AddmmBackward>) tensor([1])
3637 loss  0.22249016165733337 tensor([[ 0.3718, -1.0178]], grad_fn=<AddmmBackward>) tensor([0])
3638 loss  1.2815371751785278 tensor([[-0.5660,  0.3904]], grad_fn=<AddmmBackward>) tensor([0])
3639 loss  0.0678117647767067 tensor([[-1.4634,  1.1936]], grad_fn=<AddmmBackward>) tensor([1])
3640 loss  1.9107059240341187 tensor([[-1.0552,  0.6953]], grad_fn=<AddmmBackward>) tensor([0])
3641 loss  0.5698582530021667 tensor([[-0.1599, -0.4238]], grad_fn=<AddmmBackward>) tensor([0])
3642 loss  0.7462586164474487 tensor([[-0.2930, -0.1895]], grad_fn=<AddmmBackward>) tensor([0])
3643 loss  0.07567688077688217 tenso

3765 loss  1.0888586044311523 tensor([[-0.5093,  0.1691]], grad_fn=<AddmmBackward>) tensor([0])
3766 loss  0.0250284131616354 tensor([[-1.8463,  1.8289]], grad_fn=<AddmmBackward>) tensor([1])
3767 loss  0.014800508506596088 tensor([[-2.1290,  2.0767]], grad_fn=<AddmmBackward>) tensor([1])
3768 loss  0.7723655104637146 tensor([[-0.2639, -0.1113]], grad_fn=<AddmmBackward>) tensor([0])
3769 loss  0.8004512786865234 tensor([[-0.3294, -0.1252]], grad_fn=<AddmmBackward>) tensor([0])
3770 loss  0.57281094789505 tensor([[-0.1119, -0.3691]], grad_fn=<AddmmBackward>) tensor([0])
3771 loss  0.006496618967503309 tensor([[-2.5495,  2.4837]], grad_fn=<AddmmBackward>) tensor([1])
3772 loss  0.02395484782755375 tensor([[-1.9109,  1.8087]], grad_fn=<AddmmBackward>) tensor([1])
3773 loss  0.016464676707983017 tensor([[-2.1304,  1.9679]], grad_fn=<AddmmBackward>) tensor([1])
3774 loss  0.07548455893993378 tensor([[ 1.0807, -1.4651]], grad_fn=<AddmmBackward>) tensor([0])
3775 loss  0.2842519283294678 tens

3897 loss  0.13278797268867493 tensor([[-1.0739,  0.8780]], grad_fn=<AddmmBackward>) tensor([1])
3898 loss  0.0006255338666960597 tensor([[ 3.2598, -4.1168]], grad_fn=<AddmmBackward>) tensor([0])
3899 loss  0.0005353448214009404 tensor([[ 3.3074, -4.2250]], grad_fn=<AddmmBackward>) tensor([0])
3900 loss  0.0001538873475510627 tensor([[ 3.9205, -4.8588]], grad_fn=<AddmmBackward>) tensor([0])
3901 loss  0.07402599602937698 tensor([[-1.4006,  1.1655]], grad_fn=<AddmmBackward>) tensor([1])
3902 loss  0.9441629648208618 tensor([[-0.0077, -0.4592]], grad_fn=<AddmmBackward>) tensor([1])
3903 loss  0.1872926652431488 tensor([[-0.9137,  0.6663]], grad_fn=<AddmmBackward>) tensor([1])
3904 loss  5.549499034881592 tensor([[ 2.2713, -3.2743]], grad_fn=<AddmmBackward>) tensor([1])
3905 loss  0.03905227780342102 tensor([[ 1.2798, -1.9434]], grad_fn=<AddmmBackward>) tensor([0])
3906 loss  0.026505252346396446 tensor([[-1.9361,  1.6811]], grad_fn=<AddmmBackward>) tensor([1])
3907 loss  0.17906257510185

4024 loss  0.005904138553887606 tensor([[-2.5593,  2.5699]], grad_fn=<AddmmBackward>) tensor([1])
4025 loss  0.2789427638053894 tensor([[ 0.2394, -0.8946]], grad_fn=<AddmmBackward>) tensor([0])
4026 loss  0.2800016701221466 tensor([[ 0.2464, -0.8833]], grad_fn=<AddmmBackward>) tensor([0])
4027 loss  0.08720185607671738 tensor([[ 0.8211, -1.5745]], grad_fn=<AddmmBackward>) tensor([0])
4028 loss  0.6496957540512085 tensor([[-0.2368, -0.1479]], grad_fn=<AddmmBackward>) tensor([1])
4029 loss  0.08958545327186584 tensor([[ 0.9040, -1.4635]], grad_fn=<AddmmBackward>) tensor([0])
4030 loss  0.016792841255664825 tensor([[-2.1658,  1.9126]], grad_fn=<AddmmBackward>) tensor([1])
4031 loss  0.2105538547039032 tensor([[-0.8113,  0.6396]], grad_fn=<AddmmBackward>) tensor([1])
4032 loss  0.556499183177948 tensor([[-0.3165, -0.0215]], grad_fn=<AddmmBackward>) tensor([1])
4033 loss  0.4281933307647705 tensor([[-0.5010,  0.1255]], grad_fn=<AddmmBackward>) tensor([1])
4034 loss  0.14719517529010773 tens

4119 loss  1.7500293254852295 tensor([[ 0.5789, -0.9802]], grad_fn=<AddmmBackward>) tensor([1])
4120 loss  0.02446880005300045 tensor([[ 1.6323, -2.0658]], grad_fn=<AddmmBackward>) tensor([0])
4121 loss  2.1820642948150635 tensor([[ 0.7511, -1.3112]], grad_fn=<AddmmBackward>) tensor([1])
4122 loss  0.12982162833213806 tensor([[ 0.6586, -1.3174]], grad_fn=<AddmmBackward>) tensor([0])
4123 loss  0.9325047135353088 tensor([[ 0.0086, -0.4237]], grad_fn=<AddmmBackward>) tensor([1])
4124 loss  0.0525105819106102 tensor([[-1.5656,  1.3548]], grad_fn=<AddmmBackward>) tensor([1])
4125 loss  1.1516401767730713 tensor([[-0.5958,  0.1759]], grad_fn=<AddmmBackward>) tensor([0])
4126 loss  0.005681556649506092 tensor([[-2.5782,  2.5895]], grad_fn=<AddmmBackward>) tensor([1])
4127 loss  0.03345014899969101 tensor([[-1.8266,  1.5543]], grad_fn=<AddmmBackward>) tensor([1])
4128 loss  0.008873199112713337 tensor([[-2.3817,  2.3386]], grad_fn=<AddmmBackward>) tensor([1])
4129 loss  0.07950802892446518 te

4205 loss  0.012422214262187481 tensor([[-2.2556,  2.1265]], grad_fn=<AddmmBackward>) tensor([1])
4206 loss  0.20030076801776886 tensor([[-0.8847,  0.6214]], grad_fn=<AddmmBackward>) tensor([1])
4207 loss  0.022303108125925064 tensor([[-1.9054,  1.8864]], grad_fn=<AddmmBackward>) tensor([1])
4208 loss  0.1689503937959671 tensor([[-0.9780,  0.7145]], grad_fn=<AddmmBackward>) tensor([1])
4209 loss  1.4038513898849487 tensor([[-0.7764,  0.3455]], grad_fn=<AddmmBackward>) tensor([0])
4210 loss  0.39236173033714294 tensor([[ 0.0954, -0.6376]], grad_fn=<AddmmBackward>) tensor([0])
4211 loss  0.20085705816745758 tensor([[-0.9304,  0.5727]], grad_fn=<AddmmBackward>) tensor([1])
4212 loss  0.4408062696456909 tensor([[ 0.0057, -0.5850]], grad_fn=<AddmmBackward>) tensor([0])
4213 loss  0.1907782107591629 tensor([[-0.9246,  0.6351]], grad_fn=<AddmmBackward>) tensor([1])
4214 loss  0.20141401886940002 tensor([[ 0.4339, -1.0661]], grad_fn=<AddmmBackward>) tensor([0])
4215 loss  0.11012118309736252 t

4297 loss  0.3983467221260071 tensor([[-0.5325,  0.1822]], grad_fn=<AddmmBackward>) tensor([1])
4298 loss  0.668667197227478 tensor([[-0.2226, -0.2721]], grad_fn=<AddmmBackward>) tensor([0])
4299 loss  0.17634180188179016 tensor([[-1.1154,  0.5305]], grad_fn=<AddmmBackward>) tensor([1])
4300 loss  0.02331148274242878 tensor([[-1.9526,  1.7945]], grad_fn=<AddmmBackward>) tensor([1])
4301 loss  0.005765000823885202 tensor([[-2.5700,  2.5831]], grad_fn=<AddmmBackward>) tensor([1])
4302 loss  1.364194393157959 tensor([[-0.6663,  0.4028]], grad_fn=<AddmmBackward>) tensor([0])
4303 loss  0.02401106059551239 tensor([[-1.8921,  1.8251]], grad_fn=<AddmmBackward>) tensor([1])
4304 loss  0.05252935737371445 tensor([[-1.5578,  1.3622]], grad_fn=<AddmmBackward>) tensor([1])
4305 loss  0.0066641937009990215 tensor([[-2.4811,  2.5265]], grad_fn=<AddmmBackward>) tensor([1])
4306 loss  0.008156088180840015 tensor([[-2.4805,  2.3244]], grad_fn=<AddmmBackward>) tensor([1])
4307 loss  0.018508000299334526

4387 loss  0.11863049119710922 tensor([[-1.1156,  0.9563]], grad_fn=<AddmmBackward>) tensor([1])
4388 loss  0.5395056009292603 tensor([[-0.3899, -0.0546]], grad_fn=<AddmmBackward>) tensor([1])
4389 loss  0.023309269919991493 tensor([[ 1.5308, -2.2165]], grad_fn=<AddmmBackward>) tensor([0])
4390 loss  0.10744739323854446 tensor([[ 0.7794, -1.3971]], grad_fn=<AddmmBackward>) tensor([0])
4391 loss  0.35056817531585693 tensor([[-0.6397,  0.2281]], grad_fn=<AddmmBackward>) tensor([1])
4392 loss  0.22730283439159393 tensor([[-0.8485,  0.5172]], grad_fn=<AddmmBackward>) tensor([1])
4393 loss  3.832698345184326 tensor([[ 1.4934, -2.3174]], grad_fn=<AddmmBackward>) tensor([1])
4394 loss  0.08959559351205826 tensor([[ 0.8815, -1.4858]], grad_fn=<AddmmBackward>) tensor([0])
4395 loss  0.020904220640659332 tensor([[-1.9744,  1.8830]], grad_fn=<AddmmBackward>) tensor([1])
4396 loss  0.5083343982696533 tensor([[-0.1549, -0.5666]], grad_fn=<AddmmBackward>) tensor([0])
4397 loss  0.9037941694259644 te

4515 loss  0.0036952088121324778 tensor([[ 2.4679, -3.1310]], grad_fn=<AddmmBackward>) tensor([0])
4516 loss  0.017136014997959137 tensor([[ 1.6983, -2.3597]], grad_fn=<AddmmBackward>) tensor([0])
4517 loss  0.0033135293051600456 tensor([[ 2.4315, -3.2766]], grad_fn=<AddmmBackward>) tensor([0])
4518 loss  0.0006046851049177349 tensor([[ 3.1990, -4.2116]], grad_fn=<AddmmBackward>) tensor([0])
4519 loss  0.013566204346716404 tensor([[ 1.8890, -2.4044]], grad_fn=<AddmmBackward>) tensor([0])
4520 loss  0.0034534833393990993 tensor([[ 2.4818, -3.1848]], grad_fn=<AddmmBackward>) tensor([0])
4521 loss  0.0013821106404066086 tensor([[ 2.8890, -3.6944]], grad_fn=<AddmmBackward>) tensor([0])
4522 loss  0.5187586545944214 tensor([[-0.4010, -0.0153]], grad_fn=<AddmmBackward>) tensor([1])
4523 loss  0.006857195869088173 tensor([[ 2.2624, -2.7166]], grad_fn=<AddmmBackward>) tensor([0])
4524 loss  0.028532933443784714 tensor([[ 1.3875, -2.1549]], grad_fn=<AddmmBackward>) tensor([0])
4525 loss  1.8003

4636 loss  1.7353689670562744 tensor([[ 0.4825, -1.0588]], grad_fn=<AddmmBackward>) tensor([1])
4637 loss  0.12154719233512878 tensor([[ 0.7676, -1.2785]], grad_fn=<AddmmBackward>) tensor([0])
4638 loss  0.007895687595009804 tensor([[-2.4269,  2.4106]], grad_fn=<AddmmBackward>) tensor([1])
4639 loss  0.40992581844329834 tensor([[-0.5317,  0.1481]], grad_fn=<AddmmBackward>) tensor([1])
4640 loss  0.2669634521007538 tensor([[ 0.2696, -0.9146]], grad_fn=<AddmmBackward>) tensor([0])
4641 loss  0.02690870501101017 tensor([[-1.9254,  1.6765]], grad_fn=<AddmmBackward>) tensor([1])
4642 loss  1.706150770187378 tensor([[-0.9234,  0.5824]], grad_fn=<AddmmBackward>) tensor([0])
4643 loss  0.0073255738243460655 tensor([[-2.4514,  2.4613]], grad_fn=<AddmmBackward>) tensor([1])
4644 loss  0.468505322933197 tensor([[ 0.0306, -0.4842]], grad_fn=<AddmmBackward>) tensor([0])
4645 loss  0.020244870334863663 tensor([[-2.0190,  1.8707]], grad_fn=<AddmmBackward>) tensor([1])
4646 loss  0.35072222352027893 t

4761 loss  0.4288673400878906 tensor([[ 0.0497, -0.5748]], grad_fn=<AddmmBackward>) tensor([0])
4762 loss  0.0067981150932610035 tensor([[-2.4990,  2.4887]], grad_fn=<AddmmBackward>) tensor([1])
4763 loss  0.009387046098709106 tensor([[-2.3597,  2.3040]], grad_fn=<AddmmBackward>) tensor([1])
4764 loss  0.703417181968689 tensor([[-0.2548, -0.2343]], grad_fn=<AddmmBackward>) tensor([0])
4765 loss  0.246615931391716 tensor([[ 0.3950, -0.8791]], grad_fn=<AddmmBackward>) tensor([0])
4766 loss  0.019706418737769127 tensor([[-1.9835,  1.9334]], grad_fn=<AddmmBackward>) tensor([1])
4767 loss  0.3193182647228241 tensor([[-0.6696,  0.3081]], grad_fn=<AddmmBackward>) tensor([1])
4768 loss  0.0756952241063118 tensor([[ 1.0648, -1.4782]], grad_fn=<AddmmBackward>) tensor([0])
4769 loss  1.0224320888519287 tensor([[ 0.0721, -0.5045]], grad_fn=<AddmmBackward>) tensor([1])
4770 loss  0.008433915674686432 tensor([[ 2.0772, -2.6941]], grad_fn=<AddmmBackward>) tensor([0])
4771 loss  0.24103876948356628 te

4869 loss  0.03997427597641945 tensor([[ 1.3496, -1.8498]], grad_fn=<AddmmBackward>) tensor([0])
4870 loss  1.6895662546157837 tensor([[ 0.5910, -0.8945]], grad_fn=<AddmmBackward>) tensor([1])
4871 loss  0.4341835081577301 tensor([[-0.4679,  0.1415]], grad_fn=<AddmmBackward>) tensor([1])
4872 loss  0.16721510887145996 tensor([[-0.9849,  0.7188]], grad_fn=<AddmmBackward>) tensor([1])
4873 loss  0.025046665221452713 tensor([[-1.9087,  1.7658]], grad_fn=<AddmmBackward>) tensor([1])
4874 loss  0.06737188249826431 tensor([[-1.4162,  1.2475]], grad_fn=<AddmmBackward>) tensor([1])
4875 loss  0.14234748482704163 tensor([[-1.0456,  0.8319]], grad_fn=<AddmmBackward>) tensor([1])
4876 loss  0.01327380072325468 tensor([[-2.1766,  2.1387]], grad_fn=<AddmmBackward>) tensor([1])
4877 loss  0.018038392066955566 tensor([[-2.0355,  1.9707]], grad_fn=<AddmmBackward>) tensor([1])
4878 loss  0.3941919207572937 tensor([[ 0.1532, -0.5741]], grad_fn=<AddmmBackward>) tensor([0])
4879 loss  0.1261814385652542 t

4980 loss  0.5895931720733643 tensor([[-0.0902, -0.3093]], grad_fn=<AddmmBackward>) tensor([0])
4981 loss  1.6240299940109253 tensor([[-0.8686,  0.5359]], grad_fn=<AddmmBackward>) tensor([0])
4982 loss  0.5330802798271179 tensor([[ 0.0226, -0.3281]], grad_fn=<AddmmBackward>) tensor([0])
4983 loss  0.5910481810569763 tensor([[-0.0662, -0.2821]], grad_fn=<AddmmBackward>) tensor([0])
4984 loss  0.011656273156404495 tensor([[-2.2568,  2.1893]], grad_fn=<AddmmBackward>) tensor([1])
4985 loss  0.010866397060453892 tensor([[ 2.0064, -2.5102]], grad_fn=<AddmmBackward>) tensor([0])
4986 loss  0.28852424025535583 tensor([[-0.5686,  0.5266]], grad_fn=<AddmmBackward>) tensor([1])
4987 loss  0.08476436883211136 tensor([[ 1.0988, -1.3264]], grad_fn=<AddmmBackward>) tensor([0])
4988 loss  0.018060987815260887 tensor([[ 1.6940, -2.3110]], grad_fn=<AddmmBackward>) tensor([0])
4989 loss  0.025636862963438034 tensor([[ 1.6520, -1.9988]], grad_fn=<AddmmBackward>) tensor([0])
4990 loss  0.00158291880507022

5106 loss  0.03137974441051483 tensor([[-1.7982,  1.6477]], grad_fn=<AddmmBackward>) tensor([1])
5107 loss  0.006205931771546602 tensor([[ 2.3517, -2.7275]], grad_fn=<AddmmBackward>) tensor([0])
5108 loss  0.13815368711948395 tensor([[-1.0157,  0.8938]], grad_fn=<AddmmBackward>) tensor([1])
5109 loss  0.002439501229673624 tensor([[ 2.7124, -3.3023]], grad_fn=<AddmmBackward>) tensor([0])
5110 loss  0.27203914523124695 tensor([[-0.6934,  0.4693]], grad_fn=<AddmmBackward>) tensor([1])
5111 loss  0.009827681817114353 tensor([[ 2.1041, -2.5135]], grad_fn=<AddmmBackward>) tensor([0])
5112 loss  1.973400354385376 tensor([[ 0.7349, -1.0888]], grad_fn=<AddmmBackward>) tensor([1])
5113 loss  0.005837653763592243 tensor([[ 2.3047, -2.8359]], grad_fn=<AddmmBackward>) tensor([0])
5114 loss  0.14328916370868683 tensor([[ 0.7075, -1.1629]], grad_fn=<AddmmBackward>) tensor([0])
5115 loss  0.11661442369222641 tensor([[ 0.8815, -1.2085]], grad_fn=<AddmmBackward>) tensor([0])
5116 loss  0.070238307118415

5237 loss  0.013009787537157536 tensor([[-2.1756,  2.1600]], grad_fn=<AddmmBackward>) tensor([1])
5238 loss  1.1106587648391724 tensor([[-0.4422,  0.2690]], grad_fn=<AddmmBackward>) tensor([0])
5239 loss  0.2523173987865448 tensor([[ 0.4716, -0.7766]], grad_fn=<AddmmBackward>) tensor([0])
5240 loss  0.11491429805755615 tensor([[-1.1034,  1.0021]], grad_fn=<AddmmBackward>) tensor([1])
5241 loss  1.3056238889694214 tensor([[-0.6288,  0.3607]], grad_fn=<AddmmBackward>) tensor([0])
5242 loss  0.29879075288772583 tensor([[ 0.3735, -0.6814]], grad_fn=<AddmmBackward>) tensor([0])
5243 loss  0.3679473102092743 tensor([[ 0.1614, -0.6488]], grad_fn=<AddmmBackward>) tensor([0])
5244 loss  0.23229724168777466 tensor([[ 0.5062, -0.8351]], grad_fn=<AddmmBackward>) tensor([0])
5245 loss  0.01108488067984581 tensor([[-2.2525,  2.2441]], grad_fn=<AddmmBackward>) tensor([1])
5246 loss  0.1985451877117157 tensor([[ 0.5645, -0.9514]], grad_fn=<AddmmBackward>) tensor([0])
5247 loss  0.009049000218510628 te

5370 loss  0.7591614723205566 tensor([[-0.1920, -0.0641]], grad_fn=<AddmmBackward>) tensor([0])
5371 loss  1.2473331689834595 tensor([[-0.5996,  0.3091]], grad_fn=<AddmmBackward>) tensor([0])
5372 loss  0.8318926095962524 tensor([[-0.2537,  0.0069]], grad_fn=<AddmmBackward>) tensor([0])
5373 loss  0.15880733728408813 tensor([[ 0.6602, -1.0994]], grad_fn=<AddmmBackward>) tensor([0])
5374 loss  0.012685793451964855 tensor([[-2.2144,  2.1466]], grad_fn=<AddmmBackward>) tensor([1])
5375 loss  0.005579493474215269 tensor([[-2.5876,  2.5983]], grad_fn=<AddmmBackward>) tensor([1])
5376 loss  0.04056202620267868 tensor([[-1.6458,  1.5387]], grad_fn=<AddmmBackward>) tensor([1])
5377 loss  0.3566276431083679 tensor([[ 0.2733, -0.5741]], grad_fn=<AddmmBackward>) tensor([0])
5378 loss  0.08174525201320648 tensor([[-1.2987,  1.1643]], grad_fn=<AddmmBackward>) tensor([1])
5379 loss  0.94997239112854 tensor([[ 0.0887, -0.3723]], grad_fn=<AddmmBackward>) tensor([1])
5380 loss  0.27994221448898315 tens

5499 loss  0.19692516326904297 tensor([[ 0.6680, -0.8568]], grad_fn=<AddmmBackward>) tensor([0])
5500 loss  0.024912724271416664 tensor([[-1.8810,  1.7989]], grad_fn=<AddmmBackward>) tensor([1])
5501 loss  0.37043318152427673 tensor([[-0.4910,  0.3111]], grad_fn=<AddmmBackward>) tensor([1])
5502 loss  0.020035039633512497 tensor([[-1.9825,  1.9177]], grad_fn=<AddmmBackward>) tensor([1])
5503 loss  0.1096675917506218 tensor([[ 0.8917, -1.2633]], grad_fn=<AddmmBackward>) tensor([0])
5504 loss  0.1763078272342682 tensor([[ 0.6319, -1.0142]], grad_fn=<AddmmBackward>) tensor([0])
5505 loss  0.15970519185066223 tensor([[ 0.5822, -1.1713]], grad_fn=<AddmmBackward>) tensor([0])
5506 loss  0.2403232902288437 tensor([[ 0.5210, -0.7822]], grad_fn=<AddmmBackward>) tensor([0])
5507 loss  0.05339110270142555 tensor([[-1.5007,  1.4026]], grad_fn=<AddmmBackward>) tensor([1])
5508 loss  0.11863239854574203 tensor([[-1.1631,  0.9087]], grad_fn=<AddmmBackward>) tensor([1])
5509 loss  0.009291856549680233

5632 loss  0.02278563380241394 tensor([[-1.9187,  1.8515]], grad_fn=<AddmmBackward>) tensor([1])
5633 loss  0.23909403383731842 tensor([[-0.7734,  0.5356]], grad_fn=<AddmmBackward>) tensor([1])
5634 loss  0.019870158284902573 tensor([[ 1.7255, -2.1831]], grad_fn=<AddmmBackward>) tensor([0])
5635 loss  0.014898112043738365 tensor([[ 1.8716, -2.3275]], grad_fn=<AddmmBackward>) tensor([0])
5636 loss  0.41781851649284363 tensor([[-0.4436,  0.2129]], grad_fn=<AddmmBackward>) tensor([1])
5637 loss  0.016444625332951546 tensor([[ 1.8025, -2.2970]], grad_fn=<AddmmBackward>) tensor([0])
5638 loss  0.01202418189495802 tensor([[-2.2254,  2.1894]], grad_fn=<AddmmBackward>) tensor([1])
5639 loss  1.7507431507110596 tensor([[ 0.6514, -0.9087]], grad_fn=<AddmmBackward>) tensor([1])
5640 loss  0.7069754600524902 tensor([[-0.1073, -0.1347]], grad_fn=<AddmmBackward>) tensor([1])
5641 loss  0.005326010286808014 tensor([[ 2.3493, -2.8832]], grad_fn=<AddmmBackward>) tensor([0])
5642 loss  0.127014040946960

5759 loss  6.971240520477295 tensor([[ 3.1565, -3.8138]], grad_fn=<AddmmBackward>) tensor([1])
5760 loss  0.09490567445755005 tensor([[ 1.0249, -1.2821]], grad_fn=<AddmmBackward>) tensor([0])
5761 loss  0.4752160906791687 tensor([[ 0.2049, -0.2921]], grad_fn=<AddmmBackward>) tensor([0])
5762 loss  0.03642532601952553 tensor([[-1.6632,  1.6310]], grad_fn=<AddmmBackward>) tensor([1])
5763 loss  0.13148730993270874 tensor([[ 0.8654, -1.0970]], grad_fn=<AddmmBackward>) tensor([0])
5764 loss  1.2497702836990356 tensor([[ 0.2311, -0.6810]], grad_fn=<AddmmBackward>) tensor([1])
5765 loss  0.04901102930307388 tensor([[ 1.2806, -1.7105]], grad_fn=<AddmmBackward>) tensor([0])
5766 loss  0.08812058717012405 tensor([[-1.3458,  1.0389]], grad_fn=<AddmmBackward>) tensor([1])
5767 loss  0.1761448085308075 tensor([[ 0.7210, -0.9261]], grad_fn=<AddmmBackward>) tensor([0])
5768 loss  0.28271082043647766 tensor([[ 0.3265, -0.7921]], grad_fn=<AddmmBackward>) tensor([0])
5769 loss  0.02527032606303692 tens

5887 loss  0.4151230752468109 tensor([[-0.4262,  0.2383]], grad_fn=<AddmmBackward>) tensor([1])
5888 loss  0.013646288774907589 tensor([[ 1.9633, -2.3242]], grad_fn=<AddmmBackward>) tensor([0])
5889 loss  0.009169372729957104 tensor([[ 2.0859, -2.6014]], grad_fn=<AddmmBackward>) tensor([0])
5890 loss  0.0037611236330121756 tensor([[ 2.5915, -2.9896]], grad_fn=<AddmmBackward>) tensor([0])
5891 loss  1.2586569786071777 tensor([[ 0.3734, -0.5511]], grad_fn=<AddmmBackward>) tensor([1])
5892 loss  0.028418460860848427 tensor([[ 1.5769, -1.9696]], grad_fn=<AddmmBackward>) tensor([0])
5893 loss  1.1153756380081177 tensor([[ 0.1732, -0.5449]], grad_fn=<AddmmBackward>) tensor([1])
5894 loss  0.010351421311497688 tensor([[ 1.9992, -2.5663]], grad_fn=<AddmmBackward>) tensor([0])
5895 loss  0.4064754843711853 tensor([[-0.4219,  0.2682]], grad_fn=<AddmmBackward>) tensor([1])
5896 loss  0.05951482802629471 tensor([[ 1.2285, -1.5632]], grad_fn=<AddmmBackward>) tensor([0])
5897 loss  0.011053637601435

6010 loss  0.7231015563011169 tensor([[-0.2018, -0.1427]], grad_fn=<AddmmBackward>) tensor([0])
6011 loss  0.03453890606760979 tensor([[-1.7621,  1.5863]], grad_fn=<AddmmBackward>) tensor([1])
6012 loss  0.6404143571853638 tensor([[-0.0781, -0.1865]], grad_fn=<AddmmBackward>) tensor([0])
6013 loss  0.018708324059844017 tensor([[-2.0262,  1.9432]], grad_fn=<AddmmBackward>) tensor([1])
6014 loss  0.8737615346908569 tensor([[-0.3641, -0.0306]], grad_fn=<AddmmBackward>) tensor([0])
6015 loss  0.9132677912712097 tensor([[-0.2612,  0.1392]], grad_fn=<AddmmBackward>) tensor([0])
6016 loss  0.5440055727958679 tensor([[-0.1108, -0.4353]], grad_fn=<AddmmBackward>) tensor([0])
6017 loss  0.1066686287522316 tensor([[-1.2116,  0.9726]], grad_fn=<AddmmBackward>) tensor([1])
6018 loss  0.035537902265787125 tensor([[-1.6498,  1.6696]], grad_fn=<AddmmBackward>) tensor([1])
6019 loss  0.26000991463661194 tensor([[ 0.4181, -0.7961]], grad_fn=<AddmmBackward>) tensor([0])
6020 loss  0.045322731137275696 te

6129 loss  0.02335410751402378 tensor([[ 1.5921, -2.1532]], grad_fn=<AddmmBackward>) tensor([0])
6130 loss  0.3408545255661011 tensor([[-0.5830,  0.3180]], grad_fn=<AddmmBackward>) tensor([1])
6131 loss  0.024223318323493004 tensor([[ 1.5128, -2.1955]], grad_fn=<AddmmBackward>) tensor([0])
6132 loss  0.4087923467159271 tensor([[-0.5178,  0.1654]], grad_fn=<AddmmBackward>) tensor([1])
6133 loss  0.1272072046995163 tensor([[ 0.7596, -1.2381]], grad_fn=<AddmmBackward>) tensor([0])
6134 loss  2.955504894256592 tensor([[ 1.1686, -1.7334]], grad_fn=<AddmmBackward>) tensor([1])
6135 loss  0.7060168385505676 tensor([[-0.2083, -0.1827]], grad_fn=<AddmmBackward>) tensor([0])
6136 loss  0.0057455627247691154 tensor([[-2.5657,  2.5908]], grad_fn=<AddmmBackward>) tensor([1])
6137 loss  0.5396132469177246 tensor([[-0.1038, -0.4388]], grad_fn=<AddmmBackward>) tensor([0])
6138 loss  0.007546370383352041 tensor([[-2.4751,  2.4078]], grad_fn=<AddmmBackward>) tensor([1])
6139 loss  1.4057015180587769 ten

6250 loss  0.755530834197998 tensor([[-0.1564, -0.2775]], grad_fn=<AddmmBackward>) tensor([1])
6251 loss  0.0007431129342876375 tensor([[ 3.0555, -4.1488]], grad_fn=<AddmmBackward>) tensor([0])
6252 loss  0.006433963775634766 tensor([[ 2.1369, -2.9061]], grad_fn=<AddmmBackward>) tensor([0])
6253 loss  0.0003591130953282118 tensor([[ 3.6149, -4.3169]], grad_fn=<AddmmBackward>) tensor([0])
6254 loss  0.19409525394439697 tensor([[-0.9621,  0.5787]], grad_fn=<AddmmBackward>) tensor([1])
6255 loss  7.652943895664066e-05 tensor([[ 4.1471, -5.3299]], grad_fn=<AddmmBackward>) tensor([0])
6256 loss  0.2196473777294159 tensor([[-0.8590,  0.5449]], grad_fn=<AddmmBackward>) tensor([1])
6257 loss  3.0674870014190674 tensor([[ 1.2082, -1.8116]], grad_fn=<AddmmBackward>) tensor([1])
6258 loss  6.980042457580566 tensor([[ 2.9288, -4.0503]], grad_fn=<AddmmBackward>) tensor([1])
6259 loss  0.002143468242138624 tensor([[ 2.6157, -3.5286]], grad_fn=<AddmmBackward>) tensor([0])
6260 loss  0.133940547704696

6374 loss  0.05348806083202362 tensor([[-1.5937,  1.3077]], grad_fn=<AddmmBackward>) tensor([1])
6375 loss  0.07818730175495148 tensor([[ 0.7026, -1.8067]], grad_fn=<AddmmBackward>) tensor([0])
6376 loss  0.007925491780042648 tensor([[-2.4760,  2.3577]], grad_fn=<AddmmBackward>) tensor([1])
6377 loss  0.0662158951163292 tensor([[ 0.8518, -1.8298]], grad_fn=<AddmmBackward>) tensor([0])
6378 loss  0.44401973485946655 tensor([[-0.5504,  0.0313]], grad_fn=<AddmmBackward>) tensor([1])
6379 loss  0.046747054904699326 tensor([[ 1.1581, -1.8814]], grad_fn=<AddmmBackward>) tensor([0])
6380 loss  0.04875674098730087 tensor([[ 1.0909, -1.9056]], grad_fn=<AddmmBackward>) tensor([0])
6381 loss  0.023915158584713936 tensor([[ 1.3833, -2.3380]], grad_fn=<AddmmBackward>) tensor([0])
6382 loss  0.02926637977361679 tensor([[ 1.2816, -2.2350]], grad_fn=<AddmmBackward>) tensor([0])
6383 loss  0.02942913956940174 tensor([[-1.8721,  1.6389]], grad_fn=<AddmmBackward>) tensor([1])
6384 loss  0.253596484661102

6494 loss  0.07247073203325272 tensor([[-1.5733,  1.0148]], grad_fn=<AddmmBackward>) tensor([1])
6495 loss  0.03269138187170029 tensor([[-1.7824,  1.6218]], grad_fn=<AddmmBackward>) tensor([1])
6496 loss  0.45466378331184387 tensor([[-0.0477, -0.6000]], grad_fn=<AddmmBackward>) tensor([0])
6497 loss  0.3376583456993103 tensor([[ 0.1298, -0.7824]], grad_fn=<AddmmBackward>) tensor([0])
6498 loss  0.4173084795475006 tensor([[-0.0073, -0.6654]], grad_fn=<AddmmBackward>) tensor([0])
6499 loss  0.08032737672328949 tensor([[-1.4311,  1.0501]], grad_fn=<AddmmBackward>) tensor([1])
6500 loss  0.060681503266096115 tensor([[-1.5489,  1.2227]], grad_fn=<AddmmBackward>) tensor([1])
6501 loss  0.3076138198375702 tensor([[ 0.1982, -0.8229]], grad_fn=<AddmmBackward>) tensor([0])
6502 loss  0.30414944887161255 tensor([[ 0.1521, -0.8822]], grad_fn=<AddmmBackward>) tensor([0])
6503 loss  0.13346050679683685 tensor([[ 0.5599, -1.3865]], grad_fn=<AddmmBackward>) tensor([0])
6504 loss  0.05147632211446762 t

6598 loss  0.9730439186096191 tensor([[-0.6485, -0.1502]], grad_fn=<AddmmBackward>) tensor([0])
6599 loss  0.0457368828356266 tensor([[-1.7086,  1.3533]], grad_fn=<AddmmBackward>) tensor([1])
6600 loss  0.018668074160814285 tensor([[-2.1334,  1.8382]], grad_fn=<AddmmBackward>) tensor([1])
6601 loss  0.8110518455505371 tensor([[-0.2706, -0.4940]], grad_fn=<AddmmBackward>) tensor([1])
6602 loss  0.0991082489490509 tensor([[ 0.7167, -1.5449]], grad_fn=<AddmmBackward>) tensor([0])
6603 loss  0.5869798064231873 tensor([[-0.1632, -0.3882]], grad_fn=<AddmmBackward>) tensor([0])
6604 loss  0.01687735505402088 tensor([[-2.1619,  1.9114]], grad_fn=<AddmmBackward>) tensor([1])
6605 loss  0.32334449887275696 tensor([[ 0.2105, -0.7525]], grad_fn=<AddmmBackward>) tensor([0])
6606 loss  0.028696391731500626 tensor([[-1.8540,  1.6826]], grad_fn=<AddmmBackward>) tensor([1])
6607 loss  0.18253713846206665 tensor([[-1.0170,  0.5912]], grad_fn=<AddmmBackward>) tensor([1])
6608 loss  0.1334274411201477 ten

6717 loss  0.027638588100671768 tensor([[ 1.2601, -2.3146]], grad_fn=<AddmmBackward>) tensor([0])
6718 loss  0.09586133807897568 tensor([[ 0.6656, -1.6309]], grad_fn=<AddmmBackward>) tensor([0])
6719 loss  0.11808498948812485 tensor([[-1.2051,  0.8716]], grad_fn=<AddmmBackward>) tensor([1])
6720 loss  0.2208385318517685 tensor([[ 0.3683, -1.0296]], grad_fn=<AddmmBackward>) tensor([0])
6721 loss  0.2429530769586563 tensor([[ 0.2473, -1.0437]], grad_fn=<AddmmBackward>) tensor([0])
6722 loss  0.12148110568523407 tensor([[-1.2658,  0.7808]], grad_fn=<AddmmBackward>) tensor([1])
6723 loss  0.5320255756378174 tensor([[-0.4676, -0.1143]], grad_fn=<AddmmBackward>) tensor([1])
6724 loss  0.009736783802509308 tensor([[-2.3853,  2.2416]], grad_fn=<AddmmBackward>) tensor([1])
6725 loss  0.06258759647607803 tensor([[-1.4989,  1.2409]], grad_fn=<AddmmBackward>) tensor([1])
6726 loss  0.692302942276001 tensor([[-0.2771, -0.2788]], grad_fn=<AddmmBackward>) tensor([0])
6727 loss  0.624116063117981 tens

6838 loss  0.07325354218482971 tensor([[-1.4560,  1.1210]], grad_fn=<AddmmBackward>) tensor([1])
6839 loss  0.11215218901634216 tensor([[ 0.8134, -1.3179]], grad_fn=<AddmmBackward>) tensor([0])
6840 loss  0.10620103031396866 tensor([[ 0.9369, -1.2520]], grad_fn=<AddmmBackward>) tensor([0])
6841 loss  0.0458112433552742 tensor([[ 1.0706, -1.9896]], grad_fn=<AddmmBackward>) tensor([0])
6842 loss  0.12380954623222351 tensor([[ 0.5783, -1.4482]], grad_fn=<AddmmBackward>) tensor([0])
6843 loss  0.027042610570788383 tensor([[-1.9032,  1.6935]], grad_fn=<AddmmBackward>) tensor([1])
6844 loss  0.8898735046386719 tensor([[-0.0655, -0.4265]], grad_fn=<AddmmBackward>) tensor([1])
6845 loss  0.033921562135219574 tensor([[ 1.4074, -1.9593]], grad_fn=<AddmmBackward>) tensor([0])
6846 loss  0.10514263808727264 tensor([[ 0.6490, -1.5504]], grad_fn=<AddmmBackward>) tensor([0])
6847 loss  0.010687966831028461 tensor([[-2.3175,  2.2158]], grad_fn=<AddmmBackward>) tensor([1])
6848 loss  0.0770553424954414

6959 loss  0.02101103402674198 tensor([[-2.0199,  1.8323]], grad_fn=<AddmmBackward>) tensor([1])
6960 loss  0.013029202818870544 tensor([[-2.1682,  2.1658]], grad_fn=<AddmmBackward>) tensor([1])
6961 loss  0.013184744864702225 tensor([[-2.1995,  2.1226]], grad_fn=<AddmmBackward>) tensor([1])
6962 loss  0.03495121747255325 tensor([[-1.7819,  1.5544]], grad_fn=<AddmmBackward>) tensor([1])
6963 loss  0.011549986898899078 tensor([[-2.2903,  2.1650]], grad_fn=<AddmmBackward>) tensor([1])
6964 loss  0.030289502814412117 tensor([[-1.9023,  1.5795]], grad_fn=<AddmmBackward>) tensor([1])
6965 loss  0.015227260068058968 tensor([[-2.1399,  2.0371]], grad_fn=<AddmmBackward>) tensor([1])
6966 loss  0.025367841124534607 tensor([[ 1.3841, -2.2775]], grad_fn=<AddmmBackward>) tensor([0])
6967 loss  0.058630578219890594 tensor([[ 1.0803, -1.7268]], grad_fn=<AddmmBackward>) tensor([0])
6968 loss  1.191627860069275 tensor([[ 0.0881, -0.7415]], grad_fn=<AddmmBackward>) tensor([1])
6969 loss  0.505145430564

7095 loss  0.04356294870376587 tensor([[-1.6037,  1.5079]], grad_fn=<AddmmBackward>) tensor([1])
7096 loss  0.1595211625099182 tensor([[-1.0117,  0.7431]], grad_fn=<AddmmBackward>) tensor([1])
7097 loss  0.14156387746334076 tensor([[-1.0763,  0.8071]], grad_fn=<AddmmBackward>) tensor([1])
7098 loss  0.0786147341132164 tensor([[-1.3910,  1.1127]], grad_fn=<AddmmBackward>) tensor([1])
7099 loss  0.04982216656208038 tensor([[-1.5980,  1.3763]], grad_fn=<AddmmBackward>) tensor([1])
7100 loss  0.04004070535302162 tensor([[-1.7758,  1.4220]], grad_fn=<AddmmBackward>) tensor([1])
7101 loss  0.006433490198105574 tensor([[-2.5259,  2.5171]], grad_fn=<AddmmBackward>) tensor([1])
7102 loss  0.01667936146259308 tensor([[-2.1162,  1.9690]], grad_fn=<AddmmBackward>) tensor([1])
7103 loss  0.4853130877017975 tensor([[ 0.0106, -0.4600]], grad_fn=<AddmmBackward>) tensor([0])
7104 loss  0.29697635769844055 tensor([[ 0.1476, -0.9144]], grad_fn=<AddmmBackward>) tensor([0])
7105 loss  0.27271413803100586 t

7229 loss  0.9168786406517029 tensor([[-0.3916,  0.0148]], grad_fn=<AddmmBackward>) tensor([0])
7230 loss  0.4994426965713501 tensor([[ 0.0385, -0.3957]], grad_fn=<AddmmBackward>) tensor([0])
7231 loss  0.031056910753250122 tensor([[-1.7960,  1.6603]], grad_fn=<AddmmBackward>) tensor([1])
7232 loss  0.4563445448875427 tensor([[-0.4604,  0.0872]], grad_fn=<AddmmBackward>) tensor([1])
7233 loss  0.09136081486940384 tensor([[-1.2966,  1.0503]], grad_fn=<AddmmBackward>) tensor([1])
7234 loss  0.5244925618171692 tensor([[-0.0386, -0.4102]], grad_fn=<AddmmBackward>) tensor([0])
7235 loss  0.25164860486984253 tensor([[-0.7824,  0.4689]], grad_fn=<AddmmBackward>) tensor([1])
7236 loss  0.34759125113487244 tensor([[ 0.3019, -0.5760]], grad_fn=<AddmmBackward>) tensor([0])
7237 loss  0.047904398292303085 tensor([[-1.5883,  1.4262]], grad_fn=<AddmmBackward>) tensor([1])
7238 loss  0.15002678334712982 tensor([[ 0.6683, -1.1526]], grad_fn=<AddmmBackward>) tensor([0])
7239 loss  0.05465986579656601 t

7353 loss  0.04174495488405228 tensor([[ 1.3544, -1.8008]], grad_fn=<AddmmBackward>) tensor([0])
7354 loss  0.007142723072320223 tensor([[-2.4560,  2.4821]], grad_fn=<AddmmBackward>) tensor([1])
7355 loss  0.014936750754714012 tensor([[ 1.7852, -2.4112]], grad_fn=<AddmmBackward>) tensor([0])
7356 loss  0.036431074142456055 tensor([[ 1.4552, -1.8389]], grad_fn=<AddmmBackward>) tensor([0])
7357 loss  0.013873218558728695 tensor([[ 1.8769, -2.3939]], grad_fn=<AddmmBackward>) tensor([0])
7358 loss  0.004615012556314468 tensor([[ 2.4064, -2.9697]], grad_fn=<AddmmBackward>) tensor([0])
7359 loss  0.007660772651433945 tensor([[ 2.2550, -2.6128]], grad_fn=<AddmmBackward>) tensor([0])
7360 loss  0.002904841210693121 tensor([[ 2.6076, -3.2324]], grad_fn=<AddmmBackward>) tensor([0])
7361 loss  0.318408727645874 tensor([[-0.5449,  0.4361]], grad_fn=<AddmmBackward>) tensor([1])
7362 loss  0.6671537756919861 tensor([[-0.1779, -0.1252]], grad_fn=<AddmmBackward>) tensor([1])
7363 loss  0.0202257111668

7483 loss  0.00385731621645391 tensor([[ 2.5627, -2.9931]], grad_fn=<AddmmBackward>) tensor([0])
7484 loss  0.0016269554616883397 tensor([[ 2.9937, -3.4265]], grad_fn=<AddmmBackward>) tensor([0])
7485 loss  0.0017889224691316485 tensor([[ 2.9298, -3.3954]], grad_fn=<AddmmBackward>) tensor([0])
7486 loss  1.209023118019104 tensor([[ 0.2786, -0.5759]], grad_fn=<AddmmBackward>) tensor([1])
7487 loss  0.004153079353272915 tensor([[ 2.4994, -2.9824]], grad_fn=<AddmmBackward>) tensor([0])
7488 loss  3.160857677459717 tensor([[ 1.4653, -1.6522]], grad_fn=<AddmmBackward>) tensor([1])
7489 loss  0.005332531873136759 tensor([[ 2.3413, -2.8900]], grad_fn=<AddmmBackward>) tensor([0])
7490 loss  0.056475285440683365 tensor([[ 1.2612, -1.5844]], grad_fn=<AddmmBackward>) tensor([0])
7491 loss  0.49378204345703125 tensor([[ 0.0588, -0.3899]], grad_fn=<AddmmBackward>) tensor([0])
7492 loss  0.17990976572036743 tensor([[-0.9103,  0.7137]], grad_fn=<AddmmBackward>) tensor([1])
7493 loss  0.47292366623878

7593 loss  0.005697676911950111 tensor([[ 2.3190, -2.8458]], grad_fn=<AddmmBackward>) tensor([0])
7594 loss  0.02166871167719364 tensor([[ 1.7277, -2.0933]], grad_fn=<AddmmBackward>) tensor([0])
7595 loss  0.011329364962875843 tensor([[-2.2364,  2.2382]], grad_fn=<AddmmBackward>) tensor([1])
7596 loss  0.004863338079303503 tensor([[ 2.4242, -2.8994]], grad_fn=<AddmmBackward>) tensor([0])
7597 loss  0.06413470953702927 tensor([[-1.4044,  1.3101]], grad_fn=<AddmmBackward>) tensor([1])
7598 loss  2.1343138217926025 tensor([[ 0.8209, -1.1875]], grad_fn=<AddmmBackward>) tensor([1])
7599 loss  0.806448757648468 tensor([[-0.0128, -0.2279]], grad_fn=<AddmmBackward>) tensor([1])
7600 loss  0.02633737027645111 tensor([[ 1.6113, -2.0122]], grad_fn=<AddmmBackward>) tensor([0])
7601 loss  0.5365527272224426 tensor([[-0.2773,  0.0650]], grad_fn=<AddmmBackward>) tensor([1])
7602 loss  0.006154039409011602 tensor([[-2.5418,  2.5458]], grad_fn=<AddmmBackward>) tensor([1])
7603 loss  0.6749066710472107 

7710 loss  0.0319962352514267 tensor([[-1.6881,  1.7380]], grad_fn=<AddmmBackward>) tensor([1])
7711 loss  0.33579394221305847 tensor([[-0.4872,  0.4315]], grad_fn=<AddmmBackward>) tensor([1])
7712 loss  0.18957652151584625 tensor([[ 0.5627, -1.0040]], grad_fn=<AddmmBackward>) tensor([0])
7713 loss  0.33938613533973694 tensor([[ 0.2642, -0.6419]], grad_fn=<AddmmBackward>) tensor([0])
7714 loss  0.01838909648358822 tensor([[-2.0439,  1.9428]], grad_fn=<AddmmBackward>) tensor([1])
7715 loss  2.604640483856201 tensor([[ 1.0572, -1.4707]], grad_fn=<AddmmBackward>) tensor([1])
7716 loss  0.04796382784843445 tensor([[-1.6038,  1.4094]], grad_fn=<AddmmBackward>) tensor([1])
7717 loss  0.3924289643764496 tensor([[ 0.1574, -0.5753]], grad_fn=<AddmmBackward>) tensor([0])
7718 loss  1.7581522464752197 tensor([[-0.8289,  0.7401]], grad_fn=<AddmmBackward>) tensor([0])
7719 loss  0.38468387722969055 tensor([[-0.4615,  0.2954]], grad_fn=<AddmmBackward>) tensor([1])
7720 loss  1.5174648761749268 tenso

7833 loss  1.1383781433105469 tensor([[ 0.1888, -0.5634]], grad_fn=<AddmmBackward>) tensor([1])
7834 loss  0.06543559581041336 tensor([[-1.3836,  1.3102]], grad_fn=<AddmmBackward>) tensor([1])
7835 loss  0.051918722689151764 tensor([[-1.5303,  1.4017]], grad_fn=<AddmmBackward>) tensor([1])
7836 loss  0.005989578552544117 tensor([[-2.5679,  2.5468]], grad_fn=<AddmmBackward>) tensor([1])
7837 loss  0.005976899527013302 tensor([[-2.5448,  2.5721]], grad_fn=<AddmmBackward>) tensor([1])
7838 loss  0.5988178849220276 tensor([[ 0.0305, -0.1680]], grad_fn=<AddmmBackward>) tensor([0])
7839 loss  1.740382432937622 tensor([[-0.8402,  0.7073]], grad_fn=<AddmmBackward>) tensor([0])
7840 loss  0.007039980497211218 tensor([[-2.5007,  2.4519]], grad_fn=<AddmmBackward>) tensor([1])
7841 loss  0.6869193911552429 tensor([[-0.1024, -0.1149]], grad_fn=<AddmmBackward>) tensor([0])
7842 loss  0.7683884501457214 tensor([[-0.2155, -0.0703]], grad_fn=<AddmmBackward>) tensor([0])
7843 loss  0.006864536087960005 

7959 loss  0.03185756504535675 tensor([[-1.7473,  1.6832]], grad_fn=<AddmmBackward>) tensor([1])
7960 loss  0.07054229825735092 tensor([[ 1.0843, -1.5318]], grad_fn=<AddmmBackward>) tensor([0])
7961 loss  0.08585198223590851 tensor([[ 0.9747, -1.4372]], grad_fn=<AddmmBackward>) tensor([0])
7962 loss  0.010190009139478207 tensor([[-2.2958,  2.2854]], grad_fn=<AddmmBackward>) tensor([1])
7963 loss  0.04993772879242897 tensor([[ 1.2730, -1.6990]], grad_fn=<AddmmBackward>) tensor([0])
7964 loss  0.04718984663486481 tensor([[-1.5516,  1.4783]], grad_fn=<AddmmBackward>) tensor([1])
7965 loss  0.19388464093208313 tensor([[ 0.5406, -1.0014]], grad_fn=<AddmmBackward>) tensor([0])
7966 loss  0.018234947696328163 tensor([[-2.0270,  1.9683]], grad_fn=<AddmmBackward>) tensor([1])
7967 loss  0.24378952383995056 tensor([[ 0.5277, -0.7594]], grad_fn=<AddmmBackward>) tensor([0])
7968 loss  0.17595887184143066 tensor([[ 0.5382, -1.1100]], grad_fn=<AddmmBackward>) tensor([0])
7969 loss  0.139633476734161

8077 loss  0.21118129789829254 tensor([[ 0.6177, -0.8298]], grad_fn=<AddmmBackward>) tensor([0])
8078 loss  0.23787294328212738 tensor([[ 0.4617, -0.8530]], grad_fn=<AddmmBackward>) tensor([0])
8079 loss  0.06430474668741226 tensor([[-1.4179,  1.2939]], grad_fn=<AddmmBackward>) tensor([1])
8080 loss  0.25150105357170105 tensor([[ 0.4606, -0.7913]], grad_fn=<AddmmBackward>) tensor([0])
8081 loss  0.03932085260748863 tensor([[-1.6482,  1.5680]], grad_fn=<AddmmBackward>) tensor([1])
8082 loss  0.6308552622795105 tensor([[-0.1934, -0.0647]], grad_fn=<AddmmBackward>) tensor([1])
8083 loss  0.006025601178407669 tensor([[-2.5528,  2.5559]], grad_fn=<AddmmBackward>) tensor([1])
8084 loss  0.15531155467033386 tensor([[ 0.7022, -1.0814]], grad_fn=<AddmmBackward>) tensor([0])
8085 loss  0.1304527372121811 tensor([[ 0.6714, -1.2994]], grad_fn=<AddmmBackward>) tensor([0])
8086 loss  0.20164546370506287 tensor([[ 0.5218, -0.9769]], grad_fn=<AddmmBackward>) tensor([0])
8087 loss  0.008901556022465229

8198 loss  0.16824066638946533 tensor([[ 0.6384, -1.0587]], grad_fn=<AddmmBackward>) tensor([0])
8199 loss  0.038325633853673935 tensor([[ 1.4015, -1.8409]], grad_fn=<AddmmBackward>) tensor([0])
8200 loss  0.032729338854551315 tensor([[-1.7492,  1.6539]], grad_fn=<AddmmBackward>) tensor([1])
8201 loss  0.09218475222587585 tensor([[-1.2207,  1.1168]], grad_fn=<AddmmBackward>) tensor([1])
8202 loss  0.0025769618805497885 tensor([[ 2.7430, -3.2169]], grad_fn=<AddmmBackward>) tensor([0])
8203 loss  0.004437124822288752 tensor([[ 2.4629, -2.9526]], grad_fn=<AddmmBackward>) tensor([0])
8204 loss  0.2973931133747101 tensor([[-0.5645,  0.4958]], grad_fn=<AddmmBackward>) tensor([1])
8205 loss  0.0005169962532818317 tensor([[ 3.5561, -4.0110]], grad_fn=<AddmmBackward>) tensor([0])
8206 loss  0.00013481661153491586 tensor([[ 4.0934, -4.8182]], grad_fn=<AddmmBackward>) tensor([0])
8207 loss  1.682629108428955 tensor([[ 0.5272, -0.9498]], grad_fn=<AddmmBackward>) tensor([1])
8208 loss  0.0030354636

8326 loss  0.17017853260040283 tensor([[ 0.5983, -1.0863]], grad_fn=<AddmmBackward>) tensor([0])
8327 loss  0.02401117794215679 tensor([[ 1.6026, -2.1146]], grad_fn=<AddmmBackward>) tensor([0])
8328 loss  0.037716593593358994 tensor([[ 1.2993, -1.9595]], grad_fn=<AddmmBackward>) tensor([0])
8329 loss  0.041652336716651917 tensor([[ 1.3283, -1.8292]], grad_fn=<AddmmBackward>) tensor([0])
8330 loss  0.16055727005004883 tensor([[-0.9249,  0.8229]], grad_fn=<AddmmBackward>) tensor([1])
8331 loss  0.11864881217479706 tensor([[-1.1203,  0.9514]], grad_fn=<AddmmBackward>) tensor([1])
8332 loss  0.009859907440841198 tensor([[-2.3325,  2.2818]], grad_fn=<AddmmBackward>) tensor([1])
8333 loss  0.6666094660758972 tensor([[-0.1540, -0.1002]], grad_fn=<AddmmBackward>) tensor([1])
8334 loss  0.030118901282548904 tensor([[ 1.4076, -2.0800]], grad_fn=<AddmmBackward>) tensor([0])
8335 loss  0.007630132604390383 tensor([[-2.4356,  2.4363]], grad_fn=<AddmmBackward>) tensor([1])
8336 loss  1.7131191492080

8462 loss  0.022671550512313843 tensor([[-1.9221,  1.8532]], grad_fn=<AddmmBackward>) tensor([1])
8463 loss  0.9399226903915405 tensor([[-0.3823,  0.0623]], grad_fn=<AddmmBackward>) tensor([0])
8464 loss  0.03337094187736511 tensor([[-1.6902,  1.6931]], grad_fn=<AddmmBackward>) tensor([1])
8465 loss  0.6092157959938049 tensor([[-0.0612, -0.2367]], grad_fn=<AddmmBackward>) tensor([0])
8466 loss  0.014008646830916405 tensor([[-2.1611,  2.1000]], grad_fn=<AddmmBackward>) tensor([1])
8467 loss  0.006363131105899811 tensor([[-2.5232,  2.5308]], grad_fn=<AddmmBackward>) tensor([1])
8468 loss  0.2238052785396576 tensor([[ 0.4410, -0.9420]], grad_fn=<AddmmBackward>) tensor([0])
8469 loss  0.14280231297016144 tensor([[-1.0445,  0.8295]], grad_fn=<AddmmBackward>) tensor([1])
8470 loss  0.2194635272026062 tensor([[ 0.4915, -0.9134]], grad_fn=<AddmmBackward>) tensor([0])
8471 loss  0.009718721732497215 tensor([[-2.3437,  2.2851]], grad_fn=<AddmmBackward>) tensor([1])
8472 loss  0.01927034929394722

8599 loss  0.006881347857415676 tensor([[-2.5190,  2.4565]], grad_fn=<AddmmBackward>) tensor([1])
8600 loss  0.0076646762900054455 tensor([[-2.4606,  2.4067]], grad_fn=<AddmmBackward>) tensor([1])
8601 loss  0.07147634774446487 tensor([[ 0.9941, -1.6083]], grad_fn=<AddmmBackward>) tensor([0])
8602 loss  0.017866037786006927 tensor([[-2.0229,  1.9930]], grad_fn=<AddmmBackward>) tensor([1])
8603 loss  0.3963320851325989 tensor([[ 0.1300, -0.5908]], grad_fn=<AddmmBackward>) tensor([0])
8604 loss  0.3092744052410126 tensor([[-0.6736,  0.3414]], grad_fn=<AddmmBackward>) tensor([1])
8605 loss  0.0453689843416214 tensor([[-1.5987,  1.4715]], grad_fn=<AddmmBackward>) tensor([1])
8606 loss  0.0570174977183342 tensor([[ 1.2019, -1.6338]], grad_fn=<AddmmBackward>) tensor([0])
8607 loss  0.030026821419596672 tensor([[-1.8124,  1.6782]], grad_fn=<AddmmBackward>) tensor([1])
8608 loss  0.1286620795726776 tensor([[ 0.7202, -1.2653]], grad_fn=<AddmmBackward>) tensor([0])
8609 loss  0.03506586700677872

8733 loss  0.04196822643280029 tensor([[-1.6869,  1.4629]], grad_fn=<AddmmBackward>) tensor([1])
8734 loss  0.07368762791156769 tensor([[ 0.9875, -1.5833]], grad_fn=<AddmmBackward>) tensor([0])
8735 loss  0.048706214874982834 tensor([[ 1.2965, -1.7010]], grad_fn=<AddmmBackward>) tensor([0])
8736 loss  0.14443765580654144 tensor([[ 0.8296, -1.0322]], grad_fn=<AddmmBackward>) tensor([0])
8737 loss  0.06818787008523941 tensor([[-1.4525,  1.1987]], grad_fn=<AddmmBackward>) tensor([1])
8738 loss  0.0149494344368577 tensor([[ 1.7942, -2.4014]], grad_fn=<AddmmBackward>) tensor([0])
8739 loss  0.5221966505050659 tensor([[-0.4166, -0.0394]], grad_fn=<AddmmBackward>) tensor([1])
8740 loss  0.016901619732379913 tensor([[ 1.7072, -2.3647]], grad_fn=<AddmmBackward>) tensor([0])
8741 loss  0.5170602202415466 tensor([[-0.3824,  0.0075]], grad_fn=<AddmmBackward>) tensor([1])
8742 loss  0.6733364462852478 tensor([[-0.1879, -0.1479]], grad_fn=<AddmmBackward>) tensor([1])
8743 loss  0.06205601617693901 t

8839 loss  0.04769199714064598 tensor([[-1.5917,  1.4274]], grad_fn=<AddmmBackward>) tensor([1])
8840 loss  0.21741187572479248 tensor([[ 0.5772, -0.8381]], grad_fn=<AddmmBackward>) tensor([0])
8841 loss  0.010057368315756321 tensor([[-2.2736,  2.3208]], grad_fn=<AddmmBackward>) tensor([1])
8842 loss  0.1482204794883728 tensor([[ 0.7644, -1.0696]], grad_fn=<AddmmBackward>) tensor([0])
8843 loss  0.015639981254935265 tensor([[-2.0536,  2.0965]], grad_fn=<AddmmBackward>) tensor([1])
8844 loss  0.3080405592918396 tensor([[ 0.2949, -0.7246]], grad_fn=<AddmmBackward>) tensor([0])
8845 loss  0.006821795366704464 tensor([[-2.5021,  2.4822]], grad_fn=<AddmmBackward>) tensor([1])
8846 loss  0.005474930629134178 tensor([[-2.6005,  2.6044]], grad_fn=<AddmmBackward>) tensor([1])
8847 loss  0.005469358526170254 tensor([[-2.5870,  2.6188]], grad_fn=<AddmmBackward>) tensor([1])
8848 loss  0.09565184265375137 tensor([[-1.2181,  1.0808]], grad_fn=<AddmmBackward>) tensor([1])
8849 loss  0.18500517308712

8936 loss  0.12362278252840042 tensor([[ 0.8499, -1.1782]], grad_fn=<AddmmBackward>) tensor([0])
8937 loss  0.10629160702228546 tensor([[ 0.8044, -1.3836]], grad_fn=<AddmmBackward>) tensor([0])
8938 loss  0.0023511406034231186 tensor([[ 2.7652, -3.2865]], grad_fn=<AddmmBackward>) tensor([0])
8939 loss  0.438375324010849 tensor([[-0.4700,  0.1275]], grad_fn=<AddmmBackward>) tensor([1])
8940 loss  0.013379550538957119 tensor([[ 1.7779, -2.5295]], grad_fn=<AddmmBackward>) tensor([0])
8941 loss  0.05532477796077728 tensor([[-1.4731,  1.3936]], grad_fn=<AddmmBackward>) tensor([1])
8942 loss  0.0061971647664904594 tensor([[-2.5259,  2.5546]], grad_fn=<AddmmBackward>) tensor([1])
8943 loss  0.9012100100517273 tensor([[ 0.0532, -0.3270]], grad_fn=<AddmmBackward>) tensor([1])
8944 loss  0.24226723611354828 tensor([[-0.7428,  0.5513]], grad_fn=<AddmmBackward>) tensor([1])
8945 loss  0.21339772641658783 tensor([[-0.7846,  0.6514]], grad_fn=<AddmmBackward>) tensor([1])
8946 loss  0.066656388342380

9035 loss  0.6622810363769531 tensor([[-0.2394, -0.1766]], grad_fn=<AddmmBackward>) tensor([1])
9036 loss  0.23818987607955933 tensor([[-0.8332,  0.4800]], grad_fn=<AddmmBackward>) tensor([1])
9037 loss  0.18969495594501495 tensor([[-0.9003,  0.6657]], grad_fn=<AddmmBackward>) tensor([1])
9038 loss  0.020808836445212364 tensor([[ 1.6438, -2.2181]], grad_fn=<AddmmBackward>) tensor([0])
9039 loss  3.5575881004333496 tensor([[ 1.4746, -2.0541]], grad_fn=<AddmmBackward>) tensor([1])
9040 loss  0.030789460986852646 tensor([[ 1.4160, -2.0492]], grad_fn=<AddmmBackward>) tensor([0])
9041 loss  0.03605421632528305 tensor([[-1.7762,  1.5284]], grad_fn=<AddmmBackward>) tensor([1])
9042 loss  0.005957346875220537 tensor([[-2.5581,  2.5621]], grad_fn=<AddmmBackward>) tensor([1])
9043 loss  0.8044595718383789 tensor([[ 0.0178, -0.1936]], grad_fn=<AddmmBackward>) tensor([1])
9044 loss  0.055228110402822495 tensor([[-1.5251,  1.3435]], grad_fn=<AddmmBackward>) tensor([1])
9045 loss  0.0061721666716039

9135 loss  0.7877381443977356 tensor([[-0.2052, -0.3862]], grad_fn=<AddmmBackward>) tensor([1])
9136 loss  0.0637296736240387 tensor([[-1.4767,  1.2444]], grad_fn=<AddmmBackward>) tensor([1])
9137 loss  0.02990604192018509 tensor([[ 1.3385, -2.1562]], grad_fn=<AddmmBackward>) tensor([0])
9138 loss  0.020151877775788307 tensor([[-2.0006,  1.8938]], grad_fn=<AddmmBackward>) tensor([1])
9139 loss  0.05080012232065201 tensor([[ 1.0908, -1.8636]], grad_fn=<AddmmBackward>) tensor([0])
9140 loss  0.11455951631069183 tensor([[ 0.7022, -1.4066]], grad_fn=<AddmmBackward>) tensor([0])
9141 loss  0.022331787273287773 tensor([[ 1.5225, -2.2681]], grad_fn=<AddmmBackward>) tensor([0])
9142 loss  0.06803575158119202 tensor([[ 0.9803, -1.6732]], grad_fn=<AddmmBackward>) tensor([0])
9143 loss  0.03988378867506981 tensor([[-1.7319,  1.4699]], grad_fn=<AddmmBackward>) tensor([1])
9144 loss  0.03304540738463402 tensor([[ 1.3577, -2.0356]], grad_fn=<AddmmBackward>) tensor([0])
9145 loss  0.20346418023109436

9236 loss  0.04158532992005348 tensor([[ 1.1387, -2.0205]], grad_fn=<AddmmBackward>) tensor([0])
9237 loss  0.08379726856946945 tensor([[-1.3708,  1.0664]], grad_fn=<AddmmBackward>) tensor([1])
9238 loss  0.06459076702594757 tensor([[ 0.9949, -1.7123]], grad_fn=<AddmmBackward>) tensor([0])
9239 loss  0.22651368379592896 tensor([[ 0.4122, -0.9574]], grad_fn=<AddmmBackward>) tensor([0])
9240 loss  0.033154621720314026 tensor([[ 1.3523, -2.0376]], grad_fn=<AddmmBackward>) tensor([0])
9241 loss  0.03484933823347092 tensor([[ 1.2918, -2.0474]], grad_fn=<AddmmBackward>) tensor([0])
9242 loss  0.06088028475642204 tensor([[ 1.0746, -1.6936]], grad_fn=<AddmmBackward>) tensor([0])
9243 loss  0.017358873039484024 tensor([[ 1.6493, -2.3957]], grad_fn=<AddmmBackward>) tensor([0])
9244 loss  0.24338577687740326 tensor([[-0.8553,  0.4336]], grad_fn=<AddmmBackward>) tensor([1])
9245 loss  0.10579176992177963 tensor([[-1.3604,  0.8325]], grad_fn=<AddmmBackward>) tensor([1])
9246 loss  0.014508466236293

9340 loss  0.060382358729839325 tensor([[ 1.0601, -1.7166]], grad_fn=<AddmmBackward>) tensor([0])
9341 loss  0.07500070333480835 tensor([[ 0.9348, -1.6177]], grad_fn=<AddmmBackward>) tensor([0])
9342 loss  0.039342742413282394 tensor([[-1.7608,  1.4549]], grad_fn=<AddmmBackward>) tensor([1])
9343 loss  0.11631287634372711 tensor([[ 0.7701, -1.3226]], grad_fn=<AddmmBackward>) tensor([0])
9344 loss  0.028998449444770813 tensor([[ 1.3944, -2.1315]], grad_fn=<AddmmBackward>) tensor([0])
9345 loss  0.09153411537408829 tensor([[ 0.9325, -1.4125]], grad_fn=<AddmmBackward>) tensor([0])
9346 loss  0.21677258610725403 tensor([[ 0.4971, -0.9215]], grad_fn=<AddmmBackward>) tensor([0])
9347 loss  0.22524000704288483 tensor([[-0.9044,  0.4714]], grad_fn=<AddmmBackward>) tensor([1])
9348 loss  0.15722376108169556 tensor([[ 0.5638, -1.2067]], grad_fn=<AddmmBackward>) tensor([0])
9349 loss  0.2324133664369583 tensor([[ 0.4286, -0.9122]], grad_fn=<AddmmBackward>) tensor([0])
9350 loss  0.059317793697118

9456 loss  0.018298154696822166 tensor([[-2.0134,  1.9784]], grad_fn=<AddmmBackward>) tensor([1])
9457 loss  0.16652221977710724 tensor([[-0.9915,  0.7167]], grad_fn=<AddmmBackward>) tensor([1])
9458 loss  0.05570707470178604 tensor([[-1.5127,  1.3470]], grad_fn=<AddmmBackward>) tensor([1])
9459 loss  0.5810920000076294 tensor([[-0.0372, -0.2754]], grad_fn=<AddmmBackward>) tensor([0])
9460 loss  0.2983998656272888 tensor([[ 0.2342, -0.8223]], grad_fn=<AddmmBackward>) tensor([0])
9461 loss  0.5676166415214539 tensor([[-0.1125, -0.3816]], grad_fn=<AddmmBackward>) tensor([0])
9462 loss  0.10252531617879868 tensor([[-1.2372,  0.9888]], grad_fn=<AddmmBackward>) tensor([1])
9463 loss  0.05413319915533066 tensor([[ 1.1400, -1.7491]], grad_fn=<AddmmBackward>) tensor([0])
9464 loss  2.4918105602264404 tensor([[ 0.9659, -1.4395]], grad_fn=<AddmmBackward>) tensor([1])
9465 loss  0.10339600592851639 tensor([[-1.2148,  1.0023]], grad_fn=<AddmmBackward>) tensor([1])
9466 loss  0.04451945051550865 te

9569 loss  0.007624453864991665 tensor([[-2.4449,  2.4277]], grad_fn=<AddmmBackward>) tensor([1])
9570 loss  0.08669424057006836 tensor([[ 1.0060, -1.3957]], grad_fn=<AddmmBackward>) tensor([0])
9571 loss  0.523706316947937 tensor([[-0.3134,  0.0602]], grad_fn=<AddmmBackward>) tensor([1])
9572 loss  0.030938798561692238 tensor([[ 1.5517, -1.9085]], grad_fn=<AddmmBackward>) tensor([0])
9573 loss  0.04683180898427963 tensor([[ 1.4164, -1.6213]], grad_fn=<AddmmBackward>) tensor([0])
9574 loss  0.12222142517566681 tensor([[ 0.7878, -1.2524]], grad_fn=<AddmmBackward>) tensor([0])
9575 loss  0.06483446806669235 tensor([[ 1.1069, -1.5964]], grad_fn=<AddmmBackward>) tensor([0])
9576 loss  1.8285422325134277 tensor([[ 0.5901, -1.0633]], grad_fn=<AddmmBackward>) tensor([1])
9577 loss  0.07566339522600174 tensor([[ 1.0848, -1.4586]], grad_fn=<AddmmBackward>) tensor([0])
9578 loss  0.04076370596885681 tensor([[ 1.4324, -1.7471]], grad_fn=<AddmmBackward>) tensor([0])
9579 loss  0.14078089594841003 

9684 loss  0.013368376530706882 tensor([[-2.1428,  2.1654]], grad_fn=<AddmmBackward>) tensor([1])
9685 loss  0.0072504254058003426 tensor([[-2.4600,  2.4630]], grad_fn=<AddmmBackward>) tensor([1])
9686 loss  0.05731720104813576 tensor([[ 1.1520, -1.6784]], grad_fn=<AddmmBackward>) tensor([0])
9687 loss  0.10868462175130844 tensor([[-1.1217,  1.0427]], grad_fn=<AddmmBackward>) tensor([1])
9688 loss  0.27119138836860657 tensor([[-0.6672,  0.4991]], grad_fn=<AddmmBackward>) tensor([1])
9689 loss  0.006166953593492508 tensor([[-2.5502,  2.5352]], grad_fn=<AddmmBackward>) tensor([1])
9690 loss  0.020062146708369255 tensor([[ 1.7267, -2.1722]], grad_fn=<AddmmBackward>) tensor([0])
9691 loss  1.2613445520401 tensor([[ 0.3147, -0.6136]], grad_fn=<AddmmBackward>) tensor([1])
9692 loss  0.065021812915802 tensor([[ 1.1765, -1.5238]], grad_fn=<AddmmBackward>) tensor([0])
9693 loss  0.05836005508899689 tensor([[ 1.2302, -1.5816]], grad_fn=<AddmmBackward>) tensor([0])
9694 loss  0.32735753059387207 

9793 loss  0.01732606813311577 tensor([[ 1.8414, -2.2054]], grad_fn=<AddmmBackward>) tensor([0])
9794 loss  0.1792520135641098 tensor([[ 0.6240, -1.0040]], grad_fn=<AddmmBackward>) tensor([0])
9795 loss  0.04134710133075714 tensor([[ 1.4365, -1.7285]], grad_fn=<AddmmBackward>) tensor([0])
9796 loss  0.10884425789117813 tensor([[-1.1317,  1.0313]], grad_fn=<AddmmBackward>) tensor([1])
9797 loss  0.07764707505702972 tensor([[ 1.0463, -1.4702]], grad_fn=<AddmmBackward>) tensor([0])
9798 loss  0.03696933016180992 tensor([[-1.6659,  1.6132]], grad_fn=<AddmmBackward>) tensor([1])
9799 loss  0.6917442083358765 tensor([[-0.0297, -0.0269]], grad_fn=<AddmmBackward>) tensor([1])
9800 loss  0.00545145571231842 tensor([[-2.6080,  2.6012]], grad_fn=<AddmmBackward>) tensor([1])
9801 loss  0.17378093302249908 tensor([[ 0.6352, -1.0266]], grad_fn=<AddmmBackward>) tensor([0])
9802 loss  0.6957932114601135 tensor([[-0.1023, -0.1076]], grad_fn=<AddmmBackward>) tensor([1])
9803 loss  0.6435998678207397 ten

9905 loss  0.023777689784765244 tensor([[-1.8811,  1.8460]], grad_fn=<AddmmBackward>) tensor([1])
9906 loss  0.05938250198960304 tensor([[-1.4444,  1.3495]], grad_fn=<AddmmBackward>) tensor([1])
9907 loss  0.10695809870958328 tensor([[ 0.9268, -1.2546]], grad_fn=<AddmmBackward>) tensor([0])
9908 loss  0.005152280908077955 tensor([[-2.6233,  2.6424]], grad_fn=<AddmmBackward>) tensor([1])
9909 loss  0.01581307128071785 tensor([[-2.0813,  2.0577]], grad_fn=<AddmmBackward>) tensor([1])
9910 loss  0.5164437890052795 tensor([[-0.0424, -0.4338]], grad_fn=<AddmmBackward>) tensor([0])
9911 loss  0.6730132102966309 tensor([[-0.1482, -0.1889]], grad_fn=<AddmmBackward>) tensor([0])
9912 loss  0.4423176050186157 tensor([[ 0.1208, -0.4657]], grad_fn=<AddmmBackward>) tensor([0])
9913 loss  0.6952205896377563 tensor([[-0.1845, -0.1803]], grad_fn=<AddmmBackward>) tensor([0])
9914 loss  0.0727742612361908 tensor([[ 1.0478, -1.5359]], grad_fn=<AddmmBackward>) tensor([0])
9915 loss  0.16042456030845642 te

In [49]:
PATH = './die_net.pth'
torch.save(net.state_dict(), PATH)

In [50]:
# #Check progress:

# #Create batch:
# batch_data = []
# batch_label = []
# for j in range(BATCH_SIZE):   
#     rand = random.randint(0,1)
#     if rand == 0:
#         #fair die
#         trial_seq = binomial_simulation(NUM, FAIR)
#         batch_data.append(trial_seq)
#         batch_label.append(0)
#     else:
#         #unfair die
#         trial_seq = binomial_simulation(NUM, UNFAIR)
#         batch_data.append(trial_seq)
#         batch_label.append(1)
            
# batch_data = torch.tensor(batch_data, dtype = torch.float)
# batch_label = torch.tensor(batch_label, dtype = torch.long)

# #Test batch:
# net = Net()
# net.load_state_dict(torch.load(PATH))

# #test on images above
# outputs = net(batch_data)
# _, predicted = torch.max(outputs, 1)

# print(predicted, batch_label)

In [55]:
#Test:

net = Net()
net.load_state_dict(torch.load(PATH))
correct = 0
total = 0
failed_probabilities = []
with torch.no_grad():
    for i, data in enumerate(test_data, 0):
        inputs = data
        label = test_label[i]
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        if predicted != label:
            print(outputs, predicted, label, test_prob[i])
        total += labels.size(0)
        correct += (predicted == label).sum().item()
        #print(total, correct, "\n")
        
print('Accuracy of the network on dice roll sequences: %d %%' % (
    100 * correct / total))

tensor([[ 0.0949, -0.4139]]) tensor([0]) tensor([1]) [[0.570977914306552, 0.04334938678539161, 0.13532908921081785, 0.04607731792071007, 0.07538110585194925, 0.12888518592457926]]
tensor([[-0.1076, -0.2299]]) tensor([0]) tensor([1]) [[0.20303336715863807, 0.2133793312687587, 0.22891672329096638, 0.22942390320000275, 0.06603503326718346, 0.05921164181445063]]
tensor([[ 1.7966, -2.3831]]) tensor([0]) tensor([1]) [[0.05220429318310538, 0.06400652311934596, 0.31549788366767945, 0.40243442431271303, 0.04926470351183231, 0.11659217220532392]]
tensor([[ 0.2631, -0.5625]]) tensor([0]) tensor([1]) [[0.45567943181897674, 0.21479829865475342, 0.0461959631111431, 0.05969809826609993, 0.020834978496345365, 0.20279322965268146]]
tensor([[ 1.2270, -1.6231]]) tensor([0]) tensor([1]) [[0.12472027405732256, 0.1997457082009271, 0.264863166245481, 0.06309510734007201, 0.32629886864096386, 0.021276875515233495]]
tensor([[-0.0766, -0.1313]]) tensor([0]) tensor([1]) [[0.3125152764279957, 0.3703739819104542, 

tensor([[ 1.8122, -2.4055]]) tensor([0]) tensor([1]) [[0.17208147971297105, 0.2279104200238763, 0.06233214433744259, 0.474273619516225, 0.018317714723814688, 0.04508462168567037]]
tensor([[ 1.3096, -1.8218]]) tensor([0]) tensor([1]) [[0.10002113981165905, 0.324453980865322, 0.028794164668569808, 0.43004124866539406, 0.006170901619541216, 0.11051856436951395]]
tensor([[ 0.4679, -0.8763]]) tensor([0]) tensor([1]) [[0.12395034063868282, 0.21677174777813873, 0.4310540224073017, 0.10177247944544156, 0.08386194896464891, 0.04258946076578626]]
tensor([[ 0.6845, -1.0089]]) tensor([0]) tensor([1]) [[0.2353450120802666, 0.4308988228931183, 0.08673445034793649, 0.08180066641072, 0.11952089781714789, 0.045700150450810745]]
tensor([[ 0.2655, -0.8340]]) tensor([0]) tensor([1]) [[0.46547741777088225, 0.004276516022508916, 0.06625363255948903, 0.3230276715679789, 0.056433517185283055, 0.08453124489385788]]
tensor([[-0.1286, -0.1826]]) tensor([0]) tensor([1]) [[0.029071741626076952, 0.8174503891528857,

tensor([[ 1.2076, -1.7340]]) tensor([0]) tensor([1]) [[0.09840618356975239, 0.2206937218167441, 0.33132867346777833, 0.15943382997053618, 0.1489290251450679, 0.04120856603012113]]
tensor([[ 0.3506, -0.6379]]) tensor([0]) tensor([1]) [[0.40316569457743, 0.047061794175438924, 0.04313767459682969, 0.48527279699209797, 0.0051236146597255115, 0.016238424998477923]]
tensor([[ 1.3346, -1.7640]]) tensor([0]) tensor([1]) [[0.15303697367123126, 0.043085830778300215, 0.24077882516962004, 0.5280775358455155, 0.025071230621367898, 0.009949603913965103]]
tensor([[-0.0602, -0.3730]]) tensor([0]) tensor([1]) [[0.5280994052959651, 0.08188642329824583, 0.07352037565186045, 0.21135362784391157, 0.0006685946972705263, 0.10447157321274657]]
tensor([[ 0.1065, -0.4271]]) tensor([0]) tensor([1]) [[0.2644862402221222, 0.3513023838760169, 0.07785973176702538, 0.1316302156196402, 0.105152586615723, 0.06956884189947231]]
tensor([[ 0.5860, -0.9519]]) tensor([0]) tensor([1]) [[0.042663327158890985, 0.76736033538458

tensor([[ 1.3800, -1.8565]]) tensor([0]) tensor([1]) [[0.06291684778364226, 0.5034932319161436, 0.20183044124507288, 0.029938444632658906, 0.03810580454442541, 0.16371522987805695]]
tensor([[ 1.4357, -1.8659]]) tensor([0]) tensor([1]) [[0.05344609325045957, 0.42910843198848386, 0.007534821499158695, 0.2624798237375481, 0.043027506914305326, 0.20440332261004449]]
tensor([[ 1.6310, -2.2041]]) tensor([0]) tensor([1]) [[0.3258331748891632, 0.2145329602781043, 0.14537595962880648, 0.023705034817898037, 0.18943894379024756, 0.10111392659578045]]
tensor([[ 1.2800, -1.8642]]) tensor([0]) tensor([1]) [[0.04506303564659697, 0.41069337828880104, 0.3300144864501399, 0.021980850228260962, 0.002662415331244481, 0.18958583405495663]]
tensor([[ 1.0809, -1.5483]]) tensor([0]) tensor([1]) [[0.0015727292252505354, 0.3954812130080639, 0.441468725161366, 0.006676569320793508, 0.07993569874294082, 0.07486506454158534]]
tensor([[ 1.5634, -2.1895]]) tensor([0]) tensor([1]) [[0.13437636360498229, 0.19669372417

tensor([[ 0.0602, -0.2744]]) tensor([0]) tensor([1]) [[0.3998100542542373, 0.06249966790612283, 0.3274014471762937, 0.008740131649346861, 0.015284064446550845, 0.18626463456744843]]
tensor([[ 0.2455, -0.5536]]) tensor([0]) tensor([1]) [[0.10866654231689388, 0.322033468183867, 0.5645237103835787, 0.0013945202037038821, 0.0028253209105783285, 0.0005564380013781539]]
tensor([[ 0.4542, -0.8624]]) tensor([0]) tensor([1]) [[0.42792226883804374, 0.18022093817430437, 0.1219803124991632, 0.1748074239173249, 0.02367696961037782, 0.07139208696078594]]
tensor([[ 0.7813, -1.2494]]) tensor([0]) tensor([1]) [[0.07590213099037924, 0.48330664214542757, 0.037985345775708196, 0.29374487814082867, 0.030523698076492484, 0.07853730487116388]]
tensor([[-0.1060, -0.2388]]) tensor([0]) tensor([1]) [[0.16280202375215325, 0.21766020925347207, 0.5979362681794423, 0.020632790426098904, 0.00019123651498172326, 0.0007774718738517131]]
tensor([[ 2.1035, -2.6262]]) tensor([0]) tensor([1]) [[0.04915837905746201, 0.0783

tensor([[ 0.0810, -0.4772]]) tensor([0]) tensor([1]) [[0.13460296371502967, 0.5328751498348103, 0.18928044962419852, 0.10569143396024691, 0.011214820781021505, 0.026335182084693078]]
tensor([[-0.1357, -0.2075]]) tensor([0]) tensor([1]) [[0.15144442826705162, 0.587703373382203, 0.10112002241191921, 0.051180258388847294, 0.03785661377869331, 0.07069530377128555]]
tensor([[ 0.6374, -1.0826]]) tensor([0]) tensor([1]) [[0.020244812003063717, 0.420601196177938, 0.4099417681780324, 0.046888267346143056, 0.044541741264603, 0.05778221503021981]]
tensor([[ 0.0033, -0.2964]]) tensor([0]) tensor([1]) [[0.26634663580013507, 0.4775030755721468, 0.05384264000091598, 0.019514358319973384, 0.10497315513845124, 0.07782013516837755]]
tensor([[ 0.7468, -1.0933]]) tensor([0]) tensor([1]) [[0.054471503376484876, 0.16122092359583917, 0.5965575012051002, 0.15646947004698705, 0.013821884834799496, 0.0174587169407892]]
tensor([[ 0.7609, -1.0897]]) tensor([0]) tensor([1]) [[0.26798823442715225, 0.228666876256376

tensor([[ 0.4294, -0.7934]]) tensor([0]) tensor([1]) [[0.37357819529231195, 0.30243114798322046, 0.18674611633278934, 0.036273144426500925, 0.018896996767298082, 0.08207439919787923]]
tensor([[ 0.0702, -0.3756]]) tensor([0]) tensor([1]) [[0.20053091672890677, 0.10609067725413455, 0.641937741970366, 0.005231084120009224, 0.010485744391237034, 0.03572383553534639]]
tensor([[ 0.6564, -1.0377]]) tensor([0]) tensor([1]) [[0.01622558815449182, 0.2565637844415692, 0.4594916228282717, 0.13362562701976166, 0.07676686893282672, 0.05732650862307884]]
tensor([[ 0.4939, -0.8487]]) tensor([0]) tensor([1]) [[0.19871919700846186, 0.19384237789717212, 0.08078902655843083, 0.18305468564252178, 0.28848782812743945, 0.05510688476597403]]
tensor([[-0.1330, -0.1962]]) tensor([0]) tensor([1]) [[0.4982953612710539, 0.09903945117306495, 0.17943792828599164, 0.047709196560112244, 0.06708554091901486, 0.1084325217907624]]
tensor([[ 1.3179, -1.9120]]) tensor([0]) tensor([1]) [[0.06787398356703722, 0.5471446722873

tensor([[ 0.0723, -0.3916]]) tensor([0]) tensor([1]) [[0.15446599406824413, 0.2621940338850728, 0.4567775305881056, 0.09649002914543199, 0.029996307891805025, 7.610442134046297e-05]]
tensor([[ 1.5849, -2.1526]]) tensor([0]) tensor([1]) [[0.2031176184775645, 0.0960730768867156, 0.02866502064720405, 0.3195096772282122, 0.34917804505592337, 0.003456561704380301]]
tensor([[ 1.2842, -1.7818]]) tensor([0]) tensor([1]) [[0.015889489266784573, 0.14565530437415222, 0.46603510536450926, 0.33488375638432377, 0.024838084715755143, 0.012698259894475056]]
tensor([[ 0.1691, -0.5328]]) tensor([0]) tensor([1]) [[0.05994930797469289, 0.4642265262981939, 0.3449484337778898, 0.06994121798267064, 0.022689976802447505, 0.03824453716410527]]
tensor([[-0.1263, -0.1994]]) tensor([0]) tensor([1]) [[0.37627455202151505, 0.22146779269433478, 0.051336586026670135, 0.31754407720781147, 0.0175300969298347, 0.01584689511983384]]
tensor([[-0.1545, -0.1692]]) tensor([0]) tensor([1]) [[0.2953933019738971, 0.283171402630

tensor([[ 0.5582, -0.9754]]) tensor([0]) tensor([1]) [[0.08901225629580856, 0.2592064855650041, 0.4950220314246964, 0.090684613707844, 0.031299201102820744, 0.03477541190382614]]
tensor([[ 0.2341, -0.6234]]) tensor([0]) tensor([1]) [[0.48029553061312935, 0.12574403032282092, 0.17902415020963885, 0.014643467912350282, 0.06575357755629087, 0.13453924338576978]]
tensor([[-0.0880, -0.1595]]) tensor([0]) tensor([1]) [[0.36575459073915706, 0.4379407685267414, 0.021353211031156255, 0.005897421606862698, 0.034706227333981036, 0.13434778076210155]]
tensor([[ 1.4206, -1.8503]]) tensor([0]) tensor([1]) [[0.0653451829582905, 0.1607990841978831, 0.055050825837034974, 0.6681033139392745, 0.04373802198891815, 0.0069635710785987195]]
tensor([[-0.0924, -0.2215]]) tensor([0]) tensor([1]) [[0.06433847840847806, 0.47720311003854426, 0.2153549788677167, 0.24286123986171404, 0.00018618780272013045, 5.6005020826796833e-05]]
tensor([[-0.0660, -0.2375]]) tensor([0]) tensor([1]) [[0.21094648915824954, 0.2700955

tensor([[ 0.1091, -0.5235]]) tensor([0]) tensor([1]) [[0.15627711111130727, 0.584076699780001, 0.09789937428442604, 0.0065540884480078205, 0.014247146583432093, 0.14094557979282582]]
tensor([[ 0.4759, -0.9831]]) tensor([0]) tensor([1]) [[0.38048676972063555, 0.17869812326597645, 0.07570925931677198, 0.2959049885667418, 0.049443314865464376, 0.01975754426440985]]
tensor([[ 1.7241, -2.2530]]) tensor([0]) tensor([1]) [[0.10137646318812266, 0.1110053640305446, 0.15261922830368171, 0.5706666301959445, 0.03641655575165686, 0.027915758530049635]]
tensor([[-0.0493, -0.2233]]) tensor([0]) tensor([1]) [[0.11967482227045301, 0.500509020583045, 0.2856847461024815, 0.02216778977994979, 0.03310902310668948, 0.03885459815738118]]
tensor([[ 0.2796, -0.6850]]) tensor([0]) tensor([1]) [[0.34466272163073775, 0.18680627250229115, 0.11802736402986576, 0.026419856810413058, 0.2233471465665591, 0.10073663846013323]]
tensor([[ 0.5589, -0.9177]]) tensor([0]) tensor([1]) [[0.24604487461442792, 0.155821928295853

tensor([[ 0.0530, -0.4531]]) tensor([0]) tensor([1]) [[0.12332200677617222, 0.35389184862283374, 0.19213762739444715, 0.22564055998835514, 0.07816650856556194, 0.026841448652629787]]
tensor([[ 1.0023, -1.4168]]) tensor([0]) tensor([1]) [[0.2900502368766191, 0.37076233127817715, 0.05062890147636617, 0.010274212576552063, 0.2690566755976486, 0.009227642194636843]]
Accuracy of the network on dice roll sequences: 88 %
