In [12]:
import numpy as np
import os 
import tensorflow as tf
from tensorflow import keras as kr
import torch
import torch.utils.data as Data #将数据分批次需要用到
from torch import nn
import torch.nn.functional as F
import torch.optim as optim
from torch.nn import init

In [13]:
#读取词汇表
def read_vocab(vocab_dir):
    with open(vocab_dir,'r',encoding = 'utf-8',errors = 'ignore') as fp:
        words = [_.strip() for _ in fp.readlines()]
    word_to_id = dict(zip(words,range(len(words)))) 
    return words, word_to_id 

words, word_to_id = read_vocab('../input/cnews.vocab.txt')
print(word_to_id)

{'<PAD>': 0, '，': 1, '的': 2, '。': 3, '一': 4, '是': 5, '在': 6, '0': 7, '有': 8, '不': 9, '了': 10, '中': 11, '1': 12, '人': 13, '大': 14, '、': 15, '国': 16, '': 3903, '2': 18, '这': 19, '上': 20, '为': 21, '个': 22, '“': 23, '”': 24, '年': 25, '学': 26, '时': 27, '我': 28, '地': 29, '和': 30, '以': 31, '到': 32, '出': 33, '来': 34, '会': 35, '行': 36, '发': 37, '：': 38, '对': 39, '们': 40, '要': 41, '生': 42, '家': 43, '他': 44, '能': 45, '也': 46, '业': 47, '金': 48, '3': 49, '成': 50, '可': 51, '分': 52, '多': 53, '现': 54, '5': 55, '就': 56, '场': 57, '新': 58, '后': 59, '于': 60, '下': 61, '日': 62, '经': 63, '市': 64, '前': 65, '过': 66, '方': 67, '得': 68, '作': 69, '月': 70, '最': 71, '开': 72, '房': 73, '》': 74, '《': 75, '高': 76, '9': 77, '8': 78, '.': 79, '而': 80, '比': 81, '公': 82, '4': 83, '说': 84, ')': 85, '将': 86, '(': 87, '都': 88, '资': 89, 'e': 90, '6': 91, '基': 92, '用': 93, '面': 94, '产': 95, '还': 96, '自': 97, '者': 98, '本': 99, '之': 100, '美': 101, '很': 102, '同': 103, '7': 105, '部': 106, '进': 107, '但': 108, '主': 109, '外': 110, '动':

In [14]:
#读取分类目录，固定
def read_category():
    categories = ['体育', '财经', '房产', '家居', '教育', '科技', '时尚', '时政', '游戏', '娱乐']
    categories = [x for x in categories]
    cat_to_id = dict(zip(categories,range(len(categories))))
    return categories, cat_to_id

# 获取文本的类别及其对应id的字典
categories, cat_to_id = read_category()
print(cat_to_id)

{'体育': 0, '财经': 1, '房产': 2, '家居': 3, '教育': 4, '科技': 5, '时尚': 6, '时政': 7, '游戏': 8, '娱乐': 9}


In [15]:
# 将文件转换为id表示
def process_file(filename,word_to_id,cat_to_id,max_length=600):
    contents, labels = [],[]
    with open(filename,'r',encoding = 'utf-8',errors = 'ignore') as f:
        for line in f:
            try:
                label,content = line.strip().split('\t')
                if content:
                    contents.append(list(content))
                    labels.append(label)
            except:
                pass
    data_id, label_id = [],[]
    for i in range(len(contents)):
        data_id.append([word_to_id[x] for x in contents[i] if x in word_to_id])
        label_id.append(cat_to_id[labels[i]]) #每句话对应的类别的id
    ## 使用keras提供的pad_sequences 来将文本pad为固定长度
    x_pad = kr.preprocessing.sequence.pad_sequences(data_id,max_length)
    # 将标签转化为one-hot表示
    y_pad = kr.utils.to_categorical(label_id, num_classes = len(cat_to_id))
    return x_pad, y_pad

# 数据加载及分批
# 获取训练数据每个字的id和对应标签的one-hot形式
x_train, y_train = process_file('../input/cnews.train.txt', word_to_id, cat_to_id, 600)
print('x_train=', x_train)
x_val, y_val = process_file('../input/cnews.val.txt', word_to_id, cat_to_id, 600)

x_train= [[1609  659   56 ...    9  311    3]
 [   2  101   16 ... 1168    3   24]
 [ 465  855  521 ...  116  136   85]
 ...
 [  49   18   79 ...  836 1928 1072]
 [ 166  110  714 ...  836 1928 1072]
 [   1   80  551 ...   78  192    3]]


# data batch

In [16]:
# TextRNN Model
# 文本分类，RNN模型
class TextRNN(nn.Module):   
    def __init__(self):
        super(TextRNN, self).__init__()
        # 进行词嵌入
        self.embedding = nn.Embedding(5000, 64)  
        #self.rnn = nn.LSTM(input_size=64, hidden_size=128, num_layers=2, bidirectional=True)
        self.rnn = nn.GRU(input_size=64, hidden_size=128, bidirectional=True, batch_first = True,dropout = 0.5)
        self.fc = nn.Sequential(nn.Linear(256,10),
                                #nn.Dropout(0.8),
                                nn.Softmax())
        #self.f2 = nn.Sequential(nn.Linear(128,10),
                                #nn.Softmax())
 
    def forward(self, x):
        x = self.embedding(x)
        x,_ = self.rnn(x)
        x = F.dropout(x,p=0.8)
        x = self.fc(x[:,-1,:])
        return x

In [17]:
train_dataset = Data.TensorDataset(torch.LongTensor(x_train),torch.LongTensor(y_train))
train_loader = Data.DataLoader(dataset = train_dataset,batch_size = 100,
                              shuffle = True)
val_dataset = Data.TensorDataset(torch.LongTensor(x_val),torch.LongTensor(y_val))
val_loader = Data.DataLoader(dataset = val_dataset,batch_size = 100,
                              shuffle = True)

In [18]:
# 设置初始化方式
def weights_init_kaiming(m):
    classname = m.__class__.__name__
    if classname.find('Linear') != -1:
        init.kaiming_normal_(m.weight.data, a=0, mode='fan_out')
        init.constant_(m.bias.data, 0.0)

In [23]:
def train():
    #使用TextRNN
    model = TextRNN().cuda()
    model.apply(weights_init_kaiming)
    #损失函数
    Loss = nn.MultiLabelSoftMarginLoss()
    optimizer = optim.Adam(model.parameters(),lr = 0.001)
    best_val_acc = 0
    
    for epoch in range(30):
        print('epoch = ', epoch)
        for step, (x_batch,y_batch) in enumerate(train_loader):
            x = x_batch.cuda()
            y = y_batch.cuda()
            out =  model(x)
            loss = Loss(out,y)
            print('loss = ',loss)
            #反向传播
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            #计算准确率
            accuracy = np.mean((torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy())
            print(accuracy)
            
            #对模型进行验证
            if (epoch+1) % 25 == 0:
                #accuracy.append(accuracy_)
                for step,(x_batch,y_batch) in enumerate(val_loader):
                    x = x_batch.cuda()
                    y = y_batch.cuda()
                    out = model(x)
                    #计算准确率
                    val_accuracy = np.mean((torch.argmax(out, 1) == torch.argmax(y, 1)).cpu().numpy())
                    if val_accuracy > best_val_acc:
                        torch.save(model.state_dict(),'model_params.pkl')
                        best_val_acc = val_accuracy
    print(val_accuracy)

In [24]:
# 设置随机数种子
torch.manual_seed(33)
train()

  "num_layers={}".format(dropout, num_layers))


epoch =  0
loss =  tensor(0.7351, device='cuda:0', grad_fn=<MeanBackward0>)


  input = module(input)


0.13
loss =  tensor(0.7296, device='cuda:0', grad_fn=<MeanBackward0>)
0.19
loss =  tensor(0.7271, device='cuda:0', grad_fn=<MeanBackward0>)
0.26
loss =  tensor(0.7296, device='cuda:0', grad_fn=<MeanBackward0>)
0.18
loss =  tensor(0.7263, device='cuda:0', grad_fn=<MeanBackward0>)
0.28
loss =  tensor(0.7199, device='cuda:0', grad_fn=<MeanBackward0>)
0.34
loss =  tensor(0.7213, device='cuda:0', grad_fn=<MeanBackward0>)
0.31
loss =  tensor(0.7224, device='cuda:0', grad_fn=<MeanBackward0>)
0.28
loss =  tensor(0.7252, device='cuda:0', grad_fn=<MeanBackward0>)
0.24
loss =  tensor(0.7179, device='cuda:0', grad_fn=<MeanBackward0>)
0.32
loss =  tensor(0.7181, device='cuda:0', grad_fn=<MeanBackward0>)
0.31
loss =  tensor(0.7150, device='cuda:0', grad_fn=<MeanBackward0>)
0.33
loss =  tensor(0.7111, device='cuda:0', grad_fn=<MeanBackward0>)
0.42
loss =  tensor(0.7177, device='cuda:0', grad_fn=<MeanBackward0>)
0.35
loss =  tensor(0.7148, device='cuda:0', grad_fn=<MeanBackward0>)
0.38
loss =  tensor(

0.46
loss =  tensor(0.7073, device='cuda:0', grad_fn=<MeanBackward0>)
0.46
loss =  tensor(0.7074, device='cuda:0', grad_fn=<MeanBackward0>)
0.44
loss =  tensor(0.7012, device='cuda:0', grad_fn=<MeanBackward0>)
0.52
loss =  tensor(0.7070, device='cuda:0', grad_fn=<MeanBackward0>)
0.47
loss =  tensor(0.7068, device='cuda:0', grad_fn=<MeanBackward0>)
0.45
loss =  tensor(0.7010, device='cuda:0', grad_fn=<MeanBackward0>)
0.55
loss =  tensor(0.7097, device='cuda:0', grad_fn=<MeanBackward0>)
0.41
loss =  tensor(0.7014, device='cuda:0', grad_fn=<MeanBackward0>)
0.52
loss =  tensor(0.7006, device='cuda:0', grad_fn=<MeanBackward0>)
0.56
loss =  tensor(0.7072, device='cuda:0', grad_fn=<MeanBackward0>)
0.47
loss =  tensor(0.7026, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(0.7064, device='cuda:0', grad_fn=<MeanBackward0>)
0.49
loss =  tensor(0.6981, device='cuda:0', grad_fn=<MeanBackward0>)
0.58
loss =  tensor(0.7023, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(

loss =  tensor(0.7029, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(0.7088, device='cuda:0', grad_fn=<MeanBackward0>)
0.44
loss =  tensor(0.7149, device='cuda:0', grad_fn=<MeanBackward0>)
0.38
loss =  tensor(0.7082, device='cuda:0', grad_fn=<MeanBackward0>)
0.45
loss =  tensor(0.7161, device='cuda:0', grad_fn=<MeanBackward0>)
0.38
loss =  tensor(0.7093, device='cuda:0', grad_fn=<MeanBackward0>)
0.43
loss =  tensor(0.7025, device='cuda:0', grad_fn=<MeanBackward0>)
0.54
loss =  tensor(0.7027, device='cuda:0', grad_fn=<MeanBackward0>)
0.53
loss =  tensor(0.7003, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(0.7107, device='cuda:0', grad_fn=<MeanBackward0>)
0.43
loss =  tensor(0.7117, device='cuda:0', grad_fn=<MeanBackward0>)
0.42
loss =  tensor(0.7156, device='cuda:0', grad_fn=<MeanBackward0>)
0.37
loss =  tensor(0.7048, device='cuda:0', grad_fn=<MeanBackward0>)
0.48
loss =  tensor(0.6982, device='cuda:0', grad_fn=<MeanBackward0>)
0.58
loss =  tensor(0.710

loss =  tensor(0.7020, device='cuda:0', grad_fn=<MeanBackward0>)
0.53
loss =  tensor(0.7015, device='cuda:0', grad_fn=<MeanBackward0>)
0.55
loss =  tensor(0.7069, device='cuda:0', grad_fn=<MeanBackward0>)
0.45
loss =  tensor(0.7040, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(0.7012, device='cuda:0', grad_fn=<MeanBackward0>)
0.55
loss =  tensor(0.7101, device='cuda:0', grad_fn=<MeanBackward0>)
0.41
loss =  tensor(0.6998, device='cuda:0', grad_fn=<MeanBackward0>)
0.53
loss =  tensor(0.7043, device='cuda:0', grad_fn=<MeanBackward0>)
0.5
loss =  tensor(0.7007, device='cuda:0', grad_fn=<MeanBackward0>)
0.52
loss =  tensor(0.7055, device='cuda:0', grad_fn=<MeanBackward0>)
0.48
loss =  tensor(0.7059, device='cuda:0', grad_fn=<MeanBackward0>)
0.47
loss =  tensor(0.6999, device='cuda:0', grad_fn=<MeanBackward0>)
0.55
loss =  tensor(0.7023, device='cuda:0', grad_fn=<MeanBackward0>)
0.5
loss =  tensor(0.6950, device='cuda:0', grad_fn=<MeanBackward0>)
0.59
loss =  tensor(0.6981,

0.57
loss =  tensor(0.6954, device='cuda:0', grad_fn=<MeanBackward0>)
0.6
loss =  tensor(0.6898, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6988, device='cuda:0', grad_fn=<MeanBackward0>)
0.57
loss =  tensor(0.6893, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6907, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6911, device='cuda:0', grad_fn=<MeanBackward0>)
0.63
loss =  tensor(0.6876, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6963, device='cuda:0', grad_fn=<MeanBackward0>)
0.58
loss =  tensor(0.6898, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6895, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6902, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6933, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0.6871, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6906, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0

0.5
loss =  tensor(0.6981, device='cuda:0', grad_fn=<MeanBackward0>)
0.57
loss =  tensor(0.6994, device='cuda:0', grad_fn=<MeanBackward0>)
0.54
loss =  tensor(0.7040, device='cuda:0', grad_fn=<MeanBackward0>)
0.48
loss =  tensor(0.7084, device='cuda:0', grad_fn=<MeanBackward0>)
0.44
loss =  tensor(0.6992, device='cuda:0', grad_fn=<MeanBackward0>)
0.55
loss =  tensor(0.7045, device='cuda:0', grad_fn=<MeanBackward0>)
0.49
loss =  tensor(0.7031, device='cuda:0', grad_fn=<MeanBackward0>)
0.53
loss =  tensor(0.7010, device='cuda:0', grad_fn=<MeanBackward0>)
0.53
loss =  tensor(0.7051, device='cuda:0', grad_fn=<MeanBackward0>)
0.5
loss =  tensor(0.7052, device='cuda:0', grad_fn=<MeanBackward0>)
0.49
loss =  tensor(0.7018, device='cuda:0', grad_fn=<MeanBackward0>)
0.54
loss =  tensor(0.7040, device='cuda:0', grad_fn=<MeanBackward0>)
0.51
loss =  tensor(0.7050, device='cuda:0', grad_fn=<MeanBackward0>)
0.49
loss =  tensor(0.7070, device='cuda:0', grad_fn=<MeanBackward0>)
0.47
loss =  tensor(0.

0.63
loss =  tensor(0.6842, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6792, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6902, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6885, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6932, device='cuda:0', grad_fn=<MeanBackward0>)
0.6
loss =  tensor(0.6925, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0.6985, device='cuda:0', grad_fn=<MeanBackward0>)
0.54
loss =  tensor(0.6840, device='cuda:0', grad_fn=<MeanBackward0>)
0.7
loss =  tensor(0.6896, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6885, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6936, device='cuda:0', grad_fn=<MeanBackward0>)
0.59
loss =  tensor(0.6886, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6966, device='cuda:0', grad_fn=<MeanBackward0>)
0.61
loss =  tensor(0.6932, device='cuda:0', grad_fn=<MeanBackward0>)
0.6
loss =  tensor(0.6

loss =  tensor(0.6903, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6882, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6814, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6855, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6905, device='cuda:0', grad_fn=<MeanBackward0>)
0.63
loss =  tensor(0.6871, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6839, device='cuda:0', grad_fn=<MeanBackward0>)
0.7
loss =  tensor(0.6836, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6873, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6817, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6840, device='cuda:0', grad_fn=<MeanBackward0>)
0.7
loss =  tensor(0.6835, device='cuda:0', grad_fn=<MeanBackward0>)
0.71
loss =  tensor(0.6845, device='cuda:0', grad_fn=<MeanBackward0>)
0.7
loss =  tensor(0.6894, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6813, 

0.62
loss =  tensor(0.6926, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0.6865, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6813, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6809, device='cuda:0', grad_fn=<MeanBackward0>)
0.74
loss =  tensor(0.6878, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6873, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6896, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6950, device='cuda:0', grad_fn=<MeanBackward0>)
0.6
loss =  tensor(0.6859, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6908, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0.6904, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6912, device='cuda:0', grad_fn=<MeanBackward0>)
0.63
loss =  tensor(0.6951, device='cuda:0', grad_fn=<MeanBackward0>)
0.59
loss =  tensor(0.6906, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0

loss =  tensor(0.6888, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6792, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6874, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6883, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6882, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6836, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6920, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6921, device='cuda:0', grad_fn=<MeanBackward0>)
0.62
loss =  tensor(0.6864, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6895, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6888, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6865, device='cuda:0', grad_fn=<MeanBackward0>)
0.68
loss =  tensor(0.6827, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6825, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.692

0.65
loss =  tensor(0.6781, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6881, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6879, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6989, device='cuda:0', grad_fn=<MeanBackward0>)
0.54
loss =  tensor(0.6834, device='cuda:0', grad_fn=<MeanBackward0>)
0.71
loss =  tensor(0.6754, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6777, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6843, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6817, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6800, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6926, device='cuda:0', grad_fn=<MeanBackward0>)
0.63
loss =  tensor(0.6800, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6814, device='cuda:0', grad_fn=<MeanBackward0>)
0.74
loss =  tensor(0.6869, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(

loss =  tensor(0.6788, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6836, device='cuda:0', grad_fn=<MeanBackward0>)
0.71
loss =  tensor(0.6737, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6758, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6839, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6820, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6840, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6857, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6759, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6821, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6876, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6798, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6737, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6752, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6800,

0.87
loss =  tensor(0.6786, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6804, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6729, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6813, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0.6702, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6712, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6734, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6721, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6766, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6817, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6752, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6788, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6743, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6690, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0

loss =  tensor(0.6762, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6716, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6796, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6655, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6756, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6702, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6771, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6771, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6679, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6762, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6697, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6754, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6710, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6724, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6696, d

loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6707, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6722, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6702, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6745, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6776, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6777, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6752, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6647, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6804, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6715, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6692, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6740, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6670, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6719

0.74
loss =  tensor(0.6889, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6849, device='cuda:0', grad_fn=<MeanBackward0>)
0.71
loss =  tensor(0.6827, device='cuda:0', grad_fn=<MeanBackward0>)
0.7
loss =  tensor(0.6887, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6965, device='cuda:0', grad_fn=<MeanBackward0>)
0.59
loss =  tensor(0.6842, device='cuda:0', grad_fn=<MeanBackward0>)
0.72
loss =  tensor(0.6904, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6878, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6902, device='cuda:0', grad_fn=<MeanBackward0>)
0.66
loss =  tensor(0.6878, device='cuda:0', grad_fn=<MeanBackward0>)
0.69
loss =  tensor(0.6911, device='cuda:0', grad_fn=<MeanBackward0>)
0.65
loss =  tensor(0.6909, device='cuda:0', grad_fn=<MeanBackward0>)
0.64
loss =  tensor(0.6871, device='cuda:0', grad_fn=<MeanBackward0>)
0.67
loss =  tensor(0.6828, device='cuda:0', grad_fn=<MeanBackward0>)
0.73
loss =  tensor(0

loss =  tensor(0.6755, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6759, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6729, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6750, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6773, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6767, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6796, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6742, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6799, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6798, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6779, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6753, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6766, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6763, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6812

0.77
loss =  tensor(0.6802, device='cuda:0', grad_fn=<MeanBackward0>)
0.74
loss =  tensor(0.6740, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6743, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6804, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6730, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6773, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6721, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6751, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6782, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6787, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6776, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6741, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6782, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6799, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0

loss =  tensor(0.6748, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6725, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6711, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6779, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6727, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6785, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6761, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6714, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6830, device='cuda:0', grad_fn=<MeanBackward0>)
0.71
loss =  tensor(0.6693, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6762, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6762, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6780, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6773,

loss =  tensor(0.6732, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6705, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6741, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6754, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6731, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6739, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6814, device='cuda:0', grad_fn=<MeanBackward0>)
0.74
loss =  tensor(0.6733, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6669, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6751, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6730, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6768, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6738, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6763, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6763, d

loss =  tensor(0.6721, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6748, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6787, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6721, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6713, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6754, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6751, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6664, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6759, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6744, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6756, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6738, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6763,

loss =  tensor(0.6683, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6720, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6728, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6744, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6676, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6740, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6710, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6769, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6720, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6682, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6703, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6749, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6744, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6669, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6787

loss =  tensor(0.6694, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6763, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6652, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6735, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6761, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6732, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6794, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6713, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6798, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6692, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6764, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6765, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6806, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6737, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6711,

0.86
loss =  tensor(0.6671, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6788, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6683, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6730, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6695, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6739, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6705, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6728, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6781, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6708, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6659, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6723, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6677, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.

0.83
loss =  tensor(0.6655, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6730, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6724, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6777, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6741, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6733, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6774, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6728, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6651, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6683, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6727, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6702, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6763, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6792, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.

0.84
loss =  tensor(0.6771, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6669, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6716, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
epoch =  6
loss =  tensor(0.6790, device='cuda:0', grad_fn=<MeanBackward0>)
0.75
loss =  tensor(0.6717, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6731, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6760, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6664, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6723, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6695, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6738, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6686, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6736, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6664, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss 

0.81
loss =  tensor(0.6668, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6699, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6736, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6630, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6688, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6697, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6645, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6732, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6705, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6640, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6691, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6706, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6766, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6686, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0

loss =  tensor(0.6795, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6681, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6742, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6777, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6768, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6774, device='cuda:0', grad_fn=<MeanBackward0>)
0.78
loss =  tensor(0.6741, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6725, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6702, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6747, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6658, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6729, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6717, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6749, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6765

loss =  tensor(0.6737, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6684, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6760, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6651, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6690, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6656, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6718, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6694, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6682, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6663, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6707, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6757, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6675, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6711, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6704

loss =  tensor(0.6752, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6711, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6699, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6696, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6718, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6684, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6738, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6719, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6726, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6680, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6655, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6717, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6668, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6758, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6721

loss =  tensor(0.6691, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6714, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6736, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6704, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6641, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6744, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6664, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6740, device='cuda:0', grad_fn=<MeanBackward0>)
0.8
loss =  tensor(0.6694, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6659, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6696, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6715, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6679, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6712, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6628, 

0.88
loss =  tensor(0.6714, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6784, device='cuda:0', grad_fn=<MeanBackward0>)
0.76
loss =  tensor(0.6726, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6696, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6716, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6764, device='cuda:0', grad_fn=<MeanBackward0>)
0.79
loss =  tensor(0.6672, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6668, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6720, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6743, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6721, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6667, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6704, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6737, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(

0.82
loss =  tensor(0.6707, device='cuda:0', grad_fn=<MeanBackward0>)
0.84
loss =  tensor(0.6662, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6677, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6727, device='cuda:0', grad_fn=<MeanBackward0>)
0.81
loss =  tensor(0.6685, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6655, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6715, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6666, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6693, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6729, device='cuda:0', grad_fn=<MeanBackward0>)
0.82
loss =  tensor(0.6767, device='cuda:0', grad_fn=<MeanBackward0>)
0.77
loss =  tensor(0.6717, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6723, device='cuda:0', grad_fn=<MeanBackward0>)
0.83
loss =  tensor(0.6693, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0

loss =  tensor(0.6636, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6678, device='cuda:0', grad_fn=<MeanBackward0>)
0.86
loss =  tensor(0.6618, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6641, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6639, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6648, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6637, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6698, device='cuda:0', grad_fn=<MeanBackward0>)
0.85
loss =  tensor(0.6631, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6639, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6660, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6648, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.666

0.92
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6641, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6638, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6665, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6635, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6609, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6618, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6660, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0

0.93
loss =  tensor(0.6622, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6613, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6621, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6671, device='cuda:0', grad_fn=<MeanBackward0>)
0.87
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6627, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6641, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0

0.91
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6643, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6640, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6655, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6646, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6634, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6660, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6664, device='cuda:0', grad_fn=<MeanBackward0>)
0.9
loss =  tensor(0.

0.92
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6662, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6609, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6618, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6638, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6636, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6626, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(

0.93
loss =  tensor(0.6640, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6636, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6619, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6624, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6604, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6659, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6638, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6652, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6639, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(

loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6634, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6637, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6604, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6636, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.657

0.9
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6609, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6634, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0

0.97
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6610, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6627, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6662, device='cuda:0', grad_fn=<MeanBackward0>)
0.88
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6618, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(

0.93
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6631, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(

0.92
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6610, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6610, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(

loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6618, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6631, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6622, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6621, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.660

0.92
loss =  tensor(0.6657, device='cuda:0', grad_fn=<MeanBackward0>)
0.89
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6624, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6626, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(

loss =  tensor(0.6623, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6644, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.660

0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6625, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6621, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6634, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6621, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(

loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6613, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6622, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6604

loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6628, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6604, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6609, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.655

loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6557, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
epoch =  12
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6622, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  t

0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(

0.96
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6644, device='cuda:0', grad_fn=<MeanBackward0>)
0.91
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(

0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6629, device='cuda:0', grad_fn=<MeanBackward0>)
0.92
loss =  tensor(

loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.657

loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6609, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572

0.95
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6610, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6616, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(

0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(

0.95
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(

0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0

loss =  tensor(0.6613, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6620, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.660

loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6604, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6599

loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6603, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6604, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.659

loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6611, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.660

loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6560,

loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.659

loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6585

0.96
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(

0.98
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(

0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6557, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6557, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6610, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.

0.98
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0

0.99
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6617, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.

loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6601,

0.97
loss =  tensor(0.6608, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6614, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6605, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6600, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0

loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6621

0.98
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6557, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.

loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6591, d

loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552

0.98
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.

loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6592, 

0.95
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0

0.97
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6

0.99
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0

0.98
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6556, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0

0.97
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.

0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0

1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.

0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6622, device='cuda:0', grad_fn=<MeanBackward0>)
0.93
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6612, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.

0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6595, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6

0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6

0.99
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0

0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6

loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, 

0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.65

loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6596, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.658

0.98
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.

loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, d

1.0
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6586, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6591,

0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6556, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.

1.0
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6

0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6607, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6

loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6562,

loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6577, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6579, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6589, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6557, 

loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6587, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6597, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6552, d

loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6598, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6558, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552

loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, d

0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.655

loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6575

0.99
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.

loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6588, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.657

loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6556, 

0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6559, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6599, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6560, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0

loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6575, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582,

loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6615, device='cuda:0', grad_fn=<MeanBackward0>)
0.94
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6585, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592,

0.99
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6578, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0

loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6561, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6606, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, 

1.0
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6576, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6584, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.

0.98
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6583, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.

loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6556, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6572,

loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6601, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6590, devi

loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6602, device='cuda:0', grad_fn=<MeanBackward0>)
0.95
loss =  tensor(0.6564, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6569, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, de

loss =  tensor(0.6571, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
epoch =  29
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6593, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6594, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6554, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tens

0.98
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6574, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6565, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6570, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6568, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6566, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0

loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6567, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6571

loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6591, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6573, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6563, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6553, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6580, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, d

loss =  tensor(0.6572, device='cuda:0', grad_fn=<MeanBackward0>)
0.98
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6582, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6592, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6581, device='cuda:0', grad_fn=<MeanBackward0>)
0.97
loss =  tensor(0.6552, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6590, device='cuda:0', grad_fn=<MeanBackward0>)
0.96
loss =  tensor(0.6555, device='cuda:0', grad_fn=<MeanBackward0>)
1.0
loss =  tensor(0.6562, device='cuda:0', grad_fn=<MeanBackward0>)
0.99
loss =  tensor(0.6559, 