## <font color=#6B49F5> A Simple Implementation of FedAvg with PyTorch on IID Data </font> 
Please see https://towardsdatascience.com/federated-learning-a-simple-implementation-of-fedavg-federated-averaging-with-pytorch-90187c9c9577 for more details.

In [9]:
import numpy as np
import pandas as pd
from sklearn import tree
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.ensemble import RandomForestClassifier
import random
import math
from torch.utils.tensorboard import SummaryWriter
from matplotlib import pyplot

from pathlib import Path
import requests
import pickle
import gzip

import torch
import math
import torch.nn.functional as F
from torch import nn
from torch import optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader
from imblearn.over_sampling import SMOTE
import copy
from sklearn.metrics import confusion_matrix

pd.options.display.float_format = "{:,.4f}".format
sm = SMOTE(random_state=42)

In [10]:
THREAT_TYPE = 'threat_type'
THREAT_HL = 'threat_hl'

learning_rate = 0.01
numEpoch = 20
batch_size = 32
momentum = 0.9
print_amount=3
number_of_slices = 2
isSmote = False
runtime = 21

file_name = "federated_" + str(isSmote) + "_" + str(number_of_slices)  + "_" + str(runtime) + ".txt"
file = open(file_name, "w")

data_path = "D:\\learning\\PyTorch\\NSL_KDD-master\\"

colnames = ['duration', 'protocol_type', 'service', 'flag', 'src_bytes', 'dst_bytes', 'land',
            'wrong_fragment', 'urgent', 'hot', 'num_failed_logins', 'logged_in', 'num_compromised',
            'root_shell', 'su_attempted', 'num_root', 'num_file_creations', 'num_shells', 'num_access_files',
            'num_outbound_cmds', 'is_host_login', 'is_guest_login', 'count', 'srv_count', 'serror_rate',
            'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate', 'same_srv_rate', 'diff_srv_rate',
            'srv_diff_host_rate', 'dst_host_count', 'dst_host_srv_count', 'dst_host_same_srv_rate',
            'dst_host_diff_srv_rate', 'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate',
            'dst_host_serror_rate', 'dst_host_srv_serror_rate', 'dst_host_rerror_rate',
            'dst_host_srv_rerror_rate', 'threat_type']

In [11]:
df_train = pd.read_csv(data_path + "KDDTrain+.csv", header = None)
df_train = df_train.iloc[:, :-1]

df_test = pd.read_csv(data_path + "KDDTest+.csv", header = None)
df_test = df_test.iloc[:, :-1]

df_train.columns = colnames
df_test.columns = colnames

df_train.loc[(df_train['threat_type'] == 'back'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'buffer_overflow'), 'threat_type'] = 2
df_train.loc[(df_train['threat_type'] == 'ftp_write'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'guess_passwd'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'imap'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'ipsweep'), 'threat_type'] = 4
df_train.loc[(df_train['threat_type'] == 'land'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'loadmodule'), 'threat_type'] = 2
df_train.loc[(df_train['threat_type'] == 'multihop'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'neptune'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'nmap'), 'threat_type'] = 4
df_train.loc[(df_train['threat_type'] == 'perl'), 'threat_type'] = 2
df_train.loc[(df_train['threat_type'] == 'phf'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'pod'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'portsweep'), 'threat_type'] = 4
df_train.loc[(df_train['threat_type'] == 'rootkit'), 'threat_type'] = 2
df_train.loc[(df_train['threat_type'] == 'satan'), 'threat_type'] = 4
df_train.loc[(df_train['threat_type'] == 'smurf'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'spy'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'teardrop'), 'threat_type'] = 1
df_train.loc[(df_train['threat_type'] == 'warezclient'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'warezmaster'), 'threat_type'] = 3
df_train.loc[(df_train['threat_type'] == 'normal'), 'threat_type'] = 0
df_train.loc[(df_train['threat_type'] == 'unknown'), 'threat_type'] = 6

df_test.loc[(df_test['threat_type'] == 'back'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'buffer_overflow'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'ftp_write'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'guess_passwd'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'imap'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'ipsweep'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'land'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'loadmodule'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'multihop'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'neptune'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'nmap'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'perl'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'phf'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'pod'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'portsweep'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'rootkit'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'satan'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'smurf'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'spy'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'teardrop'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'warezclient'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'warezmaster'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'normal'), 'threat_type'] = 0
df_test.loc[(df_test['threat_type'] == 'unknown'), 'threat_type'] = 6
df_test.loc[(df_test['threat_type'] == 'mscan'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'apache2'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'snmpgetattack'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'processtable'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'httptunnel'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'ps'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'snmpguess'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'mailbomb'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'named'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'sendmail'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'xterm'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'xlock'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'xsnoop'), 'threat_type'] = 3
df_test.loc[(df_test['threat_type'] == 'sqlattack'), 'threat_type'] = 2
df_test.loc[(df_test['threat_type'] == 'udpstorm'), 'threat_type'] = 1
df_test.loc[(df_test['threat_type'] == 'saint'), 'threat_type'] = 4
df_test.loc[(df_test['threat_type'] == 'worm'), 'threat_type'] = 1

df_full = pd.concat([df_train, df_test])

print('Attack types in full set: \n', df_full[THREAT_TYPE].value_counts())

Attack types in full set: 
 0    77053
1    53387
4    14077
3     3880
2      119
Name: threat_type, dtype: int64


In [12]:

print('Before normalization shape of data set : ', df_full.shape)
threat_type_df = df_full['threat_type'].copy()
# Considering numerical columns
# 34 numerical columns are considered for training
numerical_colmanes = ['duration', 'src_bytes', 'dst_bytes', 'wrong_fragment', 'urgent', 'hot',
                      'num_failed_logins', 'num_compromised', 'root_shell', 'su_attempted', 'num_root',
                      'num_file_creations', 'num_shells', 'num_access_files', 'num_outbound_cmds', 'count',
                      'srv_count', 'serror_rate', 'srv_serror_rate', 'rerror_rate', 'srv_rerror_rate',
                      'same_srv_rate', 'diff_srv_rate', 'srv_diff_host_rate', 'dst_host_count',
                      'dst_host_srv_count', 'dst_host_same_srv_rate', 'dst_host_diff_srv_rate',
                      'dst_host_same_src_port_rate', 'dst_host_srv_diff_host_rate', 'dst_host_serror_rate',
                      'dst_host_srv_serror_rate', 'dst_host_rerror_rate', 'dst_host_srv_rerror_rate']

numerical_df_full = df_full[numerical_colmanes].copy()
print(numerical_df_full.shape)
#
# # Lets remove the numerical columns with constant value
numerical_df_full = numerical_df_full.loc[:, (numerical_df_full != numerical_df_full.iloc[0]).any()]
#
# # lets scale the values for each column from [0,1]
# # N.B. we dont have any negative values]
final_df_full = numerical_df_full / numerical_df_full.max()
print(final_df_full.shape)

df_normalized = pd.concat([final_df_full, threat_type_df], axis=1)
print('After normalization shape of data set: ', df_normalized.shape)
print(df_normalized[THREAT_TYPE].value_counts())

Before normalization shape of data set :  (148516, 42)
(148516, 34)
(148516, 33)
After normalization shape of data set:  (148516, 34)
0    77053
1    53387
4    14077
3     3880
2      119
Name: threat_type, dtype: int64


In [13]:
def divide_train_test(df, propotion=0.1):
    
    df_train = []
    df_test = []
    for key,val in df[THREAT_TYPE].value_counts().iteritems():
        df_part = df[df['threat_type'] == key]
        df_test.append(df_part[0: int(df_part.shape[0]*propotion)])
        df_train.append(df_part[int(df_part.shape[0]*propotion):df_part.shape[0]])
        
    return df_train,df_test
    

In [14]:
def get_data_for_slices(df_train, number_of_slices, isSmote=False, x_name="x_train", y_name="y_train"):
    
    x_data_dict= dict()
    y_data_dict= dict()    
    
    for i in range(number_of_slices):
        xname= x_name+str(i)
        yname= y_name+str(i)
        df_types = []
        
        for df in df_train:
            df_type = df[int(df.shape[0]*i/number_of_slices):int(df.shape[0]*(i+1)/number_of_slices)]
            df_types.append(df_type)
        
        slice_df = pd.concat(df_types)
        y_info = slice_df.pop('threat_type').values
        x_info = slice_df.values
        y_info = y_info.astype('int')
        
        if isSmote:
            sm = SMOTE(random_state=42)
            x_info, y_info = sm.fit_resample(x_info, y_info)
        
        print('========================================================================================')
        print('\tX part size for slice ' + str(i) + ' is ' + str(x_info.shape))
        print('\tY part size for slice ' + str(i) + ' is ' + str(y_info.shape))
        print('Value types of each class in slice : ' + str(i))
        print(np.unique(y_info,return_counts=True))
        
        x_info = torch.tensor(x_info).float()
        y_info = torch.tensor(y_info).type(torch.LongTensor)
            
        x_data_dict.update({xname : x_info})
        y_data_dict.update({yname : y_info})
        
    return x_data_dict, y_data_dict     

In [15]:
df_train, df_test = divide_train_test(df_normalized,propotion=0.1)
# print('Value counts in train set : ')
# df_train[THREAT_TYPE].value_counts()
# print('Value counts in test set : ')
# print(df_test[THREAT_TYPE].value_counts())

x_train_dict, y_train_dict = get_data_for_slices(df_train, number_of_slices, isSmote)

df_test = pd.concat(df_test)
y_test = df_test.pop(THREAT_TYPE).values
x_test = df_test.values

print('Test set size is : x => ' + str(x_test.shape) + ' y => ' + str(y_test.shape))
x_test = torch.tensor(x_test).float()
y_test = torch.tensor(y_test.astype('int')).type(torch.LongTensor)

inputs = x_test.shape[1]
outputs = 5
print(inputs,outputs)

	X part size for slice 0 is (66833, 33)
	Y part size for slice 0 is (66833,)
Value types of each class in slice : 0
(array([0, 1, 2, 3, 4]), array([34674, 24024,    54,  1746,  6335], dtype=int64))
	X part size for slice 1 is (66834, 33)
	Y part size for slice 1 is (66834,)
Value types of each class in slice : 1
(array([0, 1, 2, 3, 4]), array([34674, 24025,    54,  1746,  6335], dtype=int64))
Test set size is : x => (14849, 33) y => (14849,)
33 5


--------------------------
### <span style="background-color:#F087F9"> Classification Model </span> 

In [16]:
class Net2nn(nn.Module):
    def __init__(self, inputs, outputs):
        super(Net2nn, self).__init__()
        self.fc1=nn.Linear(inputs,200)
        self.fc2=nn.Linear(200,200)
        self.fc3=nn.Linear(200,outputs)
        
    def forward(self,x):
        x=F.relu(self.fc1(x))
        x=F.relu(self.fc2(x))
        x=self.fc3(x)
        return x

In [17]:
class WrappedDataLoader:
    def __init__(self, dl, func):
        self.dl = dl
        self.func = func

    def __len__(self):
        return len(self.dl)

    def __iter__(self):
        batches = iter(self.dl)
        for b in batches:
            yield (self.func(*b))

In [18]:
def train(model, train_loader, criterion, optimizer):
    model.train()
    train_loss = 0.0
    correct = 0

    for data, target in train_loader:
        output = model(data)
        loss = criterion(output, target)
        print(loss)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        prediction = output.argmax(dim=1, keepdim=True)
        correct += prediction.eq(target.view_as(prediction)).sum().item()
        

    return train_loss / len(train_loader), correct/len(train_loader.dataset)

In [19]:
def validation(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            output = model(data)
            
            test_loss += criterion(output, target).item()
            prediction = output.argmax(dim=1, keepdim=True)
            correct += prediction.eq(target.view_as(prediction)).sum().item()

    test_loss /= len(test_loader)
    correct /= len(test_loader.dataset)

    return (test_loss, correct)

In [20]:
def confusion_mat(model, test_loader):
    y_pred = []
    y_true = []

    # iterate over test data
    for inputs, labels in test_loader:
        output = model(inputs)  # Feed Network

        output = (torch.max(torch.exp(output), 1)[1]).data.cpu().numpy()
        y_pred.extend(output)  # Save Prediction

        labels = labels.data.cpu().numpy()
        y_true.extend(labels)  # Save Truth

    cf_matrix = confusion_matrix(y_true, y_pred)
    precisionv = precision_score(y_true,y_pred,average='macro')
    recallv = recall_score(y_true,y_pred,average='macro')
    print('precision value: '+str(precisionv))
    print('recall value: '+ str(recallv))
#     df_cm = pd.DataFrame(cf_matrix, index=[i for i in Counter(y_test)],
#                          columns=[i for i in Counter(y_test)])
#     plt.figure(1)
#     plt.ylabel('True label')
#     plt.xlabel('Predicted label')
#     plt.figure(figsize=(12, 7))

#     sn.heatmap(df_cm, annot=True).set(xlabel='Predicted label', ylabel='True label')
#     plt.savefig('D:\\learning\\PyTorch\\experiment\\cf\\cf_fl_'+str(self.number_of_slices)+'.png')
    print('confusion matrix for normal scenario for slices : ' + str(number_of_slices))
    print(cf_matrix)
    file.write('\ncf matrix for slice :' + str(number_of_slices))
    file.write('\n'+str(cf_matrix))

---------------------------------
### <span style="background-color:#F087F9"> Functions for Federated Averaging </span> 

In [21]:
def create_model_optimizer_criterion_dict(number_of_slices):
    model_dict = dict()
    optimizer_dict= dict()
    criterion_dict = dict()
    
    for i in range(number_of_slices):
        model_name="model"+str(i)
        model_info=Net2nn(inputs, outputs)
        model_dict.update({model_name : model_info })
        
        optimizer_name="optimizer"+str(i)
        optimizer_info = torch.optim.SGD(model_info.parameters(), lr=learning_rate, momentum=momentum)
        optimizer_dict.update({optimizer_name : optimizer_info })
        
        criterion_name = "criterion"+str(i)
        criterion_info = nn.CrossEntropyLoss()
        criterion_dict.update({criterion_name : criterion_info})
        
    return model_dict, optimizer_dict, criterion_dict 

In [22]:
def get_averaged_weights(model_dict, number_of_slices):
   
    fc1_mean_weight = torch.zeros(size=model_dict[name_of_models[0]].fc1.weight.shape)
    fc1_mean_bias = torch.zeros(size=model_dict[name_of_models[0]].fc1.bias.shape)
    
    fc2_mean_weight = torch.zeros(size=model_dict[name_of_models[0]].fc2.weight.shape)
    fc2_mean_bias = torch.zeros(size=model_dict[name_of_models[0]].fc2.bias.shape)
    
    fc3_mean_weight = torch.zeros(size=model_dict[name_of_models[0]].fc3.weight.shape)
    fc3_mean_bias = torch.zeros(size=model_dict[name_of_models[0]].fc3.bias.shape)
    
    with torch.no_grad():
    
    
        for i in range(number_of_slices):
            fc1_mean_weight += model_dict[name_of_models[i]].fc1.weight.data.clone()
            fc1_mean_bias += model_dict[name_of_models[i]].fc1.bias.data.clone()
        
            fc2_mean_weight += model_dict[name_of_models[i]].fc2.weight.data.clone()
            fc2_mean_bias += model_dict[name_of_models[i]].fc2.bias.data.clone()
        
            fc3_mean_weight += model_dict[name_of_models[i]].fc3.weight.data.clone()
            fc3_mean_bias += model_dict[name_of_models[i]].fc3.bias.data.clone()

        
        fc1_mean_weight =fc1_mean_weight/number_of_slices
        fc1_mean_bias = fc1_mean_bias/ number_of_slices
    
        fc2_mean_weight =fc2_mean_weight/number_of_slices
        fc2_mean_bias = fc2_mean_bias/ number_of_slices
    
        fc3_mean_weight =fc3_mean_weight/number_of_slices
        fc3_mean_bias = fc3_mean_bias/ number_of_slices
    
    return fc1_mean_weight, fc1_mean_bias, fc2_mean_weight, fc2_mean_bias, fc3_mean_weight, fc3_mean_bias

In [23]:
def set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,model_dict, number_of_slices):
    fc1_mean_weight, fc1_mean_bias, fc2_mean_weight, fc2_mean_bias, fc3_mean_weight, fc3_mean_bias = get_averaged_weights(model_dict, number_of_slices=number_of_slices)
    with torch.no_grad():
        main_model.fc1.weight.data = fc1_mean_weight.data.clone()
        main_model.fc2.weight.data = fc2_mean_weight.data.clone()
        main_model.fc3.weight.data = fc3_mean_weight.data.clone()

        main_model.fc1.bias.data = fc1_mean_bias.data.clone()
        main_model.fc2.bias.data = fc2_mean_bias.data.clone()
        main_model.fc3.bias.data = fc3_mean_bias.data.clone() 
    return main_model

In [24]:
def compare_local_and_merged_model_performance(number_of_slices):
    accuracy_table=pd.DataFrame(data=np.zeros((number_of_slices,3)), columns=["sample", "local_ind_model", "merged_main_model"])
    for i in range (number_of_slices):
    
        test_ds = TensorDataset(x_test, y_test)
        test_dl = DataLoader(test_ds, batch_size=batch_size * 2)
    
        model=model_dict[name_of_models[i]]
        criterion=criterion_dict[name_of_criterions[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]
    
        individual_loss, individual_accuracy = validation(model, test_dl, criterion)
        main_loss, main_accuracy =validation(main_model, test_dl, main_criterion )
    
        accuracy_table.loc[i, "sample"]="sample "+str(i)
        accuracy_table.loc[i, "local_ind_model"] = individual_accuracy
        accuracy_table.loc[i, "merged_main_model"] = main_accuracy

    return accuracy_table

In [25]:
def send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_slices):
    with torch.no_grad():
        for i in range(number_of_slices):
            print('Updating model :' + name_of_models[i] )
            model_dict[name_of_models[i]].fc1.weight.data =main_model.fc1.weight.data.clone()
            model_dict[name_of_models[i]].fc2.weight.data =main_model.fc2.weight.data.clone()
            model_dict[name_of_models[i]].fc3.weight.data =main_model.fc3.weight.data.clone() 
            
            model_dict[name_of_models[i]].fc1.bias.data =main_model.fc1.bias.data.clone()
            model_dict[name_of_models[i]].fc2.bias.data =main_model.fc2.bias.data.clone()
            model_dict[name_of_models[i]].fc3.bias.data =main_model.fc3.bias.data.clone() 
    
    return model_dict

In [26]:
def start_train_end_node_process(number_of_slices):
    for i in range (number_of_slices): 

        print('Federated learning for slice '+ str(i+1))
        train_ds = TensorDataset(x_train_dict[name_of_x_train_sets[i]], y_train_dict[name_of_y_train_sets[i]])
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

#         valid_ds = TensorDataset(x_valid_dict[name_of_x_valid_sets[i]], y_valid_dict[name_of_y_valid_sets[i]])
#         valid_dl = DataLoader(valid_ds, batch_size=batch_size * 2)
        
        test_ds = TensorDataset(x_test_dict[name_of_x_test_sets[i]], y_test_dict[name_of_y_test_sets[i]])
        test_dl = DataLoader(test_ds, batch_size= batch_size * 2)
    
        model=model_dict[name_of_models[i]]
        criterion=criterion_dict[name_of_criterions[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]
    
        print("Subset" ,i)
        for epoch in range(numEpoch):        
            train_loss, train_accuracy = train(model, train_dl, criterion, optimizer)
#             valid_loss, valid_accuracy = validation(model, valid_dl, criterion)
            test_loss, test_accuracy = validation(model, test_dl, criterion)
    
            print("epoch: {:3.0f}".format(epoch+1) + " | train accuracy: {:7.5f}".format(train_accuracy) + " | test accuracy: {:7.5f}".format(test_accuracy))

In [27]:

def start_train_end_node_process_without_print(number_of_slices):
    for i in range (number_of_slices): 

        train_ds = TensorDataset(x_train_dict[name_of_x_train_sets[i]], y_train_dict[name_of_y_train_sets[i]])
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

        test_ds = TensorDataset(x_test, y_test)
        test_dl = DataLoader(test_ds, batch_size= batch_size * 2)
    
        model=model_dict[name_of_models[i]]
        criterion=criterion_dict[name_of_criterions[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]
    
        for epoch in range(numEpoch):        
            train_loss, train_accuracy = train(model, train_dl, criterion, optimizer)
            test_loss, test_accuracy = validation(model, test_dl, criterion)

In [28]:
def start_train_end_node_process_print_some(number_of_slices, print_amount):
    for i in range (number_of_slices): 
        
        print('Federated learning for slice '+ str(i+1))
        train_ds = TensorDataset(x_train_dict[name_of_x_train_sets[i]], 
                                 y_train_dict[name_of_y_train_sets[i]])
        train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

        test_ds = TensorDataset(x_test, y_test)
        test_dl = DataLoader(test_ds, batch_size= batch_size * 2)
    
        model=model_dict[name_of_models[i]]
        criterion=criterion_dict[name_of_criterions[i]]
        optimizer=optimizer_dict[name_of_optimizers[i]]
    
        if i<print_amount:
            print("Subset" ,i)
            
        for epoch in range(numEpoch):
        
            train_loss, train_accuracy = train(model, train_dl, criterion, optimizer)
            test_loss, test_accuracy = validation(model, test_dl, criterion)
            
            if i<print_amount:        
                print("epoch: {:3.0f}".format(epoch+1) + " | train accuracy: {:7.5f}".format(train_accuracy) + " | test accuracy: {:7.5f}".format(test_accuracy))    

In [29]:
# x_train, y_train, x_valid, y_valid,x_test, y_test = map(torch.tensor, (x_train, y_train, x_valid, y_valid, x_test, y_test))


----------------

### <span style="background-color:#F087F9"> Let's examine what would the performance of the centralized model be if the data were not distributed to nodes at all? </span>   

The model used in this example is very simple, different things can be done to improve model performance, such as using more complex models, increasing epoch or hyperparameter tuning. However, the purpose here is to compare the performance of the main model that is formed by combining the parameters of the local models trained on their own data with a centralized model that trained on all training data. In this way, we can gain insight into the capacity of federated learning.


In [30]:
# initial_model = Net2nn()
# initial_optimizer = torch.optim.SGD(initial_model.parameters(), lr=0.01, momentum=0.9)
# initial_criterion = nn.CrossEntropyLoss()

centralized_model = Net2nn(inputs, outputs)
centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=0.01, momentum=0.9)
centralized_criterion = nn.CrossEntropyLoss()

In [31]:
print("------ Centralized Model ------")

train_acc = []
test_acc = []
train_loss = []
test_loss = []

test_ds = TensorDataset(x_test, y_test)
test_dl = DataLoader(test_ds, batch_size=batch_size * 2)

for i in range(number_of_slices):
    centralized_model = Net2nn(inputs,outputs)
    centralized_optimizer = torch.optim.SGD(centralized_model.parameters(), lr=0.01, momentum=0.9)
    centralized_criterion = nn.CrossEntropyLoss()
#     centralized_model = copy.deepcopy(initial_model)
#     centralized_optimizer = copy.deepcopy(initial_optimizer)
#     centralized_criterion = copy.deepcopy(initial_criterion)
    print('Training with slice ' + str(i+1) + ' data' )
    x_name = 'x_train' + str(i)
    y_name = 'y_train' + str(i)
    train_ds = TensorDataset(x_train_dict[x_name], y_train_dict[y_name])
    train_dl = DataLoader(train_ds, batch_size=batch_size, shuffle=True)

    for epoch in range(numEpoch):
        central_train_loss, central_train_accuracy = train(centralized_model, train_dl, centralized_criterion, centralized_optimizer)
        central_test_loss, central_test_accuracy = validation(centralized_model, test_dl, centralized_criterion)
        
        train_acc.append(central_train_accuracy)
        train_loss.append(central_train_loss)
        test_acc.append(central_test_accuracy)
        test_loss.append(central_test_loss)
        
#         print("epoch: {:3.0f}".format(epoch+1) + " | train accuracy: {:7.4f}".format(central_train_accuracy) + " | test accuracy: {:7.4f}".format(central_test_accuracy))
    print(" | train accuracy: {:7.4f}".format(central_train_accuracy) + " | test accuracy: {:7.4f}".format(central_test_accuracy))
    confusion_mat(centralized_model, test_dl)
    
print("------ Training finished ------")
print('Mean train accuracy: ' + str(sum(train_acc)/len(train_acc)))
print('Mean test accuracy: ' + str(sum(test_acc)/len(test_acc)))


------ Centralized Model ------
Training with slice 1 data
tensor(1.6048, grad_fn=<NllLossBackward>)
tensor(1.6014, grad_fn=<NllLossBackward>)
tensor(1.5779, grad_fn=<NllLossBackward>)
tensor(1.5611, grad_fn=<NllLossBackward>)
tensor(1.5615, grad_fn=<NllLossBackward>)
tensor(1.5318, grad_fn=<NllLossBackward>)
tensor(1.5053, grad_fn=<NllLossBackward>)
tensor(1.4798, grad_fn=<NllLossBackward>)
tensor(1.4675, grad_fn=<NllLossBackward>)
tensor(1.4507, grad_fn=<NllLossBackward>)
tensor(1.4406, grad_fn=<NllLossBackward>)
tensor(1.3796, grad_fn=<NllLossBackward>)
tensor(1.3094, grad_fn=<NllLossBackward>)
tensor(1.2939, grad_fn=<NllLossBackward>)
tensor(1.3872, grad_fn=<NllLossBackward>)
tensor(1.2952, grad_fn=<NllLossBackward>)
tensor(1.2273, grad_fn=<NllLossBackward>)
tensor(1.2533, grad_fn=<NllLossBackward>)
tensor(1.2167, grad_fn=<NllLossBackward>)
tensor(1.0480, grad_fn=<NllLossBackward>)
tensor(1.0943, grad_fn=<NllLossBackward>)
tensor(1.1765, grad_fn=<NllLossBackward>)
tensor(0.9869, gr

tensor(0.2057, grad_fn=<NllLossBackward>)
tensor(0.9388, grad_fn=<NllLossBackward>)
tensor(0.3383, grad_fn=<NllLossBackward>)
tensor(0.2934, grad_fn=<NllLossBackward>)
tensor(0.2921, grad_fn=<NllLossBackward>)
tensor(0.3169, grad_fn=<NllLossBackward>)
tensor(0.5942, grad_fn=<NllLossBackward>)
tensor(0.1202, grad_fn=<NllLossBackward>)
tensor(0.3742, grad_fn=<NllLossBackward>)
tensor(0.2491, grad_fn=<NllLossBackward>)
tensor(0.4108, grad_fn=<NllLossBackward>)
tensor(0.3682, grad_fn=<NllLossBackward>)
tensor(0.3481, grad_fn=<NllLossBackward>)
tensor(0.2780, grad_fn=<NllLossBackward>)
tensor(0.2259, grad_fn=<NllLossBackward>)
tensor(0.4608, grad_fn=<NllLossBackward>)
tensor(0.2652, grad_fn=<NllLossBackward>)
tensor(0.2573, grad_fn=<NllLossBackward>)
tensor(0.3293, grad_fn=<NllLossBackward>)
tensor(0.2398, grad_fn=<NllLossBackward>)
tensor(0.4205, grad_fn=<NllLossBackward>)
tensor(0.3845, grad_fn=<NllLossBackward>)
tensor(0.3950, grad_fn=<NllLossBackward>)
tensor(0.2654, grad_fn=<NllLossBac

tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.1929, grad_fn=<NllLossBackward>)
tensor(0.3680, grad_fn=<NllLossBackward>)
tensor(0.3051, grad_fn=<NllLossBackward>)
tensor(0.2027, grad_fn=<NllLossBackward>)
tensor(0.2661, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.2785, grad_fn=<NllLossBackward>)
tensor(0.4767, grad_fn=<NllLossBackward>)
tensor(0.1965, grad_fn=<NllLossBackward>)
tensor(0.2113, grad_fn=<NllLossBackward>)
tensor(0.4121, grad_fn=<NllLossBackward>)
tensor(0.0595, grad_fn=<NllLossBackward>)
tensor(0.3959, grad_fn=<NllLossBackward>)
tensor(0.2917, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.1880, grad_fn=<NllLossBackward>)
tensor(0.2387, grad_fn=<NllLossBackward>)
tensor(0.1526, grad_fn=<NllLossBackward>)
tensor(0.1776, grad_fn=<NllLossBackward>)
tensor(0.5814, grad_fn=<NllLossBackward>)
tensor(0.3287, grad_fn=<NllLossBackward>)
tensor(0.3655, grad_fn=<NllLossBackward>)
tensor(0.3046, grad_fn=<NllLossBac

tensor(0.1991, grad_fn=<NllLossBackward>)
tensor(0.3295, grad_fn=<NllLossBackward>)
tensor(0.3228, grad_fn=<NllLossBackward>)
tensor(0.9873, grad_fn=<NllLossBackward>)
tensor(0.2989, grad_fn=<NllLossBackward>)
tensor(0.3399, grad_fn=<NllLossBackward>)
tensor(0.2738, grad_fn=<NllLossBackward>)
tensor(0.2891, grad_fn=<NllLossBackward>)
tensor(0.5051, grad_fn=<NllLossBackward>)
tensor(0.3038, grad_fn=<NllLossBackward>)
tensor(0.2993, grad_fn=<NllLossBackward>)
tensor(0.3836, grad_fn=<NllLossBackward>)
tensor(0.0559, grad_fn=<NllLossBackward>)
tensor(0.0791, grad_fn=<NllLossBackward>)
tensor(0.2210, grad_fn=<NllLossBackward>)
tensor(0.2656, grad_fn=<NllLossBackward>)
tensor(0.1609, grad_fn=<NllLossBackward>)
tensor(0.3338, grad_fn=<NllLossBackward>)
tensor(0.1744, grad_fn=<NllLossBackward>)
tensor(0.2846, grad_fn=<NllLossBackward>)
tensor(0.2499, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.1848, grad_fn=<NllLossBackward>)
tensor(0.2185, grad_fn=<NllLossBac

tensor(0.1202, grad_fn=<NllLossBackward>)
tensor(0.2705, grad_fn=<NllLossBackward>)
tensor(0.2316, grad_fn=<NllLossBackward>)
tensor(0.2872, grad_fn=<NllLossBackward>)
tensor(0.4238, grad_fn=<NllLossBackward>)
tensor(0.2521, grad_fn=<NllLossBackward>)
tensor(0.1716, grad_fn=<NllLossBackward>)
tensor(0.1074, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBackward>)
tensor(0.1308, grad_fn=<NllLossBackward>)
tensor(0.1591, grad_fn=<NllLossBackward>)
tensor(0.1517, grad_fn=<NllLossBackward>)
tensor(0.4871, grad_fn=<NllLossBackward>)
tensor(0.3145, grad_fn=<NllLossBackward>)
tensor(0.2888, grad_fn=<NllLossBackward>)
tensor(0.1957, grad_fn=<NllLossBackward>)
tensor(0.4099, grad_fn=<NllLossBackward>)
tensor(0.2561, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.1568, grad_fn=<NllLossBackward>)
tensor(0.2952, grad_fn=<NllLossBackward>)
tensor(0.2083, grad_fn=<NllLossBackward>)
tensor(0.2579, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBac

tensor(0.4743, grad_fn=<NllLossBackward>)
tensor(0.2945, grad_fn=<NllLossBackward>)
tensor(0.5861, grad_fn=<NllLossBackward>)
tensor(0.4629, grad_fn=<NllLossBackward>)
tensor(0.4272, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.1506, grad_fn=<NllLossBackward>)
tensor(0.4920, grad_fn=<NllLossBackward>)
tensor(0.1697, grad_fn=<NllLossBackward>)
tensor(0.1992, grad_fn=<NllLossBackward>)
tensor(0.2480, grad_fn=<NllLossBackward>)
tensor(0.1802, grad_fn=<NllLossBackward>)
tensor(0.1272, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.2913, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.0811, grad_fn=<NllLossBackward>)
tensor(0.0558, grad_fn=<NllLossBackward>)
tensor(0.0712, grad_fn=<NllLossBackward>)
tensor(0.3259, grad_fn=<NllLossBackward>)
tensor(0.2587, grad_fn=<NllLossBackward>)
tensor(0.1848, grad_fn=<NllLossBackward>)
tensor(0.2595, grad_fn=<NllLossBackward>)
tensor(0.2090, grad_fn=<NllLossBac

tensor(0.2060, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.2060, grad_fn=<NllLossBackward>)
tensor(0.2185, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.2584, grad_fn=<NllLossBackward>)
tensor(0.1523, grad_fn=<NllLossBackward>)
tensor(0.0991, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.2232, grad_fn=<NllLossBackward>)
tensor(0.3717, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.1305, grad_fn=<NllLossBackward>)
tensor(0.3648, grad_fn=<NllLossBackward>)
tensor(0.0297, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.2779, grad_fn=<NllLossBackward>)
tensor(0.2758, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.3347, grad_fn=<NllLossBackward>)
tensor(0.1397, grad_fn=<NllLossBackward>)
tensor(0.2702, grad_fn=<NllLossBackward>)
tensor(0.1794, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBac

tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.2969, grad_fn=<NllLossBackward>)
tensor(0.1344, grad_fn=<NllLossBackward>)
tensor(0.3458, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.1940, grad_fn=<NllLossBackward>)
tensor(0.3408, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.2599, grad_fn=<NllLossBackward>)
tensor(0.1976, grad_fn=<NllLossBackward>)
tensor(0.2281, grad_fn=<NllLossBackward>)
tensor(0.2663, grad_fn=<NllLossBackward>)
tensor(0.2340, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.1714, grad_fn=<NllLossBackward>)
tensor(0.1231, grad_fn=<NllLossBackward>)
tensor(0.1862, grad_fn=<NllLossBackward>)
tensor(0.1691, grad_fn=<NllLossBackward>)
tensor(0.1378, grad_fn=<NllLossBackward>)
tensor(0.1022, grad_fn=<NllLossBackward>)
tensor(0.1434, grad_fn=<NllLossBackward>)
tensor(0.1322, grad_fn=<NllLossBackward>)
tensor(0.2757, grad_fn=<NllLossBac

tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0207, grad_fn=<NllLossBackward>)
tensor(0.0925, grad_fn=<NllLossBackward>)
tensor(0.1023, grad_fn=<NllLossBackward>)
tensor(0.2957, grad_fn=<NllLossBackward>)
tensor(0.3047, grad_fn=<NllLossBackward>)
tensor(0.1518, grad_fn=<NllLossBackward>)
tensor(0.0919, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.2170, grad_fn=<NllLossBackward>)
tensor(0.2117, grad_fn=<NllLossBackward>)
tensor(0.1110, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.1739, grad_fn=<NllLossBackward>)
tensor(0.2066, grad_fn=<NllLossBackward>)
tensor(0.1321, grad_fn=<NllLossBackward>)
tensor(0.1758, grad_fn=<NllLossBackward>)
tensor(0.1615, grad_fn=<NllLossBackward>)
tensor(0.3146, grad_fn=<NllLossBackward>)
tensor(0.0902, grad_fn=<NllLossBackward>)
tensor(0.2082, grad_fn=<NllLossBackward>)
tensor(0.1636, grad_fn=<NllLossBackward>)
tensor(0.0918, grad_fn=<NllLossBackward>)
tensor(0.1331, grad_fn=<NllLossBac

tensor(0.1532, grad_fn=<NllLossBackward>)
tensor(0.1815, grad_fn=<NllLossBackward>)
tensor(0.1005, grad_fn=<NllLossBackward>)
tensor(0.2195, grad_fn=<NllLossBackward>)
tensor(0.1136, grad_fn=<NllLossBackward>)
tensor(0.2814, grad_fn=<NllLossBackward>)
tensor(0.2026, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.2107, grad_fn=<NllLossBackward>)
tensor(0.2402, grad_fn=<NllLossBackward>)
tensor(0.0945, grad_fn=<NllLossBackward>)
tensor(0.0784, grad_fn=<NllLossBackward>)
tensor(0.1501, grad_fn=<NllLossBackward>)
tensor(0.3589, grad_fn=<NllLossBackward>)
tensor(0.2728, grad_fn=<NllLossBackward>)
tensor(0.3257, grad_fn=<NllLossBackward>)
tensor(0.2107, grad_fn=<NllLossBackward>)
tensor(0.1980, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.1607, grad_fn=<NllLossBackward>)
tensor(0.1442, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.1292, grad_fn=<NllLossBackward>)
tensor(0.0557, grad_fn=<NllLossBac

tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.2703, grad_fn=<NllLossBackward>)
tensor(0.1814, grad_fn=<NllLossBackward>)
tensor(0.0405, grad_fn=<NllLossBackward>)
tensor(0.0843, grad_fn=<NllLossBackward>)
tensor(0.2269, grad_fn=<NllLossBackward>)
tensor(0.1391, grad_fn=<NllLossBackward>)
tensor(0.2423, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.1016, grad_fn=<NllLossBackward>)
tensor(0.1728, grad_fn=<NllLossBackward>)
tensor(0.0939, grad_fn=<NllLossBackward>)
tensor(0.1454, grad_fn=<NllLossBackward>)
tensor(0.1017, grad_fn=<NllLossBackward>)
tensor(0.1257, grad_fn=<NllLossBackward>)
tensor(0.3251, grad_fn=<NllLossBackward>)
tensor(0.1484, grad_fn=<NllLossBackward>)
tensor(0.1927, grad_fn=<NllLossBackward>)
tensor(0.0687, grad_fn=<NllLossBackward>)
tensor(0.1834, grad_fn=<NllLossBackward>)
tensor(0.1861, grad_fn=<NllLossBackward>)
tensor(0.1177, grad_fn=<NllLossBac

tensor(0.1409, grad_fn=<NllLossBackward>)
tensor(0.2473, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.3844, grad_fn=<NllLossBackward>)
tensor(0.0978, grad_fn=<NllLossBackward>)
tensor(0.2578, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBackward>)
tensor(0.0493, grad_fn=<NllLossBackward>)
tensor(0.1561, grad_fn=<NllLossBackward>)
tensor(0.2620, grad_fn=<NllLossBackward>)
tensor(0.1632, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.2892, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.2275, grad_fn=<NllLossBackward>)
tensor(0.1939, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
tensor(0.2510, grad_fn=<NllLossBackward>)
tensor(0.2336, grad_fn=<NllLossBackward>)
tensor(0.2436, grad_fn=<NllLossBackward>)
tensor(0.2190, grad_fn=<NllLossBackward>)
tensor(0.1094, grad_fn=<NllLossBackward>)
tensor(0.0302, grad_fn=<NllLossBac

tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.2210, grad_fn=<NllLossBackward>)
tensor(0.2087, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.1150, grad_fn=<NllLossBackward>)
tensor(0.2427, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBackward>)
tensor(0.0269, grad_fn=<NllLossBackward>)
tensor(0.1306, grad_fn=<NllLossBackward>)
tensor(0.2209, grad_fn=<NllLossBackward>)
tensor(0.2735, grad_fn=<NllLossBackward>)
tensor(0.2480, grad_fn=<NllLossBackward>)
tensor(0.1730, grad_fn=<NllLossBackward>)
tensor(0.1718, grad_fn=<NllLossBackward>)
tensor(0.1095, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.1478, grad_fn=<NllLossBackward>)
tensor(0.1921, grad_fn=<NllLossBackward>)
tensor(0.2774, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1893, grad_fn=<NllLossBac

tensor(0.2769, grad_fn=<NllLossBackward>)
tensor(0.3052, grad_fn=<NllLossBackward>)
tensor(0.1874, grad_fn=<NllLossBackward>)
tensor(0.3202, grad_fn=<NllLossBackward>)
tensor(0.2532, grad_fn=<NllLossBackward>)
tensor(0.0399, grad_fn=<NllLossBackward>)
tensor(0.7188, grad_fn=<NllLossBackward>)
tensor(0.0701, grad_fn=<NllLossBackward>)
tensor(0.1326, grad_fn=<NllLossBackward>)
tensor(0.2023, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.1401, grad_fn=<NllLossBackward>)
tensor(0.3489, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.2116, grad_fn=<NllLossBackward>)
tensor(0.1145, grad_fn=<NllLossBackward>)
tensor(0.1799, grad_fn=<NllLossBackward>)
tensor(0.1771, grad_fn=<NllLossBackward>)
tensor(0.1255, grad_fn=<NllLossBackward>)
tensor(0.1335, grad_fn=<NllLossBackward>)
tensor(0.1127, grad_fn=<NllLossBackward>)
tensor(0.2678, grad_fn=<NllLossBackward>)
tensor(0.0998, grad_fn=<NllLossBackward>)
tensor(0.1280, grad_fn=<NllLossBac

tensor(0.2473, grad_fn=<NllLossBackward>)
tensor(0.2417, grad_fn=<NllLossBackward>)
tensor(0.1326, grad_fn=<NllLossBackward>)
tensor(0.0899, grad_fn=<NllLossBackward>)
tensor(0.0438, grad_fn=<NllLossBackward>)
tensor(0.1415, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBackward>)
tensor(0.5509, grad_fn=<NllLossBackward>)
tensor(0.2395, grad_fn=<NllLossBackward>)
tensor(0.1890, grad_fn=<NllLossBackward>)
tensor(0.2381, grad_fn=<NllLossBackward>)
tensor(0.2447, grad_fn=<NllLossBackward>)
tensor(0.1287, grad_fn=<NllLossBackward>)
tensor(0.1344, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.1875, grad_fn=<NllLossBackward>)
tensor(0.0853, grad_fn=<NllLossBackward>)
tensor(0.1130, grad_fn=<NllLossBackward>)
tensor(0.2050, grad_fn=<NllLossBackward>)
tensor(0.1367, grad_fn=<NllLossBackward>)
tensor(0.0663, grad_fn=<NllLossBackward>)
tensor(0.1917, grad_fn=<NllLossBac

tensor(0.2385, grad_fn=<NllLossBackward>)
tensor(0.2530, grad_fn=<NllLossBackward>)
tensor(0.1605, grad_fn=<NllLossBackward>)
tensor(0.1526, grad_fn=<NllLossBackward>)
tensor(0.2982, grad_fn=<NllLossBackward>)
tensor(0.2199, grad_fn=<NllLossBackward>)
tensor(0.1453, grad_fn=<NllLossBackward>)
tensor(0.2062, grad_fn=<NllLossBackward>)
tensor(0.0898, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.1828, grad_fn=<NllLossBackward>)
tensor(0.1683, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.1340, grad_fn=<NllLossBackward>)
tensor(0.2849, grad_fn=<NllLossBackward>)
tensor(0.0791, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.3864, grad_fn=<NllLossBackward>)
tensor(0.2569, grad_fn=<NllLossBackward>)
tensor(0.3074, grad_fn=<NllLossBackward>)
tensor(0.1803, grad_fn=<NllLossBackward>)
tensor(0.0278, grad_fn=<NllLossBac

tensor(0.1709, grad_fn=<NllLossBackward>)
tensor(0.1166, grad_fn=<NllLossBackward>)
tensor(0.1925, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.1253, grad_fn=<NllLossBackward>)
tensor(0.1615, grad_fn=<NllLossBackward>)
tensor(0.1862, grad_fn=<NllLossBackward>)
tensor(0.0516, grad_fn=<NllLossBackward>)
tensor(0.1195, grad_fn=<NllLossBackward>)
tensor(0.0470, grad_fn=<NllLossBackward>)
tensor(0.0446, grad_fn=<NllLossBackward>)
tensor(0.0801, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.0559, grad_fn=<NllLossBackward>)
tensor(0.0951, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBackward>)
tensor(0.4561, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0998, grad_fn=<NllLossBackward>)
tensor(0.1839, grad_fn=<NllLossBackward>)
tensor(0.2883, grad_fn=<NllLossBackward>)
tensor(0.0242, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBac

tensor(0.0846, grad_fn=<NllLossBackward>)
tensor(0.1189, grad_fn=<NllLossBackward>)
tensor(0.2258, grad_fn=<NllLossBackward>)
tensor(0.2502, grad_fn=<NllLossBackward>)
tensor(0.0952, grad_fn=<NllLossBackward>)
tensor(0.0823, grad_fn=<NllLossBackward>)
tensor(0.2408, grad_fn=<NllLossBackward>)
tensor(0.1337, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.0670, grad_fn=<NllLossBackward>)
tensor(0.1286, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.0791, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.0820, grad_fn=<NllLossBackward>)
tensor(0.1571, grad_fn=<NllLossBackward>)
tensor(0.1707, grad_fn=<NllLossBackward>)
tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.1673, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.2263, grad_fn=<NllLossBac

tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.1001, grad_fn=<NllLossBackward>)
tensor(0.3071, grad_fn=<NllLossBackward>)
tensor(0.1838, grad_fn=<NllLossBackward>)
tensor(0.4456, grad_fn=<NllLossBackward>)
tensor(0.2301, grad_fn=<NllLossBackward>)
tensor(0.0320, grad_fn=<NllLossBackward>)
tensor(0.0916, grad_fn=<NllLossBackward>)
tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.0542, grad_fn=<NllLossBackward>)
tensor(0.1140, grad_fn=<NllLossBackward>)
tensor(0.1377, grad_fn=<NllLossBackward>)
tensor(0.2534, grad_fn=<NllLossBackward>)
tensor(0.1809, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.0352, grad_fn=<NllLossBackward>)
tensor(0.1316, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.1294, grad_fn=<NllLossBackward>)
tensor(0.0719, grad_fn=<NllLossBackward>)
tensor(0.0995, grad_fn=<NllLossBackward>)
tensor(0.1582, grad_fn=<NllLossBac

tensor(0.2093, grad_fn=<NllLossBackward>)
tensor(0.2386, grad_fn=<NllLossBackward>)
tensor(0.0899, grad_fn=<NllLossBackward>)
tensor(0.2230, grad_fn=<NllLossBackward>)
tensor(0.2591, grad_fn=<NllLossBackward>)
tensor(0.0998, grad_fn=<NllLossBackward>)
tensor(0.2194, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.1871, grad_fn=<NllLossBackward>)
tensor(0.3150, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.1757, grad_fn=<NllLossBackward>)
tensor(0.2314, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.0511, grad_fn=<NllLossBackward>)
tensor(0.1765, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.1979, grad_fn=<NllLossBackward>)
tensor(0.2719, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.3440, grad_fn=<NllLossBackward>)
tensor(0.1168, grad_fn=<NllLossBac

tensor(0.2459, grad_fn=<NllLossBackward>)
tensor(0.2629, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.1700, grad_fn=<NllLossBackward>)
tensor(0.1197, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBackward>)
tensor(0.1066, grad_fn=<NllLossBackward>)
tensor(0.1963, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.1184, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.2681, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.1623, grad_fn=<NllLossBackward>)
tensor(0.0482, grad_fn=<NllLossBackward>)
tensor(0.0084, grad_fn=<NllLossBackward>)
tensor(0.1807, grad_fn=<NllLossBackward>)
tensor(0.3635, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.3595, grad_fn=<NllLossBackward>)
tensor(0.1503, grad_fn=<NllLossBac

tensor(0.2303, grad_fn=<NllLossBackward>)
tensor(0.2174, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.1601, grad_fn=<NllLossBackward>)
tensor(0.0599, grad_fn=<NllLossBackward>)
tensor(0.2242, grad_fn=<NllLossBackward>)
tensor(0.0546, grad_fn=<NllLossBackward>)
tensor(0.0812, grad_fn=<NllLossBackward>)
tensor(0.0663, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.0559, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.0936, grad_fn=<NllLossBackward>)
tensor(0.1022, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.1493, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.1759, grad_fn=<NllLossBackward>)
tensor(0.3964, grad_fn=<NllLossBac

tensor(0.2265, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.3378, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.0352, grad_fn=<NllLossBackward>)
tensor(0.1002, grad_fn=<NllLossBackward>)
tensor(0.3118, grad_fn=<NllLossBackward>)
tensor(0.1030, grad_fn=<NllLossBackward>)
tensor(0.3059, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.1574, grad_fn=<NllLossBackward>)
tensor(0.1013, grad_fn=<NllLossBackward>)
tensor(0.1979, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBackward>)
tensor(0.2042, grad_fn=<NllLossBackward>)
tensor(0.1639, grad_fn=<NllLossBackward>)
tensor(0.1673, grad_fn=<NllLossBackward>)
tensor(0.1290, grad_fn=<NllLossBackward>)
tensor(0.1783, grad_fn=<NllLossBackward>)
tensor(0.1672, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.3053, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBackward>)
tensor(0.0976, grad_fn=<NllLossBac

tensor(0.3733, grad_fn=<NllLossBackward>)
tensor(0.0374, grad_fn=<NllLossBackward>)
tensor(0.0227, grad_fn=<NllLossBackward>)
tensor(0.1245, grad_fn=<NllLossBackward>)
tensor(0.3468, grad_fn=<NllLossBackward>)
tensor(0.1638, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.2425, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.2428, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBackward>)
tensor(0.0785, grad_fn=<NllLossBackward>)
tensor(0.0930, grad_fn=<NllLossBackward>)
tensor(0.1943, grad_fn=<NllLossBackward>)
tensor(0.1925, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.4299, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.0756, grad_fn=<NllLossBac

tensor(0.0810, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.1346, grad_fn=<NllLossBackward>)
tensor(0.1502, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0248, grad_fn=<NllLossBackward>)
tensor(0.2791, grad_fn=<NllLossBackward>)
tensor(0.0578, grad_fn=<NllLossBackward>)
tensor(0.2429, grad_fn=<NllLossBackward>)
tensor(0.1751, grad_fn=<NllLossBackward>)
tensor(0.3814, grad_fn=<NllLossBackward>)
tensor(0.1076, grad_fn=<NllLossBackward>)
tensor(0.2692, grad_fn=<NllLossBackward>)
tensor(0.1713, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.1775, grad_fn=<NllLossBackward>)
tensor(0.1620, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.1178, grad_fn=<NllLossBackward>)
tensor(0.2889, grad_fn=<NllLossBackward>)
tensor(0.1492, grad_fn=<NllLossBackward>)
tensor(0.1630, grad_fn=<NllLossBackward>)
tensor(0.2545, grad_fn=<NllLossBac

tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.1157, grad_fn=<NllLossBackward>)
tensor(0.1685, grad_fn=<NllLossBackward>)
tensor(0.0225, grad_fn=<NllLossBackward>)
tensor(0.5187, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1231, grad_fn=<NllLossBackward>)
tensor(0.2592, grad_fn=<NllLossBackward>)
tensor(0.0221, grad_fn=<NllLossBackward>)
tensor(0.0643, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.1986, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.1997, grad_fn=<NllLossBackward>)
tensor(0.3548, grad_fn=<NllLossBackward>)
tensor(0.0217, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.0952, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.1392, grad_fn=<NllLossBackward>)
tensor(0.2090, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBackward>)
tensor(0.3184, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBac

tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.2145, grad_fn=<NllLossBackward>)
tensor(0.0629, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.0754, grad_fn=<NllLossBackward>)
tensor(0.0477, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.1074, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.1454, grad_fn=<NllLossBackward>)
tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.1681, grad_fn=<NllLossBackward>)
tensor(0.0438, grad_fn=<NllLossBackward>)
tensor(0.0239, grad_fn=<NllLossBackward>)
tensor(0.0649, grad_fn=<NllLossBackward>)
tensor(0.1094, grad_fn=<NllLossBackward>)
tensor(0.0823, grad_fn=<NllLossBackward>)
tensor(0.1425, grad_fn=<NllLossBackward>)
tensor(0.0912, grad_fn=<NllLossBackward>)
tensor(0.1223, grad_fn=<NllLossBackward>)
tensor(0.1253, grad_fn=<NllLossBackward>)
tensor(0.2279, grad_fn=<NllLossBac

tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.0909, grad_fn=<NllLossBackward>)
tensor(0.1291, grad_fn=<NllLossBackward>)
tensor(0.2602, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBackward>)
tensor(0.1808, grad_fn=<NllLossBackward>)
tensor(0.0523, grad_fn=<NllLossBackward>)
tensor(0.4373, grad_fn=<NllLossBackward>)
tensor(0.1289, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.1061, grad_fn=<NllLossBackward>)
tensor(0.1633, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.1147, grad_fn=<NllLossBackward>)
tensor(0.1333, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.3849, grad_fn=<NllLossBackward>)
tensor(0.0612, grad_fn=<NllLossBackward>)
tensor(0.1387, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBac

tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.1236, grad_fn=<NllLossBackward>)
tensor(0.0831, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.0584, grad_fn=<NllLossBackward>)
tensor(0.1540, grad_fn=<NllLossBackward>)
tensor(0.0154, grad_fn=<NllLossBackward>)
tensor(0.1024, grad_fn=<NllLossBackward>)
tensor(0.0507, grad_fn=<NllLossBackward>)
tensor(0.2282, grad_fn=<NllLossBackward>)
tensor(0.1052, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.3436, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.0162, grad_fn=<NllLossBackward>)
tensor(0.0075, grad_fn=<NllLossBackward>)
tensor(0.2314, grad_fn=<NllLossBackward>)
tensor(0.1078, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBac

tensor(0.2041, grad_fn=<NllLossBackward>)
tensor(0.0338, grad_fn=<NllLossBackward>)
tensor(0.0557, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.1156, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.1371, grad_fn=<NllLossBackward>)
tensor(0.2519, grad_fn=<NllLossBackward>)
tensor(0.2825, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.1052, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.1483, grad_fn=<NllLossBackward>)
tensor(0.1939, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.1749, grad_fn=<NllLossBackward>)
tensor(0.2490, grad_fn=<NllLossBackward>)
tensor(0.4009, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.1282, grad_fn=<NllLossBackward>)
tensor(0.2233, grad_fn=<NllLossBackward>)
tensor(0.2177, grad_fn=<NllLossBac

tensor(0.0484, grad_fn=<NllLossBackward>)
tensor(0.2554, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.0480, grad_fn=<NllLossBackward>)
tensor(0.0104, grad_fn=<NllLossBackward>)
tensor(0.1382, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.1410, grad_fn=<NllLossBackward>)
tensor(0.2228, grad_fn=<NllLossBackward>)
tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.0906, grad_fn=<NllLossBackward>)
tensor(0.4306, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.1519, grad_fn=<NllLossBackward>)
tensor(0.1173, grad_fn=<NllLossBackward>)
tensor(0.1702, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.1660, grad_fn=<NllLossBackward>)
tensor(0.1595, grad_fn=<NllLossBackward>)
tensor(0.1817, grad_fn=<NllLossBac

tensor(0.0989, grad_fn=<NllLossBackward>)
tensor(0.1871, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.2340, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0572, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBackward>)
tensor(0.0794, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.1297, grad_fn=<NllLossBackward>)
tensor(0.0281, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.1894, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.1060, grad_fn=<NllLossBackward>)
tensor(0.3019, grad_fn=<NllLossBackward>)
tensor(0.0995, grad_fn=<NllLossBackward>)
tensor(0.0668, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBac

tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.2406, grad_fn=<NllLossBackward>)
tensor(0.0741, grad_fn=<NllLossBackward>)
tensor(0.1416, grad_fn=<NllLossBackward>)
tensor(0.1683, grad_fn=<NllLossBackward>)
tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.1621, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.2241, grad_fn=<NllLossBackward>)
tensor(0.4146, grad_fn=<NllLossBackward>)
tensor(0.1062, grad_fn=<NllLossBackward>)
tensor(0.2241, grad_fn=<NllLossBackward>)
tensor(0.1410, grad_fn=<NllLossBackward>)
tensor(0.0936, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.1315, grad_fn=<NllLossBackward>)
tensor(0.1114, grad_fn=<NllLossBackward>)
tensor(0.4139, grad_fn=<NllLossBackward>)
tensor(0.2187, grad_fn=<NllLossBac

tensor(0.2545, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0380, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.1237, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.3019, grad_fn=<NllLossBackward>)
tensor(0.3060, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.1808, grad_fn=<NllLossBackward>)
tensor(0.1082, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.1096, grad_fn=<NllLossBackward>)
tensor(0.1338, grad_fn=<NllLossBackward>)
tensor(0.0785, grad_fn=<NllLossBackward>)
tensor(0.1469, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.1138, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.1466, grad_fn=<NllLossBackward>)
tensor(0.1486, grad_fn=<NllLossBac

tensor(0.0642, grad_fn=<NllLossBackward>)
tensor(0.1479, grad_fn=<NllLossBackward>)
tensor(0.1293, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.1825, grad_fn=<NllLossBackward>)
tensor(0.0954, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.1940, grad_fn=<NllLossBackward>)
tensor(0.1910, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.1758, grad_fn=<NllLossBackward>)
tensor(0.1341, grad_fn=<NllLossBackward>)
tensor(0.2552, grad_fn=<NllLossBackward>)
tensor(0.1760, grad_fn=<NllLossBackward>)
tensor(0.2945, grad_fn=<NllLossBackward>)
tensor(0.1690, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.1824, grad_fn=<NllLossBackward>)
tensor(0.1230, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.2942, grad_fn=<NllLossBackward>)
tensor(0.1936, grad_fn=<NllLossBac

tensor(0.2158, grad_fn=<NllLossBackward>)
tensor(0.1634, grad_fn=<NllLossBackward>)
tensor(0.3467, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.0617, grad_fn=<NllLossBackward>)
tensor(0.3039, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.2256, grad_fn=<NllLossBackward>)
tensor(0.1988, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBackward>)
tensor(0.4684, grad_fn=<NllLossBackward>)
tensor(0.1422, grad_fn=<NllLossBackward>)
tensor(0.1696, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.3312, grad_fn=<NllLossBackward>)
tensor(0.1267, grad_fn=<NllLossBackward>)
tensor(0.0867, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.2026, grad_fn=<NllLossBackward>)
tensor(0.0755, grad_fn=<NllLossBackward>)
tensor(0.1504, grad_fn=<NllLossBac

tensor(0.0418, grad_fn=<NllLossBackward>)
tensor(0.1743, grad_fn=<NllLossBackward>)
tensor(0.0412, grad_fn=<NllLossBackward>)
tensor(0.1530, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.1397, grad_fn=<NllLossBackward>)
tensor(0.2490, grad_fn=<NllLossBackward>)
tensor(0.1021, grad_fn=<NllLossBackward>)
tensor(0.0387, grad_fn=<NllLossBackward>)
tensor(0.0867, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0778, grad_fn=<NllLossBackward>)
tensor(0.1407, grad_fn=<NllLossBackward>)
tensor(0.0178, grad_fn=<NllLossBackward>)
tensor(0.2414, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.2600, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBackward>)
tensor(0.0073, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.1818, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0117, grad_fn=<NllLossBac

tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1414, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.1734, grad_fn=<NllLossBackward>)
tensor(0.3643, grad_fn=<NllLossBackward>)
tensor(0.1839, grad_fn=<NllLossBackward>)
tensor(0.2580, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.0713, grad_fn=<NllLossBackward>)
tensor(0.0778, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.0898, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.0450, grad_fn=<NllLossBackward>)
tensor(0.0734, grad_fn=<NllLossBackward>)
tensor(0.1987, grad_fn=<NllLossBackward>)
tensor(0.1553, grad_fn=<NllLossBackward>)
tensor(0.0532, grad_fn=<NllLossBackward>)
tensor(0.2376, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1077, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBac

tensor(0.2440, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.2088, grad_fn=<NllLossBackward>)
tensor(0.1612, grad_fn=<NllLossBackward>)
tensor(0.1485, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.1091, grad_fn=<NllLossBackward>)
tensor(0.1003, grad_fn=<NllLossBackward>)
tensor(0.0916, grad_fn=<NllLossBackward>)
tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.1165, grad_fn=<NllLossBackward>)
tensor(0.1982, grad_fn=<NllLossBackward>)
tensor(0.2729, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.1524, grad_fn=<NllLossBackward>)
tensor(0.2230, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.0238, grad_fn=<NllLossBackward>)
tensor(0.1609, grad_fn=<NllLossBac

tensor(0.1729, grad_fn=<NllLossBackward>)
tensor(0.1633, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.0719, grad_fn=<NllLossBackward>)
tensor(0.2307, grad_fn=<NllLossBackward>)
tensor(0.2204, grad_fn=<NllLossBackward>)
tensor(0.1113, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.1788, grad_fn=<NllLossBackward>)
tensor(0.0395, grad_fn=<NllLossBackward>)
tensor(0.1627, grad_fn=<NllLossBackward>)
tensor(0.0459, grad_fn=<NllLossBackward>)
tensor(0.1284, grad_fn=<NllLossBackward>)
tensor(0.1183, grad_fn=<NllLossBackward>)
tensor(0.0155, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.1689, grad_fn=<NllLossBackward>)
tensor(0.1141, grad_fn=<NllLossBackward>)
tensor(0.2916, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.0397, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.1192, grad_fn=<NllLossBac

tensor(0.1158, grad_fn=<NllLossBackward>)
tensor(0.0585, grad_fn=<NllLossBackward>)
tensor(0.0921, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.1354, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.1254, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.0612, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.0502, grad_fn=<NllLossBackward>)
tensor(0.0225, grad_fn=<NllLossBackward>)
tensor(0.1493, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.2585, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.3681, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.3348, grad_fn=<NllLossBackward>)
tensor(0.2277, grad_fn=<NllLossBackward>)
tensor(0.0491, grad_fn=<NllLossBac

tensor(0.0396, grad_fn=<NllLossBackward>)
tensor(0.0913, grad_fn=<NllLossBackward>)
tensor(0.1043, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.0172, grad_fn=<NllLossBackward>)
tensor(0.0122, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1297, grad_fn=<NllLossBackward>)
tensor(0.1308, grad_fn=<NllLossBackward>)
tensor(0.3796, grad_fn=<NllLossBackward>)
tensor(0.4532, grad_fn=<NllLossBackward>)
tensor(0.1717, grad_fn=<NllLossBackward>)
tensor(0.0186, grad_fn=<NllLossBackward>)
tensor(0.1896, grad_fn=<NllLossBackward>)
tensor(0.0790, grad_fn=<NllLossBackward>)
tensor(0.2132, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.2083, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBackward>)
tensor(0.2860, grad_fn=<NllLossBackward>)
tensor(0.1005, grad_fn=<NllLossBackward>)
tensor(0.1923, grad_fn=<NllLossBac

tensor(0.0477, grad_fn=<NllLossBackward>)
tensor(0.0880, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.0545, grad_fn=<NllLossBackward>)
tensor(0.2338, grad_fn=<NllLossBackward>)
tensor(0.1387, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBackward>)
tensor(0.1138, grad_fn=<NllLossBackward>)
tensor(0.0493, grad_fn=<NllLossBackward>)
tensor(0.1621, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0653, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.0457, grad_fn=<NllLossBackward>)
tensor(0.1830, grad_fn=<NllLossBackward>)
tensor(0.1261, grad_fn=<NllLossBackward>)
tensor(0.1547, grad_fn=<NllLossBackward>)
tensor(0.1473, grad_fn=<NllLossBackward>)
tensor(0.0098, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.1087, grad_fn=<NllLossBac

tensor(0.0084, grad_fn=<NllLossBackward>)
tensor(0.1268, grad_fn=<NllLossBackward>)
tensor(0.1612, grad_fn=<NllLossBackward>)
tensor(0.1753, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.0663, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.0284, grad_fn=<NllLossBackward>)
tensor(0.1134, grad_fn=<NllLossBackward>)
tensor(0.0232, grad_fn=<NllLossBackward>)
tensor(0.4093, grad_fn=<NllLossBackward>)
tensor(0.2037, grad_fn=<NllLossBackward>)
tensor(0.0654, grad_fn=<NllLossBackward>)
tensor(0.1054, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.0712, grad_fn=<NllLossBackward>)
tensor(0.1381, grad_fn=<NllLossBackward>)
tensor(0.0423, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.1932, grad_fn=<NllLossBackward>)
tensor(0.0345, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.0799, grad_fn=<NllLossBac

tensor(0.1571, grad_fn=<NllLossBackward>)
tensor(0.1926, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.5903, grad_fn=<NllLossBackward>)
tensor(0.0545, grad_fn=<NllLossBackward>)
tensor(0.0917, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBackward>)
tensor(0.1261, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.0415, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.0030, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.2016, grad_fn=<NllLossBackward>)
tensor(0.0552, grad_fn=<NllLossBac

tensor(0.1271, grad_fn=<NllLossBackward>)
tensor(0.1508, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.1503, grad_fn=<NllLossBackward>)
tensor(0.3459, grad_fn=<NllLossBackward>)
tensor(0.1187, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.1893, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.1012, grad_fn=<NllLossBackward>)
tensor(0.0240, grad_fn=<NllLossBackward>)
tensor(0.4432, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.2834, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.1152, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.1312, grad_fn=<NllLossBackward>)
tensor(0.1730, grad_fn=<NllLossBackward>)
tensor(0.1292, grad_fn=<NllLossBackward>)
tensor(0.0874, grad_fn=<NllLossBac

tensor(0.2522, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.2503, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0437, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBackward>)
tensor(0.0300, grad_fn=<NllLossBackward>)
tensor(0.2879, grad_fn=<NllLossBackward>)
tensor(0.0933, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.1458, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.0482, grad_fn=<NllLossBackward>)
tensor(0.3189, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.0030, grad_fn=<NllLossBackward>)
tensor(0.1441, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.3592, grad_fn=<NllLossBackward>)
tensor(0.1174, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.3862, grad_fn=<NllLossBac

tensor(0.2021, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.1921, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.1589, grad_fn=<NllLossBackward>)
tensor(0.1190, grad_fn=<NllLossBackward>)
tensor(0.0070, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.1726, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.1207, grad_fn=<NllLossBackward>)
tensor(0.1118, grad_fn=<NllLossBackward>)
tensor(0.1771, grad_fn=<NllLossBackward>)
tensor(0.1489, grad_fn=<NllLossBackward>)
tensor(0.0338, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.1777, grad_fn=<NllLossBackward>)
tensor(0.1147, grad_fn=<NllLossBackward>)
tensor(0.0693, grad_fn=<NllLossBackward>)
tensor(0.1879, grad_fn=<NllLossBackward>)
tensor(0.1116, grad_fn=<NllLossBackward>)
tensor(0.1764, grad_fn=<NllLossBac

tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.1595, grad_fn=<NllLossBackward>)
tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.2638, grad_fn=<NllLossBackward>)
tensor(0.0573, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.0387, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.2063, grad_fn=<NllLossBackward>)
tensor(0.0803, grad_fn=<NllLossBackward>)
tensor(0.0613, grad_fn=<NllLossBackward>)
tensor(0.2918, grad_fn=<NllLossBackward>)
tensor(0.0771, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.1702, grad_fn=<NllLossBackward>)
tensor(0.1161, grad_fn=<NllLossBackward>)
tensor(0.0689, grad_fn=<NllLossBackward>)
tensor(0.0357, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0928, grad_fn=<NllLossBackward>)
tensor(0.1334, grad_fn=<NllLossBac

tensor(0.0446, grad_fn=<NllLossBackward>)
tensor(0.0538, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.0181, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0187, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.0617, grad_fn=<NllLossBackward>)
tensor(0.1775, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.2253, grad_fn=<NllLossBackward>)
tensor(0.3050, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.1826, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.1017, grad_fn=<NllLossBackward>)
tensor(0.2854, grad_fn=<NllLossBackward>)
tensor(0.1403, grad_fn=<NllLossBac

tensor(0.1962, grad_fn=<NllLossBackward>)
tensor(0.1679, grad_fn=<NllLossBackward>)
tensor(0.1449, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.1358, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.1833, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.1517, grad_fn=<NllLossBackward>)
tensor(0.0665, grad_fn=<NllLossBackward>)
tensor(0.1409, grad_fn=<NllLossBackward>)
tensor(0.1048, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.0807, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.0412, grad_fn=<NllLossBac

tensor(0.1175, grad_fn=<NllLossBackward>)
tensor(0.0418, grad_fn=<NllLossBackward>)
tensor(0.1370, grad_fn=<NllLossBackward>)
tensor(0.2475, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.1336, grad_fn=<NllLossBackward>)
tensor(0.0507, grad_fn=<NllLossBackward>)
tensor(0.0947, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.1270, grad_fn=<NllLossBackward>)
tensor(0.1927, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.3232, grad_fn=<NllLossBackward>)
tensor(0.0178, grad_fn=<NllLossBackward>)
tensor(0.0293, grad_fn=<NllLossBackward>)
tensor(0.0711, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.2176, grad_fn=<NllLossBackward>)
tensor(0.1120, grad_fn=<NllLossBackward>)
tensor(0.0447, grad_fn=<NllLossBackward>)
tensor(0.1282, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.1112, grad_fn=<NllLossBac

tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.2455, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.2605, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.0431, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.1240, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.0079, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.1161, grad_fn=<NllLossBackward>)
tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBac

tensor(0.1945, grad_fn=<NllLossBackward>)
tensor(0.1979, grad_fn=<NllLossBackward>)
tensor(0.0790, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.1931, grad_fn=<NllLossBackward>)
tensor(0.1948, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0484, grad_fn=<NllLossBackward>)
tensor(0.1193, grad_fn=<NllLossBackward>)
tensor(0.1846, grad_fn=<NllLossBackward>)
tensor(0.2520, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0557, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.2322, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.1448, grad_fn=<NllLossBackward>)
tensor(0.1777, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.0857, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBackward>)
tensor(0.1554, grad_fn=<NllLossBackward>)
tensor(0.2095, grad_fn=<NllLossBac

tensor(0.1423, grad_fn=<NllLossBackward>)
tensor(0.1430, grad_fn=<NllLossBackward>)
tensor(0.0491, grad_fn=<NllLossBackward>)
tensor(0.1778, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.0846, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.1297, grad_fn=<NllLossBackward>)
tensor(0.2215, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0704, grad_fn=<NllLossBackward>)
tensor(0.0267, grad_fn=<NllLossBackward>)
tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.1830, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.0347, grad_fn=<NllLossBac

tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.2143, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.0270, grad_fn=<NllLossBackward>)
tensor(0.0449, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.1215, grad_fn=<NllLossBackward>)
tensor(0.0499, grad_fn=<NllLossBackward>)
tensor(0.1261, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBackward>)
tensor(0.1786, grad_fn=<NllLossBackward>)
tensor(0.0461, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.3951, grad_fn=<NllLossBackward>)
tensor(0.2636, grad_fn=<NllLossBackward>)
tensor(0.3355, grad_fn=<NllLossBackward>)
tensor(0.1701, grad_fn=<NllLossBackward>)
tensor(0.0246, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.0979, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBac

tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.3539, grad_fn=<NllLossBackward>)
tensor(0.0252, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.0094, grad_fn=<NllLossBackward>)
tensor(0.0889, grad_fn=<NllLossBackward>)
tensor(0.1968, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.1744, grad_fn=<NllLossBackward>)
tensor(0.2406, grad_fn=<NllLossBackward>)
tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.1534, grad_fn=<NllLossBackward>)
tensor(0.0895, grad_fn=<NllLossBackward>)
tensor(0.0239, grad_fn=<NllLossBackward>)
tensor(0.2543, grad_fn=<NllLossBackward>)
tensor(0.1084, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBackward>)
tensor(0.0781, grad_fn=<NllLossBac

tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.0232, grad_fn=<NllLossBackward>)
tensor(0.0339, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.1553, grad_fn=<NllLossBackward>)
tensor(0.0087, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBackward>)
tensor(0.2187, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.0481, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.0081, grad_fn=<NllLossBackward>)
tensor(0.0590, grad_fn=<NllLossBackward>)
tensor(0.1978, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.0304, grad_fn=<NllLossBackward>)
tensor(0.1564, grad_fn=<NllLossBackward>)
tensor(0.0594, grad_fn=<NllLossBackward>)
tensor(0.1568, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.0439, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBac

tensor(0.2447, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.2284, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.1729, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.1132, grad_fn=<NllLossBackward>)
tensor(0.2194, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.1305, grad_fn=<NllLossBackward>)
tensor(0.0880, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.0843, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.0748, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.1099, grad_fn=<NllLossBackward>)
tensor(0.0461, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.1915, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBac

tensor(0.0078, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.2130, grad_fn=<NllLossBackward>)
tensor(0.0332, grad_fn=<NllLossBackward>)
tensor(0.1613, grad_fn=<NllLossBackward>)
tensor(0.1106, grad_fn=<NllLossBackward>)
tensor(0.0088, grad_fn=<NllLossBackward>)
tensor(0.2241, grad_fn=<NllLossBackward>)
tensor(0.0689, grad_fn=<NllLossBackward>)
tensor(0.0766, grad_fn=<NllLossBackward>)
tensor(0.1700, grad_fn=<NllLossBackward>)
tensor(0.2616, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.1283, grad_fn=<NllLossBackward>)
tensor(0.2457, grad_fn=<NllLossBackward>)
tensor(0.1330, grad_fn=<NllLossBackward>)
tensor(0.0777, grad_fn=<NllLossBackward>)
tensor(0.0649, grad_fn=<NllLossBackward>)
tensor(0.0594, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.1792, grad_fn=<NllLossBackward>)
tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.0568, grad_fn=<NllLossBac

tensor(0.1884, grad_fn=<NllLossBackward>)
tensor(0.3024, grad_fn=<NllLossBackward>)
tensor(0.0831, grad_fn=<NllLossBackward>)
tensor(0.3813, grad_fn=<NllLossBackward>)
tensor(0.2671, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.0227, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.0790, grad_fn=<NllLossBackward>)
tensor(0.1132, grad_fn=<NllLossBackward>)
tensor(0.1489, grad_fn=<NllLossBackward>)
tensor(0.1060, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.0912, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.0992, grad_fn=<NllLossBackward>)
tensor(0.0585, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.2151, grad_fn=<NllLossBackward>)
tensor(0.4429, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBac

tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.1169, grad_fn=<NllLossBackward>)
tensor(0.1247, grad_fn=<NllLossBackward>)
tensor(0.0738, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0630, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.1853, grad_fn=<NllLossBackward>)
tensor(0.2916, grad_fn=<NllLossBackward>)
tensor(0.0418, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.0539, grad_fn=<NllLossBackward>)
tensor(0.0562, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.1507, grad_fn=<NllLossBackward>)
tensor(0.0853, grad_fn=<NllLossBackward>)
tensor(0.1689, grad_fn=<NllLossBackward>)
tensor(0.0533, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.1837, grad_fn=<NllLossBac

tensor(0.0781, grad_fn=<NllLossBackward>)
tensor(0.0042, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.5121, grad_fn=<NllLossBackward>)
tensor(0.2370, grad_fn=<NllLossBackward>)
tensor(0.1659, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.0849, grad_fn=<NllLossBackward>)
tensor(0.1683, grad_fn=<NllLossBackward>)
tensor(0.1716, grad_fn=<NllLossBackward>)
tensor(0.2968, grad_fn=<NllLossBackward>)
tensor(0.1262, grad_fn=<NllLossBackward>)
tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.2037, grad_fn=<NllLossBackward>)
tensor(0.1137, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.1975, grad_fn=<NllLossBackward>)
tensor(0.1292, grad_fn=<NllLossBackward>)
tensor(0.1056, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBac

tensor(0.0386, grad_fn=<NllLossBackward>)
tensor(0.2269, grad_fn=<NllLossBackward>)
tensor(0.1652, grad_fn=<NllLossBackward>)
tensor(0.2468, grad_fn=<NllLossBackward>)
tensor(0.0818, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.1422, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.1107, grad_fn=<NllLossBackward>)
tensor(0.0480, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0243, grad_fn=<NllLossBackward>)
tensor(0.1065, grad_fn=<NllLossBackward>)
tensor(0.1145, grad_fn=<NllLossBackward>)
tensor(0.1245, grad_fn=<NllLossBackward>)
tensor(0.1853, grad_fn=<NllLossBackward>)
tensor(0.2332, grad_fn=<NllLossBackward>)
tensor(0.3640, grad_fn=<NllLossBackward>)
tensor(0.3342, grad_fn=<NllLossBackward>)
tensor(0.1579, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.2010, grad_fn=<NllLossBac

tensor(0.1257, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.0584, grad_fn=<NllLossBackward>)
tensor(0.0219, grad_fn=<NllLossBackward>)
tensor(0.1673, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.0295, grad_fn=<NllLossBackward>)
tensor(0.0701, grad_fn=<NllLossBackward>)
tensor(0.2009, grad_fn=<NllLossBackward>)
tensor(0.1006, grad_fn=<NllLossBackward>)
tensor(0.1124, grad_fn=<NllLossBackward>)
tensor(0.0570, grad_fn=<NllLossBackward>)
tensor(0.1367, grad_fn=<NllLossBackward>)
tensor(0.0646, grad_fn=<NllLossBackward>)
tensor(0.2093, grad_fn=<NllLossBackward>)
tensor(0.0900, grad_fn=<NllLossBackward>)
tensor(0.0590, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.3489, grad_fn=<NllLossBackward>)
tensor(0.0180, grad_fn=<NllLossBackward>)
tensor(0.0536, grad_fn=<NllLossBackward>)
tensor(0.0164, grad_fn=<NllLossBac

tensor(0.3556, grad_fn=<NllLossBackward>)
tensor(0.0380, grad_fn=<NllLossBackward>)
tensor(0.0454, grad_fn=<NllLossBackward>)
tensor(0.2457, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.1099, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.1447, grad_fn=<NllLossBackward>)
tensor(0.0187, grad_fn=<NllLossBackward>)
tensor(0.3504, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.0576, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.1061, grad_fn=<NllLossBackward>)
tensor(0.0113, grad_fn=<NllLossBackward>)
tensor(0.0331, grad_fn=<NllLossBackward>)
tensor(0.1826, grad_fn=<NllLossBackward>)
tensor(0.1161, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.2131, grad_fn=<NllLossBackward>)
tensor(0.2567, grad_fn=<NllLossBackward>)
tensor(0.0750, grad_fn=<NllLossBac

tensor(0.1811, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.0403, grad_fn=<NllLossBackward>)
tensor(0.1286, grad_fn=<NllLossBackward>)
tensor(0.0296, grad_fn=<NllLossBackward>)
tensor(0.1075, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.1024, grad_fn=<NllLossBackward>)
tensor(0.1991, grad_fn=<NllLossBackward>)
tensor(0.2068, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0390, grad_fn=<NllLossBackward>)
tensor(0.0423, grad_fn=<NllLossBackward>)
tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0593, grad_fn=<NllLossBackward>)
tensor(0.0267, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.2246, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.3138, grad_fn=<NllLossBackward>)
tensor(0.1605, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBac

tensor(0.1093, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.0885, grad_fn=<NllLossBackward>)
tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.0054, grad_fn=<NllLossBackward>)
tensor(0.1233, grad_fn=<NllLossBackward>)
tensor(0.0408, grad_fn=<NllLossBackward>)
tensor(0.0959, grad_fn=<NllLossBackward>)
tensor(0.0660, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.1222, grad_fn=<NllLossBackward>)
tensor(0.0325, grad_fn=<NllLossBackward>)
tensor(0.0439, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
tensor(0.0255, grad_fn=<NllLossBackward>)
tensor(0.1054, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.1506, grad_fn=<NllLossBackward>)
tensor(0.0625, grad_fn=<NllLossBackward>)
tensor(0.0219, grad_fn=<NllLossBackward>)
tensor(0.1995, grad_fn=<NllLossBackward>)
tensor(0.1205, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBac

tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.0246, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.0111, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.2457, grad_fn=<NllLossBackward>)
tensor(0.0788, grad_fn=<NllLossBackward>)
tensor(0.1493, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.1883, grad_fn=<NllLossBackward>)
tensor(0.0409, grad_fn=<NllLossBackward>)
tensor(0.2261, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.2033, grad_fn=<NllLossBackward>)
tensor(0.3248, grad_fn=<NllLossBackward>)
tensor(0.0272, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBac

tensor(0.0028, grad_fn=<NllLossBackward>)
tensor(0.0689, grad_fn=<NllLossBackward>)
tensor(0.0486, grad_fn=<NllLossBackward>)
tensor(0.1640, grad_fn=<NllLossBackward>)
tensor(0.0190, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.0944, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.1406, grad_fn=<NllLossBackward>)
tensor(0.0242, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0950, grad_fn=<NllLossBackward>)
tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.0336, grad_fn=<NllLossBackward>)
tensor(0.2200, grad_fn=<NllLossBackward>)
tensor(0.0570, grad_fn=<NllLossBackward>)
tensor(0.0197, grad_fn=<NllLossBackward>)
tensor(0.0409, grad_fn=<NllLossBackward>)
tensor(0.0459, grad_fn=<NllLossBackward>)
tensor(0.0240, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBac

tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.0916, grad_fn=<NllLossBackward>)
tensor(0.0583, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBackward>)
tensor(0.1028, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.1354, grad_fn=<NllLossBackward>)
tensor(0.4744, grad_fn=<NllLossBackward>)
tensor(0.2126, grad_fn=<NllLossBackward>)
tensor(0.0386, grad_fn=<NllLossBackward>)
tensor(0.1469, grad_fn=<NllLossBackward>)
tensor(0.0331, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.0719, grad_fn=<NllLossBackward>)
tensor(0.0502, grad_fn=<NllLossBackward>)
tensor(0.1387, grad_fn=<NllLossBackward>)
tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.1783, grad_fn=<NllLossBackward>)
tensor(0.0284, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBac

tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.2111, grad_fn=<NllLossBackward>)
tensor(0.0239, grad_fn=<NllLossBackward>)
tensor(0.0675, grad_fn=<NllLossBackward>)
tensor(0.1995, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBackward>)
tensor(0.0206, grad_fn=<NllLossBackward>)
tensor(0.1103, grad_fn=<NllLossBackward>)
tensor(0.1559, grad_fn=<NllLossBackward>)
tensor(0.0247, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.0632, grad_fn=<NllLossBackward>)
tensor(0.1154, grad_fn=<NllLossBackward>)
tensor(0.2615, grad_fn=<NllLossBackward>)
tensor(0.0993, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.3119, grad_fn=<NllLossBackward>)
tensor(0.3708, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.0217, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.1416, grad_fn=<NllLossBackward>)
tensor(0.1390, grad_fn=<NllLossBac

tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.0808, grad_fn=<NllLossBackward>)
tensor(0.2163, grad_fn=<NllLossBackward>)
tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.1964, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
tensor(0.0467, grad_fn=<NllLossBackward>)
tensor(0.0734, grad_fn=<NllLossBackward>)
tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.3160, grad_fn=<NllLossBackward>)
tensor(0.1451, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.2251, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0592, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.1304, grad_fn=<NllLossBac

tensor(0.2212, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.0812, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.0612, grad_fn=<NllLossBackward>)
tensor(0.0936, grad_fn=<NllLossBackward>)
tensor(0.0105, grad_fn=<NllLossBackward>)
tensor(0.0652, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.1712, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.0915, grad_fn=<NllLossBackward>)
tensor(0.0540, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.1286, grad_fn=<NllLossBackward>)
tensor(0.1657, grad_fn=<NllLossBackward>)
tensor(0.1841, grad_fn=<NllLossBackward>)
tensor(0.1479, grad_fn=<NllLossBackward>)
tensor(0.2259, grad_fn=<NllLossBackward>)
tensor(0.0145, grad_fn=<NllLossBackward>)
tensor(0.2760, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBac

tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.0791, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.0225, grad_fn=<NllLossBackward>)
tensor(0.0088, grad_fn=<NllLossBackward>)
tensor(0.1998, grad_fn=<NllLossBackward>)
tensor(0.1359, grad_fn=<NllLossBackward>)
tensor(0.0120, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0755, grad_fn=<NllLossBackward>)
tensor(0.0190, grad_fn=<NllLossBackward>)
tensor(0.0395, grad_fn=<NllLossBackward>)
tensor(0.1640, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0653, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.2697, grad_fn=<NllLossBackward>)
tensor(0.1237, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.0415, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBac

tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.0122, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.0242, grad_fn=<NllLossBackward>)
tensor(0.0295, grad_fn=<NllLossBackward>)
tensor(0.0932, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.2198, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.2642, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.1224, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.2145, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBackward>)
tensor(0.2134, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.0197, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.0499, grad_fn=<NllLossBac

tensor(0.1093, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0406, grad_fn=<NllLossBackward>)
tensor(0.1464, grad_fn=<NllLossBackward>)
tensor(0.2785, grad_fn=<NllLossBackward>)
tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.1664, grad_fn=<NllLossBackward>)
tensor(0.2113, grad_fn=<NllLossBackward>)
tensor(0.0688, grad_fn=<NllLossBackward>)
tensor(0.2338, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.2419, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.3549, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1491, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.1846, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.0226, grad_fn=<NllLossBac

tensor(0.1121, grad_fn=<NllLossBackward>)
tensor(0.1221, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.1624, grad_fn=<NllLossBackward>)
tensor(0.1347, grad_fn=<NllLossBackward>)
tensor(0.1602, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.0846, grad_fn=<NllLossBackward>)
tensor(0.0158, grad_fn=<NllLossBackward>)
tensor(0.0941, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.1504, grad_fn=<NllLossBackward>)
tensor(0.0187, grad_fn=<NllLossBackward>)
tensor(0.1721, grad_fn=<NllLossBackward>)
tensor(0.0065, grad_fn=<NllLossBackward>)
tensor(0.0221, grad_fn=<NllLossBackward>)
tensor(0.2173, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.1539, grad_fn=<NllLossBackward>)
tensor(0.1895, grad_fn=<NllLossBackward>)
tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.1110, grad_fn=<NllLossBac

tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.2722, grad_fn=<NllLossBackward>)
tensor(0.0782, grad_fn=<NllLossBackward>)
tensor(0.0854, grad_fn=<NllLossBackward>)
tensor(0.1104, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.1751, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBackward>)
tensor(0.0899, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.0754, grad_fn=<NllLossBackward>)
tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBackward>)
tensor(0.3229, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.1688, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.2128, grad_fn=<NllLossBackward>)
tensor(0.0419, grad_fn=<NllLossBackward>)
tensor(0.1436, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.0357, grad_fn=<NllLossBac

tensor(0.0373, grad_fn=<NllLossBackward>)
tensor(0.1644, grad_fn=<NllLossBackward>)
tensor(0.2436, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.2293, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.1233, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.0618, grad_fn=<NllLossBackward>)
tensor(0.0994, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.2479, grad_fn=<NllLossBackward>)
tensor(0.2122, grad_fn=<NllLossBackward>)
tensor(0.1425, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0111, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.1615, grad_fn=<NllLossBackward>)
tensor(0.2189, grad_fn=<NllLossBac

tensor(0.0269, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.0891, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.0205, grad_fn=<NllLossBackward>)
tensor(0.1507, grad_fn=<NllLossBackward>)
tensor(0.0154, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.1946, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0361, grad_fn=<NllLossBackward>)
tensor(0.0853, grad_fn=<NllLossBackward>)
tensor(0.2078, grad_fn=<NllLossBackward>)
tensor(0.0163, grad_fn=<NllLossBackward>)
tensor(0.0756, grad_fn=<NllLossBackward>)
tensor(0.0152, grad_fn=<NllLossBackward>)
tensor(0.0954, grad_fn=<NllLossBackward>)
tensor(0.0941, grad_fn=<NllLossBackward>)
tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.0867, grad_fn=<NllLossBackward>)
tensor(0.0307, grad_fn=<NllLossBac

tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.2874, grad_fn=<NllLossBackward>)
tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.0924, grad_fn=<NllLossBackward>)
tensor(0.1157, grad_fn=<NllLossBackward>)
tensor(0.0418, grad_fn=<NllLossBackward>)
tensor(0.2096, grad_fn=<NllLossBackward>)
tensor(0.0417, grad_fn=<NllLossBackward>)
tensor(0.0851, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0679, grad_fn=<NllLossBackward>)
tensor(0.1192, grad_fn=<NllLossBackward>)
tensor(0.1795, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.0111, grad_fn=<NllLossBackward>)
tensor(0.1371, grad_fn=<NllLossBackward>)
tensor(0.1553, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.2115, grad_fn=<NllLossBackward>)
tensor(0.2129, grad_fn=<NllLossBackward>)
tensor(0.1567, grad_fn=<NllLossBac

tensor(0.0691, grad_fn=<NllLossBackward>)
tensor(0.1372, grad_fn=<NllLossBackward>)
tensor(0.2584, grad_fn=<NllLossBackward>)
tensor(0.0849, grad_fn=<NllLossBackward>)
tensor(0.0665, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.1855, grad_fn=<NllLossBackward>)
tensor(0.1467, grad_fn=<NllLossBackward>)
tensor(0.0568, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.1584, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.1621, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.0138, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBac

tensor(0.0535, grad_fn=<NllLossBackward>)
tensor(0.2543, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBackward>)
tensor(0.0398, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBackward>)
tensor(0.1477, grad_fn=<NllLossBackward>)
tensor(0.2151, grad_fn=<NllLossBackward>)
tensor(0.0241, grad_fn=<NllLossBackward>)
tensor(0.1472, grad_fn=<NllLossBackward>)
tensor(0.0686, grad_fn=<NllLossBackward>)
tensor(0.0910, grad_fn=<NllLossBackward>)
tensor(0.0872, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.1348, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBackward>)
tensor(0.1867, grad_fn=<NllLossBac

tensor(0.0521, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBackward>)
tensor(0.1887, grad_fn=<NllLossBackward>)
tensor(0.2670, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.1170, grad_fn=<NllLossBackward>)
tensor(0.0732, grad_fn=<NllLossBackward>)
tensor(0.1332, grad_fn=<NllLossBackward>)
tensor(0.0950, grad_fn=<NllLossBackward>)
tensor(0.1374, grad_fn=<NllLossBackward>)
tensor(0.0713, grad_fn=<NllLossBackward>)
tensor(0.2263, grad_fn=<NllLossBackward>)
tensor(0.2788, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.0191, grad_fn=<NllLossBackward>)
tensor(0.1289, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.0641, grad_fn=<NllLossBackward>)
tensor(0.0137, grad_fn=<NllLossBackward>)
tensor(0.0484, grad_fn=<NllLossBackward>)
tensor(0.0047, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.0731, grad_fn=<NllLossBackward>)
tensor(0.0032, grad_fn=<NllLossBac

tensor(0.0007, grad_fn=<NllLossBackward>)
tensor(0.1828, grad_fn=<NllLossBackward>)
tensor(0.1727, grad_fn=<NllLossBackward>)
tensor(0.1608, grad_fn=<NllLossBackward>)
tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.0026, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBackward>)
tensor(0.0146, grad_fn=<NllLossBackward>)
tensor(0.1701, grad_fn=<NllLossBackward>)
tensor(0.1034, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.0498, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0649, grad_fn=<NllLossBackward>)
tensor(0.2554, grad_fn=<NllLossBackward>)
tensor(0.3087, grad_fn=<NllLossBackward>)
tensor(0.0559, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.1245, grad_fn=<NllLossBac

tensor(0.1428, grad_fn=<NllLossBackward>)
tensor(0.0716, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBackward>)
tensor(0.0349, grad_fn=<NllLossBackward>)
tensor(0.1607, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.0204, grad_fn=<NllLossBackward>)
tensor(0.1466, grad_fn=<NllLossBackward>)
tensor(0.0470, grad_fn=<NllLossBackward>)
tensor(0.1677, grad_fn=<NllLossBackward>)
tensor(0.0585, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.1601, grad_fn=<NllLossBackward>)
tensor(0.0398, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.1612, grad_fn=<NllLossBackward>)
tensor(0.1258, grad_fn=<NllLossBackward>)
tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.1238, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.0040, grad_fn=<NllLossBackward>)
tensor(0.2236, grad_fn=<NllLossBac

tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.1353, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.1138, grad_fn=<NllLossBackward>)
tensor(0.1599, grad_fn=<NllLossBackward>)
tensor(0.0183, grad_fn=<NllLossBackward>)
tensor(0.0523, grad_fn=<NllLossBackward>)
tensor(0.2208, grad_fn=<NllLossBackward>)
tensor(0.1399, grad_fn=<NllLossBackward>)
tensor(0.1819, grad_fn=<NllLossBackward>)
tensor(0.0539, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBackward>)
tensor(0.0122, grad_fn=<NllLossBackward>)
tensor(0.1342, grad_fn=<NllLossBackward>)
tensor(0.0298, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0171, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBac

tensor(0.1496, grad_fn=<NllLossBackward>)
tensor(0.0226, grad_fn=<NllLossBackward>)
tensor(0.1031, grad_fn=<NllLossBackward>)
tensor(0.0568, grad_fn=<NllLossBackward>)
tensor(0.2028, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.1503, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.0181, grad_fn=<NllLossBackward>)
tensor(0.0042, grad_fn=<NllLossBackward>)
tensor(0.1723, grad_fn=<NllLossBackward>)
tensor(0.1168, grad_fn=<NllLossBackward>)
tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.1387, grad_fn=<NllLossBackward>)
tensor(0.0012, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBackward>)
tensor(0.0970, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.1369, grad_fn=<NllLossBackward>)
tensor(0.0344, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0183, grad_fn=<NllLossBac

tensor(0.1897, grad_fn=<NllLossBackward>)
tensor(0.0560, grad_fn=<NllLossBackward>)
tensor(0.0667, grad_fn=<NllLossBackward>)
tensor(0.0108, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.1251, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.1066, grad_fn=<NllLossBackward>)
tensor(0.1516, grad_fn=<NllLossBackward>)
tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.1713, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0684, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.1656, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0898, grad_fn=<NllLossBackward>)
tensor(0.0088, grad_fn=<NllLossBackward>)
tensor(0.1227, grad_fn=<NllLossBackward>)
tensor(0.0450, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.1387, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBac

tensor(0.0568, grad_fn=<NllLossBackward>)
tensor(0.1485, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.0598, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.1724, grad_fn=<NllLossBackward>)
tensor(0.2537, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBackward>)
tensor(0.2059, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0531, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.0307, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0641, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.1528, grad_fn=<NllLossBackward>)
tensor(0.2971, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0870, grad_fn=<NllLossBackward>)
tensor(0.0051, grad_fn=<NllLossBac

tensor(0.0912, grad_fn=<NllLossBackward>)
tensor(0.0795, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.2105, grad_fn=<NllLossBackward>)
tensor(0.0634, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.0475, grad_fn=<NllLossBackward>)
tensor(0.2099, grad_fn=<NllLossBackward>)
tensor(0.1201, grad_fn=<NllLossBackward>)
tensor(0.1186, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.2754, grad_fn=<NllLossBackward>)
tensor(0.1836, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBackward>)
tensor(0.1040, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.0798, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.0724, grad_fn=<NllLossBackward>)
tensor(0.0544, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBackward>)
tensor(0.1451, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBac

tensor(0.2449, grad_fn=<NllLossBackward>)
tensor(0.1681, grad_fn=<NllLossBackward>)
tensor(0.2163, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.1116, grad_fn=<NllLossBackward>)
tensor(0.1965, grad_fn=<NllLossBackward>)
tensor(0.1246, grad_fn=<NllLossBackward>)
tensor(0.1736, grad_fn=<NllLossBackward>)
tensor(0.2161, grad_fn=<NllLossBackward>)
tensor(0.0539, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBackward>)
tensor(0.1259, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.0140, grad_fn=<NllLossBackward>)
tensor(0.1220, grad_fn=<NllLossBackward>)
tensor(0.0785, grad_fn=<NllLossBackward>)
tensor(0.2014, grad_fn=<NllLossBackward>)
tensor(0.1524, grad_fn=<NllLossBackward>)
tensor(0.0595, grad_fn=<NllLossBackward>)
tensor(0.1046, grad_fn=<NllLossBac

tensor(0.1823, grad_fn=<NllLossBackward>)
tensor(0.1198, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.1638, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.1330, grad_fn=<NllLossBackward>)
tensor(0.0453, grad_fn=<NllLossBackward>)
tensor(0.1650, grad_fn=<NllLossBackward>)
tensor(0.1212, grad_fn=<NllLossBackward>)
tensor(0.0128, grad_fn=<NllLossBackward>)
tensor(0.1232, grad_fn=<NllLossBackward>)
tensor(0.0605, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.0038, grad_fn=<NllLossBackward>)
tensor(0.0830, grad_fn=<NllLossBackward>)
tensor(0.2202, grad_fn=<NllLossBackward>)
tensor(0.1169, grad_fn=<NllLossBackward>)
tensor(0.0077, grad_fn=<NllLossBackward>)
tensor(0.0102, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBac

tensor(0.1001, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.2724, grad_fn=<NllLossBackward>)
tensor(0.2126, grad_fn=<NllLossBackward>)
tensor(0.1583, grad_fn=<NllLossBackward>)
tensor(0.1307, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.0326, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.0519, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.0587, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.1092, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.0578, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBackward>)
tensor(0.0074, grad_fn=<NllLossBackward>)
tensor(0.0420, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBac

tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.0065, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.0094, grad_fn=<NllLossBackward>)
tensor(0.2082, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.2161, grad_fn=<NllLossBackward>)
tensor(0.0371, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.0475, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0514, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.2191, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.0946, grad_fn=<NllLossBackward>)
tensor(0.0565, grad_fn=<NllLossBackward>)
tensor(0.0517, grad_fn=<NllLossBackward>)
tensor(0.0914, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.1735, grad_fn=<NllLossBac

tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.1577, grad_fn=<NllLossBackward>)
tensor(0.2120, grad_fn=<NllLossBackward>)
tensor(0.0911, grad_fn=<NllLossBackward>)
tensor(0.0744, grad_fn=<NllLossBackward>)
tensor(0.1055, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.1757, grad_fn=<NllLossBackward>)
tensor(0.0612, grad_fn=<NllLossBackward>)
tensor(0.1308, grad_fn=<NllLossBackward>)
tensor(0.0391, grad_fn=<NllLossBackward>)
tensor(0.1942, grad_fn=<NllLossBackward>)
tensor(0.1027, grad_fn=<NllLossBackward>)
tensor(0.0097, grad_fn=<NllLossBackward>)
tensor(0.0898, grad_fn=<NllLossBackward>)
tensor(0.0065, grad_fn=<NllLossBackward>)
tensor(0.1799, grad_fn=<NllLossBackward>)
tensor(0.1257, grad_fn=<NllLossBackward>)
tensor(0.1614, grad_fn=<NllLossBackward>)
tensor(0.1763, grad_fn=<NllLossBackward>)
tensor(0.0924, grad_fn=<NllLossBackward>)
tensor(0.1258, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBac

tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0641, grad_fn=<NllLossBackward>)
tensor(0.0068, grad_fn=<NllLossBackward>)
tensor(0.0993, grad_fn=<NllLossBackward>)
tensor(0.1795, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.1244, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.1077, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0867, grad_fn=<NllLossBackward>)
tensor(0.1145, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.1721, grad_fn=<NllLossBackward>)
tensor(0.0269, grad_fn=<NllLossBackward>)
tensor(0.0643, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBac

tensor(0.0073, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.1164, grad_fn=<NllLossBackward>)
tensor(0.1368, grad_fn=<NllLossBackward>)
tensor(0.1583, grad_fn=<NllLossBackward>)
tensor(0.1358, grad_fn=<NllLossBackward>)
tensor(0.0816, grad_fn=<NllLossBackward>)
tensor(0.0390, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBackward>)
tensor(0.0785, grad_fn=<NllLossBackward>)
tensor(0.0672, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.2147, grad_fn=<NllLossBackward>)
tensor(0.1296, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.1951, grad_fn=<NllLossBackward>)
tensor(0.0608, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.0293, grad_fn=<NllLossBac

tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0397, grad_fn=<NllLossBackward>)
tensor(0.2053, grad_fn=<NllLossBackward>)
tensor(0.0538, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.0480, grad_fn=<NllLossBackward>)
tensor(0.0520, grad_fn=<NllLossBackward>)
tensor(0.3097, grad_fn=<NllLossBackward>)
tensor(0.0658, grad_fn=<NllLossBackward>)
tensor(0.0261, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.0989, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.1893, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.2190, grad_fn=<NllLossBackward>)
tensor(0.1358, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.1292, grad_fn=<NllLossBackward>)
tensor(0.0672, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBac

tensor(0.0310, grad_fn=<NllLossBackward>)
tensor(0.1705, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.0572, grad_fn=<NllLossBackward>)
tensor(0.1169, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.1896, grad_fn=<NllLossBackward>)
tensor(0.0791, grad_fn=<NllLossBackward>)
tensor(0.2219, grad_fn=<NllLossBackward>)
tensor(0.0129, grad_fn=<NllLossBackward>)
tensor(0.0696, grad_fn=<NllLossBackward>)
tensor(0.0132, grad_fn=<NllLossBackward>)
tensor(0.0138, grad_fn=<NllLossBackward>)
tensor(0.2009, grad_fn=<NllLossBackward>)
tensor(0.2516, grad_fn=<NllLossBackward>)
tensor(0.2229, grad_fn=<NllLossBackward>)
tensor(0.1860, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0810, grad_fn=<NllLossBackward>)
tensor(0.1027, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0691, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBac

tensor(0.2374, grad_fn=<NllLossBackward>)
tensor(0.0214, grad_fn=<NllLossBackward>)
tensor(0.0163, grad_fn=<NllLossBackward>)
tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.0540, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.0153, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.1106, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.0750, grad_fn=<NllLossBackward>)
tensor(0.0016, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.1237, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.0338, grad_fn=<NllLossBackward>)
tensor(0.0426, grad_fn=<NllLossBackward>)
tensor(0.1474, grad_fn=<NllLossBackward>)
tensor(0.2433, grad_fn=<NllLossBackward>)
tensor(0.0389, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.0088, grad_fn=<NllLossBackward>)
tensor(0.0043, grad_fn=<NllLossBackward>)
tensor(0.1404, grad_fn=<NllLossBac

tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.0575, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0510, grad_fn=<NllLossBackward>)
tensor(0.1613, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.0611, grad_fn=<NllLossBackward>)
tensor(0.1433, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0351, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.1298, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.1836, grad_fn=<NllLossBackward>)
tensor(0.0242, grad_fn=<NllLossBackward>)
tensor(0.1053, grad_fn=<NllLossBackward>)
tensor(0.1711, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1896, grad_fn=<NllLossBackward>)
tensor(0.1031, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBac

tensor(0.0754, grad_fn=<NllLossBackward>)
tensor(0.0307, grad_fn=<NllLossBackward>)
tensor(0.1745, grad_fn=<NllLossBackward>)
tensor(0.1040, grad_fn=<NllLossBackward>)
tensor(0.1436, grad_fn=<NllLossBackward>)
tensor(0.0808, grad_fn=<NllLossBackward>)
tensor(0.0111, grad_fn=<NllLossBackward>)
tensor(0.1204, grad_fn=<NllLossBackward>)
tensor(0.0439, grad_fn=<NllLossBackward>)
tensor(0.0387, grad_fn=<NllLossBackward>)
tensor(0.0070, grad_fn=<NllLossBackward>)
tensor(0.0611, grad_fn=<NllLossBackward>)
tensor(0.0672, grad_fn=<NllLossBackward>)
tensor(0.2184, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.2115, grad_fn=<NllLossBackward>)
tensor(0.0021, grad_fn=<NllLossBackward>)
tensor(0.0546, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBackward>)
tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.1208, grad_fn=<NllLossBac

tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0281, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.1694, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.0833, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBackward>)
tensor(0.0700, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.1346, grad_fn=<NllLossBackward>)
tensor(0.2690, grad_fn=<NllLossBackward>)
tensor(0.0310, grad_fn=<NllLossBackward>)
tensor(0.0247, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.0854, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.0870, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.1373, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBac

tensor(0.0201, grad_fn=<NllLossBackward>)
tensor(0.0584, grad_fn=<NllLossBackward>)
tensor(0.1114, grad_fn=<NllLossBackward>)
tensor(0.0822, grad_fn=<NllLossBackward>)
tensor(0.3517, grad_fn=<NllLossBackward>)
tensor(0.0182, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.1623, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1305, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.1605, grad_fn=<NllLossBackward>)
tensor(0.0572, grad_fn=<NllLossBackward>)
tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0163, grad_fn=<NllLossBackward>)
tensor(0.0826, grad_fn=<NllLossBackward>)
tensor(0.0030, grad_fn=<NllLossBackward>)
tensor(0.1203, grad_fn=<NllLossBackward>)
tensor(0.1925, grad_fn=<NllLossBac

tensor(0.0801, grad_fn=<NllLossBackward>)
tensor(0.2240, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.1185, grad_fn=<NllLossBackward>)
tensor(0.2359, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.0075, grad_fn=<NllLossBackward>)
tensor(0.1028, grad_fn=<NllLossBackward>)
tensor(0.1343, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.1856, grad_fn=<NllLossBackward>)
tensor(0.1201, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBackward>)
tensor(0.2200, grad_fn=<NllLossBackward>)
tensor(0.0911, grad_fn=<NllLossBackward>)
tensor(0.0352, grad_fn=<NllLossBackward>)
tensor(0.0204, grad_fn=<NllLossBackward>)
tensor(0.0437, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBac

tensor(0.0557, grad_fn=<NllLossBackward>)
tensor(0.0431, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.4032, grad_fn=<NllLossBackward>)
tensor(0.0661, grad_fn=<NllLossBackward>)
tensor(0.0467, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.0599, grad_fn=<NllLossBackward>)
tensor(0.0584, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.1039, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.0830, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0970, grad_fn=<NllLossBackward>)
tensor(0.1414, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1473, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.1604, grad_fn=<NllLossBac

tensor(0.0084, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.1449, grad_fn=<NllLossBackward>)
tensor(0.0511, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0959, grad_fn=<NllLossBackward>)
tensor(0.1216, grad_fn=<NllLossBackward>)
tensor(0.1777, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0107, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.0682, grad_fn=<NllLossBackward>)
tensor(0.0780, grad_fn=<NllLossBackward>)
tensor(0.0545, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.1013, grad_fn=<NllLossBackward>)
tensor(0.0278, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBackward>)
tensor(0.2700, grad_fn=<NllLossBackward>)
tensor(0.1017, grad_fn=<NllLossBackward>)
tensor(0.0064, grad_fn=<NllLossBac

tensor(0.0135, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.0351, grad_fn=<NllLossBackward>)
tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.1623, grad_fn=<NllLossBackward>)
tensor(0.1783, grad_fn=<NllLossBackward>)
tensor(0.0317, grad_fn=<NllLossBackward>)
tensor(0.1346, grad_fn=<NllLossBackward>)
tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.0067, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.1469, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.1832, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0027, grad_fn=<NllLossBackward>)
tensor(0.1301, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.0994, grad_fn=<NllLossBac

tensor(0.0979, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBackward>)
tensor(0.1353, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.2228, grad_fn=<NllLossBackward>)
tensor(0.1265, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.0531, grad_fn=<NllLossBackward>)
tensor(0.1174, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.1562, grad_fn=<NllLossBackward>)
tensor(0.0167, grad_fn=<NllLossBackward>)
tensor(0.1964, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0278, grad_fn=<NllLossBackward>)
tensor(0.2130, grad_fn=<NllLossBackward>)
tensor(0.0008, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBac

tensor(0.0050, grad_fn=<NllLossBackward>)
tensor(0.0667, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.0050, grad_fn=<NllLossBackward>)
tensor(0.0580, grad_fn=<NllLossBackward>)
tensor(0.0759, grad_fn=<NllLossBackward>)
tensor(0.0586, grad_fn=<NllLossBackward>)
tensor(0.1911, grad_fn=<NllLossBackward>)
tensor(0.2645, grad_fn=<NllLossBackward>)
tensor(0.2578, grad_fn=<NllLossBackward>)
tensor(0.1773, grad_fn=<NllLossBackward>)
tensor(0.0450, grad_fn=<NllLossBackward>)
tensor(0.0790, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.0162, grad_fn=<NllLossBackward>)
tensor(0.1573, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.0860, grad_fn=<NllLossBackward>)
tensor(0.1047, grad_fn=<NllLossBackward>)
tensor(0.1159, grad_fn=<NllLossBackward>)
tensor(0.1485, grad_fn=<NllLossBackward>)
tensor(0.0249, grad_fn=<NllLossBackward>)
tensor(0.0050, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBac

tensor(0.0342, grad_fn=<NllLossBackward>)
tensor(0.0794, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.1474, grad_fn=<NllLossBackward>)
tensor(0.1129, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.0759, grad_fn=<NllLossBackward>)
tensor(0.1061, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.0963, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.0130, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBackward>)
tensor(0.0035, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.1972, grad_fn=<NllLossBackward>)
tensor(0.0994, grad_fn=<NllLossBackward>)
tensor(0.0256, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.0081, grad_fn=<NllLossBac

tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.0339, grad_fn=<NllLossBackward>)
tensor(0.0820, grad_fn=<NllLossBackward>)
tensor(0.1391, grad_fn=<NllLossBackward>)
tensor(0.0400, grad_fn=<NllLossBackward>)
tensor(0.2191, grad_fn=<NllLossBackward>)
tensor(0.0121, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.0063, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBackward>)
tensor(0.1399, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.1549, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.1242, grad_fn=<NllLossBackward>)
tensor(0.1466, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.1903, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.0482, grad_fn=<NllLossBackward>)
tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.3046, grad_fn=<NllLossBackward>)
tensor(0.1267, grad_fn=<NllLossBac

tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.3463, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0778, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.1678, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.1018, grad_fn=<NllLossBackward>)
tensor(0.1009, grad_fn=<NllLossBackward>)
tensor(0.1125, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBackward>)
tensor(0.2190, grad_fn=<NllLossBackward>)
tensor(0.1537, grad_fn=<NllLossBackward>)
tensor(0.1723, grad_fn=<NllLossBackward>)
tensor(0.1584, grad_fn=<NllLossBackward>)
tensor(0.0026, grad_fn=<NllLossBackward>)
tensor(0.0707, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.2113, grad_fn=<NllLossBackward>)
tensor(0.1580, grad_fn=<NllLossBac

tensor(0.0781, grad_fn=<NllLossBackward>)
tensor(0.0724, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBackward>)
tensor(0.0391, grad_fn=<NllLossBackward>)
tensor(0.0702, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.0740, grad_fn=<NllLossBackward>)
tensor(0.3266, grad_fn=<NllLossBackward>)
tensor(0.0083, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.0241, grad_fn=<NllLossBackward>)
tensor(0.0304, grad_fn=<NllLossBackward>)
tensor(0.0617, grad_fn=<NllLossBackward>)
tensor(0.1430, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.1525, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.1735, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.2171, grad_fn=<NllLossBackward>)
tensor(0.2032, grad_fn=<NllLossBac

tensor(0.1093, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.0255, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.1613, grad_fn=<NllLossBackward>)
tensor(0.0050, grad_fn=<NllLossBackward>)
tensor(0.0867, grad_fn=<NllLossBackward>)
tensor(0.0391, grad_fn=<NllLossBackward>)
tensor(0.0031, grad_fn=<NllLossBackward>)
tensor(0.1874, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0491, grad_fn=<NllLossBackward>)
tensor(0.0726, grad_fn=<NllLossBackward>)
tensor(0.3151, grad_fn=<NllLossBackward>)
tensor(0.0102, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0254, grad_fn=<NllLossBackward>)
tensor(0.1991, grad_fn=<NllLossBackward>)
tensor(0.1620, grad_fn=<NllLossBac

tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.3424, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.1329, grad_fn=<NllLossBackward>)
tensor(0.1511, grad_fn=<NllLossBackward>)
tensor(0.1882, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.0204, grad_fn=<NllLossBackward>)
tensor(0.1966, grad_fn=<NllLossBackward>)
tensor(0.2498, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.1618, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.2830, grad_fn=<NllLossBac

tensor(0.1403, grad_fn=<NllLossBackward>)
tensor(0.2310, grad_fn=<NllLossBackward>)
tensor(0.1611, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0829, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.2648, grad_fn=<NllLossBackward>)
tensor(0.0180, grad_fn=<NllLossBackward>)
tensor(0.1312, grad_fn=<NllLossBackward>)
tensor(0.0368, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.1917, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.3105, grad_fn=<NllLossBackward>)
tensor(0.1480, grad_fn=<NllLossBackward>)
tensor(0.0750, grad_fn=<NllLossBackward>)
tensor(0.0238, grad_fn=<NllLossBackward>)
tensor(0.1713, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.0833, grad_fn=<NllLossBac

tensor(0.0043, grad_fn=<NllLossBackward>)
tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.0051, grad_fn=<NllLossBackward>)
tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.0954, grad_fn=<NllLossBackward>)
tensor(0.1862, grad_fn=<NllLossBackward>)
tensor(0.3202, grad_fn=<NllLossBackward>)
tensor(0.0629, grad_fn=<NllLossBackward>)
tensor(0.1371, grad_fn=<NllLossBackward>)
tensor(0.0053, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.1038, grad_fn=<NllLossBackward>)
tensor(0.0073, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.1728, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBackward>)
tensor(0.0986, grad_fn=<NllLossBackward>)
tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.0895, grad_fn=<NllLossBac

tensor(0.1602, grad_fn=<NllLossBackward>)
tensor(0.0123, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.0756, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.0704, grad_fn=<NllLossBackward>)
tensor(0.0406, grad_fn=<NllLossBackward>)
tensor(0.1300, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.1013, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.0399, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBackward>)
tensor(0.1473, grad_fn=<NllLossBackward>)
tensor(0.0870, grad_fn=<NllLossBackward>)
tensor(0.0293, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0345, grad_fn=<NllLossBackward>)
tensor(0.0161, grad_fn=<NllLossBackward>)
tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.1677, grad_fn=<NllLossBac

tensor(0.0172, grad_fn=<NllLossBackward>)
tensor(0.1230, grad_fn=<NllLossBackward>)
tensor(0.2933, grad_fn=<NllLossBackward>)
tensor(0.1053, grad_fn=<NllLossBackward>)
tensor(0.1557, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0647, grad_fn=<NllLossBackward>)
tensor(0.0137, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0703, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.1255, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBackward>)
tensor(0.1839, grad_fn=<NllLossBackward>)
tensor(0.1613, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.1129, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBac

tensor(0.1555, grad_fn=<NllLossBackward>)
tensor(0.1698, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.1947, grad_fn=<NllLossBackward>)
tensor(0.0284, grad_fn=<NllLossBackward>)
tensor(0.1256, grad_fn=<NllLossBackward>)
tensor(0.1567, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.3284, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.1635, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.0025, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.0356, grad_fn=<NllLossBackward>)
tensor(0.0920, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBackward>)
tensor(0.0403, grad_fn=<NllLossBackward>)
tensor(0.1147, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.0054, grad_fn=<NllLossBac

tensor(0.2127, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.0419, grad_fn=<NllLossBackward>)
tensor(0.1850, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.1430, grad_fn=<NllLossBackward>)
tensor(0.0820, grad_fn=<NllLossBackward>)
tensor(0.0269, grad_fn=<NllLossBackward>)
tensor(0.1716, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.1492, grad_fn=<NllLossBackward>)
tensor(0.0068, grad_fn=<NllLossBackward>)
tensor(0.0832, grad_fn=<NllLossBackward>)
tensor(0.1389, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.1478, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.2420, grad_fn=<NllLossBackward>)
tensor(0.1940, grad_fn=<NllLossBackward>)
tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBac

tensor(0.2004, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.0401, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.1476, grad_fn=<NllLossBackward>)
tensor(0.0947, grad_fn=<NllLossBackward>)
tensor(0.0455, grad_fn=<NllLossBackward>)
tensor(0.2148, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.2000, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.1132, grad_fn=<NllLossBackward>)
tensor(0.0389, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0255, grad_fn=<NllLossBackward>)
tensor(0.1367, grad_fn=<NllLossBac

tensor(0.0846, grad_fn=<NllLossBackward>)
tensor(0.0248, grad_fn=<NllLossBackward>)
tensor(0.1352, grad_fn=<NllLossBackward>)
tensor(0.0990, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.0031, grad_fn=<NllLossBackward>)
tensor(0.2235, grad_fn=<NllLossBackward>)
tensor(0.2165, grad_fn=<NllLossBackward>)
tensor(0.0641, grad_fn=<NllLossBackward>)
tensor(0.1467, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.0438, grad_fn=<NllLossBackward>)
tensor(0.0235, grad_fn=<NllLossBackward>)
tensor(0.0042, grad_fn=<NllLossBackward>)
tensor(0.1604, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0808, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBac

tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0918, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.3634, grad_fn=<NllLossBackward>)
tensor(0.0412, grad_fn=<NllLossBackward>)
tensor(0.3097, grad_fn=<NllLossBackward>)
tensor(0.1540, grad_fn=<NllLossBackward>)
tensor(0.0074, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.0972, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.1973, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.0252, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBackward>)
tensor(0.1447, grad_fn=<NllLossBackward>)
tensor(0.0544, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBac

tensor(0.0534, grad_fn=<NllLossBackward>)
tensor(0.0298, grad_fn=<NllLossBackward>)
tensor(0.1649, grad_fn=<NllLossBackward>)
tensor(0.1474, grad_fn=<NllLossBackward>)
tensor(0.3832, grad_fn=<NllLossBackward>)
tensor(0.2036, grad_fn=<NllLossBackward>)
tensor(0.2051, grad_fn=<NllLossBackward>)
tensor(0.0929, grad_fn=<NllLossBackward>)
tensor(0.2427, grad_fn=<NllLossBackward>)
tensor(0.0306, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0798, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.0632, grad_fn=<NllLossBackward>)
tensor(0.1575, grad_fn=<NllLossBackward>)
tensor(0.1626, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0763, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.0944, grad_fn=<NllLossBackward>)
tensor(0.1192, grad_fn=<NllLossBackward>)
tensor(0.1206, grad_fn=<NllLossBac

tensor(0.0974, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0131, grad_fn=<NllLossBackward>)
tensor(0.1944, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.0170, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.0091, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0891, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.0097, grad_fn=<NllLossBackward>)
tensor(0.2581, grad_fn=<NllLossBackward>)
tensor(0.0066, grad_fn=<NllLossBackward>)
tensor(0.0339, grad_fn=<NllLossBackward>)
tensor(0.0256, grad_fn=<NllLossBackward>)
tensor(0.2144, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.1078, grad_fn=<NllLossBackward>)
tensor(0.0195, grad_fn=<NllLossBackward>)
tensor(0.0508, grad_fn=<NllLossBac

tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.1399, grad_fn=<NllLossBackward>)
tensor(0.2224, grad_fn=<NllLossBackward>)
tensor(0.0261, grad_fn=<NllLossBackward>)
tensor(0.3627, grad_fn=<NllLossBackward>)
tensor(0.0799, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBackward>)
tensor(0.0326, grad_fn=<NllLossBackward>)
tensor(0.1330, grad_fn=<NllLossBackward>)
tensor(0.0126, grad_fn=<NllLossBackward>)
tensor(0.0673, grad_fn=<NllLossBackward>)
tensor(0.1704, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.2135, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.3714, grad_fn=<NllLossBackward>)
tensor(0.1775, grad_fn=<NllLossBackward>)
tensor(0.0825, grad_fn=<NllLossBackward>)
tensor(0.0587, grad_fn=<NllLossBackward>)
tensor(0.2655, grad_fn=<NllLossBackward>)
tensor(0.0961, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBac

tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.1403, grad_fn=<NllLossBackward>)
tensor(0.1506, grad_fn=<NllLossBackward>)
tensor(0.2292, grad_fn=<NllLossBackward>)
tensor(0.2219, grad_fn=<NllLossBackward>)
tensor(0.0719, grad_fn=<NllLossBackward>)
tensor(0.0498, grad_fn=<NllLossBackward>)
tensor(0.1359, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.1502, grad_fn=<NllLossBackward>)
tensor(0.0712, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.0312, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1841, grad_fn=<NllLossBackward>)
tensor(0.0681, grad_fn=<NllLossBackward>)
tensor(0.0546, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.0180, grad_fn=<NllLossBackward>)
tensor(0.0206, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.3104, grad_fn=<NllLossBac

tensor(0.1093, grad_fn=<NllLossBackward>)
tensor(0.0985, grad_fn=<NllLossBackward>)
tensor(0.1761, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.1261, grad_fn=<NllLossBackward>)
tensor(0.0654, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.0083, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.1798, grad_fn=<NllLossBackward>)
tensor(0.0156, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBackward>)
tensor(0.0376, grad_fn=<NllLossBackward>)
tensor(0.1015, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.0521, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.2302, grad_fn=<NllLossBac

tensor(0.0301, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.1258, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0616, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.0431, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.0357, grad_fn=<NllLossBackward>)
tensor(0.1686, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.0971, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.0486, grad_fn=<NllLossBac

tensor(0.0085, grad_fn=<NllLossBackward>)
tensor(0.0949, grad_fn=<NllLossBackward>)
tensor(0.2093, grad_fn=<NllLossBackward>)
tensor(0.1230, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0079, grad_fn=<NllLossBackward>)
tensor(0.0054, grad_fn=<NllLossBackward>)
tensor(0.1630, grad_fn=<NllLossBackward>)
tensor(0.0356, grad_fn=<NllLossBackward>)
tensor(0.0712, grad_fn=<NllLossBackward>)
tensor(0.0880, grad_fn=<NllLossBackward>)
tensor(0.0249, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.2011, grad_fn=<NllLossBackward>)
tensor(0.0345, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.0097, grad_fn=<NllLossBackward>)
tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.0342, grad_fn=<NllLossBackward>)
tensor(0.0047, grad_fn=<NllLossBac

tensor(0.0728, grad_fn=<NllLossBackward>)
tensor(0.1219, grad_fn=<NllLossBackward>)
tensor(0.1462, grad_fn=<NllLossBackward>)
tensor(0.1159, grad_fn=<NllLossBackward>)
tensor(0.0087, grad_fn=<NllLossBackward>)
tensor(0.0117, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.3465, grad_fn=<NllLossBackward>)
tensor(0.0048, grad_fn=<NllLossBackward>)
tensor(0.0399, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.0085, grad_fn=<NllLossBackward>)
tensor(0.0729, grad_fn=<NllLossBackward>)
tensor(0.0032, grad_fn=<NllLossBackward>)
tensor(0.5405, grad_fn=<NllLossBackward>)
tensor(0.3520, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0007, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.0403, grad_fn=<NllLossBac

tensor(0.0514, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.1677, grad_fn=<NllLossBackward>)
tensor(0.3126, grad_fn=<NllLossBackward>)
tensor(0.0046, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1101, grad_fn=<NllLossBackward>)
tensor(0.2334, grad_fn=<NllLossBackward>)
tensor(0.2003, grad_fn=<NllLossBackward>)
tensor(0.0053, grad_fn=<NllLossBackward>)
tensor(0.0327, grad_fn=<NllLossBackward>)
tensor(0.0067, grad_fn=<NllLossBackward>)
tensor(0.1453, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0102, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.0201, grad_fn=<NllLossBac

tensor(0.1336, grad_fn=<NllLossBackward>)
tensor(0.1443, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.0580, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.1312, grad_fn=<NllLossBackward>)
tensor(0.3213, grad_fn=<NllLossBackward>)
tensor(0.1845, grad_fn=<NllLossBackward>)
tensor(0.0258, grad_fn=<NllLossBackward>)
tensor(0.0603, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.0415, grad_fn=<NllLossBackward>)
tensor(0.1575, grad_fn=<NllLossBackward>)
tensor(0.2222, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.2222, grad_fn=<NllLossBackward>)
tensor(0.1033, grad_fn=<NllLossBackward>)
tensor(0.2420, grad_fn=<NllLossBackward>)
tensor(0.0105, grad_fn=<NllLossBac

tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.1940, grad_fn=<NllLossBackward>)
tensor(0.1351, grad_fn=<NllLossBackward>)
tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.1232, grad_fn=<NllLossBackward>)
tensor(0.0990, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.0339, grad_fn=<NllLossBackward>)
tensor(0.2053, grad_fn=<NllLossBackward>)
tensor(0.1417, grad_fn=<NllLossBackward>)
tensor(0.1666, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.1157, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0846, grad_fn=<NllLossBackward>)
tensor(0.0799, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.0728, grad_fn=<NllLossBac

tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.1350, grad_fn=<NllLossBackward>)
tensor(0.0107, grad_fn=<NllLossBackward>)
tensor(0.1716, grad_fn=<NllLossBackward>)
tensor(0.0553, grad_fn=<NllLossBackward>)
tensor(0.1632, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.0171, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.2195, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.0530, grad_fn=<NllLossBackward>)
tensor(0.0707, grad_fn=<NllLossBackward>)
tensor(0.1574, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0576, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.0171, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.3115, grad_fn=<NllLossBackward>)
tensor(0.0722, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBac

tensor(0.1241, grad_fn=<NllLossBackward>)
tensor(0.0860, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.1023, grad_fn=<NllLossBackward>)
tensor(0.0797, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.1485, grad_fn=<NllLossBackward>)
tensor(0.1142, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.1385, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.1217, grad_fn=<NllLossBackward>)
tensor(0.0181, grad_fn=<NllLossBackward>)
tensor(0.0748, grad_fn=<NllLossBackward>)
tensor(0.0106, grad_fn=<NllLossBackward>)
tensor(0.1421, grad_fn=<NllLossBackward>)
tensor(0.0250, grad_fn=<NllLossBackward>)
tensor(0.1091, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBackward>)
tensor(0.0930, grad_fn=<NllLossBackward>)
tensor(0.1438, grad_fn=<NllLossBac

tensor(0.1525, grad_fn=<NllLossBackward>)
tensor(0.0054, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.2115, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.1486, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.0618, grad_fn=<NllLossBackward>)
tensor(0.0319, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.0298, grad_fn=<NllLossBackward>)
tensor(0.0357, grad_fn=<NllLossBackward>)
tensor(0.1326, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0325, grad_fn=<NllLossBackward>)
tensor(0.1293, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0166, grad_fn=<NllLossBackward>)
tensor(0.0128, grad_fn=<NllLossBackward>)
tensor(0.0048, grad_fn=<NllLossBackward>)
tensor(0.1096, grad_fn=<NllLossBac

tensor(0.2147, grad_fn=<NllLossBackward>)
tensor(0.0914, grad_fn=<NllLossBackward>)
tensor(0.0539, grad_fn=<NllLossBackward>)
tensor(0.2293, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.0092, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.1852, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0345, grad_fn=<NllLossBackward>)
tensor(0.0906, grad_fn=<NllLossBackward>)
tensor(0.2052, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.2427, grad_fn=<NllLossBackward>)
tensor(0.0084, grad_fn=<NllLossBackward>)
tensor(0.0096, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0044, grad_fn=<NllLossBackward>)
tensor(0.1391, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBackward>)
tensor(0.1460, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBac

tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.0225, grad_fn=<NllLossBackward>)
tensor(0.1712, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.0692, grad_fn=<NllLossBackward>)
tensor(0.1820, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.2239, grad_fn=<NllLossBackward>)
tensor(0.3540, grad_fn=<NllLossBackward>)
tensor(0.0067, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBackward>)
tensor(0.1311, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.1381, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.0647, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.0281, grad_fn=<NllLossBac

tensor(0.1336, grad_fn=<NllLossBackward>)
tensor(0.1530, grad_fn=<NllLossBackward>)
tensor(0.0911, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0988, grad_fn=<NllLossBackward>)
tensor(0.0052, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.1840, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.2188, grad_fn=<NllLossBackward>)
tensor(0.1447, grad_fn=<NllLossBackward>)
tensor(0.1333, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.1542, grad_fn=<NllLossBackward>)
tensor(0.8383, grad_fn=<NllLossBackward>)
tensor(0.2174, grad_fn=<NllLossBackward>)
tensor(0.1106, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBac

tensor(0.0252, grad_fn=<NllLossBackward>)
tensor(0.1968, grad_fn=<NllLossBackward>)
tensor(0.2809, grad_fn=<NllLossBackward>)
tensor(0.0581, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.2778, grad_fn=<NllLossBackward>)
tensor(0.1840, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.1241, grad_fn=<NllLossBackward>)
tensor(0.0063, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0153, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0197, grad_fn=<NllLossBackward>)
tensor(0.1580, grad_fn=<NllLossBackward>)
tensor(0.0553, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0684, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBac

tensor(0.0510, grad_fn=<NllLossBackward>)
tensor(0.0135, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.0740, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.0571, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.0647, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0108, grad_fn=<NllLossBackward>)
tensor(0.1211, grad_fn=<NllLossBackward>)
tensor(0.1569, grad_fn=<NllLossBackward>)
tensor(0.2673, grad_fn=<NllLossBackward>)
tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.0916, grad_fn=<NllLossBackward>)
tensor(0.2207, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBackward>)
tensor(0.0439, grad_fn=<NllLossBackward>)
tensor(0.1794, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBackward>)
tensor(0.1610, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBac

tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.1042, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.0161, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0756, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.1101, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.1411, grad_fn=<NllLossBackward>)
tensor(0.2971, grad_fn=<NllLossBackward>)
tensor(0.0400, grad_fn=<NllLossBackward>)
tensor(0.1993, grad_fn=<NllLossBackward>)
tensor(0.0283, grad_fn=<NllLossBackward>)
tensor(0.0300, grad_fn=<NllLossBackward>)
tensor(0.1396, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBac

tensor(0.1028, grad_fn=<NllLossBackward>)
tensor(0.1468, grad_fn=<NllLossBackward>)
tensor(0.1054, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.0064, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.0036, grad_fn=<NllLossBackward>)
tensor(0.1292, grad_fn=<NllLossBackward>)
tensor(0.0035, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.0098, grad_fn=<NllLossBackward>)
tensor(0.2471, grad_fn=<NllLossBackward>)
tensor(0.0024, grad_fn=<NllLossBackward>)
tensor(0.0717, grad_fn=<NllLossBackward>)
tensor(0.1167, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.1838, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.3862, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.0130, grad_fn=<NllLossBac

tensor(0.0129, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.1477, grad_fn=<NllLossBackward>)
tensor(0.2696, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.0569, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.0105, grad_fn=<NllLossBackward>)
tensor(0.1126, grad_fn=<NllLossBackward>)
tensor(0.0283, grad_fn=<NllLossBackward>)
tensor(0.0441, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.2848, grad_fn=<NllLossBackward>)
tensor(0.0047, grad_fn=<NllLossBackward>)
tensor(0.1711, grad_fn=<NllLossBackward>)
tensor(0.1256, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0605, grad_fn=<NllLossBackward>)
tensor(0.1185, grad_fn=<NllLossBackward>)
tensor(0.2865, grad_fn=<NllLossBac

tensor(0.0054, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBackward>)
tensor(0.0318, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.1038, grad_fn=<NllLossBackward>)
tensor(0.0335, grad_fn=<NllLossBackward>)
tensor(0.0449, grad_fn=<NllLossBackward>)
tensor(0.1807, grad_fn=<NllLossBackward>)
tensor(0.0724, grad_fn=<NllLossBackward>)
tensor(0.0201, grad_fn=<NllLossBackward>)
tensor(0.0213, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
tensor(0.0253, grad_fn=<NllLossBackward>)
tensor(0.0155, grad_fn=<NllLossBackward>)
tensor(0.1568, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.0631, grad_fn=<NllLossBac

tensor(0.2580, grad_fn=<NllLossBackward>)
tensor(0.0278, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.0406, grad_fn=<NllLossBackward>)
tensor(0.4581, grad_fn=<NllLossBackward>)
tensor(0.0851, grad_fn=<NllLossBackward>)
tensor(0.0681, grad_fn=<NllLossBackward>)
tensor(0.1404, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.0163, grad_fn=<NllLossBackward>)
tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0851, grad_fn=<NllLossBackward>)
tensor(0.2146, grad_fn=<NllLossBackward>)
tensor(0.0016, grad_fn=<NllLossBackward>)
tensor(0.1124, grad_fn=<NllLossBackward>)
tensor(0.2169, grad_fn=<NllLossBackward>)
tensor(0.1542, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBac

tensor(0.0643, grad_fn=<NllLossBackward>)
tensor(0.0480, grad_fn=<NllLossBackward>)
tensor(0.0544, grad_fn=<NllLossBackward>)
tensor(0.0395, grad_fn=<NllLossBackward>)
tensor(0.2510, grad_fn=<NllLossBackward>)
tensor(0.1751, grad_fn=<NllLossBackward>)
tensor(0.0240, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.1489, grad_fn=<NllLossBackward>)
tensor(0.2957, grad_fn=<NllLossBackward>)
tensor(0.1551, grad_fn=<NllLossBackward>)
tensor(0.1620, grad_fn=<NllLossBackward>)
tensor(0.1159, grad_fn=<NllLossBackward>)
tensor(0.2098, grad_fn=<NllLossBackward>)
tensor(0.1078, grad_fn=<NllLossBackward>)
tensor(0.1290, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.1211, grad_fn=<NllLossBackward>)
tensor(0.1370, grad_fn=<NllLossBackward>)
tensor(0.0621, grad_fn=<NllLossBackward>)
tensor(0.1200, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBac

tensor(0.1414, grad_fn=<NllLossBackward>)
tensor(0.1018, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0992, grad_fn=<NllLossBackward>)
tensor(0.1113, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0455, grad_fn=<NllLossBackward>)
tensor(0.0469, grad_fn=<NllLossBackward>)
tensor(0.1427, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.1732, grad_fn=<NllLossBackward>)
tensor(0.0038, grad_fn=<NllLossBackward>)
tensor(0.3244, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.1023, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBac

tensor(0.1007, grad_fn=<NllLossBackward>)
tensor(0.0063, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.0113, grad_fn=<NllLossBackward>)
tensor(0.1289, grad_fn=<NllLossBackward>)
tensor(0.1806, grad_fn=<NllLossBackward>)
tensor(0.0284, grad_fn=<NllLossBackward>)
tensor(0.2180, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.2071, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.1406, grad_fn=<NllLossBackward>)
tensor(0.1346, grad_fn=<NllLossBackward>)
tensor(0.0933, grad_fn=<NllLossBackward>)
tensor(0.0709, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.0525, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBac

tensor(0.1470, grad_fn=<NllLossBackward>)
tensor(0.1282, grad_fn=<NllLossBackward>)
tensor(0.0528, grad_fn=<NllLossBackward>)
tensor(0.1198, grad_fn=<NllLossBackward>)
tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.2758, grad_fn=<NllLossBackward>)
tensor(0.0335, grad_fn=<NllLossBackward>)
tensor(0.1275, grad_fn=<NllLossBackward>)
tensor(0.1000, grad_fn=<NllLossBackward>)
tensor(0.0902, grad_fn=<NllLossBackward>)
tensor(0.0307, grad_fn=<NllLossBackward>)
tensor(0.0191, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.1550, grad_fn=<NllLossBackward>)
tensor(0.1829, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0313, grad_fn=<NllLossBackward>)
tensor(0.1031, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.0129, grad_fn=<NllLossBac

tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.1631, grad_fn=<NllLossBackward>)
tensor(0.0711, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBackward>)
tensor(0.1770, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.1198, grad_fn=<NllLossBackward>)
tensor(0.1510, grad_fn=<NllLossBackward>)
tensor(0.0946, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBackward>)
tensor(0.0331, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.3631, grad_fn=<NllLossBackward>)
tensor(0.0469, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.3423, grad_fn=<NllLossBackward>)
tensor(0.0545, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBac

tensor(0.0364, grad_fn=<NllLossBackward>)
tensor(0.1209, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.0325, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1953, grad_fn=<NllLossBackward>)
tensor(0.1516, grad_fn=<NllLossBackward>)
tensor(0.0264, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.2567, grad_fn=<NllLossBackward>)
tensor(0.0241, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.1636, grad_fn=<NllLossBackward>)
tensor(0.1372, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBackward>)
tensor(0.0952, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.0332, grad_fn=<NllLossBackward>)
tensor(0.0055, grad_fn=<NllLossBackward>)
tensor(0.0870, grad_fn=<NllLossBackward>)
tensor(0.1290, grad_fn=<NllLossBackward>)
tensor(0.0719, grad_fn=<NllLossBackward>)
tensor(0.0796, grad_fn=<NllLossBac

tensor(0.1438, grad_fn=<NllLossBackward>)
tensor(0.1109, grad_fn=<NllLossBackward>)
tensor(0.1285, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.1486, grad_fn=<NllLossBackward>)
tensor(0.1864, grad_fn=<NllLossBackward>)
tensor(0.0535, grad_fn=<NllLossBackward>)
tensor(0.1656, grad_fn=<NllLossBackward>)
tensor(0.1550, grad_fn=<NllLossBackward>)
tensor(0.0909, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.1117, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.0364, grad_fn=<NllLossBackward>)
tensor(0.2645, grad_fn=<NllLossBackward>)
tensor(0.1651, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBac

tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBackward>)
tensor(0.1169, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBackward>)
tensor(0.0138, grad_fn=<NllLossBackward>)
tensor(0.0560, grad_fn=<NllLossBackward>)
tensor(0.1198, grad_fn=<NllLossBackward>)
tensor(0.0045, grad_fn=<NllLossBackward>)
tensor(0.0946, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0046, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.0974, grad_fn=<NllLossBackward>)
tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.0046, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0502, grad_fn=<NllLossBackward>)
tensor(0.0037, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.0094, grad_fn=<NllLossBac

tensor(0.0395, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.0755, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0920, grad_fn=<NllLossBackward>)
tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.0570, grad_fn=<NllLossBackward>)
tensor(0.0112, grad_fn=<NllLossBackward>)
tensor(0.0532, grad_fn=<NllLossBackward>)
tensor(0.1282, grad_fn=<NllLossBackward>)
tensor(0.0156, grad_fn=<NllLossBackward>)
tensor(0.2336, grad_fn=<NllLossBackward>)
tensor(0.0960, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.2008, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.0132, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.1069, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBac

tensor(0.1295, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.1342, grad_fn=<NllLossBackward>)
tensor(0.0649, grad_fn=<NllLossBackward>)
tensor(0.2529, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.1077, grad_fn=<NllLossBackward>)
tensor(0.0301, grad_fn=<NllLossBackward>)
tensor(0.0825, grad_fn=<NllLossBackward>)
tensor(0.1282, grad_fn=<NllLossBackward>)
tensor(0.1459, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.0083, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.1098, grad_fn=<NllLossBackward>)
tensor(0.3379, grad_fn=<NllLossBackward>)
tensor(0.0183, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.0809, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.0084, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBac

tensor(0.1255, grad_fn=<NllLossBackward>)
tensor(0.1821, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBackward>)
tensor(0.1008, grad_fn=<NllLossBackward>)
tensor(0.0498, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.0066, grad_fn=<NllLossBackward>)
tensor(0.1782, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.0186, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.2007, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.0906, grad_fn=<NllLossBackward>)
tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.0062, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.1708, grad_fn=<NllLossBackward>)
tensor(0.1608, grad_fn=<NllLossBackward>)
tensor(0.0558, grad_fn=<NllLossBac

tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.0728, grad_fn=<NllLossBackward>)
tensor(0.0711, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.1766, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.1082, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.0423, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBackward>)
tensor(0.1381, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0301, grad_fn=<NllLossBackward>)
tensor(0.2272, grad_fn=<NllLossBackward>)
tensor(0.0342, grad_fn=<NllLossBackward>)
tensor(0.0048, grad_fn=<NllLossBackward>)
tensor(0.0836, grad_fn=<NllLossBackward>)
tensor(0.1009, grad_fn=<NllLossBackward>)
tensor(0.1307, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBac

tensor(0.0543, grad_fn=<NllLossBackward>)
tensor(0.0297, grad_fn=<NllLossBackward>)
tensor(0.0003, grad_fn=<NllLossBackward>)
tensor(0.1504, grad_fn=<NllLossBackward>)
tensor(0.1832, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBackward>)
tensor(0.0152, grad_fn=<NllLossBackward>)
tensor(0.0120, grad_fn=<NllLossBackward>)
tensor(0.0616, grad_fn=<NllLossBackward>)
tensor(0.2395, grad_fn=<NllLossBackward>)
tensor(0.0089, grad_fn=<NllLossBackward>)
tensor(0.4186, grad_fn=<NllLossBackward>)
tensor(0.0985, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.2209, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0222, grad_fn=<NllLossBackward>)
tensor(0.0338, grad_fn=<NllLossBackward>)
tensor(0.0481, grad_fn=<NllLossBackward>)
tensor(0.1133, grad_fn=<NllLossBac

tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.1461, grad_fn=<NllLossBackward>)
tensor(0.0370, grad_fn=<NllLossBackward>)
tensor(0.1406, grad_fn=<NllLossBackward>)
tensor(0.0007, grad_fn=<NllLossBackward>)
tensor(0.2764, grad_fn=<NllLossBackward>)
tensor(0.1178, grad_fn=<NllLossBackward>)
tensor(0.0128, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBackward>)
tensor(0.3673, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.1819, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.1440, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0274, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.2314, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBac

tensor(0.0052, grad_fn=<NllLossBackward>)
tensor(0.0827, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.0874, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.0606, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.0116, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0258, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.0420, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBackward>)
tensor(0.1205, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.2179, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBac

tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.1900, grad_fn=<NllLossBackward>)
tensor(0.2109, grad_fn=<NllLossBackward>)
tensor(0.1126, grad_fn=<NllLossBackward>)
tensor(0.0335, grad_fn=<NllLossBackward>)
tensor(0.2170, grad_fn=<NllLossBackward>)
tensor(0.3594, grad_fn=<NllLossBackward>)
tensor(0.2288, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.0652, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.0412, grad_fn=<NllLossBackward>)
tensor(0.1343, grad_fn=<NllLossBackward>)
tensor(0.0125, grad_fn=<NllLossBackward>)
tensor(0.0798, grad_fn=<NllLossBackward>)
tensor(0.0182, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.2434, grad_fn=<NllLossBac

tensor(0.0420, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.1860, grad_fn=<NllLossBackward>)
tensor(0.1140, grad_fn=<NllLossBackward>)
tensor(0.1012, grad_fn=<NllLossBackward>)
tensor(0.1681, grad_fn=<NllLossBackward>)
tensor(0.1378, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.0127, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.0878, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.1208, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.0210, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.2484, grad_fn=<NllLossBackward>)
tensor(0.0419, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBac

tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.0932, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.1442, grad_fn=<NllLossBackward>)
tensor(0.1831, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.4085, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.2292, grad_fn=<NllLossBackward>)
tensor(0.1445, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBackward>)
tensor(0.0552, grad_fn=<NllLossBackward>)
tensor(0.2003, grad_fn=<NllLossBackward>)
tensor(0.0200, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.0303, grad_fn=<NllLossBackward>)
tensor(0.1056, grad_fn=<NllLossBackward>)
tensor(0.0123, grad_fn=<NllLossBackward>)
tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.0449, grad_fn=<NllLossBac

tensor(0.1288, grad_fn=<NllLossBackward>)
tensor(0.0236, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.0162, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.0069, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.0171, grad_fn=<NllLossBackward>)
tensor(0.0081, grad_fn=<NllLossBackward>)
tensor(0.3588, grad_fn=<NllLossBackward>)
tensor(0.0047, grad_fn=<NllLossBackward>)
tensor(0.2493, grad_fn=<NllLossBackward>)
tensor(0.4304, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.0438, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.0374, grad_fn=<NllLossBackward>)
tensor(0.0493, grad_fn=<NllLossBackward>)
tensor(0.0108, grad_fn=<NllLossBackward>)
tensor(0.1756, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0746, grad_fn=<NllLossBackward>)
tensor(0.2015, grad_fn=<NllLossBackward>)
tensor(0.1644, grad_fn=<NllLossBac

tensor(0.0532, grad_fn=<NllLossBackward>)
tensor(0.0901, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.0181, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.2577, grad_fn=<NllLossBackward>)
tensor(0.1027, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.1911, grad_fn=<NllLossBackward>)
tensor(0.0533, grad_fn=<NllLossBackward>)
tensor(0.0304, grad_fn=<NllLossBackward>)
tensor(0.2067, grad_fn=<NllLossBackward>)
tensor(0.1321, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.0152, grad_fn=<NllLossBackward>)
tensor(0.1067, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.0510, grad_fn=<NllLossBackward>)
tensor(0.1383, grad_fn=<NllLossBackward>)
tensor(0.0188, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBac

tensor(0.1992, grad_fn=<NllLossBackward>)
tensor(0.0035, grad_fn=<NllLossBackward>)
tensor(0.0181, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.0573, grad_fn=<NllLossBackward>)
tensor(0.0055, grad_fn=<NllLossBackward>)
tensor(0.1025, grad_fn=<NllLossBackward>)
tensor(0.0978, grad_fn=<NllLossBackward>)
tensor(0.1076, grad_fn=<NllLossBackward>)
tensor(0.0096, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBackward>)
tensor(0.0029, grad_fn=<NllLossBackward>)
tensor(0.0552, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0970, grad_fn=<NllLossBackward>)
tensor(0.0407, grad_fn=<NllLossBackward>)
tensor(0.0572, grad_fn=<NllLossBackward>)
tensor(0.2336, grad_fn=<NllLossBackward>)
tensor(0.0702, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.1809, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0583, grad_fn=<NllLossBackward>)
tensor(0.0959, grad_fn=<NllLossBac

tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.2103, grad_fn=<NllLossBackward>)
tensor(0.2949, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.0079, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.2255, grad_fn=<NllLossBackward>)
tensor(0.1655, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0207, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1001, grad_fn=<NllLossBackward>)
tensor(0.0746, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1028, grad_fn=<NllLossBac

tensor(0.0061, grad_fn=<NllLossBackward>)
tensor(0.1760, grad_fn=<NllLossBackward>)
tensor(0.0217, grad_fn=<NllLossBackward>)
tensor(0.0668, grad_fn=<NllLossBackward>)
tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.0843, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.2051, grad_fn=<NllLossBackward>)
tensor(0.2171, grad_fn=<NllLossBackward>)
tensor(0.0026, grad_fn=<NllLossBackward>)
tensor(0.0431, grad_fn=<NllLossBackward>)
tensor(0.2711, grad_fn=<NllLossBackward>)
tensor(0.0368, grad_fn=<NllLossBackward>)
tensor(0.0056, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.1177, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.1090, grad_fn=<NllLossBackward>)
tensor(0.0683, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBac

tensor(0.0419, grad_fn=<NllLossBackward>)
tensor(0.0778, grad_fn=<NllLossBackward>)
tensor(0.1526, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0463, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.0832, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.0254, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.0919, grad_fn=<NllLossBackward>)
tensor(0.0272, grad_fn=<NllLossBackward>)
tensor(0.0293, grad_fn=<NllLossBackward>)
tensor(0.0373, grad_fn=<NllLossBackward>)
tensor(0.1065, grad_fn=<NllLossBackward>)
tensor(0.2274, grad_fn=<NllLossBackward>)
tensor(0.2115, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBac

tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBackward>)
tensor(0.0296, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.0036, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.1473, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.0089, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.2472, grad_fn=<NllLossBackward>)
tensor(0.1833, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBac

tensor(0.1894, grad_fn=<NllLossBackward>)
tensor(0.2362, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.0132, grad_fn=<NllLossBackward>)
tensor(0.1164, grad_fn=<NllLossBackward>)
tensor(0.0925, grad_fn=<NllLossBackward>)
tensor(0.0063, grad_fn=<NllLossBackward>)
tensor(0.1183, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBackward>)
tensor(0.2335, grad_fn=<NllLossBackward>)
tensor(0.1048, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.0216, grad_fn=<NllLossBackward>)
tensor(0.1380, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.0066, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.1202, grad_fn=<NllLossBackward>)
tensor(0.1403, grad_fn=<NllLossBackward>)
tensor(0.0319, grad_fn=<NllLossBackward>)
tensor(0.2749, grad_fn=<NllLossBac

tensor(0.0356, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.0594, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.0910, grad_fn=<NllLossBackward>)
tensor(0.2359, grad_fn=<NllLossBackward>)
tensor(0.1165, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.0321, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.1366, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.0447, grad_fn=<NllLossBackward>)
tensor(0.0616, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0408, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0019, grad_fn=<NllLossBac

tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.1766, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.0832, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.2251, grad_fn=<NllLossBackward>)
tensor(0.1362, grad_fn=<NllLossBackward>)
tensor(0.1807, grad_fn=<NllLossBackward>)
tensor(0.1813, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.0126, grad_fn=<NllLossBackward>)
tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.1104, grad_fn=<NllLossBackward>)
tensor(0.1603, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.2765, grad_fn=<NllLossBac

tensor(0.1804, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.0308, grad_fn=<NllLossBackward>)
tensor(0.0112, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBackward>)
tensor(0.1369, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.0962, grad_fn=<NllLossBackward>)
tensor(0.0065, grad_fn=<NllLossBackward>)
tensor(0.0676, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1723, grad_fn=<NllLossBackward>)
tensor(0.0525, grad_fn=<NllLossBackward>)
tensor(0.0518, grad_fn=<NllLossBackward>)
tensor(0.0510, grad_fn=<NllLossBackward>)
tensor(0.0166, grad_fn=<NllLossBackward>)
tensor(0.0116, grad_fn=<NllLossBackward>)
tensor(0.0488, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBackward>)
tensor(0.0301, grad_fn=<NllLossBackward>)
tensor(0.1189, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBac

tensor(0.1174, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.0495, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.1347, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.2183, grad_fn=<NllLossBackward>)
tensor(0.0441, grad_fn=<NllLossBackward>)
tensor(0.0808, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.0180, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.1241, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.0258, grad_fn=<NllLossBackward>)
tensor(0.0172, grad_fn=<NllLossBackward>)
tensor(0.0673, grad_fn=<NllLossBackward>)
tensor(0.0928, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.0658, grad_fn=<NllLossBackward>)
tensor(0.1075, grad_fn=<NllLossBac

tensor(0.1166, grad_fn=<NllLossBackward>)
tensor(0.1221, grad_fn=<NllLossBackward>)
tensor(0.0023, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.0502, grad_fn=<NllLossBackward>)
tensor(0.0144, grad_fn=<NllLossBackward>)
tensor(0.0103, grad_fn=<NllLossBackward>)
tensor(0.2125, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.0241, grad_fn=<NllLossBackward>)
tensor(0.0267, grad_fn=<NllLossBackward>)
tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.0267, grad_fn=<NllLossBackward>)
tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0103, grad_fn=<NllLossBackward>)
tensor(0.0087, grad_fn=<NllLossBackward>)
tensor(0.0944, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.0978, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.2160, grad_fn=<NllLossBackward>)
tensor(0.0423, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBac

tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.0802, grad_fn=<NllLossBackward>)
tensor(0.0327, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.0106, grad_fn=<NllLossBackward>)
tensor(0.0089, grad_fn=<NllLossBackward>)
tensor(0.0083, grad_fn=<NllLossBackward>)
tensor(0.1654, grad_fn=<NllLossBackward>)
tensor(0.1989, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.0064, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.0585, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0525, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBac

tensor(0.0332, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0043, grad_fn=<NllLossBackward>)
tensor(0.1370, grad_fn=<NllLossBackward>)
tensor(0.0364, grad_fn=<NllLossBackward>)
tensor(0.0146, grad_fn=<NllLossBackward>)
tensor(0.0354, grad_fn=<NllLossBackward>)
tensor(0.0227, grad_fn=<NllLossBackward>)
tensor(0.1694, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.0041, grad_fn=<NllLossBackward>)
tensor(0.1328, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBackward>)
tensor(0.0317, grad_fn=<NllLossBackward>)
tensor(0.2585, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.1671, grad_fn=<NllLossBackward>)
tensor(0.0802, grad_fn=<NllLossBackward>)
tensor(0.0595, grad_fn=<NllLossBac

tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0944, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0562, grad_fn=<NllLossBackward>)
tensor(0.0024, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.1909, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.0154, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.1080, grad_fn=<NllLossBackward>)
tensor(0.0059, grad_fn=<NllLossBackward>)
tensor(0.1606, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.0493, grad_fn=<NllLossBac

tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.2313, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.1512, grad_fn=<NllLossBackward>)
tensor(0.0319, grad_fn=<NllLossBackward>)
tensor(0.0696, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0832, grad_fn=<NllLossBackward>)
tensor(0.0015, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBackward>)
tensor(0.0073, grad_fn=<NllLossBackward>)
tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.0390, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.0430, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBac

tensor(0.2564, grad_fn=<NllLossBackward>)
tensor(0.0499, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.2199, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.1254, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.1806, grad_fn=<NllLossBackward>)
tensor(0.0682, grad_fn=<NllLossBackward>)
tensor(0.1239, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.1322, grad_fn=<NllLossBackward>)
tensor(0.1043, grad_fn=<NllLossBackward>)
tensor(0.0054, grad_fn=<NllLossBackward>)
tensor(0.0105, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.1892, grad_fn=<NllLossBackward>)
tensor(0.3164, grad_fn=<NllLossBackward>)
tensor(0.1909, grad_fn=<NllLossBac

tensor(0.1319, grad_fn=<NllLossBackward>)
tensor(0.0147, grad_fn=<NllLossBackward>)
tensor(0.0990, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.0235, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.1091, grad_fn=<NllLossBackward>)
tensor(0.1490, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.1185, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBackward>)
tensor(0.1444, grad_fn=<NllLossBackward>)
tensor(0.0586, grad_fn=<NllLossBackward>)
tensor(0.0857, grad_fn=<NllLossBackward>)
tensor(0.1248, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.2116, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBac

tensor(0.0064, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBackward>)
tensor(0.0061, grad_fn=<NllLossBackward>)
tensor(0.2551, grad_fn=<NllLossBackward>)
tensor(0.1817, grad_fn=<NllLossBackward>)
tensor(0.0238, grad_fn=<NllLossBackward>)
tensor(0.0752, grad_fn=<NllLossBackward>)
tensor(0.1087, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.0061, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.0605, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.3500, grad_fn=<NllLossBackward>)
tensor(0.0210, grad_fn=<NllLossBackward>)
tensor(0.0170, grad_fn=<NllLossBackward>)
tensor(0.0026, grad_fn=<NllLossBackward>)
tensor(0.0327, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.0205, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBac

tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.1781, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0734, grad_fn=<NllLossBackward>)
tensor(0.0961, grad_fn=<NllLossBackward>)
tensor(0.0144, grad_fn=<NllLossBackward>)
tensor(0.0248, grad_fn=<NllLossBackward>)
tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.0778, grad_fn=<NllLossBackward>)
tensor(0.1554, grad_fn=<NllLossBackward>)
tensor(0.0448, grad_fn=<NllLossBackward>)
tensor(0.0178, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.0161, grad_fn=<NllLossBackward>)
tensor(0.0122, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.1314, grad_fn=<NllLossBackward>)
tensor(0.0189, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBac

tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.1644, grad_fn=<NllLossBackward>)
tensor(0.1257, grad_fn=<NllLossBackward>)
tensor(0.1139, grad_fn=<NllLossBackward>)
tensor(0.0530, grad_fn=<NllLossBackward>)
tensor(0.1245, grad_fn=<NllLossBackward>)
tensor(0.2298, grad_fn=<NllLossBackward>)
tensor(0.0366, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBackward>)
tensor(0.2582, grad_fn=<NllLossBackward>)
tensor(0.0676, grad_fn=<NllLossBackward>)
tensor(0.1171, grad_fn=<NllLossBackward>)
tensor(0.1167, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.2104, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.0046, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBac

tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.0104, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.1032, grad_fn=<NllLossBackward>)
tensor(0.0041, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.0518, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1008, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.2852, grad_fn=<NllLossBackward>)
tensor(0.1072, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.1010, grad_fn=<NllLossBackward>)
tensor(0.0417, grad_fn=<NllLossBackward>)
tensor(0.0155, grad_fn=<NllLossBackward>)
tensor(0.0930, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.0905, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.1117, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBac

tensor(0.1392, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.1680, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.2386, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.1490, grad_fn=<NllLossBackward>)
tensor(0.1189, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0409, grad_fn=<NllLossBackward>)
tensor(0.2228, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.0351, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBac

tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.0121, grad_fn=<NllLossBackward>)
tensor(0.1080, grad_fn=<NllLossBackward>)
tensor(0.0391, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.1360, grad_fn=<NllLossBackward>)
tensor(0.0014, grad_fn=<NllLossBackward>)
tensor(0.2201, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.1130, grad_fn=<NllLossBackward>)
tensor(0.0624, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.0990, grad_fn=<NllLossBackward>)
tensor(0.0201, grad_fn=<NllLossBackward>)
tensor(0.2282, grad_fn=<NllLossBackward>)
tensor(0.0454, grad_fn=<NllLossBackward>)
tensor(0.0599, grad_fn=<NllLossBackward>)
tensor(0.0312, grad_fn=<NllLossBackward>)
tensor(0.0516, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.0257, grad_fn=<NllLossBac

tensor(0.2845, grad_fn=<NllLossBackward>)
tensor(0.0210, grad_fn=<NllLossBackward>)
tensor(0.2123, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0935, grad_fn=<NllLossBackward>)
tensor(0.1023, grad_fn=<NllLossBackward>)
tensor(0.0803, grad_fn=<NllLossBackward>)
tensor(0.0215, grad_fn=<NllLossBackward>)
tensor(0.1580, grad_fn=<NllLossBackward>)
tensor(0.1632, grad_fn=<NllLossBackward>)
tensor(0.0060, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.1213, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.1105, grad_fn=<NllLossBackward>)
tensor(0.0091, grad_fn=<NllLossBackward>)
tensor(0.0589, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0266, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.0399, grad_fn=<NllLossBackward>)
tensor(0.0235, grad_fn=<NllLossBac

tensor(0.0454, grad_fn=<NllLossBackward>)
tensor(0.0287, grad_fn=<NllLossBackward>)
tensor(0.1672, grad_fn=<NllLossBackward>)
tensor(0.0026, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.0308, grad_fn=<NllLossBackward>)
tensor(0.1775, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.0469, grad_fn=<NllLossBackward>)
tensor(0.0177, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.2327, grad_fn=<NllLossBackward>)
tensor(0.1734, grad_fn=<NllLossBackward>)
tensor(0.0253, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0858, grad_fn=<NllLossBackward>)
tensor(0.0137, grad_fn=<NllLossBackward>)
tensor(0.0066, grad_fn=<NllLossBackward>)
tensor(0.1798, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBac

tensor(0.1449, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.0686, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.0780, grad_fn=<NllLossBackward>)
tensor(0.2151, grad_fn=<NllLossBackward>)
tensor(0.0015, grad_fn=<NllLossBackward>)
tensor(0.0941, grad_fn=<NllLossBackward>)
tensor(0.2067, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.0370, grad_fn=<NllLossBackward>)
tensor(0.0441, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.1977, grad_fn=<NllLossBackward>)
tensor(0.1342, grad_fn=<NllLossBackward>)
tensor(0.0814, grad_fn=<NllLossBackward>)
tensor(0.1938, grad_fn=<NllLossBackward>)
tensor(0.0240, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBac

tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.0300, grad_fn=<NllLossBackward>)
tensor(0.1337, grad_fn=<NllLossBackward>)
 | train accuracy:  0.9743 | test accuracy:  0.9725
precision value: 0.8793170258508803
recall value: 0.8024279108274441
confusion matrix for normal scenario for slices : 2
[[7600   61    2   42    0]
 [  37 5301    0    0    0]
 [   6    0    5    0    0]
 [ 106   20    1  261    0]
 [ 115    3    0   15 1274]]
Training with slice 2 data
tensor(1.6009, grad_fn=<NllLossBackward>)
tensor(1.5931, grad_fn=<NllLossBackward>)
tensor(1.5859, grad_fn=<NllLossBackward>)
tensor(1.5747, grad_fn=<NllLossBackward>)
tensor(1.5594, grad_fn=<NllLossBackward>)
tensor(1.5277, grad_fn=<NllLossBackward>)
tensor(1.5014, grad_fn=<NllLossBackward>)
tensor(1.4802, grad_fn=<NllLossBackward>)
tensor(1.4530, grad_fn=<NllLossBackward>)
tensor(1.4170, grad_fn=<NllLossBackward>)
tensor(1.4012, grad_fn=<NllLo

tensor(0.4920, grad_fn=<NllLossBackward>)
tensor(0.2215, grad_fn=<NllLossBackward>)
tensor(0.6236, grad_fn=<NllLossBackward>)
tensor(0.4116, grad_fn=<NllLossBackward>)
tensor(0.5252, grad_fn=<NllLossBackward>)
tensor(0.2128, grad_fn=<NllLossBackward>)
tensor(0.7366, grad_fn=<NllLossBackward>)
tensor(0.1405, grad_fn=<NllLossBackward>)
tensor(0.3386, grad_fn=<NllLossBackward>)
tensor(0.2324, grad_fn=<NllLossBackward>)
tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.2936, grad_fn=<NllLossBackward>)
tensor(0.3570, grad_fn=<NllLossBackward>)
tensor(0.5642, grad_fn=<NllLossBackward>)
tensor(0.2693, grad_fn=<NllLossBackward>)
tensor(0.3564, grad_fn=<NllLossBackward>)
tensor(0.4827, grad_fn=<NllLossBackward>)
tensor(0.6330, grad_fn=<NllLossBackward>)
tensor(0.5634, grad_fn=<NllLossBackward>)
tensor(0.3997, grad_fn=<NllLossBackward>)
tensor(0.5330, grad_fn=<NllLossBackward>)
tensor(0.2420, grad_fn=<NllLossBackward>)
tensor(0.2124, grad_fn=<NllLossBackward>)
tensor(0.3669, grad_fn=<NllLossBac

tensor(0.2991, grad_fn=<NllLossBackward>)
tensor(0.2852, grad_fn=<NllLossBackward>)
tensor(0.2137, grad_fn=<NllLossBackward>)
tensor(0.2790, grad_fn=<NllLossBackward>)
tensor(0.1586, grad_fn=<NllLossBackward>)
tensor(0.3637, grad_fn=<NllLossBackward>)
tensor(0.2450, grad_fn=<NllLossBackward>)
tensor(0.2045, grad_fn=<NllLossBackward>)
tensor(0.4056, grad_fn=<NllLossBackward>)
tensor(0.3884, grad_fn=<NllLossBackward>)
tensor(0.4501, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.2010, grad_fn=<NllLossBackward>)
tensor(0.4194, grad_fn=<NllLossBackward>)
tensor(0.3540, grad_fn=<NllLossBackward>)
tensor(0.1766, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.2872, grad_fn=<NllLossBackward>)
tensor(0.1047, grad_fn=<NllLossBackward>)
tensor(0.3182, grad_fn=<NllLossBackward>)
tensor(0.3802, grad_fn=<NllLossBackward>)
tensor(0.2633, grad_fn=<NllLossBackward>)
tensor(0.3782, grad_fn=<NllLossBackward>)
tensor(0.3456, grad_fn=<NllLossBac

tensor(0.4244, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.4141, grad_fn=<NllLossBackward>)
tensor(0.5197, grad_fn=<NllLossBackward>)
tensor(0.1847, grad_fn=<NllLossBackward>)
tensor(0.1205, grad_fn=<NllLossBackward>)
tensor(0.3139, grad_fn=<NllLossBackward>)
tensor(0.1726, grad_fn=<NllLossBackward>)
tensor(0.2324, grad_fn=<NllLossBackward>)
tensor(0.3353, grad_fn=<NllLossBackward>)
tensor(0.5929, grad_fn=<NllLossBackward>)
tensor(0.2243, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBackward>)
tensor(0.1238, grad_fn=<NllLossBackward>)
tensor(0.4130, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.3633, grad_fn=<NllLossBackward>)
tensor(0.2598, grad_fn=<NllLossBackward>)
tensor(0.2065, grad_fn=<NllLossBackward>)
tensor(0.2722, grad_fn=<NllLossBackward>)
tensor(0.2836, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.2533, grad_fn=<NllLossBac

tensor(0.1344, grad_fn=<NllLossBackward>)
tensor(0.2517, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBackward>)
tensor(0.3111, grad_fn=<NllLossBackward>)
tensor(0.1870, grad_fn=<NllLossBackward>)
tensor(0.3094, grad_fn=<NllLossBackward>)
tensor(0.5471, grad_fn=<NllLossBackward>)
tensor(0.0810, grad_fn=<NllLossBackward>)
tensor(0.4517, grad_fn=<NllLossBackward>)
tensor(0.3654, grad_fn=<NllLossBackward>)
tensor(0.4625, grad_fn=<NllLossBackward>)
tensor(0.3964, grad_fn=<NllLossBackward>)
tensor(0.2389, grad_fn=<NllLossBackward>)
tensor(0.1625, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.2113, grad_fn=<NllLossBackward>)
tensor(0.2942, grad_fn=<NllLossBackward>)
tensor(0.1347, grad_fn=<NllLossBackward>)
tensor(0.3473, grad_fn=<NllLossBackward>)
tensor(0.2451, grad_fn=<NllLossBackward>)
tensor(0.4432, grad_fn=<NllLossBackward>)
tensor(0.2843, grad_fn=<NllLossBackward>)
tensor(0.2482, grad_fn=<NllLossBackward>)
tensor(0.2167, grad_fn=<NllLossBac

tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.2017, grad_fn=<NllLossBackward>)
tensor(0.2853, grad_fn=<NllLossBackward>)
tensor(0.1815, grad_fn=<NllLossBackward>)
tensor(0.2397, grad_fn=<NllLossBackward>)
tensor(0.2006, grad_fn=<NllLossBackward>)
tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.3297, grad_fn=<NllLossBackward>)
tensor(0.0642, grad_fn=<NllLossBackward>)
tensor(0.3619, grad_fn=<NllLossBackward>)
tensor(0.2698, grad_fn=<NllLossBackward>)
tensor(0.3489, grad_fn=<NllLossBackward>)
tensor(0.4456, grad_fn=<NllLossBackward>)
tensor(0.0629, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.3721, grad_fn=<NllLossBackward>)
tensor(0.4053, grad_fn=<NllLossBackward>)
tensor(0.2330, grad_fn=<NllLossBackward>)
tensor(0.2989, grad_fn=<NllLossBackward>)
tensor(0.2673, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.2530, grad_fn=<NllLossBackward>)
tensor(0.4389, grad_fn=<NllLossBac

tensor(0.1773, grad_fn=<NllLossBackward>)
tensor(0.2722, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.2860, grad_fn=<NllLossBackward>)
tensor(0.3381, grad_fn=<NllLossBackward>)
tensor(0.1844, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.3533, grad_fn=<NllLossBackward>)
tensor(0.0606, grad_fn=<NllLossBackward>)
tensor(0.2387, grad_fn=<NllLossBackward>)
tensor(0.3026, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBackward>)
tensor(0.1747, grad_fn=<NllLossBackward>)
tensor(0.1613, grad_fn=<NllLossBackward>)
tensor(0.0514, grad_fn=<NllLossBackward>)
tensor(0.1594, grad_fn=<NllLossBackward>)
tensor(0.1698, grad_fn=<NllLossBackward>)
tensor(0.2351, grad_fn=<NllLossBackward>)
tensor(0.2103, grad_fn=<NllLossBackward>)
tensor(0.5586, grad_fn=<NllLossBackward>)
tensor(0.1550, grad_fn=<NllLossBackward>)
tensor(0.1173, grad_fn=<NllLossBackward>)
tensor(0.2600, grad_fn=<NllLossBackward>)
tensor(0.2015, grad_fn=<NllLossBac

tensor(0.2992, grad_fn=<NllLossBackward>)
tensor(0.3589, grad_fn=<NllLossBackward>)
tensor(0.3400, grad_fn=<NllLossBackward>)
tensor(0.1155, grad_fn=<NllLossBackward>)
tensor(0.3747, grad_fn=<NllLossBackward>)
tensor(0.2811, grad_fn=<NllLossBackward>)
tensor(0.1224, grad_fn=<NllLossBackward>)
tensor(0.2529, grad_fn=<NllLossBackward>)
tensor(0.3350, grad_fn=<NllLossBackward>)
tensor(0.2562, grad_fn=<NllLossBackward>)
tensor(0.3026, grad_fn=<NllLossBackward>)
tensor(0.2141, grad_fn=<NllLossBackward>)
tensor(0.2308, grad_fn=<NllLossBackward>)
tensor(0.1435, grad_fn=<NllLossBackward>)
tensor(0.5330, grad_fn=<NllLossBackward>)
tensor(0.4058, grad_fn=<NllLossBackward>)
tensor(0.4313, grad_fn=<NllLossBackward>)
tensor(0.4216, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.1755, grad_fn=<NllLossBackward>)
tensor(0.1222, grad_fn=<NllLossBackward>)
tensor(0.1828, grad_fn=<NllLossBackward>)
tensor(0.1384, grad_fn=<NllLossBackward>)
tensor(0.1906, grad_fn=<NllLossBac

tensor(0.2670, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.3608, grad_fn=<NllLossBackward>)
tensor(0.2467, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.3274, grad_fn=<NllLossBackward>)
tensor(0.2874, grad_fn=<NllLossBackward>)
tensor(0.1601, grad_fn=<NllLossBackward>)
tensor(0.1484, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.1241, grad_fn=<NllLossBackward>)
tensor(0.2753, grad_fn=<NllLossBackward>)
tensor(0.4342, grad_fn=<NllLossBackward>)
tensor(0.4002, grad_fn=<NllLossBackward>)
tensor(0.2595, grad_fn=<NllLossBackward>)
tensor(0.4261, grad_fn=<NllLossBackward>)
tensor(0.2461, grad_fn=<NllLossBackward>)
tensor(0.4366, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBackward>)
tensor(0.1366, grad_fn=<NllLossBackward>)
tensor(0.4256, grad_fn=<NllLossBackward>)
tensor(0.2020, grad_fn=<NllLossBackward>)
tensor(0.2431, grad_fn=<NllLossBackward>)
tensor(0.2958, grad_fn=<NllLossBac

tensor(0.1651, grad_fn=<NllLossBackward>)
tensor(0.2795, grad_fn=<NllLossBackward>)
tensor(0.1848, grad_fn=<NllLossBackward>)
tensor(0.3030, grad_fn=<NllLossBackward>)
tensor(0.2401, grad_fn=<NllLossBackward>)
tensor(0.2215, grad_fn=<NllLossBackward>)
tensor(0.2977, grad_fn=<NllLossBackward>)
tensor(0.1980, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.2514, grad_fn=<NllLossBackward>)
tensor(0.4225, grad_fn=<NllLossBackward>)
tensor(0.1434, grad_fn=<NllLossBackward>)
tensor(0.1833, grad_fn=<NllLossBackward>)
tensor(0.1403, grad_fn=<NllLossBackward>)
tensor(0.1729, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.2616, grad_fn=<NllLossBackward>)
tensor(0.1965, grad_fn=<NllLossBackward>)
tensor(0.1669, grad_fn=<NllLossBackward>)
tensor(0.3936, grad_fn=<NllLossBackward>)
tensor(0.2298, grad_fn=<NllLossBackward>)
tensor(0.3555, grad_fn=<NllLossBackward>)
tensor(0.1413, grad_fn=<NllLossBackward>)
tensor(0.1330, grad_fn=<NllLossBac

tensor(0.0845, grad_fn=<NllLossBackward>)
tensor(0.3741, grad_fn=<NllLossBackward>)
tensor(0.0883, grad_fn=<NllLossBackward>)
tensor(0.3052, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.1556, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.2132, grad_fn=<NllLossBackward>)
tensor(0.2119, grad_fn=<NllLossBackward>)
tensor(0.2475, grad_fn=<NllLossBackward>)
tensor(0.5443, grad_fn=<NllLossBackward>)
tensor(0.0681, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.0303, grad_fn=<NllLossBackward>)
tensor(0.2383, grad_fn=<NllLossBackward>)
tensor(0.3835, grad_fn=<NllLossBackward>)
tensor(0.1675, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.1268, grad_fn=<NllLossBackward>)
tensor(0.1502, grad_fn=<NllLossBackward>)
tensor(0.1179, grad_fn=<NllLossBackward>)
tensor(0.1818, grad_fn=<NllLossBackward>)
tensor(0.3811, grad_fn=<NllLossBac

tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.1230, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.1086, grad_fn=<NllLossBackward>)
tensor(0.2774, grad_fn=<NllLossBackward>)
tensor(0.2298, grad_fn=<NllLossBackward>)
tensor(0.2067, grad_fn=<NllLossBackward>)
tensor(0.3402, grad_fn=<NllLossBackward>)
tensor(0.2710, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.2320, grad_fn=<NllLossBackward>)
tensor(0.1478, grad_fn=<NllLossBackward>)
tensor(0.2008, grad_fn=<NllLossBackward>)
tensor(0.2451, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.1388, grad_fn=<NllLossBackward>)
tensor(0.2157, grad_fn=<NllLossBackward>)
tensor(0.2330, grad_fn=<NllLossBackward>)
tensor(0.2656, grad_fn=<NllLossBackward>)
tensor(0.1226, grad_fn=<NllLossBackward>)
tensor(0.2486, grad_fn=<NllLossBackward>)
tensor(0.1365, grad_fn=<NllLossBackward>)
tensor(0.2925, grad_fn=<NllLossBackward>)
tensor(0.2285, grad_fn=<NllLossBac

tensor(0.0795, grad_fn=<NllLossBackward>)
tensor(0.1183, grad_fn=<NllLossBackward>)
tensor(0.2387, grad_fn=<NllLossBackward>)
tensor(0.0647, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.2511, grad_fn=<NllLossBackward>)
tensor(0.3521, grad_fn=<NllLossBackward>)
tensor(0.3360, grad_fn=<NllLossBackward>)
tensor(0.2183, grad_fn=<NllLossBackward>)
tensor(0.1881, grad_fn=<NllLossBackward>)
tensor(0.2495, grad_fn=<NllLossBackward>)
tensor(0.3308, grad_fn=<NllLossBackward>)
tensor(0.0335, grad_fn=<NllLossBackward>)
tensor(0.2020, grad_fn=<NllLossBackward>)
tensor(0.2923, grad_fn=<NllLossBackward>)
tensor(0.1717, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.1549, grad_fn=<NllLossBackward>)
tensor(0.3440, grad_fn=<NllLossBackward>)
tensor(0.1675, grad_fn=<NllLossBackward>)
tensor(0.1906, grad_fn=<NllLossBackward>)
tensor(0.1894, grad_fn=<NllLossBackward>)
tensor(0.2183, grad_fn=<NllLossBackward>)
tensor(0.2192, grad_fn=<NllLossBac

tensor(0.1373, grad_fn=<NllLossBackward>)
tensor(0.1897, grad_fn=<NllLossBackward>)
tensor(0.3173, grad_fn=<NllLossBackward>)
tensor(0.2161, grad_fn=<NllLossBackward>)
tensor(0.1591, grad_fn=<NllLossBackward>)
tensor(0.1063, grad_fn=<NllLossBackward>)
tensor(0.2435, grad_fn=<NllLossBackward>)
tensor(0.2814, grad_fn=<NllLossBackward>)
tensor(0.2454, grad_fn=<NllLossBackward>)
tensor(0.3101, grad_fn=<NllLossBackward>)
tensor(0.1802, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.0646, grad_fn=<NllLossBackward>)
tensor(0.4962, grad_fn=<NllLossBackward>)
tensor(0.1389, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.1342, grad_fn=<NllLossBackward>)
tensor(0.2030, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.3428, grad_fn=<NllLossBackward>)
tensor(0.1447, grad_fn=<NllLossBackward>)
tensor(0.3364, grad_fn=<NllLossBackward>)
tensor(0.2410, grad_fn=<NllLossBackward>)
tensor(0.1522, grad_fn=<NllLossBac

tensor(0.1592, grad_fn=<NllLossBackward>)
tensor(0.1712, grad_fn=<NllLossBackward>)
tensor(0.1735, grad_fn=<NllLossBackward>)
tensor(0.1042, grad_fn=<NllLossBackward>)
tensor(0.0929, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.2079, grad_fn=<NllLossBackward>)
tensor(0.2096, grad_fn=<NllLossBackward>)
tensor(0.0991, grad_fn=<NllLossBackward>)
tensor(0.1572, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.2098, grad_fn=<NllLossBackward>)
tensor(0.2166, grad_fn=<NllLossBackward>)
tensor(0.2231, grad_fn=<NllLossBackward>)
tensor(0.3736, grad_fn=<NllLossBackward>)
tensor(0.2248, grad_fn=<NllLossBackward>)
tensor(0.2369, grad_fn=<NllLossBackward>)
tensor(0.3664, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.3399, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBac

tensor(0.2485, grad_fn=<NllLossBackward>)
tensor(0.1072, grad_fn=<NllLossBackward>)
tensor(0.2594, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.1919, grad_fn=<NllLossBackward>)
tensor(0.2479, grad_fn=<NllLossBackward>)
tensor(0.2327, grad_fn=<NllLossBackward>)
tensor(0.2715, grad_fn=<NllLossBackward>)
tensor(0.2676, grad_fn=<NllLossBackward>)
tensor(0.0610, grad_fn=<NllLossBackward>)
tensor(0.1787, grad_fn=<NllLossBackward>)
tensor(0.2283, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.2150, grad_fn=<NllLossBackward>)
tensor(0.1115, grad_fn=<NllLossBackward>)
tensor(0.3314, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.2443, grad_fn=<NllLossBackward>)
tensor(0.4342, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.3694, grad_fn=<NllLossBackward>)
tensor(0.2408, grad_fn=<NllLossBac

tensor(0.2278, grad_fn=<NllLossBackward>)
tensor(0.2767, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.2127, grad_fn=<NllLossBackward>)
tensor(0.1060, grad_fn=<NllLossBackward>)
tensor(0.2008, grad_fn=<NllLossBackward>)
tensor(0.1063, grad_fn=<NllLossBackward>)
tensor(0.2739, grad_fn=<NllLossBackward>)
tensor(0.4159, grad_fn=<NllLossBackward>)
tensor(0.0519, grad_fn=<NllLossBackward>)
tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBackward>)
tensor(0.2122, grad_fn=<NllLossBackward>)
tensor(0.2010, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.5244, grad_fn=<NllLossBackward>)
tensor(0.2085, grad_fn=<NllLossBackward>)
tensor(0.2846, grad_fn=<NllLossBackward>)
tensor(0.2978, grad_fn=<NllLossBackward>)
tensor(0.1668, grad_fn=<NllLossBackward>)
tensor(0.1139, grad_fn=<NllLossBackward>)
tensor(0.0985, grad_fn=<NllLossBackward>)
tensor(0.2574, grad_fn=<NllLossBac

tensor(0.1862, grad_fn=<NllLossBackward>)
tensor(0.1418, grad_fn=<NllLossBackward>)
tensor(0.3298, grad_fn=<NllLossBackward>)
tensor(0.1698, grad_fn=<NllLossBackward>)
tensor(0.1295, grad_fn=<NllLossBackward>)
tensor(0.1757, grad_fn=<NllLossBackward>)
tensor(0.1682, grad_fn=<NllLossBackward>)
tensor(0.1830, grad_fn=<NllLossBackward>)
tensor(0.2654, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.1045, grad_fn=<NllLossBackward>)
tensor(0.3067, grad_fn=<NllLossBackward>)
tensor(0.4049, grad_fn=<NllLossBackward>)
tensor(0.2387, grad_fn=<NllLossBackward>)
tensor(0.1999, grad_fn=<NllLossBackward>)
tensor(0.1415, grad_fn=<NllLossBackward>)
tensor(0.1752, grad_fn=<NllLossBackward>)
tensor(0.2997, grad_fn=<NllLossBackward>)
tensor(0.1233, grad_fn=<NllLossBackward>)
tensor(0.3820, grad_fn=<NllLossBackward>)
tensor(0.1370, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.2630, grad_fn=<NllLossBac

tensor(0.2119, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.1019, grad_fn=<NllLossBackward>)
tensor(0.3904, grad_fn=<NllLossBackward>)
tensor(0.4216, grad_fn=<NllLossBackward>)
tensor(0.3488, grad_fn=<NllLossBackward>)
tensor(0.4316, grad_fn=<NllLossBackward>)
tensor(0.2309, grad_fn=<NllLossBackward>)
tensor(0.1384, grad_fn=<NllLossBackward>)
tensor(0.1908, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1770, grad_fn=<NllLossBackward>)
tensor(0.1865, grad_fn=<NllLossBackward>)
tensor(0.2110, grad_fn=<NllLossBackward>)
tensor(0.2176, grad_fn=<NllLossBackward>)
tensor(0.3906, grad_fn=<NllLossBackward>)
tensor(0.2548, grad_fn=<NllLossBackward>)
tensor(0.1877, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.1006, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.3562, grad_fn=<NllLossBackward>)
tensor(0.1565, grad_fn=<NllLossBac

tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.1748, grad_fn=<NllLossBackward>)
tensor(0.2325, grad_fn=<NllLossBackward>)
tensor(0.3906, grad_fn=<NllLossBackward>)
tensor(0.0781, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.1325, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.1580, grad_fn=<NllLossBackward>)
tensor(0.2234, grad_fn=<NllLossBackward>)
tensor(0.1882, grad_fn=<NllLossBackward>)
tensor(0.1188, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.2672, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.2785, grad_fn=<NllLossBackward>)
tensor(0.0979, grad_fn=<NllLossBackward>)
tensor(0.0829, grad_fn=<NllLossBackward>)
tensor(0.2216, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBackward>)
tensor(0.0660, grad_fn=<NllLossBackward>)
tensor(0.2089, grad_fn=<NllLossBackward>)
tensor(0.3719, grad_fn=<NllLossBackward>)
tensor(0.1811, grad_fn=<NllLossBac

tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.1878, grad_fn=<NllLossBackward>)
tensor(0.3107, grad_fn=<NllLossBackward>)
tensor(0.2388, grad_fn=<NllLossBackward>)
tensor(0.1446, grad_fn=<NllLossBackward>)
tensor(0.0771, grad_fn=<NllLossBackward>)
tensor(0.0123, grad_fn=<NllLossBackward>)
tensor(0.3615, grad_fn=<NllLossBackward>)
tensor(0.2391, grad_fn=<NllLossBackward>)
tensor(0.2560, grad_fn=<NllLossBackward>)
tensor(0.2219, grad_fn=<NllLossBackward>)
tensor(0.0169, grad_fn=<NllLossBackward>)
tensor(0.4514, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.1235, grad_fn=<NllLossBackward>)
tensor(0.1338, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1875, grad_fn=<NllLossBackward>)
tensor(0.4055, grad_fn=<NllLossBackward>)
tensor(0.2393, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.0776, grad_fn=<NllLossBac

tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.1336, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.2943, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.2522, grad_fn=<NllLossBackward>)
tensor(0.2769, grad_fn=<NllLossBackward>)
tensor(0.4025, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.3303, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.2405, grad_fn=<NllLossBackward>)
tensor(0.0861, grad_fn=<NllLossBackward>)
tensor(0.1566, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.1332, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.0939, grad_fn=<NllLossBackward>)
tensor(0.2303, grad_fn=<NllLossBac

tensor(0.0356, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.2114, grad_fn=<NllLossBackward>)
tensor(0.4582, grad_fn=<NllLossBackward>)
tensor(0.2370, grad_fn=<NllLossBackward>)
tensor(0.1096, grad_fn=<NllLossBackward>)
tensor(0.2529, grad_fn=<NllLossBackward>)
tensor(0.1062, grad_fn=<NllLossBackward>)
tensor(0.2304, grad_fn=<NllLossBackward>)
tensor(0.3704, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.3222, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0950, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.2762, grad_fn=<NllLossBackward>)
tensor(0.2568, grad_fn=<NllLossBackward>)
tensor(0.1799, grad_fn=<NllLossBackward>)
tensor(0.1819, grad_fn=<NllLossBackward>)
tensor(0.0925, grad_fn=<NllLossBackward>)
tensor(0.2055, grad_fn=<NllLossBackward>)
tensor(0.1122, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBac

tensor(0.1280, grad_fn=<NllLossBackward>)
tensor(0.1433, grad_fn=<NllLossBackward>)
tensor(0.1374, grad_fn=<NllLossBackward>)
tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.2289, grad_fn=<NllLossBackward>)
tensor(0.0495, grad_fn=<NllLossBackward>)
tensor(0.1204, grad_fn=<NllLossBackward>)
tensor(0.1560, grad_fn=<NllLossBackward>)
tensor(0.2107, grad_fn=<NllLossBackward>)
tensor(0.2053, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.4196, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.1861, grad_fn=<NllLossBackward>)
tensor(0.2352, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.1607, grad_fn=<NllLossBackward>)
tensor(0.1625, grad_fn=<NllLossBackward>)
tensor(0.0486, grad_fn=<NllLossBackward>)
tensor(0.0747, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.2739, grad_fn=<NllLossBackward>)
tensor(0.1799, grad_fn=<NllLossBac

tensor(0.1682, grad_fn=<NllLossBackward>)
tensor(0.2205, grad_fn=<NllLossBackward>)
tensor(0.2341, grad_fn=<NllLossBackward>)
tensor(0.3433, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.3634, grad_fn=<NllLossBackward>)
tensor(0.1065, grad_fn=<NllLossBackward>)
tensor(0.1367, grad_fn=<NllLossBackward>)
tensor(0.2512, grad_fn=<NllLossBackward>)
tensor(0.0598, grad_fn=<NllLossBackward>)
tensor(0.2338, grad_fn=<NllLossBackward>)
tensor(0.1555, grad_fn=<NllLossBackward>)
tensor(0.1901, grad_fn=<NllLossBackward>)
tensor(0.0681, grad_fn=<NllLossBackward>)
tensor(0.0892, grad_fn=<NllLossBackward>)
tensor(0.0632, grad_fn=<NllLossBackward>)
tensor(0.2430, grad_fn=<NllLossBackward>)
tensor(0.3859, grad_fn=<NllLossBackward>)
tensor(0.2520, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.2835, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.1451, grad_fn=<NllLossBac

tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.1642, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.5915, grad_fn=<NllLossBackward>)
tensor(0.4697, grad_fn=<NllLossBackward>)
tensor(0.1936, grad_fn=<NllLossBackward>)
tensor(0.3856, grad_fn=<NllLossBackward>)
tensor(0.1739, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.1247, grad_fn=<NllLossBackward>)
tensor(0.1187, grad_fn=<NllLossBackward>)
tensor(0.1298, grad_fn=<NllLossBackward>)
tensor(0.0738, grad_fn=<NllLossBackward>)
tensor(0.1133, grad_fn=<NllLossBackward>)
tensor(0.1637, grad_fn=<NllLossBackward>)
tensor(0.1331, grad_fn=<NllLossBackward>)
tensor(0.1646, grad_fn=<NllLossBackward>)
tensor(0.1411, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.1882, grad_fn=<NllLossBackward>)
tensor(0.4972, grad_fn=<NllLossBackward>)
tensor(0.3272, grad_fn=<NllLossBac

tensor(0.0857, grad_fn=<NllLossBackward>)
tensor(0.2103, grad_fn=<NllLossBackward>)
tensor(0.1456, grad_fn=<NllLossBackward>)
tensor(0.2500, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.2653, grad_fn=<NllLossBackward>)
tensor(0.1334, grad_fn=<NllLossBackward>)
tensor(0.2169, grad_fn=<NllLossBackward>)
tensor(0.2006, grad_fn=<NllLossBackward>)
tensor(0.2299, grad_fn=<NllLossBackward>)
tensor(0.3685, grad_fn=<NllLossBackward>)
tensor(0.3728, grad_fn=<NllLossBackward>)
tensor(0.1876, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.2547, grad_fn=<NllLossBackward>)
tensor(0.0722, grad_fn=<NllLossBackward>)
tensor(0.1886, grad_fn=<NllLossBackward>)
tensor(0.1580, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.1401, grad_fn=<NllLossBackward>)
tensor(0.2169, grad_fn=<NllLossBackward>)
tensor(0.0879, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.2360, grad_fn=<NllLossBac

tensor(0.1459, grad_fn=<NllLossBackward>)
tensor(0.0774, grad_fn=<NllLossBackward>)
tensor(0.1468, grad_fn=<NllLossBackward>)
tensor(0.0665, grad_fn=<NllLossBackward>)
tensor(0.0995, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.1202, grad_fn=<NllLossBackward>)
tensor(0.0963, grad_fn=<NllLossBackward>)
tensor(0.3232, grad_fn=<NllLossBackward>)
tensor(0.0854, grad_fn=<NllLossBackward>)
tensor(0.1928, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.1851, grad_fn=<NllLossBackward>)
tensor(0.1619, grad_fn=<NllLossBackward>)
tensor(0.1038, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.2355, grad_fn=<NllLossBackward>)
tensor(0.1862, grad_fn=<NllLossBackward>)
tensor(0.1953, grad_fn=<NllLossBackward>)
tensor(0.2894, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.0691, grad_fn=<NllLossBackward>)
tensor(0.1994, grad_fn=<NllLossBac

tensor(0.3153, grad_fn=<NllLossBackward>)
tensor(0.1424, grad_fn=<NllLossBackward>)
tensor(0.1837, grad_fn=<NllLossBackward>)
tensor(0.1979, grad_fn=<NllLossBackward>)
tensor(0.2742, grad_fn=<NllLossBackward>)
tensor(0.0816, grad_fn=<NllLossBackward>)
tensor(0.1734, grad_fn=<NllLossBackward>)
tensor(0.2382, grad_fn=<NllLossBackward>)
tensor(0.1640, grad_fn=<NllLossBackward>)
tensor(0.1773, grad_fn=<NllLossBackward>)
tensor(0.0594, grad_fn=<NllLossBackward>)
tensor(0.1943, grad_fn=<NllLossBackward>)
tensor(0.1470, grad_fn=<NllLossBackward>)
tensor(0.1416, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.1864, grad_fn=<NllLossBackward>)
tensor(0.2302, grad_fn=<NllLossBackward>)
tensor(0.1921, grad_fn=<NllLossBackward>)
tensor(0.4236, grad_fn=<NllLossBackward>)
tensor(0.0601, grad_fn=<NllLossBackward>)
tensor(0.2521, grad_fn=<NllLossBackward>)
tensor(0.0653, grad_fn=<NllLossBackward>)
tensor(0.1824, grad_fn=<NllLossBackward>)
tensor(0.2918, grad_fn=<NllLossBac

tensor(0.4716, grad_fn=<NllLossBackward>)
tensor(0.3511, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.3744, grad_fn=<NllLossBackward>)
tensor(0.1983, grad_fn=<NllLossBackward>)
tensor(0.1343, grad_fn=<NllLossBackward>)
tensor(0.1401, grad_fn=<NllLossBackward>)
tensor(0.2520, grad_fn=<NllLossBackward>)
tensor(0.2434, grad_fn=<NllLossBackward>)
tensor(0.0961, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBackward>)
tensor(0.1640, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.2275, grad_fn=<NllLossBackward>)
tensor(0.0547, grad_fn=<NllLossBackward>)
tensor(0.2038, grad_fn=<NllLossBackward>)
tensor(0.3024, grad_fn=<NllLossBackward>)
tensor(0.2472, grad_fn=<NllLossBackward>)
tensor(0.0747, grad_fn=<NllLossBackward>)
tensor(0.1813, grad_fn=<NllLossBackward>)
tensor(0.2465, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.1812, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBac

tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.2825, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.4258, grad_fn=<NllLossBackward>)
tensor(0.2205, grad_fn=<NllLossBackward>)
tensor(0.2858, grad_fn=<NllLossBackward>)
tensor(0.3355, grad_fn=<NllLossBackward>)
tensor(0.1484, grad_fn=<NllLossBackward>)
tensor(0.2408, grad_fn=<NllLossBackward>)
tensor(0.0144, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.1628, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.1401, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.2770, grad_fn=<NllLossBackward>)
tensor(0.1602, grad_fn=<NllLossBackward>)
tensor(0.0765, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBac

tensor(0.0972, grad_fn=<NllLossBackward>)
tensor(0.1288, grad_fn=<NllLossBackward>)
tensor(0.1032, grad_fn=<NllLossBackward>)
tensor(0.0373, grad_fn=<NllLossBackward>)
tensor(0.1554, grad_fn=<NllLossBackward>)
tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.1879, grad_fn=<NllLossBackward>)
tensor(0.1020, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.2439, grad_fn=<NllLossBackward>)
tensor(0.1088, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.2085, grad_fn=<NllLossBackward>)
tensor(0.1635, grad_fn=<NllLossBackward>)
tensor(0.2205, grad_fn=<NllLossBackward>)
tensor(0.2992, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.1560, grad_fn=<NllLossBackward>)
tensor(0.2133, grad_fn=<NllLossBackward>)
tensor(0.1885, grad_fn=<NllLossBackward>)
tensor(0.2240, grad_fn=<NllLossBackward>)
tensor(0.1892, grad_fn=<NllLossBackward>)
tensor(0.1456, grad_fn=<NllLossBac

tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.0376, grad_fn=<NllLossBackward>)
tensor(0.2816, grad_fn=<NllLossBackward>)
tensor(0.1551, grad_fn=<NllLossBackward>)
tensor(0.2363, grad_fn=<NllLossBackward>)
tensor(0.1291, grad_fn=<NllLossBackward>)
tensor(0.1402, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.1132, grad_fn=<NllLossBackward>)
tensor(0.1526, grad_fn=<NllLossBackward>)
tensor(0.0249, grad_fn=<NllLossBackward>)
tensor(0.3048, grad_fn=<NllLossBackward>)
tensor(0.1498, grad_fn=<NllLossBackward>)
tensor(0.1270, grad_fn=<NllLossBackward>)
tensor(0.2298, grad_fn=<NllLossBackward>)
tensor(0.3295, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.2379, grad_fn=<NllLossBackward>)
tensor(0.0111, grad_fn=<NllLossBackward>)
tensor(0.4750, grad_fn=<NllLossBackward>)
tensor(0.2399, grad_fn=<NllLossBackward>)
tensor(0.4075, grad_fn=<NllLossBac

tensor(0.1701, grad_fn=<NllLossBackward>)
tensor(0.2351, grad_fn=<NllLossBackward>)
tensor(0.0763, grad_fn=<NllLossBackward>)
tensor(0.0978, grad_fn=<NllLossBackward>)
tensor(0.3367, grad_fn=<NllLossBackward>)
tensor(0.2556, grad_fn=<NllLossBackward>)
tensor(0.3058, grad_fn=<NllLossBackward>)
tensor(0.0493, grad_fn=<NllLossBackward>)
tensor(0.1328, grad_fn=<NllLossBackward>)
tensor(0.2593, grad_fn=<NllLossBackward>)
tensor(0.0947, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.1331, grad_fn=<NllLossBackward>)
tensor(0.0408, grad_fn=<NllLossBackward>)
tensor(0.3508, grad_fn=<NllLossBackward>)
tensor(0.1750, grad_fn=<NllLossBackward>)
tensor(0.3910, grad_fn=<NllLossBackward>)
tensor(0.0397, grad_fn=<NllLossBackward>)
tensor(0.1155, grad_fn=<NllLossBackward>)
tensor(0.2933, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.0385, grad_fn=<NllLossBac

tensor(0.3777, grad_fn=<NllLossBackward>)
tensor(0.0896, grad_fn=<NllLossBackward>)
tensor(0.1443, grad_fn=<NllLossBackward>)
tensor(0.2568, grad_fn=<NllLossBackward>)
tensor(0.0526, grad_fn=<NllLossBackward>)
tensor(0.1175, grad_fn=<NllLossBackward>)
tensor(0.1704, grad_fn=<NllLossBackward>)
tensor(0.3121, grad_fn=<NllLossBackward>)
tensor(0.1468, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1124, grad_fn=<NllLossBackward>)
tensor(0.1350, grad_fn=<NllLossBackward>)
tensor(0.1064, grad_fn=<NllLossBackward>)
tensor(0.2117, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.2329, grad_fn=<NllLossBackward>)
tensor(0.3377, grad_fn=<NllLossBackward>)
tensor(0.0765, grad_fn=<NllLossBackward>)
tensor(0.1737, grad_fn=<NllLossBackward>)
tensor(0.0804, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.1873, grad_fn=<NllLossBackward>)
tensor(0.0741, grad_fn=<NllLossBackward>)
tensor(0.2762, grad_fn=<NllLossBac

tensor(0.0447, grad_fn=<NllLossBackward>)
tensor(0.2258, grad_fn=<NllLossBackward>)
tensor(0.3764, grad_fn=<NllLossBackward>)
tensor(0.2185, grad_fn=<NllLossBackward>)
tensor(0.0726, grad_fn=<NllLossBackward>)
tensor(0.1540, grad_fn=<NllLossBackward>)
tensor(0.2165, grad_fn=<NllLossBackward>)
tensor(0.0477, grad_fn=<NllLossBackward>)
tensor(0.2135, grad_fn=<NllLossBackward>)
tensor(0.1200, grad_fn=<NllLossBackward>)
tensor(0.2214, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBackward>)
tensor(0.2054, grad_fn=<NllLossBackward>)
tensor(0.1219, grad_fn=<NllLossBackward>)
tensor(0.0682, grad_fn=<NllLossBackward>)
tensor(0.1232, grad_fn=<NllLossBackward>)
tensor(0.2279, grad_fn=<NllLossBackward>)
tensor(0.0892, grad_fn=<NllLossBackward>)
tensor(0.1667, grad_fn=<NllLossBackward>)
tensor(0.0178, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.3119, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBac

tensor(0.0524, grad_fn=<NllLossBackward>)
tensor(0.0837, grad_fn=<NllLossBackward>)
tensor(0.2146, grad_fn=<NllLossBackward>)
tensor(0.1007, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.0153, grad_fn=<NllLossBackward>)
tensor(0.0686, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.0450, grad_fn=<NllLossBackward>)
tensor(0.2448, grad_fn=<NllLossBackward>)
tensor(0.3274, grad_fn=<NllLossBackward>)
tensor(0.2397, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0560, grad_fn=<NllLossBackward>)
tensor(0.1380, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.1275, grad_fn=<NllLossBackward>)
tensor(0.0256, grad_fn=<NllLossBackward>)
tensor(0.2081, grad_fn=<NllLossBackward>)
tensor(0.1230, grad_fn=<NllLossBackward>)
tensor(0.0991, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBackward>)
tensor(0.2377, grad_fn=<NllLossBackward>)
tensor(0.1405, grad_fn=<NllLossBac

tensor(0.3369, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.3097, grad_fn=<NllLossBackward>)
tensor(0.1838, grad_fn=<NllLossBackward>)
tensor(0.0972, grad_fn=<NllLossBackward>)
tensor(0.3043, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.1622, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.2415, grad_fn=<NllLossBackward>)
tensor(0.2463, grad_fn=<NllLossBackward>)
tensor(0.1805, grad_fn=<NllLossBackward>)
tensor(0.1036, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.0571, grad_fn=<NllLossBackward>)
tensor(0.2387, grad_fn=<NllLossBackward>)
tensor(0.1537, grad_fn=<NllLossBackward>)
tensor(0.1062, grad_fn=<NllLossBackward>)
tensor(0.1106, grad_fn=<NllLossBackward>)
tensor(0.1181, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.1362, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.1779, grad_fn=<NllLossBac

tensor(0.3117, grad_fn=<NllLossBackward>)
tensor(0.2693, grad_fn=<NllLossBackward>)
tensor(0.2881, grad_fn=<NllLossBackward>)
tensor(0.2814, grad_fn=<NllLossBackward>)
tensor(0.1583, grad_fn=<NllLossBackward>)
tensor(0.1107, grad_fn=<NllLossBackward>)
tensor(0.0536, grad_fn=<NllLossBackward>)
tensor(0.1152, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.2758, grad_fn=<NllLossBackward>)
tensor(0.1120, grad_fn=<NllLossBackward>)
tensor(0.1359, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.1747, grad_fn=<NllLossBackward>)
tensor(0.1389, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.1236, grad_fn=<NllLossBackward>)
tensor(0.2832, grad_fn=<NllLossBackward>)
tensor(0.3928, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.0972, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.2445, grad_fn=<NllLossBac

tensor(0.0390, grad_fn=<NllLossBackward>)
tensor(0.1206, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.2621, grad_fn=<NllLossBackward>)
tensor(0.1204, grad_fn=<NllLossBackward>)
tensor(0.2030, grad_fn=<NllLossBackward>)
tensor(0.2295, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.3772, grad_fn=<NllLossBackward>)
tensor(0.1814, grad_fn=<NllLossBackward>)
tensor(0.1045, grad_fn=<NllLossBackward>)
tensor(0.1521, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.3163, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.1697, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBackward>)
tensor(0.2990, grad_fn=<NllLossBackward>)
tensor(0.1669, grad_fn=<NllLossBackward>)
tensor(0.0983, grad_fn=<NllLossBackward>)
tensor(0.0338, grad_fn=<NllLossBackward>)
tensor(0.1959, grad_fn=<NllLossBackward>)
tensor(0.0419, grad_fn=<NllLossBac

tensor(0.1342, grad_fn=<NllLossBackward>)
tensor(0.2558, grad_fn=<NllLossBackward>)
tensor(0.0994, grad_fn=<NllLossBackward>)
tensor(0.0519, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.1920, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.2069, grad_fn=<NllLossBackward>)
tensor(0.4352, grad_fn=<NllLossBackward>)
tensor(0.5274, grad_fn=<NllLossBackward>)
tensor(0.2163, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.2127, grad_fn=<NllLossBackward>)
tensor(0.1246, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.2369, grad_fn=<NllLossBackward>)
tensor(0.0667, grad_fn=<NllLossBackward>)
tensor(0.1875, grad_fn=<NllLossBackward>)
tensor(0.1270, grad_fn=<NllLossBackward>)
tensor(0.3773, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.0984, grad_fn=<NllLossBac

tensor(0.2121, grad_fn=<NllLossBackward>)
tensor(0.0417, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.1446, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.0788, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.2441, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.2642, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBackward>)
tensor(0.0600, grad_fn=<NllLossBackward>)
tensor(0.3478, grad_fn=<NllLossBackward>)
tensor(0.0621, grad_fn=<NllLossBackward>)
tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.3721, grad_fn=<NllLossBackward>)
tensor(0.0380, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.1709, grad_fn=<NllLossBackward>)
tensor(0.2142, grad_fn=<NllLossBac

tensor(0.0618, grad_fn=<NllLossBackward>)
tensor(0.3165, grad_fn=<NllLossBackward>)
tensor(0.3183, grad_fn=<NllLossBackward>)
tensor(0.1165, grad_fn=<NllLossBackward>)
tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.1104, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.3582, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.3475, grad_fn=<NllLossBackward>)
tensor(0.1831, grad_fn=<NllLossBackward>)
tensor(0.0936, grad_fn=<NllLossBackward>)
tensor(0.2957, grad_fn=<NllLossBackward>)
tensor(0.2150, grad_fn=<NllLossBackward>)
tensor(0.2000, grad_fn=<NllLossBackward>)
tensor(0.0688, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBackward>)
tensor(0.1530, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.0538, grad_fn=<NllLossBackward>)
tensor(0.1894, grad_fn=<NllLossBackward>)
tensor(0.1237, grad_fn=<NllLossBac

tensor(0.2790, grad_fn=<NllLossBackward>)
tensor(0.2937, grad_fn=<NllLossBackward>)
tensor(0.0321, grad_fn=<NllLossBackward>)
tensor(0.2222, grad_fn=<NllLossBackward>)
tensor(0.2667, grad_fn=<NllLossBackward>)
tensor(0.1271, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBackward>)
tensor(0.3153, grad_fn=<NllLossBackward>)
tensor(0.3107, grad_fn=<NllLossBackward>)
tensor(0.1692, grad_fn=<NllLossBackward>)
tensor(0.1457, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.4847, grad_fn=<NllLossBackward>)
tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.1158, grad_fn=<NllLossBackward>)
tensor(0.2372, grad_fn=<NllLossBackward>)
tensor(0.0970, grad_fn=<NllLossBackward>)
tensor(0.3283, grad_fn=<NllLossBackward>)
tensor(0.2385, grad_fn=<NllLossBackward>)
tensor(0.2052, grad_fn=<NllLossBackward>)
tensor(0.0772, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBac

tensor(0.0947, grad_fn=<NllLossBackward>)
tensor(0.2563, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.1059, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.1898, grad_fn=<NllLossBackward>)
tensor(0.1153, grad_fn=<NllLossBackward>)
tensor(0.1146, grad_fn=<NllLossBackward>)
tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.1442, grad_fn=<NllLossBackward>)
tensor(0.0417, grad_fn=<NllLossBackward>)
tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBackward>)
tensor(0.1781, grad_fn=<NllLossBackward>)
tensor(0.1090, grad_fn=<NllLossBackward>)
tensor(0.1150, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.1614, grad_fn=<NllLossBackward>)
tensor(0.1998, grad_fn=<NllLossBackward>)
tensor(0.0097, grad_fn=<NllLossBac

tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.1979, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.2168, grad_fn=<NllLossBackward>)
tensor(0.1702, grad_fn=<NllLossBackward>)
tensor(0.1717, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.1685, grad_fn=<NllLossBackward>)
tensor(0.0274, grad_fn=<NllLossBackward>)
tensor(0.1506, grad_fn=<NllLossBackward>)
tensor(0.1352, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.2293, grad_fn=<NllLossBackward>)
tensor(0.0883, grad_fn=<NllLossBackward>)
tensor(0.1935, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.1573, grad_fn=<NllLossBackward>)
tensor(0.2332, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.1781, grad_fn=<NllLossBackward>)
tensor(0.1525, grad_fn=<NllLossBackward>)
tensor(0.0520, grad_fn=<NllLossBackward>)
tensor(0.0319, grad_fn=<NllLossBac

tensor(0.1959, grad_fn=<NllLossBackward>)
tensor(0.2128, grad_fn=<NllLossBackward>)
tensor(0.0942, grad_fn=<NllLossBackward>)
tensor(0.0858, grad_fn=<NllLossBackward>)
tensor(0.0928, grad_fn=<NllLossBackward>)
tensor(0.2529, grad_fn=<NllLossBackward>)
tensor(0.1821, grad_fn=<NllLossBackward>)
tensor(0.2433, grad_fn=<NllLossBackward>)
tensor(0.2928, grad_fn=<NllLossBackward>)
tensor(0.2849, grad_fn=<NllLossBackward>)
tensor(0.1336, grad_fn=<NllLossBackward>)
tensor(0.2159, grad_fn=<NllLossBackward>)
tensor(0.1968, grad_fn=<NllLossBackward>)
tensor(0.1946, grad_fn=<NllLossBackward>)
tensor(0.1072, grad_fn=<NllLossBackward>)
tensor(0.0830, grad_fn=<NllLossBackward>)
tensor(0.1327, grad_fn=<NllLossBackward>)
tensor(0.0531, grad_fn=<NllLossBackward>)
tensor(0.2000, grad_fn=<NllLossBackward>)
tensor(0.1920, grad_fn=<NllLossBackward>)
tensor(0.1449, grad_fn=<NllLossBackward>)
tensor(0.2559, grad_fn=<NllLossBackward>)
tensor(0.2399, grad_fn=<NllLossBackward>)
tensor(0.1293, grad_fn=<NllLossBac

tensor(0.1150, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.1004, grad_fn=<NllLossBackward>)
tensor(0.0200, grad_fn=<NllLossBackward>)
tensor(0.1876, grad_fn=<NllLossBackward>)
tensor(0.3360, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.2373, grad_fn=<NllLossBackward>)
tensor(0.1049, grad_fn=<NllLossBackward>)
tensor(0.0709, grad_fn=<NllLossBackward>)
tensor(0.2934, grad_fn=<NllLossBackward>)
tensor(0.3960, grad_fn=<NllLossBackward>)
tensor(0.0993, grad_fn=<NllLossBackward>)
tensor(0.1315, grad_fn=<NllLossBackward>)
tensor(0.0939, grad_fn=<NllLossBackward>)
tensor(0.3710, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.1295, grad_fn=<NllLossBackward>)
tensor(0.4288, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.0059, grad_fn=<NllLossBac

tensor(0.2350, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.4450, grad_fn=<NllLossBackward>)
tensor(0.2282, grad_fn=<NllLossBackward>)
tensor(0.1531, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.2826, grad_fn=<NllLossBackward>)
tensor(0.3118, grad_fn=<NllLossBackward>)
tensor(0.0668, grad_fn=<NllLossBackward>)
tensor(0.3092, grad_fn=<NllLossBackward>)
tensor(0.1304, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.2039, grad_fn=<NllLossBackward>)
tensor(0.1629, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBackward>)
tensor(0.1671, grad_fn=<NllLossBackward>)
tensor(0.0206, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBackward>)
tensor(0.1102, grad_fn=<NllLossBackward>)
tensor(0.1553, grad_fn=<NllLossBackward>)
tensor(0.2000, grad_fn=<NllLossBac

tensor(0.1499, grad_fn=<NllLossBackward>)
tensor(0.0638, grad_fn=<NllLossBackward>)
tensor(0.2040, grad_fn=<NllLossBackward>)
tensor(0.1236, grad_fn=<NllLossBackward>)
tensor(0.0818, grad_fn=<NllLossBackward>)
tensor(0.2264, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.4270, grad_fn=<NllLossBackward>)
tensor(0.1209, grad_fn=<NllLossBackward>)
tensor(0.0729, grad_fn=<NllLossBackward>)
tensor(0.0726, grad_fn=<NllLossBackward>)
tensor(0.1660, grad_fn=<NllLossBackward>)
tensor(0.1010, grad_fn=<NllLossBackward>)
tensor(0.0995, grad_fn=<NllLossBackward>)
tensor(0.2966, grad_fn=<NllLossBackward>)
tensor(0.0792, grad_fn=<NllLossBackward>)
tensor(0.1366, grad_fn=<NllLossBackward>)
tensor(0.2602, grad_fn=<NllLossBackward>)
tensor(0.2058, grad_fn=<NllLossBackward>)
tensor(0.1714, grad_fn=<NllLossBackward>)
tensor(0.1766, grad_fn=<NllLossBackward>)
tensor(0.1664, grad_fn=<NllLossBackward>)
tensor(0.0952, grad_fn=<NllLossBackward>)
tensor(0.2345, grad_fn=<NllLossBac

tensor(0.2356, grad_fn=<NllLossBackward>)
tensor(0.1640, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0818, grad_fn=<NllLossBackward>)
tensor(0.1924, grad_fn=<NllLossBackward>)
tensor(0.0687, grad_fn=<NllLossBackward>)
tensor(0.3918, grad_fn=<NllLossBackward>)
tensor(0.2630, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBackward>)
tensor(0.2941, grad_fn=<NllLossBackward>)
tensor(0.1042, grad_fn=<NllLossBackward>)
tensor(0.2533, grad_fn=<NllLossBackward>)
tensor(0.1296, grad_fn=<NllLossBackward>)
tensor(0.3033, grad_fn=<NllLossBackward>)
tensor(0.2023, grad_fn=<NllLossBackward>)
tensor(0.1446, grad_fn=<NllLossBackward>)
tensor(0.1559, grad_fn=<NllLossBackward>)
tensor(0.0683, grad_fn=<NllLossBackward>)
tensor(0.4118, grad_fn=<NllLossBackward>)
tensor(0.1520, grad_fn=<NllLossBackward>)
tensor(0.0664, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.1164, grad_fn=<NllLossBac

tensor(0.1069, grad_fn=<NllLossBackward>)
tensor(0.0343, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.2431, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBackward>)
tensor(0.1165, grad_fn=<NllLossBackward>)
tensor(0.1027, grad_fn=<NllLossBackward>)
tensor(0.0955, grad_fn=<NllLossBackward>)
tensor(0.2297, grad_fn=<NllLossBackward>)
tensor(0.1725, grad_fn=<NllLossBackward>)
tensor(0.0822, grad_fn=<NllLossBackward>)
tensor(0.2097, grad_fn=<NllLossBackward>)
tensor(0.0765, grad_fn=<NllLossBackward>)
tensor(0.0664, grad_fn=<NllLossBackward>)
tensor(0.0900, grad_fn=<NllLossBackward>)
tensor(0.1067, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.3382, grad_fn=<NllLossBackward>)
tensor(0.1561, grad_fn=<NllLossBackward>)
tensor(0.1924, grad_fn=<NllLossBackward>)
tensor(0.1079, grad_fn=<NllLossBackward>)
tensor(0.2153, grad_fn=<NllLossBackward>)
tensor(0.2819, grad_fn=<NllLossBac

tensor(0.1508, grad_fn=<NllLossBackward>)
tensor(0.1409, grad_fn=<NllLossBackward>)
tensor(0.0354, grad_fn=<NllLossBackward>)
tensor(0.1521, grad_fn=<NllLossBackward>)
tensor(0.1759, grad_fn=<NllLossBackward>)
tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.1039, grad_fn=<NllLossBackward>)
tensor(0.1858, grad_fn=<NllLossBackward>)
tensor(0.1914, grad_fn=<NllLossBackward>)
tensor(0.0969, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.0797, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.2899, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.1481, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.2157, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.1118, grad_fn=<NllLossBackward>)
tensor(0.4236, grad_fn=<NllLossBackward>)
tensor(0.1096, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBac

tensor(0.1873, grad_fn=<NllLossBackward>)
tensor(0.2472, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.1503, grad_fn=<NllLossBackward>)
tensor(0.1516, grad_fn=<NllLossBackward>)
tensor(0.1167, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.2192, grad_fn=<NllLossBackward>)
tensor(0.0767, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.2133, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.1844, grad_fn=<NllLossBackward>)
tensor(0.1747, grad_fn=<NllLossBackward>)
tensor(0.1018, grad_fn=<NllLossBackward>)
tensor(0.2079, grad_fn=<NllLossBackward>)
tensor(0.0587, grad_fn=<NllLossBackward>)
tensor(0.0747, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.4121, grad_fn=<NllLossBackward>)
tensor(0.1172, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBac

tensor(0.2586, grad_fn=<NllLossBackward>)
tensor(0.0663, grad_fn=<NllLossBackward>)
tensor(0.2007, grad_fn=<NllLossBackward>)
tensor(0.1288, grad_fn=<NllLossBackward>)
tensor(0.3092, grad_fn=<NllLossBackward>)
tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.0588, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.1197, grad_fn=<NllLossBackward>)
tensor(0.1265, grad_fn=<NllLossBackward>)
tensor(0.3216, grad_fn=<NllLossBackward>)
tensor(0.4386, grad_fn=<NllLossBackward>)
tensor(0.2171, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.1293, grad_fn=<NllLossBackward>)
tensor(0.0814, grad_fn=<NllLossBackward>)
tensor(0.1749, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.0538, grad_fn=<NllLossBackward>)
tensor(0.0673, grad_fn=<NllLossBackward>)
tensor(0.1796, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBac

tensor(0.2541, grad_fn=<NllLossBackward>)
tensor(0.1828, grad_fn=<NllLossBackward>)
tensor(0.1484, grad_fn=<NllLossBackward>)
tensor(0.2588, grad_fn=<NllLossBackward>)
tensor(0.1548, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.2901, grad_fn=<NllLossBackward>)
tensor(0.1241, grad_fn=<NllLossBackward>)
tensor(0.1746, grad_fn=<NllLossBackward>)
tensor(0.1333, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.2331, grad_fn=<NllLossBackward>)
tensor(0.1371, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0107, grad_fn=<NllLossBackward>)
tensor(0.0253, grad_fn=<NllLossBackward>)
tensor(0.2888, grad_fn=<NllLossBackward>)
tensor(0.0752, grad_fn=<NllLossBackward>)
tensor(0.1727, grad_fn=<NllLossBackward>)
tensor(0.1021, grad_fn=<NllLossBackward>)
tensor(0.3696, grad_fn=<NllLossBackward>)
tensor(0.1684, grad_fn=<NllLossBackward>)
tensor(0.2510, grad_fn=<NllLossBackward>)
tensor(0.3087, grad_fn=<NllLossBac

tensor(0.1226, grad_fn=<NllLossBackward>)
tensor(0.0041, grad_fn=<NllLossBackward>)
tensor(0.3965, grad_fn=<NllLossBackward>)
tensor(0.0126, grad_fn=<NllLossBackward>)
tensor(0.0233, grad_fn=<NllLossBackward>)
tensor(0.2385, grad_fn=<NllLossBackward>)
tensor(0.2555, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.2094, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.1991, grad_fn=<NllLossBackward>)
tensor(0.1118, grad_fn=<NllLossBackward>)
tensor(0.2575, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.2080, grad_fn=<NllLossBackward>)
tensor(0.1713, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.1384, grad_fn=<NllLossBackward>)
tensor(0.2686, grad_fn=<NllLossBackward>)
tensor(0.1464, grad_fn=<NllLossBackward>)
tensor(0.2915, grad_fn=<NllLossBackward>)
tensor(0.0587, grad_fn=<NllLossBac

tensor(0.0956, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.0511, grad_fn=<NllLossBackward>)
tensor(0.2244, grad_fn=<NllLossBackward>)
tensor(0.2718, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.0508, grad_fn=<NllLossBackward>)
tensor(0.2372, grad_fn=<NllLossBackward>)
tensor(0.1961, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.1698, grad_fn=<NllLossBackward>)
tensor(0.0993, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.1351, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.1318, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.3579, grad_fn=<NllLossBackward>)
tensor(0.0997, grad_fn=<NllLossBackward>)
tensor(0.1634, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.2748, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBac

tensor(0.0828, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.1352, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBackward>)
tensor(0.2084, grad_fn=<NllLossBackward>)
tensor(0.1876, grad_fn=<NllLossBackward>)
tensor(0.3486, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBackward>)
tensor(0.0545, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.1725, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBackward>)
tensor(0.2106, grad_fn=<NllLossBackward>)
tensor(0.1772, grad_fn=<NllLossBackward>)
tensor(0.1926, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.1348, grad_fn=<NllLossBackward>)
tensor(0.0784, grad_fn=<NllLossBackward>)
tensor(0.2102, grad_fn=<NllLossBackward>)
tensor(0.1669, grad_fn=<NllLossBackward>)
tensor(0.1886, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBac

tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.0499, grad_fn=<NllLossBackward>)
tensor(0.2948, grad_fn=<NllLossBackward>)
tensor(0.0133, grad_fn=<NllLossBackward>)
tensor(0.0386, grad_fn=<NllLossBackward>)
tensor(0.1922, grad_fn=<NllLossBackward>)
tensor(0.1061, grad_fn=<NllLossBackward>)
tensor(0.0376, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.1524, grad_fn=<NllLossBackward>)
tensor(0.0643, grad_fn=<NllLossBackward>)
tensor(0.1811, grad_fn=<NllLossBackward>)
tensor(0.2456, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.0504, grad_fn=<NllLossBackward>)
tensor(0.3919, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.2000, grad_fn=<NllLossBackward>)
tensor(0.2629, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.0408, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBac

tensor(0.2302, grad_fn=<NllLossBackward>)
tensor(0.1840, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.2405, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.3439, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.1646, grad_fn=<NllLossBackward>)
tensor(0.0727, grad_fn=<NllLossBackward>)
tensor(0.2692, grad_fn=<NllLossBackward>)
tensor(0.1041, grad_fn=<NllLossBackward>)
tensor(0.2586, grad_fn=<NllLossBackward>)
tensor(0.1246, grad_fn=<NllLossBackward>)
tensor(0.1316, grad_fn=<NllLossBackward>)
tensor(0.0632, grad_fn=<NllLossBackward>)
tensor(0.1050, grad_fn=<NllLossBackward>)
tensor(0.0198, grad_fn=<NllLossBackward>)
tensor(0.1857, grad_fn=<NllLossBackward>)
tensor(0.1333, grad_fn=<NllLossBackward>)
tensor(0.1905, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.1970, grad_fn=<NllLossBackward>)
tensor(0.2058, grad_fn=<NllLossBac

tensor(0.1350, grad_fn=<NllLossBackward>)
tensor(0.1605, grad_fn=<NllLossBackward>)
tensor(0.2540, grad_fn=<NllLossBackward>)
tensor(0.2655, grad_fn=<NllLossBackward>)
tensor(0.1250, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.2168, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.1484, grad_fn=<NllLossBackward>)
tensor(0.2247, grad_fn=<NllLossBackward>)
tensor(0.2786, grad_fn=<NllLossBackward>)
tensor(0.1448, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.3310, grad_fn=<NllLossBackward>)
tensor(0.1508, grad_fn=<NllLossBackward>)
tensor(0.0932, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.2098, grad_fn=<NllLossBackward>)
tensor(0.1434, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBackward>)
tensor(0.2372, grad_fn=<NllLossBackward>)
tensor(0.0829, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBac

tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.2073, grad_fn=<NllLossBackward>)
tensor(0.4247, grad_fn=<NllLossBackward>)
tensor(0.2028, grad_fn=<NllLossBackward>)
tensor(0.1747, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.1808, grad_fn=<NllLossBackward>)
tensor(0.2257, grad_fn=<NllLossBackward>)
tensor(0.0782, grad_fn=<NllLossBackward>)
tensor(0.0650, grad_fn=<NllLossBackward>)
tensor(0.0823, grad_fn=<NllLossBackward>)
tensor(0.2217, grad_fn=<NllLossBackward>)
tensor(0.0746, grad_fn=<NllLossBackward>)
tensor(0.2653, grad_fn=<NllLossBackward>)
tensor(0.1584, grad_fn=<NllLossBackward>)
tensor(0.0961, grad_fn=<NllLossBackward>)
tensor(0.1390, grad_fn=<NllLossBackward>)
tensor(0.1742, grad_fn=<NllLossBackward>)
tensor(0.1940, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.2948, grad_fn=<NllLossBac

tensor(0.1939, grad_fn=<NllLossBackward>)
tensor(0.0946, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.2827, grad_fn=<NllLossBackward>)
tensor(0.1841, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.1769, grad_fn=<NllLossBackward>)
tensor(0.0400, grad_fn=<NllLossBackward>)
tensor(0.0765, grad_fn=<NllLossBackward>)
tensor(0.2477, grad_fn=<NllLossBackward>)
tensor(0.1981, grad_fn=<NllLossBackward>)
tensor(0.1561, grad_fn=<NllLossBackward>)
tensor(0.2194, grad_fn=<NllLossBackward>)
tensor(0.1761, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBackward>)
tensor(0.2718, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.0789, grad_fn=<NllLossBackward>)
tensor(0.2273, grad_fn=<NllLossBackward>)
tensor(0.1456, grad_fn=<NllLossBac

tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.1778, grad_fn=<NllLossBackward>)
tensor(0.0576, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.0390, grad_fn=<NllLossBackward>)
tensor(0.1745, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBackward>)
tensor(0.1048, grad_fn=<NllLossBackward>)
tensor(0.1860, grad_fn=<NllLossBackward>)
tensor(0.1103, grad_fn=<NllLossBackward>)
tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.3424, grad_fn=<NllLossBackward>)
tensor(0.0986, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.1893, grad_fn=<NllLossBackward>)
tensor(0.1054, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.1249, grad_fn=<NllLossBackward>)
tensor(0.2782, grad_fn=<NllLossBackward>)
tensor(0.0775, grad_fn=<NllLossBackward>)
tensor(0.2492, grad_fn=<NllLossBac

tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.1666, grad_fn=<NllLossBackward>)
tensor(0.0853, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.1970, grad_fn=<NllLossBackward>)
tensor(0.2908, grad_fn=<NllLossBackward>)
tensor(0.2171, grad_fn=<NllLossBackward>)
tensor(0.3066, grad_fn=<NllLossBackward>)
tensor(0.0763, grad_fn=<NllLossBackward>)
tensor(0.1838, grad_fn=<NllLossBackward>)
tensor(0.1635, grad_fn=<NllLossBackward>)
tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.1649, grad_fn=<NllLossBackward>)
tensor(0.0968, grad_fn=<NllLossBackward>)
tensor(0.3461, grad_fn=<NllLossBackward>)
tensor(0.1539, grad_fn=<NllLossBackward>)
tensor(0.1070, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.2280, grad_fn=<NllLossBackward>)
tensor(0.1302, grad_fn=<NllLossBackward>)
tensor(0.2104, grad_fn=<NllLossBac

tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.1151, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.0707, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0248, grad_fn=<NllLossBackward>)
tensor(0.0994, grad_fn=<NllLossBackward>)
tensor(0.1423, grad_fn=<NllLossBackward>)
tensor(0.3141, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.3560, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0182, grad_fn=<NllLossBackward>)
tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.2256, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.1400, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.0241, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBac

tensor(0.2730, grad_fn=<NllLossBackward>)
tensor(0.0784, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBackward>)
tensor(0.1452, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1507, grad_fn=<NllLossBackward>)
tensor(0.0732, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.1017, grad_fn=<NllLossBackward>)
tensor(0.2107, grad_fn=<NllLossBackward>)
tensor(0.1766, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.0858, grad_fn=<NllLossBackward>)
tensor(0.3547, grad_fn=<NllLossBackward>)
tensor(0.1311, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.3781, grad_fn=<NllLossBackward>)
tensor(0.1574, grad_fn=<NllLossBackward>)
tensor(0.1622, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.0625, grad_fn=<NllLossBackward>)
tensor(0.1354, grad_fn=<NllLossBac

tensor(0.1795, grad_fn=<NllLossBackward>)
tensor(0.1304, grad_fn=<NllLossBackward>)
tensor(0.2422, grad_fn=<NllLossBackward>)
tensor(0.2057, grad_fn=<NllLossBackward>)
tensor(0.0818, grad_fn=<NllLossBackward>)
tensor(0.1374, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.1084, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1248, grad_fn=<NllLossBackward>)
tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.2352, grad_fn=<NllLossBackward>)
tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.0963, grad_fn=<NllLossBackward>)
tensor(0.2509, grad_fn=<NllLossBackward>)
tensor(0.1027, grad_fn=<NllLossBackward>)
tensor(0.1102, grad_fn=<NllLossBackward>)
tensor(0.1859, grad_fn=<NllLossBackward>)
tensor(0.0278, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.1672, grad_fn=<NllLossBackward>)
tensor(0.0520, grad_fn=<NllLossBac

tensor(0.0769, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.2134, grad_fn=<NllLossBackward>)
tensor(0.0374, grad_fn=<NllLossBackward>)
tensor(0.1773, grad_fn=<NllLossBackward>)
tensor(0.2390, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.0959, grad_fn=<NllLossBackward>)
tensor(0.0341, grad_fn=<NllLossBackward>)
tensor(0.2262, grad_fn=<NllLossBackward>)
tensor(0.1685, grad_fn=<NllLossBackward>)
tensor(0.2461, grad_fn=<NllLossBackward>)
tensor(0.0711, grad_fn=<NllLossBackward>)
tensor(0.1906, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.1107, grad_fn=<NllLossBackward>)
tensor(0.4717, grad_fn=<NllLossBackward>)
tensor(0.2086, grad_fn=<NllLossBackward>)
tensor(0.1693, grad_fn=<NllLossBackward>)
tensor(0.1265, grad_fn=<NllLossBackward>)
tensor(0.0968, grad_fn=<NllLossBackward>)
tensor(0.2509, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBac

tensor(0.2027, grad_fn=<NllLossBackward>)
tensor(0.1108, grad_fn=<NllLossBackward>)
tensor(0.0931, grad_fn=<NllLossBackward>)
tensor(0.1742, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0420, grad_fn=<NllLossBackward>)
tensor(0.2448, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.1603, grad_fn=<NllLossBackward>)
tensor(0.1496, grad_fn=<NllLossBackward>)
tensor(0.1841, grad_fn=<NllLossBackward>)
tensor(0.1946, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.3071, grad_fn=<NllLossBackward>)
tensor(0.1604, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.2179, grad_fn=<NllLossBackward>)
tensor(0.1375, grad_fn=<NllLossBac

tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.1974, grad_fn=<NllLossBackward>)
tensor(0.1614, grad_fn=<NllLossBackward>)
tensor(0.0096, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBackward>)
tensor(0.1112, grad_fn=<NllLossBackward>)
tensor(0.0823, grad_fn=<NllLossBackward>)
tensor(0.1740, grad_fn=<NllLossBackward>)
tensor(0.0457, grad_fn=<NllLossBackward>)
tensor(0.1706, grad_fn=<NllLossBackward>)
tensor(0.0074, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.3521, grad_fn=<NllLossBackward>)
tensor(0.1207, grad_fn=<NllLossBackward>)
tensor(0.4230, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.3507, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBac

tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0407, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.2214, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.1340, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0154, grad_fn=<NllLossBackward>)
tensor(0.0269, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.3145, grad_fn=<NllLossBackward>)
tensor(0.1507, grad_fn=<NllLossBackward>)
tensor(0.2209, grad_fn=<NllLossBackward>)
tensor(0.2182, grad_fn=<NllLossBackward>)
tensor(0.1877, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.1599, grad_fn=<NllLossBackward>)
tensor(0.0562, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.0731, grad_fn=<NllLossBackward>)
tensor(0.1185, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.1444, grad_fn=<NllLossBackward>)
tensor(0.1476, grad_fn=<NllLossBac

tensor(0.0230, grad_fn=<NllLossBackward>)
tensor(0.1870, grad_fn=<NllLossBackward>)
tensor(0.2635, grad_fn=<NllLossBackward>)
tensor(0.1981, grad_fn=<NllLossBackward>)
tensor(0.2541, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.1442, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.2951, grad_fn=<NllLossBackward>)
tensor(0.1063, grad_fn=<NllLossBackward>)
tensor(0.3999, grad_fn=<NllLossBackward>)
tensor(0.1758, grad_fn=<NllLossBackward>)
tensor(0.0808, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.0877, grad_fn=<NllLossBackward>)
tensor(0.0113, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.1031, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.1628, grad_fn=<NllLossBackward>)
tensor(0.2785, grad_fn=<NllLossBac

tensor(0.3131, grad_fn=<NllLossBackward>)
tensor(0.1206, grad_fn=<NllLossBackward>)
tensor(0.1474, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.0117, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.1488, grad_fn=<NllLossBackward>)
tensor(0.3626, grad_fn=<NllLossBackward>)
tensor(0.2144, grad_fn=<NllLossBackward>)
tensor(0.1710, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.2337, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.0728, grad_fn=<NllLossBackward>)
tensor(0.1084, grad_fn=<NllLossBackward>)
tensor(0.1802, grad_fn=<NllLossBackward>)
tensor(0.1691, grad_fn=<NllLossBackward>)
tensor(0.1114, grad_fn=<NllLossBackward>)
tensor(0.2860, grad_fn=<NllLossBackward>)
tensor(0.0490, grad_fn=<NllLossBackward>)
tensor(0.1845, grad_fn=<NllLossBackward>)
tensor(0.1150, grad_fn=<NllLossBac

tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.1176, grad_fn=<NllLossBackward>)
tensor(0.1380, grad_fn=<NllLossBackward>)
tensor(0.0296, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.2127, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.1789, grad_fn=<NllLossBackward>)
tensor(0.2382, grad_fn=<NllLossBackward>)
tensor(0.1947, grad_fn=<NllLossBackward>)
tensor(0.1695, grad_fn=<NllLossBackward>)
tensor(0.2345, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.0481, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.1889, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.0683, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.2962, grad_fn=<NllLossBackward>)
tensor(0.2459, grad_fn=<NllLossBac

tensor(0.0729, grad_fn=<NllLossBackward>)
tensor(0.0332, grad_fn=<NllLossBackward>)
tensor(0.0827, grad_fn=<NllLossBackward>)
tensor(0.2418, grad_fn=<NllLossBackward>)
tensor(0.2717, grad_fn=<NllLossBackward>)
tensor(0.2675, grad_fn=<NllLossBackward>)
tensor(0.0016, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.2979, grad_fn=<NllLossBackward>)
tensor(0.1749, grad_fn=<NllLossBackward>)
tensor(0.1548, grad_fn=<NllLossBackward>)
tensor(0.2595, grad_fn=<NllLossBackward>)
tensor(0.1826, grad_fn=<NllLossBackward>)
tensor(0.1138, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBackward>)
tensor(0.1244, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.1757, grad_fn=<NllLossBackward>)
tensor(0.1854, grad_fn=<NllLossBackward>)
tensor(0.0484, grad_fn=<NllLossBackward>)
tensor(0.1365, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.1818, grad_fn=<NllLossBac

tensor(0.1647, grad_fn=<NllLossBackward>)
tensor(0.0350, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.0153, grad_fn=<NllLossBackward>)
tensor(0.1406, grad_fn=<NllLossBackward>)
tensor(0.1894, grad_fn=<NllLossBackward>)
tensor(0.1358, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBackward>)
tensor(0.0134, grad_fn=<NllLossBackward>)
tensor(0.3442, grad_fn=<NllLossBackward>)
tensor(0.2564, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.0426, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.1665, grad_fn=<NllLossBackward>)
tensor(0.0581, grad_fn=<NllLossBackward>)
tensor(0.2070, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.1666, grad_fn=<NllLossBackward>)
tensor(0.1038, grad_fn=<NllLossBackward>)
tensor(0.0991, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.1404, grad_fn=<NllLossBackward>)
tensor(0.1188, grad_fn=<NllLossBac

tensor(0.1977, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0880, grad_fn=<NllLossBackward>)
tensor(0.0516, grad_fn=<NllLossBackward>)
tensor(0.2875, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.2223, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.1094, grad_fn=<NllLossBackward>)
tensor(0.2021, grad_fn=<NllLossBackward>)
tensor(0.2884, grad_fn=<NllLossBackward>)
tensor(0.1095, grad_fn=<NllLossBackward>)
tensor(0.1078, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.1274, grad_fn=<NllLossBackward>)
tensor(0.3065, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.1327, grad_fn=<NllLossBackward>)
tensor(0.1631, grad_fn=<NllLossBackward>)
tensor(0.1283, grad_fn=<NllLossBackward>)
tensor(0.1141, grad_fn=<NllLossBac

tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.1955, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.0803, grad_fn=<NllLossBackward>)
tensor(0.1818, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.1056, grad_fn=<NllLossBackward>)
tensor(0.1752, grad_fn=<NllLossBackward>)
tensor(0.1690, grad_fn=<NllLossBackward>)
tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.1338, grad_fn=<NllLossBackward>)
tensor(0.0963, grad_fn=<NllLossBackward>)
tensor(0.2038, grad_fn=<NllLossBackward>)
tensor(0.1519, grad_fn=<NllLossBackward>)
tensor(0.1122, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.1361, grad_fn=<NllLossBackward>)
tensor(0.1046, grad_fn=<NllLossBackward>)
tensor(0.0695, grad_fn=<NllLossBackward>)
tensor(0.1588, grad_fn=<NllLossBackward>)
tensor(0.0811, grad_fn=<NllLossBackward>)
tensor(0.0455, grad_fn=<NllLossBac

tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.1879, grad_fn=<NllLossBackward>)
tensor(0.0917, grad_fn=<NllLossBackward>)
tensor(0.3296, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.3336, grad_fn=<NllLossBackward>)
tensor(0.1223, grad_fn=<NllLossBackward>)
tensor(0.2062, grad_fn=<NllLossBackward>)
tensor(0.2480, grad_fn=<NllLossBackward>)
tensor(0.1462, grad_fn=<NllLossBackward>)
tensor(0.2287, grad_fn=<NllLossBackward>)
tensor(0.1919, grad_fn=<NllLossBackward>)
tensor(0.2233, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.2229, grad_fn=<NllLossBackward>)
tensor(0.3282, grad_fn=<NllLossBackward>)
tensor(0.1200, grad_fn=<NllLossBackward>)
tensor(0.0910, grad_fn=<NllLossBackward>)
tensor(0.0523, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.2785, grad_fn=<NllLossBackward>)
tensor(0.0542, grad_fn=<NllLossBac

tensor(0.0308, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.1298, grad_fn=<NllLossBackward>)
tensor(0.0273, grad_fn=<NllLossBackward>)
tensor(0.0944, grad_fn=<NllLossBackward>)
tensor(0.2565, grad_fn=<NllLossBackward>)
tensor(0.0646, grad_fn=<NllLossBackward>)
tensor(0.0854, grad_fn=<NllLossBackward>)
tensor(0.1144, grad_fn=<NllLossBackward>)
tensor(0.0308, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.1435, grad_fn=<NllLossBackward>)
tensor(0.1810, grad_fn=<NllLossBackward>)
tensor(0.1352, grad_fn=<NllLossBackward>)
tensor(0.1066, grad_fn=<NllLossBackward>)
tensor(0.1213, grad_fn=<NllLossBackward>)
tensor(0.0138, grad_fn=<NllLossBackward>)
tensor(0.2703, grad_fn=<NllLossBackward>)
tensor(0.1402, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.1507, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.3031, grad_fn=<NllLossBackward>)
tensor(0.1898, grad_fn=<NllLossBac

tensor(0.2060, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.2182, grad_fn=<NllLossBackward>)
tensor(0.0457, grad_fn=<NllLossBackward>)
tensor(0.2882, grad_fn=<NllLossBackward>)
tensor(0.1821, grad_fn=<NllLossBackward>)
tensor(0.2357, grad_fn=<NllLossBackward>)
tensor(0.1000, grad_fn=<NllLossBackward>)
tensor(0.1304, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.0496, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.1220, grad_fn=<NllLossBackward>)
tensor(0.1904, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0167, grad_fn=<NllLossBackward>)
tensor(0.3193, grad_fn=<NllLossBackward>)
tensor(0.0368, grad_fn=<NllLossBackward>)
tensor(0.4745, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.0484, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.1574, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBac

tensor(0.2426, grad_fn=<NllLossBackward>)
tensor(0.0565, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.3537, grad_fn=<NllLossBackward>)
tensor(0.1385, grad_fn=<NllLossBackward>)
tensor(0.0949, grad_fn=<NllLossBackward>)
tensor(0.2174, grad_fn=<NllLossBackward>)
tensor(0.0598, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.2662, grad_fn=<NllLossBackward>)
tensor(0.1361, grad_fn=<NllLossBackward>)
tensor(0.2226, grad_fn=<NllLossBackward>)
tensor(0.2916, grad_fn=<NllLossBackward>)
tensor(0.1275, grad_fn=<NllLossBackward>)
tensor(0.1300, grad_fn=<NllLossBackward>)
tensor(0.1225, grad_fn=<NllLossBackward>)
tensor(0.4155, grad_fn=<NllLossBackward>)
tensor(0.1941, grad_fn=<NllLossBackward>)
tensor(0.2144, grad_fn=<NllLossBackward>)
tensor(0.3752, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.1782, grad_fn=<NllLossBac

tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.0552, grad_fn=<NllLossBackward>)
tensor(0.1659, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.2888, grad_fn=<NllLossBackward>)
tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.2679, grad_fn=<NllLossBackward>)
tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.3483, grad_fn=<NllLossBackward>)
tensor(0.1299, grad_fn=<NllLossBackward>)
tensor(0.0900, grad_fn=<NllLossBackward>)
tensor(0.1465, grad_fn=<NllLossBackward>)
tensor(0.1447, grad_fn=<NllLossBackward>)
tensor(0.1209, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBackward>)
tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.1623, grad_fn=<NllLossBackward>)
tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.1173, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.1259, grad_fn=<NllLossBackward>)
tensor(0.2191, grad_fn=<NllLossBac

tensor(0.0641, grad_fn=<NllLossBackward>)
tensor(0.0560, grad_fn=<NllLossBackward>)
tensor(0.0653, grad_fn=<NllLossBackward>)
tensor(0.1688, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0750, grad_fn=<NllLossBackward>)
tensor(0.0284, grad_fn=<NllLossBackward>)
tensor(0.1784, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBackward>)
tensor(0.0901, grad_fn=<NllLossBackward>)
tensor(0.1032, grad_fn=<NllLossBackward>)
tensor(0.1058, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.0782, grad_fn=<NllLossBackward>)
tensor(0.2087, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.3165, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.0981, grad_fn=<NllLossBackward>)
tensor(0.1180, grad_fn=<NllLossBackward>)
tensor(0.1700, grad_fn=<NllLossBackward>)
tensor(0.1874, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBac

tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.1043, grad_fn=<NllLossBackward>)
tensor(0.0519, grad_fn=<NllLossBackward>)
tensor(0.1820, grad_fn=<NllLossBackward>)
tensor(0.2174, grad_fn=<NllLossBackward>)
tensor(0.1774, grad_fn=<NllLossBackward>)
tensor(0.5596, grad_fn=<NllLossBackward>)
tensor(0.2621, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.1636, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.1601, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.0700, grad_fn=<NllLossBackward>)
tensor(0.2676, grad_fn=<NllLossBackward>)
tensor(0.0726, grad_fn=<NllLossBackward>)
tensor(0.1741, grad_fn=<NllLossBackward>)
tensor(0.1119, grad_fn=<NllLossBackward>)
tensor(0.1765, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.0491, grad_fn=<NllLossBackward>)
tensor(0.0929, grad_fn=<NllLossBackward>)
tensor(0.1307, grad_fn=<NllLossBackward>)
tensor(0.2059, grad_fn=<NllLossBac

tensor(0.2030, grad_fn=<NllLossBackward>)
tensor(0.1978, grad_fn=<NllLossBackward>)
tensor(0.0281, grad_fn=<NllLossBackward>)
tensor(0.1477, grad_fn=<NllLossBackward>)
tensor(0.1866, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.1213, grad_fn=<NllLossBackward>)
tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.2193, grad_fn=<NllLossBackward>)
tensor(0.2249, grad_fn=<NllLossBackward>)
tensor(0.1991, grad_fn=<NllLossBackward>)
tensor(0.1283, grad_fn=<NllLossBackward>)
tensor(0.0401, grad_fn=<NllLossBackward>)
tensor(0.1373, grad_fn=<NllLossBackward>)
tensor(0.2418, grad_fn=<NllLossBackward>)
tensor(0.0918, grad_fn=<NllLossBackward>)
tensor(0.0248, grad_fn=<NllLossBackward>)
tensor(0.3027, grad_fn=<NllLossBackward>)
tensor(0.3984, grad_fn=<NllLossBackward>)
tensor(0.0521, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.0759, grad_fn=<NllLossBackward>)
tensor(0.1132, grad_fn=<NllLossBackward>)
tensor(0.0934, grad_fn=<NllLossBac

tensor(0.1242, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0379, grad_fn=<NllLossBackward>)
tensor(0.0302, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.1378, grad_fn=<NllLossBackward>)
tensor(0.1157, grad_fn=<NllLossBackward>)
tensor(0.0878, grad_fn=<NllLossBackward>)
tensor(0.0660, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.2188, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0113, grad_fn=<NllLossBackward>)
tensor(0.2093, grad_fn=<NllLossBackward>)
tensor(0.0923, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.1768, grad_fn=<NllLossBackward>)
tensor(0.3512, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.0369, grad_fn=<NllLossBackward>)
tensor(0.0304, grad_fn=<NllLossBackward>)
tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.1116, grad_fn=<NllLossBac

tensor(0.1831, grad_fn=<NllLossBackward>)
tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.1985, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.2476, grad_fn=<NllLossBackward>)
tensor(0.4113, grad_fn=<NllLossBackward>)
tensor(0.0482, grad_fn=<NllLossBackward>)
tensor(0.1437, grad_fn=<NllLossBackward>)
tensor(0.1683, grad_fn=<NllLossBackward>)
tensor(0.2891, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.2643, grad_fn=<NllLossBackward>)
tensor(0.0526, grad_fn=<NllLossBackward>)
tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.4017, grad_fn=<NllLossBackward>)
tensor(0.0792, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBackward>)
tensor(0.2401, grad_fn=<NllLossBackward>)
tensor(0.0606, grad_fn=<NllLossBac

tensor(0.0341, grad_fn=<NllLossBackward>)
tensor(0.1649, grad_fn=<NllLossBackward>)
tensor(0.2655, grad_fn=<NllLossBackward>)
tensor(0.1392, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.1039, grad_fn=<NllLossBackward>)
tensor(0.0788, grad_fn=<NllLossBackward>)
tensor(0.0824, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.1682, grad_fn=<NllLossBackward>)
tensor(0.1927, grad_fn=<NllLossBackward>)
tensor(0.0843, grad_fn=<NllLossBackward>)
tensor(0.3011, grad_fn=<NllLossBackward>)
tensor(0.1895, grad_fn=<NllLossBackward>)
tensor(0.1963, grad_fn=<NllLossBackward>)
tensor(0.1307, grad_fn=<NllLossBackward>)
tensor(0.0756, grad_fn=<NllLossBackward>)
tensor(0.0978, grad_fn=<NllLossBackward>)
tensor(0.1544, grad_fn=<NllLossBackward>)
tensor(0.0634, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBackward>)
tensor(0.0771, grad_fn=<NllLossBac

tensor(0.0932, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.1074, grad_fn=<NllLossBackward>)
tensor(0.2285, grad_fn=<NllLossBackward>)
tensor(0.0727, grad_fn=<NllLossBackward>)
tensor(0.0175, grad_fn=<NllLossBackward>)
tensor(0.2159, grad_fn=<NllLossBackward>)
tensor(0.1082, grad_fn=<NllLossBackward>)
tensor(0.1436, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.2023, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBackward>)
tensor(0.1498, grad_fn=<NllLossBackward>)
tensor(0.1287, grad_fn=<NllLossBackward>)
tensor(0.2195, grad_fn=<NllLossBackward>)
tensor(0.0770, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.1457, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0991, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.0593, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBac

tensor(0.0994, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.1328, grad_fn=<NllLossBackward>)
tensor(0.1202, grad_fn=<NllLossBackward>)
tensor(0.1332, grad_fn=<NllLossBackward>)
tensor(0.1339, grad_fn=<NllLossBackward>)
tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.2838, grad_fn=<NllLossBackward>)
tensor(0.0840, grad_fn=<NllLossBackward>)
tensor(0.1744, grad_fn=<NllLossBackward>)
tensor(0.1253, grad_fn=<NllLossBackward>)
tensor(0.0243, grad_fn=<NllLossBackward>)
tensor(0.0301, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.1134, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
tensor(0.1756, grad_fn=<NllLossBackward>)
tensor(0.2471, grad_fn=<NllLossBackward>)
tensor(0.2416, grad_fn=<NllLossBackward>)
tensor(0.3065, grad_fn=<NllLossBackward>)
tensor(0.1562, grad_fn=<NllLossBackward>)
tensor(0.1109, grad_fn=<NllLossBackward>)
tensor(0.2829, grad_fn=<NllLossBackward>)
tensor(0.2832, grad_fn=<NllLossBac

tensor(0.2084, grad_fn=<NllLossBackward>)
tensor(0.0842, grad_fn=<NllLossBackward>)
tensor(0.2097, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBackward>)
tensor(0.2418, grad_fn=<NllLossBackward>)
tensor(0.1180, grad_fn=<NllLossBackward>)
tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.2531, grad_fn=<NllLossBackward>)
tensor(0.0911, grad_fn=<NllLossBackward>)
tensor(0.2348, grad_fn=<NllLossBackward>)
tensor(0.2123, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.1476, grad_fn=<NllLossBackward>)
tensor(0.0634, grad_fn=<NllLossBackward>)
tensor(0.1510, grad_fn=<NllLossBackward>)
tensor(0.0562, grad_fn=<NllLossBackward>)
tensor(0.2358, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.1133, grad_fn=<NllLossBackward>)
tensor(0.2354, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.2033, grad_fn=<NllLossBackward>)
tensor(0.1767, grad_fn=<NllLossBac

tensor(0.1330, grad_fn=<NllLossBackward>)
tensor(0.0210, grad_fn=<NllLossBackward>)
tensor(0.1763, grad_fn=<NllLossBackward>)
tensor(0.0534, grad_fn=<NllLossBackward>)
tensor(0.0350, grad_fn=<NllLossBackward>)
tensor(0.1123, grad_fn=<NllLossBackward>)
tensor(0.2551, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.1999, grad_fn=<NllLossBackward>)
tensor(0.1231, grad_fn=<NllLossBackward>)
tensor(0.0998, grad_fn=<NllLossBackward>)
tensor(0.0828, grad_fn=<NllLossBackward>)
tensor(0.2738, grad_fn=<NllLossBackward>)
tensor(0.1328, grad_fn=<NllLossBackward>)
tensor(0.0194, grad_fn=<NllLossBackward>)
tensor(0.1391, grad_fn=<NllLossBackward>)
tensor(0.1104, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.0611, grad_fn=<NllLossBackward>)
tensor(0.0399, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.0775, grad_fn=<NllLossBackward>)
tensor(0.6371, grad_fn=<NllLossBac

tensor(0.1340, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.1778, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.2528, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1160, grad_fn=<NllLossBackward>)
tensor(0.1327, grad_fn=<NllLossBackward>)
tensor(0.0250, grad_fn=<NllLossBackward>)
tensor(0.1920, grad_fn=<NllLossBackward>)
tensor(0.0728, grad_fn=<NllLossBackward>)
tensor(0.0647, grad_fn=<NllLossBackward>)
tensor(0.1052, grad_fn=<NllLossBackward>)
tensor(0.0074, grad_fn=<NllLossBackward>)
tensor(0.2581, grad_fn=<NllLossBackward>)
tensor(0.1053, grad_fn=<NllLossBackward>)
tensor(0.1347, grad_fn=<NllLossBackward>)
tensor(0.1874, grad_fn=<NllLossBackward>)
tensor(0.2359, grad_fn=<NllLossBackward>)
tensor(0.0221, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.0829, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBac

tensor(0.0115, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.2911, grad_fn=<NllLossBackward>)
tensor(0.3232, grad_fn=<NllLossBackward>)
tensor(0.0866, grad_fn=<NllLossBackward>)
tensor(0.0207, grad_fn=<NllLossBackward>)
tensor(0.0292, grad_fn=<NllLossBackward>)
tensor(0.0581, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.0124, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.1135, grad_fn=<NllLossBackward>)
tensor(0.2257, grad_fn=<NllLossBackward>)
tensor(0.2014, grad_fn=<NllLossBackward>)
tensor(0.1525, grad_fn=<NllLossBackward>)
tensor(0.1492, grad_fn=<NllLossBackward>)
tensor(0.1632, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.0457, grad_fn=<NllLossBackward>)
tensor(0.2439, grad_fn=<NllLossBackward>)
tensor(0.2388, grad_fn=<NllLossBackward>)
tensor(0.1184, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBac

tensor(0.1547, grad_fn=<NllLossBackward>)
tensor(0.1858, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.1259, grad_fn=<NllLossBackward>)
tensor(0.1715, grad_fn=<NllLossBackward>)
tensor(0.1056, grad_fn=<NllLossBackward>)
tensor(0.1817, grad_fn=<NllLossBackward>)
tensor(0.2568, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.1550, grad_fn=<NllLossBackward>)
tensor(0.1983, grad_fn=<NllLossBackward>)
tensor(0.1332, grad_fn=<NllLossBackward>)
tensor(0.0941, grad_fn=<NllLossBackward>)
tensor(0.0892, grad_fn=<NllLossBackward>)
tensor(0.0191, grad_fn=<NllLossBackward>)
tensor(0.0797, grad_fn=<NllLossBackward>)
tensor(0.0479, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.1634, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1365, grad_fn=<NllLossBackward>)
tensor(0.0918, grad_fn=<NllLossBackward>)
tensor(0.1279, grad_fn=<NllLossBac

tensor(0.0690, grad_fn=<NllLossBackward>)
tensor(0.6284, grad_fn=<NllLossBackward>)
tensor(0.3370, grad_fn=<NllLossBackward>)
tensor(0.0446, grad_fn=<NllLossBackward>)
tensor(0.3293, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.2592, grad_fn=<NllLossBackward>)
tensor(0.1946, grad_fn=<NllLossBackward>)
tensor(0.2332, grad_fn=<NllLossBackward>)
tensor(0.1013, grad_fn=<NllLossBackward>)
tensor(0.1247, grad_fn=<NllLossBackward>)
tensor(0.0678, grad_fn=<NllLossBackward>)
tensor(0.1560, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBackward>)
tensor(0.1499, grad_fn=<NllLossBackward>)
tensor(0.1080, grad_fn=<NllLossBackward>)
tensor(0.3357, grad_fn=<NllLossBackward>)
tensor(0.2494, grad_fn=<NllLossBackward>)
tensor(0.2704, grad_fn=<NllLossBackward>)
tensor(0.0179, grad_fn=<NllLossBackward>)
tensor(0.1034, grad_fn=<NllLossBackward>)
tensor(0.0462, grad_fn=<NllLossBackward>)
tensor(0.5134, grad_fn=<NllLossBackward>)
tensor(0.1740, grad_fn=<NllLossBac

tensor(0.1590, grad_fn=<NllLossBackward>)
tensor(0.1007, grad_fn=<NllLossBackward>)
tensor(0.2809, grad_fn=<NllLossBackward>)
tensor(0.0625, grad_fn=<NllLossBackward>)
tensor(0.3431, grad_fn=<NllLossBackward>)
tensor(0.2421, grad_fn=<NllLossBackward>)
tensor(0.2479, grad_fn=<NllLossBackward>)
tensor(0.0453, grad_fn=<NllLossBackward>)
tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.1794, grad_fn=<NllLossBackward>)
tensor(0.1263, grad_fn=<NllLossBackward>)
tensor(0.1377, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.1721, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0839, grad_fn=<NllLossBackward>)
tensor(0.1631, grad_fn=<NllLossBackward>)
tensor(0.0814, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.2234, grad_fn=<NllLossBackward>)
tensor(0.1228, grad_fn=<NllLossBackward>)
tensor(0.1523, grad_fn=<NllLossBac

tensor(0.2122, grad_fn=<NllLossBackward>)
tensor(0.0562, grad_fn=<NllLossBackward>)
tensor(0.1586, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.0509, grad_fn=<NllLossBackward>)
tensor(0.4702, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0376, grad_fn=<NllLossBackward>)
tensor(0.1465, grad_fn=<NllLossBackward>)
tensor(0.1790, grad_fn=<NllLossBackward>)
tensor(0.0800, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.1510, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.1975, grad_fn=<NllLossBackward>)
tensor(0.1158, grad_fn=<NllLossBackward>)
tensor(0.1686, grad_fn=<NllLossBackward>)
tensor(0.2931, grad_fn=<NllLossBackward>)
tensor(0.0972, grad_fn=<NllLossBac

tensor(0.5729, grad_fn=<NllLossBackward>)
tensor(0.1000, grad_fn=<NllLossBackward>)
tensor(0.0951, grad_fn=<NllLossBackward>)
tensor(0.1471, grad_fn=<NllLossBackward>)
tensor(0.2906, grad_fn=<NllLossBackward>)
tensor(0.1610, grad_fn=<NllLossBackward>)
tensor(0.1009, grad_fn=<NllLossBackward>)
tensor(0.0939, grad_fn=<NllLossBackward>)
tensor(0.1319, grad_fn=<NllLossBackward>)
tensor(0.1190, grad_fn=<NllLossBackward>)
tensor(0.1857, grad_fn=<NllLossBackward>)
tensor(0.1744, grad_fn=<NllLossBackward>)
tensor(0.2331, grad_fn=<NllLossBackward>)
tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.0515, grad_fn=<NllLossBackward>)
tensor(0.0164, grad_fn=<NllLossBackward>)
tensor(0.2547, grad_fn=<NllLossBackward>)
tensor(0.0209, grad_fn=<NllLossBackward>)
tensor(0.3262, grad_fn=<NllLossBackward>)
tensor(0.0946, grad_fn=<NllLossBackward>)
tensor(0.2327, grad_fn=<NllLossBackward>)
tensor(0.2106, grad_fn=<NllLossBackward>)
tensor(0.1569, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBac

tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.0119, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.2738, grad_fn=<NllLossBackward>)
tensor(0.0357, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1762, grad_fn=<NllLossBackward>)
tensor(0.0766, grad_fn=<NllLossBackward>)
tensor(0.0055, grad_fn=<NllLossBackward>)
tensor(0.2088, grad_fn=<NllLossBackward>)
tensor(0.0085, grad_fn=<NllLossBackward>)
tensor(0.0989, grad_fn=<NllLossBackward>)
tensor(0.2061, grad_fn=<NllLossBackward>)
tensor(0.1005, grad_fn=<NllLossBackward>)
tensor(0.0047, grad_fn=<NllLossBackward>)
tensor(0.1799, grad_fn=<NllLossBackward>)
tensor(0.3071, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.1294, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.1380, grad_fn=<NllLossBackward>)
tensor(0.1454, grad_fn=<NllLossBac

tensor(0.0302, grad_fn=<NllLossBackward>)
tensor(0.0986, grad_fn=<NllLossBackward>)
tensor(0.1657, grad_fn=<NllLossBackward>)
tensor(0.0646, grad_fn=<NllLossBackward>)
tensor(0.0232, grad_fn=<NllLossBackward>)
tensor(0.2790, grad_fn=<NllLossBackward>)
tensor(0.1380, grad_fn=<NllLossBackward>)
tensor(0.0162, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.0083, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.1429, grad_fn=<NllLossBackward>)
tensor(0.0691, grad_fn=<NllLossBackward>)
tensor(0.0856, grad_fn=<NllLossBackward>)
tensor(0.0606, grad_fn=<NllLossBackward>)
tensor(0.0875, grad_fn=<NllLossBackward>)
tensor(0.2139, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.0830, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.1941, grad_fn=<NllLossBackward>)
tensor(0.2095, grad_fn=<NllLossBac

tensor(0.0170, grad_fn=<NllLossBackward>)
tensor(0.0682, grad_fn=<NllLossBackward>)
tensor(0.2201, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.3150, grad_fn=<NllLossBackward>)
tensor(0.0826, grad_fn=<NllLossBackward>)
tensor(0.2807, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.1618, grad_fn=<NllLossBackward>)
tensor(0.1477, grad_fn=<NllLossBackward>)
tensor(0.1270, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.2781, grad_fn=<NllLossBackward>)
tensor(0.0106, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.0211, grad_fn=<NllLossBackward>)
tensor(0.0608, grad_fn=<NllLossBackward>)
tensor(0.0792, grad_fn=<NllLossBac

tensor(0.1884, grad_fn=<NllLossBackward>)
tensor(0.3035, grad_fn=<NllLossBackward>)
tensor(0.2204, grad_fn=<NllLossBackward>)
tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.0853, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.1705, grad_fn=<NllLossBackward>)
tensor(0.1235, grad_fn=<NllLossBackward>)
tensor(0.1382, grad_fn=<NllLossBackward>)
tensor(0.0603, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.3607, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.0662, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.2225, grad_fn=<NllLossBackward>)
tensor(0.1975, grad_fn=<NllLossBackward>)
tensor(0.0388, grad_fn=<NllLossBackward>)
tensor(0.0553, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.1642, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBac

tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.1356, grad_fn=<NllLossBackward>)
tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.2845, grad_fn=<NllLossBackward>)
tensor(0.1671, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.0370, grad_fn=<NllLossBackward>)
tensor(0.1033, grad_fn=<NllLossBackward>)
tensor(0.0558, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.1116, grad_fn=<NllLossBackward>)
tensor(0.1182, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0384, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.0403, grad_fn=<NllLossBackward>)
tensor(0.0512, grad_fn=<NllLossBackward>)
tensor(0.0581, grad_fn=<NllLossBackward>)
tensor(0.0178, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBackward>)
tensor(0.1551, grad_fn=<NllLossBackward>)
tensor(0.1287, grad_fn=<NllLossBac

tensor(0.0717, grad_fn=<NllLossBackward>)
tensor(0.1685, grad_fn=<NllLossBackward>)
tensor(0.2103, grad_fn=<NllLossBackward>)
tensor(0.0398, grad_fn=<NllLossBackward>)
tensor(0.1259, grad_fn=<NllLossBackward>)
tensor(0.0658, grad_fn=<NllLossBackward>)
tensor(0.0992, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.1211, grad_fn=<NllLossBackward>)
tensor(0.1719, grad_fn=<NllLossBackward>)
tensor(0.1072, grad_fn=<NllLossBackward>)
tensor(0.2803, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.0193, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.1586, grad_fn=<NllLossBackward>)
tensor(0.1433, grad_fn=<NllLossBackward>)
tensor(0.2413, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBac

tensor(0.2158, grad_fn=<NllLossBackward>)
tensor(0.1506, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.2099, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.1413, grad_fn=<NllLossBackward>)
tensor(0.1124, grad_fn=<NllLossBackward>)
tensor(0.0986, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.1764, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.2843, grad_fn=<NllLossBackward>)
tensor(0.2338, grad_fn=<NllLossBackward>)
tensor(0.1614, grad_fn=<NllLossBackward>)
tensor(0.1879, grad_fn=<NllLossBackward>)
tensor(0.0213, grad_fn=<NllLossBackward>)
tensor(0.4244, grad_fn=<NllLossBackward>)
tensor(0.0670, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0951, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.2057, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBac

tensor(0.1441, grad_fn=<NllLossBackward>)
tensor(0.0620, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0960, grad_fn=<NllLossBackward>)
tensor(0.1000, grad_fn=<NllLossBackward>)
tensor(0.1895, grad_fn=<NllLossBackward>)
tensor(0.1656, grad_fn=<NllLossBackward>)
tensor(0.1026, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBackward>)
tensor(0.2389, grad_fn=<NllLossBackward>)
tensor(0.0043, grad_fn=<NllLossBackward>)
tensor(0.1885, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
tensor(0.1411, grad_fn=<NllLossBackward>)
tensor(0.0687, grad_fn=<NllLossBackward>)
tensor(0.1487, grad_fn=<NllLossBackward>)
tensor(0.0261, grad_fn=<NllLossBackward>)
tensor(0.1059, grad_fn=<NllLossBackward>)
tensor(0.1002, grad_fn=<NllLossBackward>)
tensor(0.0420, grad_fn=<NllLossBackward>)
tensor(0.0729, grad_fn=<NllLossBackward>)
tensor(0.0319, grad_fn=<NllLossBackward>)
tensor(0.0092, grad_fn=<NllLossBackward>)
tensor(0.1555, grad_fn=<NllLossBac

tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.1223, grad_fn=<NllLossBackward>)
tensor(0.0896, grad_fn=<NllLossBackward>)
tensor(0.1146, grad_fn=<NllLossBackward>)
tensor(0.0127, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.3398, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.0565, grad_fn=<NllLossBackward>)
tensor(0.1063, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0300, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.0788, grad_fn=<NllLossBackward>)
tensor(0.1194, grad_fn=<NllLossBackward>)
tensor(0.1154, grad_fn=<NllLossBackward>)
tensor(0.1301, grad_fn=<NllLossBackward>)
tensor(0.0674, grad_fn=<NllLossBackward>)
tensor(0.0925, grad_fn=<NllLossBackward>)
tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.1737, grad_fn=<NllLossBackward>)
tensor(0.1963, grad_fn=<NllLossBackward>)
tensor(0.0455, grad_fn=<NllLossBackward>)
tensor(0.0247, grad_fn=<NllLossBac

tensor(0.0460, grad_fn=<NllLossBackward>)
tensor(0.1015, grad_fn=<NllLossBackward>)
tensor(0.1147, grad_fn=<NllLossBackward>)
tensor(0.4560, grad_fn=<NllLossBackward>)
tensor(0.1236, grad_fn=<NllLossBackward>)
tensor(0.2007, grad_fn=<NllLossBackward>)
tensor(0.1126, grad_fn=<NllLossBackward>)
tensor(0.1306, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0396, grad_fn=<NllLossBackward>)
tensor(0.2291, grad_fn=<NllLossBackward>)
tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.1983, grad_fn=<NllLossBackward>)
tensor(0.1443, grad_fn=<NllLossBackward>)
tensor(0.1093, grad_fn=<NllLossBackward>)
tensor(0.0971, grad_fn=<NllLossBackward>)
tensor(0.0204, grad_fn=<NllLossBackward>)
tensor(0.0689, grad_fn=<NllLossBackward>)
tensor(0.1384, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.1063, grad_fn=<NllLossBackward>)
tensor(0.1609, grad_fn=<NllLossBac

tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.0163, grad_fn=<NllLossBackward>)
tensor(0.1977, grad_fn=<NllLossBackward>)
tensor(0.3421, grad_fn=<NllLossBackward>)
tensor(0.1385, grad_fn=<NllLossBackward>)
tensor(0.0629, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBackward>)
tensor(0.1910, grad_fn=<NllLossBackward>)
tensor(0.0985, grad_fn=<NllLossBackward>)
tensor(0.2678, grad_fn=<NllLossBackward>)
tensor(0.0893, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
tensor(0.1010, grad_fn=<NllLossBackward>)
tensor(0.1222, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.2511, grad_fn=<NllLossBackward>)
tensor(0.0205, grad_fn=<NllLossBackward>)
tensor(0.1221, grad_fn=<NllLossBackward>)
tensor(0.0339, grad_fn=<NllLossBackward>)
tensor(0.0967, grad_fn=<NllLossBackward>)
tensor(0.1696, grad_fn=<NllLossBac

tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.2184, grad_fn=<NllLossBackward>)
tensor(0.0827, grad_fn=<NllLossBackward>)
tensor(0.2463, grad_fn=<NllLossBackward>)
tensor(0.0288, grad_fn=<NllLossBackward>)
tensor(0.0761, grad_fn=<NllLossBackward>)
tensor(0.0112, grad_fn=<NllLossBackward>)
tensor(0.0891, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.2047, grad_fn=<NllLossBackward>)
tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.2611, grad_fn=<NllLossBackward>)
tensor(0.1233, grad_fn=<NllLossBackward>)
tensor(0.2360, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.2257, grad_fn=<NllLossBackward>)
tensor(0.1869, grad_fn=<NllLossBackward>)
tensor(0.0523, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.0246, grad_fn=<NllLossBac

tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0872, grad_fn=<NllLossBackward>)
tensor(0.0889, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.1871, grad_fn=<NllLossBackward>)
tensor(0.0803, grad_fn=<NllLossBackward>)
tensor(0.0598, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.2602, grad_fn=<NllLossBackward>)
tensor(0.1192, grad_fn=<NllLossBackward>)
tensor(0.2262, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.1460, grad_fn=<NllLossBackward>)
tensor(0.1196, grad_fn=<NllLossBackward>)
tensor(0.1343, grad_fn=<NllLossBackward>)
tensor(0.0078, grad_fn=<NllLossBackward>)
tensor(0.0405, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.0603, grad_fn=<NllLossBackward>)
tensor(0.2531, grad_fn=<NllLossBackward>)
tensor(0.1208, grad_fn=<NllLossBac

tensor(0.1287, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.1951, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.0334, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.0267, grad_fn=<NllLossBackward>)
tensor(0.1135, grad_fn=<NllLossBackward>)
tensor(0.2100, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.0368, grad_fn=<NllLossBackward>)
tensor(0.0597, grad_fn=<NllLossBackward>)
tensor(0.1693, grad_fn=<NllLossBackward>)
tensor(0.1074, grad_fn=<NllLossBackward>)
tensor(0.1331, grad_fn=<NllLossBackward>)
tensor(0.0213, grad_fn=<NllLossBackward>)
tensor(0.1031, grad_fn=<NllLossBackward>)
tensor(0.1146, grad_fn=<NllLossBackward>)
tensor(0.2571, grad_fn=<NllLossBackward>)
tensor(0.2182, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.0828, grad_fn=<NllLossBac

tensor(0.3574, grad_fn=<NllLossBackward>)
tensor(0.1098, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.0424, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.2204, grad_fn=<NllLossBackward>)
tensor(0.1650, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.0077, grad_fn=<NllLossBackward>)
tensor(0.0071, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.3259, grad_fn=<NllLossBackward>)
tensor(0.0461, grad_fn=<NllLossBackward>)
tensor(0.2104, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.0296, grad_fn=<NllLossBackward>)
tensor(0.1817, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.1306, grad_fn=<NllLossBackward>)
tensor(0.2847, grad_fn=<NllLossBackward>)
tensor(0.1010, grad_fn=<NllLossBackward>)
tensor(0.0648, grad_fn=<NllLossBackward>)
tensor(0.2405, grad_fn=<NllLossBac

tensor(0.1226, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.2335, grad_fn=<NllLossBackward>)
tensor(0.1826, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.0768, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.1730, grad_fn=<NllLossBackward>)
tensor(0.3140, grad_fn=<NllLossBackward>)
tensor(0.2577, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.0298, grad_fn=<NllLossBackward>)
tensor(0.1069, grad_fn=<NllLossBackward>)
tensor(0.0877, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.0447, grad_fn=<NllLossBackward>)
tensor(0.0670, grad_fn=<NllLossBackward>)
tensor(0.2034, grad_fn=<NllLossBackward>)
tensor(0.0605, grad_fn=<NllLossBackward>)
tensor(0.0941, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.3088, grad_fn=<NllLossBac

tensor(0.1641, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBackward>)
tensor(0.0771, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.0611, grad_fn=<NllLossBackward>)
tensor(0.1111, grad_fn=<NllLossBackward>)
tensor(0.1592, grad_fn=<NllLossBackward>)
tensor(0.0218, grad_fn=<NllLossBackward>)
tensor(0.1235, grad_fn=<NllLossBackward>)
tensor(0.0633, grad_fn=<NllLossBackward>)
tensor(0.1980, grad_fn=<NllLossBackward>)
tensor(0.1986, grad_fn=<NllLossBackward>)
tensor(0.0393, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.1749, grad_fn=<NllLossBackward>)
tensor(0.1847, grad_fn=<NllLossBackward>)
tensor(0.2152, grad_fn=<NllLossBackward>)
tensor(0.0921, grad_fn=<NllLossBackward>)
tensor(0.0303, grad_fn=<NllLossBackward>)
tensor(0.1315, grad_fn=<NllLossBac

tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.2671, grad_fn=<NllLossBackward>)
tensor(0.0652, grad_fn=<NllLossBackward>)
tensor(0.3484, grad_fn=<NllLossBackward>)
tensor(0.1469, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.1312, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.1761, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.2096, grad_fn=<NllLossBackward>)
tensor(0.0228, grad_fn=<NllLossBackward>)
tensor(0.1660, grad_fn=<NllLossBackward>)
tensor(0.0588, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.0765, grad_fn=<NllLossBackward>)
tensor(0.1140, grad_fn=<NllLossBackward>)
tensor(0.1285, grad_fn=<NllLossBackward>)
tensor(0.0523, grad_fn=<NllLossBackward>)
tensor(0.0666, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.1100, grad_fn=<NllLossBac

tensor(0.0788, grad_fn=<NllLossBackward>)
tensor(0.1355, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.0479, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.1096, grad_fn=<NllLossBackward>)
tensor(0.1326, grad_fn=<NllLossBackward>)
tensor(0.2554, grad_fn=<NllLossBackward>)
tensor(0.0063, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.1349, grad_fn=<NllLossBackward>)
tensor(0.1189, grad_fn=<NllLossBackward>)
tensor(0.0630, grad_fn=<NllLossBackward>)
tensor(0.0536, grad_fn=<NllLossBackward>)
tensor(0.0157, grad_fn=<NllLossBackward>)
tensor(0.1395, grad_fn=<NllLossBackward>)
tensor(0.3805, grad_fn=<NllLossBackward>)
tensor(0.0913, grad_fn=<NllLossBackward>)
tensor(0.0588, grad_fn=<NllLossBackward>)
tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.1036, grad_fn=<NllLossBackward>)
tensor(0.0475, grad_fn=<NllLossBac

tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.1830, grad_fn=<NllLossBackward>)
tensor(0.1210, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.2035, grad_fn=<NllLossBackward>)
tensor(0.0538, grad_fn=<NllLossBackward>)
tensor(0.1480, grad_fn=<NllLossBackward>)
tensor(0.1330, grad_fn=<NllLossBackward>)
tensor(0.1197, grad_fn=<NllLossBackward>)
tensor(0.0508, grad_fn=<NllLossBackward>)
tensor(0.1462, grad_fn=<NllLossBackward>)
tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.1323, grad_fn=<NllLossBackward>)
tensor(0.2879, grad_fn=<NllLossBackward>)
tensor(0.2211, grad_fn=<NllLossBackward>)
tensor(0.0590, grad_fn=<NllLossBackward>)
tensor(0.0886, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.1785, grad_fn=<NllLossBackward>)
tensor(0.0795, grad_fn=<NllLossBackward>)
tensor(0.1205, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.2705, grad_fn=<NllLossBac

tensor(0.1718, grad_fn=<NllLossBackward>)
tensor(0.0243, grad_fn=<NllLossBackward>)
tensor(0.0954, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.2586, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.0870, grad_fn=<NllLossBackward>)
tensor(0.0407, grad_fn=<NllLossBackward>)
tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.0995, grad_fn=<NllLossBackward>)
tensor(0.1627, grad_fn=<NllLossBackward>)
tensor(0.1026, grad_fn=<NllLossBackward>)
tensor(0.1047, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.1690, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.2687, grad_fn=<NllLossBackward>)
tensor(0.1233, grad_fn=<NllLossBackward>)
tensor(0.0626, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.3473, grad_fn=<NllLossBac

tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.0919, grad_fn=<NllLossBackward>)
tensor(0.1608, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.0383, grad_fn=<NllLossBackward>)
tensor(0.0307, grad_fn=<NllLossBackward>)
tensor(0.0236, grad_fn=<NllLossBackward>)
tensor(0.2650, grad_fn=<NllLossBackward>)
tensor(0.2673, grad_fn=<NllLossBackward>)
tensor(0.1573, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0514, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.1254, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.1435, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.0664, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.2604, grad_fn=<NllLossBackward>)
tensor(0.0969, grad_fn=<NllLossBackward>)
tensor(0.1806, grad_fn=<NllLossBackward>)
tensor(0.1133, grad_fn=<NllLossBac

tensor(0.0477, grad_fn=<NllLossBackward>)
tensor(0.1431, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.0576, grad_fn=<NllLossBackward>)
tensor(0.3271, grad_fn=<NllLossBackward>)
tensor(0.0450, grad_fn=<NllLossBackward>)
tensor(0.1805, grad_fn=<NllLossBackward>)
tensor(0.0472, grad_fn=<NllLossBackward>)
tensor(0.2439, grad_fn=<NllLossBackward>)
tensor(0.1993, grad_fn=<NllLossBackward>)
tensor(0.1669, grad_fn=<NllLossBackward>)
tensor(0.0833, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.2601, grad_fn=<NllLossBackward>)
tensor(0.0386, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBackward>)
tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.2606, grad_fn=<NllLossBackward>)
tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.0264, grad_fn=<NllLossBackward>)
tensor(0.1283, grad_fn=<NllLossBackward>)
tensor(0.0440, grad_fn=<NllLossBac

tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBackward>)
tensor(0.0486, grad_fn=<NllLossBackward>)
tensor(0.1549, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.1958, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBackward>)
tensor(0.0733, grad_fn=<NllLossBackward>)
tensor(0.1611, grad_fn=<NllLossBackward>)
tensor(0.0842, grad_fn=<NllLossBackward>)
tensor(0.2658, grad_fn=<NllLossBackward>)
tensor(0.2706, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.1629, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.1497, grad_fn=<NllLossBackward>)
tensor(0.1030, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.2416, grad_fn=<NllLossBackward>)
tensor(0.0335, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.0905, grad_fn=<NllLossBackward>)
tensor(0.1116, grad_fn=<NllLossBac

tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.0635, grad_fn=<NllLossBackward>)
tensor(0.0427, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.2078, grad_fn=<NllLossBackward>)
tensor(0.0065, grad_fn=<NllLossBackward>)
tensor(0.0172, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.3352, grad_fn=<NllLossBackward>)
tensor(0.1119, grad_fn=<NllLossBackward>)
tensor(0.1113, grad_fn=<NllLossBackward>)
tensor(0.2250, grad_fn=<NllLossBackward>)
tensor(0.0087, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.1135, grad_fn=<NllLossBackward>)
tensor(0.1788, grad_fn=<NllLossBackward>)
tensor(0.2153, grad_fn=<NllLossBackward>)
tensor(0.1650, grad_fn=<NllLossBackward>)
tensor(0.1852, grad_fn=<NllLossBackward>)
tensor(0.1204, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.2499, grad_fn=<NllLossBac

tensor(0.1635, grad_fn=<NllLossBackward>)
tensor(0.0903, grad_fn=<NllLossBackward>)
tensor(0.2004, grad_fn=<NllLossBackward>)
tensor(0.1996, grad_fn=<NllLossBackward>)
tensor(0.0229, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.2874, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0714, grad_fn=<NllLossBackward>)
tensor(0.0899, grad_fn=<NllLossBackward>)
tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.0336, grad_fn=<NllLossBackward>)
tensor(0.1328, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.1515, grad_fn=<NllLossBackward>)
tensor(0.3339, grad_fn=<NllLossBackward>)
tensor(0.1083, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBac

tensor(0.1316, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.1375, grad_fn=<NllLossBackward>)
tensor(0.0348, grad_fn=<NllLossBackward>)
tensor(0.1073, grad_fn=<NllLossBackward>)
tensor(0.0468, grad_fn=<NllLossBackward>)
tensor(0.0805, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.2821, grad_fn=<NllLossBackward>)
tensor(0.0167, grad_fn=<NllLossBackward>)
tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.1535, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.0970, grad_fn=<NllLossBackward>)
tensor(0.2036, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.2709, grad_fn=<NllLossBackward>)
tensor(0.5722, grad_fn=<NllLossBackward>)
tensor(0.0889, grad_fn=<NllLossBackward>)
tensor(0.0634, grad_fn=<NllLossBackward>)
tensor(0.0774, grad_fn=<NllLossBackward>)
tensor(0.1333, grad_fn=<NllLossBackward>)
tensor(0.1485, grad_fn=<NllLossBackward>)
tensor(0.1793, grad_fn=<NllLossBac

tensor(0.1601, grad_fn=<NllLossBackward>)
tensor(0.1258, grad_fn=<NllLossBackward>)
tensor(0.2871, grad_fn=<NllLossBackward>)
tensor(0.1901, grad_fn=<NllLossBackward>)
tensor(0.1231, grad_fn=<NllLossBackward>)
tensor(0.1145, grad_fn=<NllLossBackward>)
tensor(0.2766, grad_fn=<NllLossBackward>)
tensor(0.1932, grad_fn=<NllLossBackward>)
tensor(0.2160, grad_fn=<NllLossBackward>)
tensor(0.1091, grad_fn=<NllLossBackward>)
tensor(0.1794, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.0610, grad_fn=<NllLossBackward>)
tensor(0.1142, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.1854, grad_fn=<NllLossBackward>)
tensor(0.1359, grad_fn=<NllLossBackward>)
tensor(0.4022, grad_fn=<NllLossBackward>)
tensor(0.2526, grad_fn=<NllLossBackward>)
tensor(0.1897, grad_fn=<NllLossBackward>)
tensor(0.3463, grad_fn=<NllLossBackward>)
tensor(0.0887, grad_fn=<NllLossBackward>)
tensor(0.4191, grad_fn=<NllLossBac

tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.0913, grad_fn=<NllLossBackward>)
tensor(0.0296, grad_fn=<NllLossBackward>)
tensor(0.1296, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.0120, grad_fn=<NllLossBackward>)
tensor(0.1562, grad_fn=<NllLossBackward>)
tensor(0.0092, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.1501, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.0911, grad_fn=<NllLossBackward>)
tensor(0.1993, grad_fn=<NllLossBackward>)
tensor(0.1140, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.1294, grad_fn=<NllLossBackward>)
tensor(0.2042, grad_fn=<NllLossBackward>)
tensor(0.0732, grad_fn=<NllLossBackward>)
tensor(0.3145, grad_fn=<NllLossBackward>)
tensor(0.1193, grad_fn=<NllLossBac

tensor(0.1864, grad_fn=<NllLossBackward>)
tensor(0.0306, grad_fn=<NllLossBackward>)
tensor(0.0474, grad_fn=<NllLossBackward>)
tensor(0.1260, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.0945, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0897, grad_fn=<NllLossBackward>)
tensor(0.0290, grad_fn=<NllLossBackward>)
tensor(0.2750, grad_fn=<NllLossBackward>)
tensor(0.1175, grad_fn=<NllLossBackward>)
tensor(0.0220, grad_fn=<NllLossBackward>)
tensor(0.0477, grad_fn=<NllLossBackward>)
tensor(0.0917, grad_fn=<NllLossBackward>)
tensor(0.2812, grad_fn=<NllLossBackward>)
tensor(0.1393, grad_fn=<NllLossBackward>)
tensor(0.0729, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.0508, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.1885, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.1231, grad_fn=<NllLossBac

tensor(0.1223, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.2129, grad_fn=<NllLossBackward>)
tensor(0.3267, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.1074, grad_fn=<NllLossBackward>)
tensor(0.0811, grad_fn=<NllLossBackward>)
tensor(0.3696, grad_fn=<NllLossBackward>)
tensor(0.3024, grad_fn=<NllLossBackward>)
tensor(0.0894, grad_fn=<NllLossBackward>)
tensor(0.3570, grad_fn=<NllLossBackward>)
tensor(0.2326, grad_fn=<NllLossBackward>)
tensor(0.0249, grad_fn=<NllLossBackward>)
tensor(0.1455, grad_fn=<NllLossBackward>)
tensor(0.1109, grad_fn=<NllLossBackward>)
tensor(0.0330, grad_fn=<NllLossBackward>)
tensor(0.0256, grad_fn=<NllLossBackward>)
tensor(0.0847, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.1186, grad_fn=<NllLossBackward>)
tensor(0.1092, grad_fn=<NllLossBackward>)
tensor(0.1236, grad_fn=<NllLossBackward>)
tensor(0.0731, grad_fn=<NllLossBac

tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.2855, grad_fn=<NllLossBackward>)
tensor(0.3111, grad_fn=<NllLossBackward>)
tensor(0.1049, grad_fn=<NllLossBackward>)
tensor(0.1852, grad_fn=<NllLossBackward>)
tensor(0.4539, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.0255, grad_fn=<NllLossBackward>)
tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.0676, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBackward>)
tensor(0.0888, grad_fn=<NllLossBackward>)
tensor(0.1816, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBackward>)
tensor(0.1957, grad_fn=<NllLossBackward>)
tensor(0.0744, grad_fn=<NllLossBackward>)
tensor(0.2116, grad_fn=<NllLossBackward>)
tensor(0.4958, grad_fn=<NllLossBackward>)
tensor(0.1043, grad_fn=<NllLossBackward>)
tensor(0.0374, grad_fn=<NllLossBackward>)
tensor(0.0412, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBac

tensor(0.0590, grad_fn=<NllLossBackward>)
tensor(0.0665, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBackward>)
tensor(0.0883, grad_fn=<NllLossBackward>)
tensor(0.0920, grad_fn=<NllLossBackward>)
tensor(0.0753, grad_fn=<NllLossBackward>)
tensor(0.1761, grad_fn=<NllLossBackward>)
tensor(0.1929, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.0340, grad_fn=<NllLossBackward>)
tensor(0.1563, grad_fn=<NllLossBackward>)
tensor(0.0790, grad_fn=<NllLossBackward>)
tensor(0.1996, grad_fn=<NllLossBackward>)
tensor(0.2493, grad_fn=<NllLossBackward>)
tensor(0.0757, grad_fn=<NllLossBackward>)
tensor(0.1911, grad_fn=<NllLossBackward>)
tensor(0.1399, grad_fn=<NllLossBackward>)
tensor(0.0172, grad_fn=<NllLossBackward>)
tensor(0.1180, grad_fn=<NllLossBackward>)
tensor(0.1006, grad_fn=<NllLossBackward>)
tensor(0.1150, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.0860, grad_fn=<NllLossBackward>)
tensor(0.0190, grad_fn=<NllLossBac

tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.0696, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.1583, grad_fn=<NllLossBackward>)
tensor(0.0742, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.1412, grad_fn=<NllLossBackward>)
tensor(0.1140, grad_fn=<NllLossBackward>)
tensor(0.0575, grad_fn=<NllLossBackward>)
tensor(0.0184, grad_fn=<NllLossBackward>)
tensor(0.0948, grad_fn=<NllLossBackward>)
tensor(0.2446, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.1139, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.1650, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.0263, grad_fn=<NllLossBackward>)
tensor(0.0252, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.1410, grad_fn=<NllLossBac

tensor(0.0872, grad_fn=<NllLossBackward>)
tensor(0.2838, grad_fn=<NllLossBackward>)
tensor(0.0573, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.1553, grad_fn=<NllLossBackward>)
tensor(0.1318, grad_fn=<NllLossBackward>)
tensor(0.1243, grad_fn=<NllLossBackward>)
tensor(0.1288, grad_fn=<NllLossBackward>)
tensor(0.1600, grad_fn=<NllLossBackward>)
tensor(0.2369, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0402, grad_fn=<NllLossBackward>)
tensor(0.0371, grad_fn=<NllLossBackward>)
tensor(0.1788, grad_fn=<NllLossBackward>)
tensor(0.1591, grad_fn=<NllLossBackward>)
tensor(0.2821, grad_fn=<NllLossBackward>)
tensor(0.2138, grad_fn=<NllLossBackward>)
tensor(0.2560, grad_fn=<NllLossBackward>)
tensor(0.1306, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.0321, grad_fn=<NllLossBackward>)
tensor(0.1705, grad_fn=<NllLossBac

tensor(0.0670, grad_fn=<NllLossBackward>)
tensor(0.1353, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.0712, grad_fn=<NllLossBackward>)
tensor(0.0997, grad_fn=<NllLossBackward>)
tensor(0.1565, grad_fn=<NllLossBackward>)
tensor(0.1483, grad_fn=<NllLossBackward>)
tensor(0.0320, grad_fn=<NllLossBackward>)
tensor(0.1703, grad_fn=<NllLossBackward>)
tensor(0.0918, grad_fn=<NllLossBackward>)
tensor(0.2304, grad_fn=<NllLossBackward>)
tensor(0.0988, grad_fn=<NllLossBackward>)
tensor(0.1069, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.1399, grad_fn=<NllLossBackward>)
tensor(0.1325, grad_fn=<NllLossBackward>)
tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.1503, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.1045, grad_fn=<NllLossBackward>)
tensor(0.0544, grad_fn=<NllLossBackward>)
tensor(0.0272, grad_fn=<NllLossBac

tensor(0.1057, grad_fn=<NllLossBackward>)
tensor(0.0141, grad_fn=<NllLossBackward>)
tensor(0.0920, grad_fn=<NllLossBackward>)
tensor(0.0422, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.1213, grad_fn=<NllLossBackward>)
tensor(0.3330, grad_fn=<NllLossBackward>)
tensor(0.0411, grad_fn=<NllLossBackward>)
tensor(0.0876, grad_fn=<NllLossBackward>)
tensor(0.1000, grad_fn=<NllLossBackward>)
tensor(0.0219, grad_fn=<NllLossBackward>)
tensor(0.0140, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.0251, grad_fn=<NllLossBackward>)
tensor(0.1409, grad_fn=<NllLossBackward>)
tensor(0.0228, grad_fn=<NllLossBackward>)
tensor(0.1946, grad_fn=<NllLossBackward>)
tensor(0.0748, grad_fn=<NllLossBackward>)
tensor(0.2907, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1247, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.1028, grad_fn=<NllLossBackward>)
tensor(0.0956, grad_fn=<NllLossBac

tensor(0.0217, grad_fn=<NllLossBackward>)
tensor(0.0721, grad_fn=<NllLossBackward>)
tensor(0.1824, grad_fn=<NllLossBackward>)
tensor(0.1930, grad_fn=<NllLossBackward>)
tensor(0.1721, grad_fn=<NllLossBackward>)
tensor(0.1993, grad_fn=<NllLossBackward>)
tensor(0.0658, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.1517, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.0436, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.3436, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.1924, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.2365, grad_fn=<NllLossBackward>)
tensor(0.0644, grad_fn=<NllLossBackward>)
tensor(0.1415, grad_fn=<NllLossBackward>)
tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.1847, grad_fn=<NllLossBackward>)
tensor(0.0889, grad_fn=<NllLossBac

tensor(0.1816, grad_fn=<NllLossBackward>)
tensor(0.1044, grad_fn=<NllLossBackward>)
tensor(0.2994, grad_fn=<NllLossBackward>)
tensor(0.1828, grad_fn=<NllLossBackward>)
tensor(0.0638, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.0556, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0361, grad_fn=<NllLossBackward>)
tensor(0.2618, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.1834, grad_fn=<NllLossBackward>)
tensor(0.1974, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.0975, grad_fn=<NllLossBackward>)
tensor(0.1365, grad_fn=<NllLossBackward>)
tensor(0.0228, grad_fn=<NllLossBackward>)
tensor(0.0091, grad_fn=<NllLossBackward>)
tensor(0.2465, grad_fn=<NllLossBackward>)
tensor(0.0730, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.1989, grad_fn=<NllLossBackward>)
tensor(0.0950, grad_fn=<NllLossBac

tensor(0.1581, grad_fn=<NllLossBackward>)
tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.0945, grad_fn=<NllLossBackward>)
tensor(0.3908, grad_fn=<NllLossBackward>)
tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.0933, grad_fn=<NllLossBackward>)
tensor(0.0368, grad_fn=<NllLossBackward>)
tensor(0.1819, grad_fn=<NllLossBackward>)
tensor(0.0223, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.1359, grad_fn=<NllLossBackward>)
tensor(0.1891, grad_fn=<NllLossBackward>)
tensor(0.1569, grad_fn=<NllLossBackward>)
tensor(0.0675, grad_fn=<NllLossBackward>)
tensor(0.1426, grad_fn=<NllLossBackward>)
tensor(0.0318, grad_fn=<NllLossBackward>)
tensor(0.0401, grad_fn=<NllLossBackward>)
tensor(0.0041, grad_fn=<NllLossBackward>)
tensor(0.0801, grad_fn=<NllLossBackward>)
tensor(0.1701, grad_fn=<NllLossBackward>)
tensor(0.0570, grad_fn=<NllLossBackward>)
tensor(0.0328, grad_fn=<NllLossBackward>)
tensor(0.2780, grad_fn=<NllLossBackward>)
tensor(0.2270, grad_fn=<NllLossBac

tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0815, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0100, grad_fn=<NllLossBackward>)
tensor(0.2126, grad_fn=<NllLossBackward>)
tensor(0.2533, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBackward>)
tensor(0.1714, grad_fn=<NllLossBackward>)
tensor(0.0772, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.0279, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBackward>)
tensor(0.0849, grad_fn=<NllLossBackward>)
tensor(0.1143, grad_fn=<NllLossBackward>)
tensor(0.0096, grad_fn=<NllLossBackward>)
tensor(0.1061, grad_fn=<NllLossBackward>)
tensor(0.3259, grad_fn=<NllLossBackward>)
tensor(0.0684, grad_fn=<NllLossBackward>)
tensor(0.0792, grad_fn=<NllLossBackward>)
tensor(0.2068, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBac

tensor(0.0861, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0589, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.1112, grad_fn=<NllLossBackward>)
tensor(0.0751, grad_fn=<NllLossBackward>)
tensor(0.1621, grad_fn=<NllLossBackward>)
tensor(0.1188, grad_fn=<NllLossBackward>)
tensor(0.2807, grad_fn=<NllLossBackward>)
tensor(0.0306, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.1164, grad_fn=<NllLossBackward>)
tensor(0.1332, grad_fn=<NllLossBackward>)
tensor(0.1579, grad_fn=<NllLossBackward>)
tensor(0.1032, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.1736, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.0289, grad_fn=<NllLossBackward>)
tensor(0.0318, grad_fn=<NllLossBackward>)
tensor(0.1052, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBac

tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.0796, grad_fn=<NllLossBackward>)
tensor(0.0533, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBackward>)
tensor(0.3089, grad_fn=<NllLossBackward>)
tensor(0.0265, grad_fn=<NllLossBackward>)
tensor(0.0574, grad_fn=<NllLossBackward>)
tensor(0.2164, grad_fn=<NllLossBackward>)
tensor(0.4060, grad_fn=<NllLossBackward>)
tensor(0.1479, grad_fn=<NllLossBackward>)
tensor(0.1579, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0089, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.1099, grad_fn=<NllLossBackward>)
tensor(0.4214, grad_fn=<NllLossBackward>)
tensor(0.1085, grad_fn=<NllLossBackward>)
tensor(0.0873, grad_fn=<NllLossBackward>)
tensor(0.0566, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.1634, grad_fn=<NllLossBackward>)
tensor(0.2164, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBac

tensor(0.1014, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0921, grad_fn=<NllLossBackward>)
tensor(0.2233, grad_fn=<NllLossBackward>)
tensor(0.0646, grad_fn=<NllLossBackward>)
tensor(0.0275, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.1508, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.1668, grad_fn=<NllLossBackward>)
tensor(0.0406, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.0122, grad_fn=<NllLossBackward>)
tensor(0.0439, grad_fn=<NllLossBackward>)
tensor(0.1316, grad_fn=<NllLossBackward>)
tensor(0.1322, grad_fn=<NllLossBackward>)
tensor(0.2068, grad_fn=<NllLossBackward>)
tensor(0.1434, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.1753, grad_fn=<NllLossBackward>)
tensor(0.0166, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.1064, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBac

tensor(0.2352, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.0197, grad_fn=<NllLossBackward>)
tensor(0.2220, grad_fn=<NllLossBackward>)
tensor(0.0121, grad_fn=<NllLossBackward>)
tensor(0.0136, grad_fn=<NllLossBackward>)
tensor(0.1193, grad_fn=<NllLossBackward>)
tensor(0.0582, grad_fn=<NllLossBackward>)
tensor(0.2216, grad_fn=<NllLossBackward>)
tensor(0.0286, grad_fn=<NllLossBackward>)
tensor(0.0902, grad_fn=<NllLossBackward>)
tensor(0.1004, grad_fn=<NllLossBackward>)
tensor(0.0530, grad_fn=<NllLossBackward>)
tensor(0.1802, grad_fn=<NllLossBackward>)
tensor(0.2902, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBackward>)
tensor(0.0077, grad_fn=<NllLossBackward>)
tensor(0.0940, grad_fn=<NllLossBackward>)
tensor(0.0208, grad_fn=<NllLossBackward>)
tensor(0.1263, grad_fn=<NllLossBackward>)
tensor(0.0677, grad_fn=<NllLossBackward>)
tensor(0.0795, grad_fn=<NllLossBackward>)
tensor(0.1136, grad_fn=<NllLossBackward>)
tensor(0.0246, grad_fn=<NllLossBac

tensor(0.2378, grad_fn=<NllLossBackward>)
tensor(0.0308, grad_fn=<NllLossBackward>)
tensor(0.0996, grad_fn=<NllLossBackward>)
tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.1435, grad_fn=<NllLossBackward>)
tensor(0.1011, grad_fn=<NllLossBackward>)
tensor(0.0687, grad_fn=<NllLossBackward>)
tensor(0.0695, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.2321, grad_fn=<NllLossBackward>)
tensor(0.3134, grad_fn=<NllLossBackward>)
tensor(0.1697, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.0316, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.2472, grad_fn=<NllLossBackward>)
tensor(0.1595, grad_fn=<NllLossBackward>)
tensor(0.0027, grad_fn=<NllLossBackward>)
tensor(0.0108, grad_fn=<NllLossBackward>)
tensor(0.0664, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBac

tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.2126, grad_fn=<NllLossBackward>)
tensor(0.0744, grad_fn=<NllLossBackward>)
tensor(0.2250, grad_fn=<NllLossBackward>)
tensor(0.1917, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.0784, grad_fn=<NllLossBackward>)
tensor(0.0951, grad_fn=<NllLossBackward>)
tensor(0.2336, grad_fn=<NllLossBackward>)
tensor(0.1310, grad_fn=<NllLossBackward>)
tensor(0.2386, grad_fn=<NllLossBackward>)
tensor(0.1125, grad_fn=<NllLossBackward>)
tensor(0.0780, grad_fn=<NllLossBackward>)
tensor(0.1719, grad_fn=<NllLossBackward>)
tensor(0.1190, grad_fn=<NllLossBackward>)
tensor(0.0453, grad_fn=<NllLossBackward>)
tensor(0.2780, grad_fn=<NllLossBackward>)
tensor(0.1697, grad_fn=<NllLossBackward>)
tensor(0.2022, grad_fn=<NllLossBackward>)
tensor(0.1309, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.0709, grad_fn=<NllLossBackward>)
tensor(0.0605, grad_fn=<NllLossBackward>)
tensor(0.0301, grad_fn=<NllLossBac

tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.1354, grad_fn=<NllLossBackward>)
tensor(0.0651, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.1068, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0905, grad_fn=<NllLossBackward>)
tensor(0.1054, grad_fn=<NllLossBackward>)
tensor(0.1929, grad_fn=<NllLossBackward>)
tensor(0.0428, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.1050, grad_fn=<NllLossBackward>)
tensor(0.0713, grad_fn=<NllLossBackward>)
tensor(0.0522, grad_fn=<NllLossBackward>)
tensor(0.0930, grad_fn=<NllLossBackward>)
tensor(0.1500, grad_fn=<NllLossBackward>)
tensor(0.1066, grad_fn=<NllLossBackward>)
tensor(0.2523, grad_fn=<NllLossBackward>)
tensor(0.3189, grad_fn=<NllLossBackward>)
tensor(0.0052, grad_fn=<NllLossBackward>)
tensor(0.0397, grad_fn=<NllLossBackward>)
tensor(0.2102, grad_fn=<NllLossBackward>)
tensor(0.0057, grad_fn=<NllLossBackward>)
tensor(0.1930, grad_fn=<NllLossBac

tensor(0.0144, grad_fn=<NllLossBackward>)
tensor(0.0285, grad_fn=<NllLossBackward>)
tensor(0.1857, grad_fn=<NllLossBackward>)
tensor(0.3009, grad_fn=<NllLossBackward>)
tensor(0.1595, grad_fn=<NllLossBackward>)
tensor(0.1161, grad_fn=<NllLossBackward>)
tensor(0.0236, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.1514, grad_fn=<NllLossBackward>)
tensor(0.0913, grad_fn=<NllLossBackward>)
tensor(0.0491, grad_fn=<NllLossBackward>)
tensor(0.0945, grad_fn=<NllLossBackward>)
tensor(0.0395, grad_fn=<NllLossBackward>)
tensor(0.1823, grad_fn=<NllLossBackward>)
tensor(0.0214, grad_fn=<NllLossBackward>)
tensor(0.0661, grad_fn=<NllLossBackward>)
tensor(0.0386, grad_fn=<NllLossBackward>)
tensor(0.1186, grad_fn=<NllLossBackward>)
tensor(0.0473, grad_fn=<NllLossBackward>)
tensor(0.0098, grad_fn=<NllLossBackward>)
tensor(0.1542, grad_fn=<NllLossBackward>)
tensor(0.0245, grad_fn=<NllLossBackward>)
tensor(0.1496, grad_fn=<NllLossBackward>)
tensor(0.1381, grad_fn=<NllLossBac

tensor(0.5594, grad_fn=<NllLossBackward>)
tensor(0.1200, grad_fn=<NllLossBackward>)
tensor(0.1711, grad_fn=<NllLossBackward>)
tensor(0.1337, grad_fn=<NllLossBackward>)
tensor(0.1541, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.2188, grad_fn=<NllLossBackward>)
tensor(0.0588, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.0707, grad_fn=<NllLossBackward>)
tensor(0.0151, grad_fn=<NllLossBackward>)
tensor(0.1429, grad_fn=<NllLossBackward>)
tensor(0.0406, grad_fn=<NllLossBackward>)
tensor(0.1702, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.1203, grad_fn=<NllLossBackward>)
tensor(0.1510, grad_fn=<NllLossBackward>)
tensor(0.0852, grad_fn=<NllLossBackward>)
tensor(0.0780, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.0247, grad_fn=<NllLossBackward>)
tensor(0.2471, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.1208, grad_fn=<NllLossBac

tensor(0.0799, grad_fn=<NllLossBackward>)
tensor(0.0462, grad_fn=<NllLossBackward>)
tensor(0.0474, grad_fn=<NllLossBackward>)
tensor(0.0533, grad_fn=<NllLossBackward>)
tensor(0.4464, grad_fn=<NllLossBackward>)
tensor(0.2965, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBackward>)
tensor(0.0159, grad_fn=<NllLossBackward>)
tensor(0.2772, grad_fn=<NllLossBackward>)
tensor(0.0583, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.0217, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.0743, grad_fn=<NllLossBackward>)
tensor(0.0414, grad_fn=<NllLossBackward>)
tensor(0.0444, grad_fn=<NllLossBackward>)
tensor(0.1410, grad_fn=<NllLossBackward>)
tensor(0.1191, grad_fn=<NllLossBackward>)
tensor(0.0431, grad_fn=<NllLossBackward>)
tensor(0.1445, grad_fn=<NllLossBackward>)
tensor(0.1378, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBac

tensor(0.0467, grad_fn=<NllLossBackward>)
tensor(0.0974, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.0438, grad_fn=<NllLossBackward>)
tensor(0.0478, grad_fn=<NllLossBackward>)
tensor(0.0654, grad_fn=<NllLossBackward>)
tensor(0.3036, grad_fn=<NllLossBackward>)
tensor(0.1120, grad_fn=<NllLossBackward>)
tensor(0.1012, grad_fn=<NllLossBackward>)
tensor(0.0737, grad_fn=<NllLossBackward>)
tensor(0.1229, grad_fn=<NllLossBackward>)
tensor(0.0114, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.1411, grad_fn=<NllLossBackward>)
tensor(0.1727, grad_fn=<NllLossBackward>)
tensor(0.2173, grad_fn=<NllLossBackward>)
tensor(0.1139, grad_fn=<NllLossBackward>)
tensor(0.2751, grad_fn=<NllLossBackward>)
tensor(0.0957, grad_fn=<NllLossBackward>)
tensor(0.0080, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBac

tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.1596, grad_fn=<NllLossBackward>)
tensor(0.1029, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.0174, grad_fn=<NllLossBackward>)
tensor(0.0844, grad_fn=<NllLossBackward>)
tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.0797, grad_fn=<NllLossBackward>)
tensor(0.1765, grad_fn=<NllLossBackward>)
tensor(0.1098, grad_fn=<NllLossBackward>)
tensor(0.0246, grad_fn=<NllLossBackward>)
tensor(0.1737, grad_fn=<NllLossBackward>)
tensor(0.2399, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.0956, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.0464, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.0906, grad_fn=<NllLossBackward>)
tensor(0.0359, grad_fn=<NllLossBackward>)
tensor(0.1091, grad_fn=<NllLossBackward>)
tensor(0.2026, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBackward>)
tensor(0.1360, grad_fn=<NllLossBac

tensor(0.0517, grad_fn=<NllLossBackward>)
tensor(0.0432, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.1850, grad_fn=<NllLossBackward>)
tensor(0.1067, grad_fn=<NllLossBackward>)
tensor(0.0088, grad_fn=<NllLossBackward>)
tensor(0.1314, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBackward>)
tensor(0.1764, grad_fn=<NllLossBackward>)
tensor(0.2128, grad_fn=<NllLossBackward>)
tensor(0.0833, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.0264, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.3063, grad_fn=<NllLossBackward>)
tensor(0.0281, grad_fn=<NllLossBackward>)
tensor(0.0752, grad_fn=<NllLossBackward>)
tensor(0.0373, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.0270, grad_fn=<NllLossBackward>)
tensor(0.0656, grad_fn=<NllLossBackward>)
tensor(0.1884, grad_fn=<NllLossBac

tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.1598, grad_fn=<NllLossBackward>)
tensor(0.0780, grad_fn=<NllLossBackward>)
tensor(0.1620, grad_fn=<NllLossBackward>)
tensor(0.1585, grad_fn=<NllLossBackward>)
tensor(0.0982, grad_fn=<NllLossBackward>)
tensor(0.0708, grad_fn=<NllLossBackward>)
tensor(0.1386, grad_fn=<NllLossBackward>)
tensor(0.1136, grad_fn=<NllLossBackward>)
tensor(0.0510, grad_fn=<NllLossBackward>)
tensor(0.0631, grad_fn=<NllLossBackward>)
tensor(0.0680, grad_fn=<NllLossBackward>)
tensor(0.0702, grad_fn=<NllLossBackward>)
tensor(0.0090, grad_fn=<NllLossBackward>)
tensor(0.1235, grad_fn=<NllLossBackward>)
tensor(0.1075, grad_fn=<NllLossBackward>)
tensor(0.1109, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.2259, grad_fn=<NllLossBackward>)
tensor(0.3160, grad_fn=<NllLossBackward>)
tensor(0.0165, grad_fn=<NllLossBackward>)
tensor(0.1776, grad_fn=<NllLossBackward>)
tensor(0.2333, grad_fn=<NllLossBackward>)
tensor(0.0304, grad_fn=<NllLossBac

tensor(0.0320, grad_fn=<NllLossBackward>)
tensor(0.3924, grad_fn=<NllLossBackward>)
tensor(0.0127, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.0837, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.1948, grad_fn=<NllLossBackward>)
tensor(0.2255, grad_fn=<NllLossBackward>)
tensor(0.1757, grad_fn=<NllLossBackward>)
tensor(0.1125, grad_fn=<NllLossBackward>)
tensor(0.1425, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0593, grad_fn=<NllLossBackward>)
tensor(0.1679, grad_fn=<NllLossBackward>)
tensor(0.1009, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.0764, grad_fn=<NllLossBackward>)
tensor(0.2096, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.1643, grad_fn=<NllLossBac

tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.0993, grad_fn=<NllLossBackward>)
tensor(0.0683, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.1699, grad_fn=<NllLossBackward>)
tensor(0.2750, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0760, grad_fn=<NllLossBackward>)
tensor(0.1178, grad_fn=<NllLossBackward>)
tensor(0.0362, grad_fn=<NllLossBackward>)
tensor(0.1095, grad_fn=<NllLossBackward>)
tensor(0.0205, grad_fn=<NllLossBackward>)
tensor(0.0834, grad_fn=<NllLossBackward>)
tensor(0.0321, grad_fn=<NllLossBackward>)
tensor(0.0736, grad_fn=<NllLossBackward>)
tensor(0.1674, grad_fn=<NllLossBackward>)
tensor(0.0259, grad_fn=<NllLossBackward>)
tensor(0.3021, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.1094, grad_fn=<NllLossBackward>)
tensor(0.1423, grad_fn=<NllLossBackward>)
tensor(0.0904, grad_fn=<NllLossBackward>)
tensor(0.2342, grad_fn=<NllLossBackward>)
tensor(0.0739, grad_fn=<NllLossBac

tensor(0.1857, grad_fn=<NllLossBackward>)
tensor(0.1594, grad_fn=<NllLossBackward>)
tensor(0.1790, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.0256, grad_fn=<NllLossBackward>)
tensor(0.1954, grad_fn=<NllLossBackward>)
tensor(0.1637, grad_fn=<NllLossBackward>)
tensor(0.0093, grad_fn=<NllLossBackward>)
tensor(0.0276, grad_fn=<NllLossBackward>)
tensor(0.0576, grad_fn=<NllLossBackward>)
tensor(0.0533, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.1296, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.0900, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBackward>)
tensor(0.0681, grad_fn=<NllLossBackward>)
tensor(0.1180, grad_fn=<NllLossBackward>)
tensor(0.0104, grad_fn=<NllLossBackward>)
tensor(0.1706, grad_fn=<NllLossBackward>)
tensor(0.0798, grad_fn=<NllLossBac

tensor(0.2526, grad_fn=<NllLossBackward>)
tensor(0.1434, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.0915, grad_fn=<NllLossBackward>)
tensor(0.1426, grad_fn=<NllLossBackward>)
tensor(0.0405, grad_fn=<NllLossBackward>)
tensor(0.1859, grad_fn=<NllLossBackward>)
tensor(0.1782, grad_fn=<NllLossBackward>)
tensor(0.0320, grad_fn=<NllLossBackward>)
tensor(0.0638, grad_fn=<NllLossBackward>)
tensor(0.1486, grad_fn=<NllLossBackward>)
tensor(0.0793, grad_fn=<NllLossBackward>)
tensor(0.1849, grad_fn=<NllLossBackward>)
tensor(0.0755, grad_fn=<NllLossBackward>)
tensor(0.1270, grad_fn=<NllLossBackward>)
tensor(0.0594, grad_fn=<NllLossBackward>)
tensor(0.0907, grad_fn=<NllLossBackward>)
tensor(0.1563, grad_fn=<NllLossBackward>)
tensor(0.0567, grad_fn=<NllLossBackward>)
tensor(0.0202, grad_fn=<NllLossBackward>)
tensor(0.0060, grad_fn=<NllLossBackward>)
tensor(0.1364, grad_fn=<NllLossBackward>)
tensor(0.2197, grad_fn=<NllLossBackward>)
tensor(0.1411, grad_fn=<NllLossBac

tensor(0.0937, grad_fn=<NllLossBackward>)
tensor(0.1427, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.0171, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBackward>)
tensor(0.1562, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.0199, grad_fn=<NllLossBackward>)
tensor(0.0168, grad_fn=<NllLossBackward>)
tensor(0.2134, grad_fn=<NllLossBackward>)
tensor(0.1324, grad_fn=<NllLossBackward>)
tensor(0.0324, grad_fn=<NllLossBackward>)
tensor(0.0073, grad_fn=<NllLossBackward>)
tensor(0.0724, grad_fn=<NllLossBackward>)
tensor(0.2175, grad_fn=<NllLossBackward>)
tensor(0.0529, grad_fn=<NllLossBackward>)
tensor(0.0965, grad_fn=<NllLossBackward>)
tensor(0.0086, grad_fn=<NllLossBackward>)
tensor(0.1972, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.1857, grad_fn=<NllLossBackward>)
tensor(0.0254, grad_fn=<NllLossBackward>)
tensor(0.0234, grad_fn=<NllLossBac

tensor(0.0890, grad_fn=<NllLossBackward>)
tensor(0.0058, grad_fn=<NllLossBackward>)
tensor(0.0149, grad_fn=<NllLossBackward>)
tensor(0.0603, grad_fn=<NllLossBackward>)
tensor(0.0854, grad_fn=<NllLossBackward>)
tensor(0.1689, grad_fn=<NllLossBackward>)
tensor(0.0418, grad_fn=<NllLossBackward>)
tensor(0.2353, grad_fn=<NllLossBackward>)
tensor(0.1645, grad_fn=<NllLossBackward>)
tensor(0.2255, grad_fn=<NllLossBackward>)
tensor(0.0435, grad_fn=<NllLossBackward>)
tensor(0.0405, grad_fn=<NllLossBackward>)
tensor(0.0110, grad_fn=<NllLossBackward>)
tensor(0.0225, grad_fn=<NllLossBackward>)
tensor(0.0976, grad_fn=<NllLossBackward>)
tensor(0.1815, grad_fn=<NllLossBackward>)
tensor(0.1239, grad_fn=<NllLossBackward>)
tensor(0.0614, grad_fn=<NllLossBackward>)
tensor(0.0928, grad_fn=<NllLossBackward>)
tensor(0.0133, grad_fn=<NllLossBackward>)
tensor(0.2749, grad_fn=<NllLossBackward>)
tensor(0.1259, grad_fn=<NllLossBackward>)
tensor(0.0302, grad_fn=<NllLossBackward>)
tensor(0.0758, grad_fn=<NllLossBac

tensor(0.0595, grad_fn=<NllLossBackward>)
tensor(0.0293, grad_fn=<NllLossBackward>)
tensor(0.1571, grad_fn=<NllLossBackward>)
tensor(0.0528, grad_fn=<NllLossBackward>)
tensor(0.0561, grad_fn=<NllLossBackward>)
tensor(0.1180, grad_fn=<NllLossBackward>)
tensor(0.0838, grad_fn=<NllLossBackward>)
tensor(0.1833, grad_fn=<NllLossBackward>)
tensor(0.2056, grad_fn=<NllLossBackward>)
tensor(0.0387, grad_fn=<NllLossBackward>)
tensor(0.0645, grad_fn=<NllLossBackward>)
tensor(0.2746, grad_fn=<NllLossBackward>)
tensor(0.1748, grad_fn=<NllLossBackward>)
tensor(0.0196, grad_fn=<NllLossBackward>)
tensor(0.1632, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.0920, grad_fn=<NllLossBackward>)
tensor(0.1648, grad_fn=<NllLossBackward>)
tensor(0.1009, grad_fn=<NllLossBackward>)
tensor(0.1872, grad_fn=<NllLossBackward>)
tensor(0.1064, grad_fn=<NllLossBackward>)
tensor(0.1298, grad_fn=<NllLossBackward>)
tensor(0.0727, grad_fn=<NllLossBackward>)
tensor(0.1264, grad_fn=<NllLossBac

tensor(0.2086, grad_fn=<NllLossBackward>)
tensor(0.0120, grad_fn=<NllLossBackward>)
tensor(0.3240, grad_fn=<NllLossBackward>)
tensor(0.1357, grad_fn=<NllLossBackward>)
tensor(0.0421, grad_fn=<NllLossBackward>)
tensor(0.0489, grad_fn=<NllLossBackward>)
tensor(0.2263, grad_fn=<NllLossBackward>)
tensor(0.1133, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.1762, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.0964, grad_fn=<NllLossBackward>)
tensor(0.0999, grad_fn=<NllLossBackward>)
tensor(0.0980, grad_fn=<NllLossBackward>)
tensor(0.1254, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.0176, grad_fn=<NllLossBackward>)
tensor(0.0579, grad_fn=<NllLossBackward>)
tensor(0.1888, grad_fn=<NllLossBackward>)
tensor(0.1955, grad_fn=<NllLossBackward>)
tensor(0.0987, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.0486, grad_fn=<NllLossBackward>)
tensor(0.0367, grad_fn=<NllLossBac

tensor(0.0187, grad_fn=<NllLossBackward>)
tensor(0.0321, grad_fn=<NllLossBackward>)
tensor(0.0566, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.0446, grad_fn=<NllLossBackward>)
tensor(0.0226, grad_fn=<NllLossBackward>)
tensor(0.0802, grad_fn=<NllLossBackward>)
tensor(0.0865, grad_fn=<NllLossBackward>)
tensor(0.1790, grad_fn=<NllLossBackward>)
tensor(0.1658, grad_fn=<NllLossBackward>)
tensor(0.0720, grad_fn=<NllLossBackward>)
tensor(0.0696, grad_fn=<NllLossBackward>)
tensor(0.0389, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.1114, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.1242, grad_fn=<NllLossBackward>)
tensor(0.0305, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0988, grad_fn=<NllLossBackward>)
tensor(0.0745, grad_fn=<NllLossBackward>)
tensor(0.0783, grad_fn=<NllLossBac

tensor(0.0352, grad_fn=<NllLossBackward>)
tensor(0.0520, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.1492, grad_fn=<NllLossBackward>)
tensor(0.0817, grad_fn=<NllLossBackward>)
tensor(0.0457, grad_fn=<NllLossBackward>)
tensor(0.1405, grad_fn=<NllLossBackward>)
tensor(0.1015, grad_fn=<NllLossBackward>)
tensor(0.0976, grad_fn=<NllLossBackward>)
tensor(0.0566, grad_fn=<NllLossBackward>)
tensor(0.2398, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.0862, grad_fn=<NllLossBackward>)
tensor(0.1611, grad_fn=<NllLossBackward>)
tensor(0.1081, grad_fn=<NllLossBackward>)
tensor(0.0444, grad_fn=<NllLossBackward>)
tensor(0.1157, grad_fn=<NllLossBackward>)
tensor(0.1148, grad_fn=<NllLossBackward>)
tensor(0.1906, grad_fn=<NllLossBackward>)
tensor(0.1097, grad_fn=<NllLossBackward>)
tensor(0.1895, grad_fn=<NllLossBackward>)
tensor(0.1960, grad_fn=<NllLossBackward>)
tensor(0.0723, grad_fn=<NllLossBackward>)
tensor(0.0073, grad_fn=<NllLossBac

tensor(0.1437, grad_fn=<NllLossBackward>)
tensor(0.1456, grad_fn=<NllLossBackward>)
tensor(0.0160, grad_fn=<NllLossBackward>)
tensor(0.1657, grad_fn=<NllLossBackward>)
tensor(0.1714, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.1176, grad_fn=<NllLossBackward>)
tensor(0.0203, grad_fn=<NllLossBackward>)
tensor(0.1575, grad_fn=<NllLossBackward>)
tensor(0.0804, grad_fn=<NllLossBackward>)
tensor(0.1039, grad_fn=<NllLossBackward>)
tensor(0.0627, grad_fn=<NllLossBackward>)
tensor(0.1685, grad_fn=<NllLossBackward>)
tensor(0.1095, grad_fn=<NllLossBackward>)
tensor(0.2596, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBackward>)
tensor(0.1360, grad_fn=<NllLossBackward>)
tensor(0.1174, grad_fn=<NllLossBackward>)
tensor(0.0271, grad_fn=<NllLossBackward>)
tensor(0.2131, grad_fn=<NllLossBackward>)
tensor(0.0112, grad_fn=<NllLossBackward>)
tensor(0.0972, grad_fn=<NllLossBackward>)
tensor(0.0676, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBac

tensor(0.1273, grad_fn=<NllLossBackward>)
tensor(0.2111, grad_fn=<NllLossBackward>)
tensor(0.2511, grad_fn=<NllLossBackward>)
tensor(0.1750, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.0930, grad_fn=<NllLossBackward>)
tensor(0.0861, grad_fn=<NllLossBackward>)
tensor(0.0425, grad_fn=<NllLossBackward>)
tensor(0.0429, grad_fn=<NllLossBackward>)
tensor(0.0619, grad_fn=<NllLossBackward>)
tensor(0.1388, grad_fn=<NllLossBackward>)
tensor(0.1294, grad_fn=<NllLossBackward>)
tensor(0.2325, grad_fn=<NllLossBackward>)
tensor(0.0819, grad_fn=<NllLossBackward>)
tensor(0.0337, grad_fn=<NllLossBackward>)
tensor(0.1092, grad_fn=<NllLossBackward>)
tensor(0.1691, grad_fn=<NllLossBackward>)
tensor(0.0401, grad_fn=<NllLossBackward>)
tensor(0.0036, grad_fn=<NllLossBackward>)
tensor(0.1035, grad_fn=<NllLossBackward>)
tensor(0.2699, grad_fn=<NllLossBackward>)
tensor(0.1145, grad_fn=<NllLossBackward>)
tensor(0.1162, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBac

tensor(0.1496, grad_fn=<NllLossBackward>)
tensor(0.0247, grad_fn=<NllLossBackward>)
tensor(0.1429, grad_fn=<NllLossBackward>)
tensor(0.0164, grad_fn=<NllLossBackward>)
tensor(0.1957, grad_fn=<NllLossBackward>)
tensor(0.0331, grad_fn=<NllLossBackward>)
tensor(0.1620, grad_fn=<NllLossBackward>)
tensor(0.0877, grad_fn=<NllLossBackward>)
tensor(0.2106, grad_fn=<NllLossBackward>)
tensor(0.0312, grad_fn=<NllLossBackward>)
tensor(0.1315, grad_fn=<NllLossBackward>)
tensor(0.0355, grad_fn=<NllLossBackward>)
tensor(0.0139, grad_fn=<NllLossBackward>)
tensor(0.1573, grad_fn=<NllLossBackward>)
tensor(0.2395, grad_fn=<NllLossBackward>)
tensor(0.0812, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBackward>)
tensor(0.0886, grad_fn=<NllLossBackward>)
tensor(0.0951, grad_fn=<NllLossBackward>)
tensor(0.0775, grad_fn=<NllLossBackward>)
tensor(0.1021, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.0663, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBac

tensor(0.1046, grad_fn=<NllLossBackward>)
tensor(0.0532, grad_fn=<NllLossBackward>)
tensor(0.1692, grad_fn=<NllLossBackward>)
tensor(0.1345, grad_fn=<NllLossBackward>)
tensor(0.2014, grad_fn=<NllLossBackward>)
tensor(0.1745, grad_fn=<NllLossBackward>)
tensor(0.0260, grad_fn=<NllLossBackward>)
tensor(0.1142, grad_fn=<NllLossBackward>)
tensor(0.1311, grad_fn=<NllLossBackward>)
tensor(0.0485, grad_fn=<NllLossBackward>)
tensor(0.2038, grad_fn=<NllLossBackward>)
tensor(0.0416, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.0779, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.0555, grad_fn=<NllLossBackward>)
tensor(0.1577, grad_fn=<NllLossBackward>)
tensor(0.0974, grad_fn=<NllLossBackward>)
tensor(0.1474, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.1297, grad_fn=<NllLossBackward>)
tensor(0.0795, grad_fn=<NllLossBackward>)
tensor(0.2564, grad_fn=<NllLossBackward>)
tensor(0.3066, grad_fn=<NllLossBac

tensor(0.2969, grad_fn=<NllLossBackward>)
tensor(0.0671, grad_fn=<NllLossBackward>)
tensor(0.2072, grad_fn=<NllLossBackward>)
tensor(0.0038, grad_fn=<NllLossBackward>)
tensor(0.3202, grad_fn=<NllLossBackward>)
tensor(0.3203, grad_fn=<NllLossBackward>)
tensor(0.0861, grad_fn=<NllLossBackward>)
tensor(0.1634, grad_fn=<NllLossBackward>)
tensor(0.0353, grad_fn=<NllLossBackward>)
tensor(0.1001, grad_fn=<NllLossBackward>)
tensor(0.1700, grad_fn=<NllLossBackward>)
tensor(0.1501, grad_fn=<NllLossBackward>)
tensor(0.0591, grad_fn=<NllLossBackward>)
tensor(0.1082, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.0128, grad_fn=<NllLossBackward>)
tensor(0.1655, grad_fn=<NllLossBackward>)
tensor(0.1457, grad_fn=<NllLossBackward>)
tensor(0.1190, grad_fn=<NllLossBackward>)
tensor(0.1286, grad_fn=<NllLossBackward>)
tensor(0.1563, grad_fn=<NllLossBackward>)
tensor(0.2724, grad_fn=<NllLossBackward>)
tensor(0.1562, grad_fn=<NllLossBackward>)
tensor(0.2664, grad_fn=<NllLossBac

tensor(0.0669, grad_fn=<NllLossBackward>)
tensor(0.2310, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0871, grad_fn=<NllLossBackward>)
tensor(0.0637, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBackward>)
tensor(0.1981, grad_fn=<NllLossBackward>)
tensor(0.1219, grad_fn=<NllLossBackward>)
tensor(0.1482, grad_fn=<NllLossBackward>)
tensor(0.0253, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.0748, grad_fn=<NllLossBackward>)
tensor(0.0850, grad_fn=<NllLossBackward>)
tensor(0.0919, grad_fn=<NllLossBackward>)
tensor(0.0481, grad_fn=<NllLossBackward>)
tensor(0.0575, grad_fn=<NllLossBackward>)
tensor(0.2021, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.2185, grad_fn=<NllLossBackward>)
tensor(0.1418, grad_fn=<NllLossBackward>)
tensor(0.0596, grad_fn=<NllLossBackward>)
tensor(0.2239, grad_fn=<NllLossBackward>)
tensor(0.1735, grad_fn=<NllLossBac

tensor(0.2011, grad_fn=<NllLossBackward>)
tensor(0.0277, grad_fn=<NllLossBackward>)
tensor(0.1693, grad_fn=<NllLossBackward>)
tensor(0.2452, grad_fn=<NllLossBackward>)
tensor(0.0781, grad_fn=<NllLossBackward>)
tensor(0.2634, grad_fn=<NllLossBackward>)
tensor(0.1363, grad_fn=<NllLossBackward>)
tensor(0.3056, grad_fn=<NllLossBackward>)
tensor(0.1235, grad_fn=<NllLossBackward>)
tensor(0.1040, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.1648, grad_fn=<NllLossBackward>)
tensor(0.1149, grad_fn=<NllLossBackward>)
tensor(0.0691, grad_fn=<NllLossBackward>)
tensor(0.0087, grad_fn=<NllLossBackward>)
tensor(0.0341, grad_fn=<NllLossBackward>)
tensor(0.2005, grad_fn=<NllLossBackward>)
tensor(0.0258, grad_fn=<NllLossBackward>)
tensor(0.1316, grad_fn=<NllLossBackward>)
tensor(0.2646, grad_fn=<NllLossBackward>)
tensor(0.1793, grad_fn=<NllLossBackward>)
tensor(0.0640, grad_fn=<NllLossBackward>)
tensor(0.0410, grad_fn=<NllLossBac

tensor(0.0782, grad_fn=<NllLossBackward>)
tensor(0.0236, grad_fn=<NllLossBackward>)
tensor(0.1509, grad_fn=<NllLossBackward>)
tensor(0.1533, grad_fn=<NllLossBackward>)
tensor(0.1286, grad_fn=<NllLossBackward>)
tensor(0.0629, grad_fn=<NllLossBackward>)
tensor(0.1078, grad_fn=<NllLossBackward>)
tensor(0.0183, grad_fn=<NllLossBackward>)
tensor(0.0773, grad_fn=<NllLossBackward>)
tensor(0.0329, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.0581, grad_fn=<NllLossBackward>)
tensor(0.1232, grad_fn=<NllLossBackward>)
tensor(0.0806, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.1937, grad_fn=<NllLossBackward>)
tensor(0.0497, grad_fn=<NllLossBackward>)
tensor(0.2188, grad_fn=<NllLossBackward>)
tensor(0.0041, grad_fn=<NllLossBackward>)
tensor(0.1954, grad_fn=<NllLossBackward>)
tensor(0.0471, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.0324, grad_fn=<NllLossBackward>)
tensor(0.0774, grad_fn=<NllLossBac

tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0811, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0360, grad_fn=<NllLossBackward>)
tensor(0.0419, grad_fn=<NllLossBackward>)
tensor(0.1159, grad_fn=<NllLossBackward>)
tensor(0.0541, grad_fn=<NllLossBackward>)
tensor(0.1861, grad_fn=<NllLossBackward>)
tensor(0.0046, grad_fn=<NllLossBackward>)
tensor(0.0295, grad_fn=<NllLossBackward>)
tensor(0.2366, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.1196, grad_fn=<NllLossBackward>)
tensor(0.1468, grad_fn=<NllLossBackward>)
tensor(0.0659, grad_fn=<NllLossBackward>)
tensor(0.0864, grad_fn=<NllLossBackward>)
tensor(0.3024, grad_fn=<NllLossBackward>)
tensor(0.1648, grad_fn=<NllLossBackward>)
tensor(0.0382, grad_fn=<NllLossBackward>)
tensor(0.1933, grad_fn=<NllLossBackward>)
tensor(0.0622, grad_fn=<NllLossBackward>)
tensor(0.1764, grad_fn=<NllLossBackward>)
tensor(0.0310, grad_fn=<NllLossBackward>)
tensor(0.1589, grad_fn=<NllLossBac

tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.1037, grad_fn=<NllLossBackward>)
tensor(0.1159, grad_fn=<NllLossBackward>)
tensor(0.1952, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.3267, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.0715, grad_fn=<NllLossBackward>)
tensor(0.1695, grad_fn=<NllLossBackward>)
tensor(0.0717, grad_fn=<NllLossBackward>)
tensor(0.1355, grad_fn=<NllLossBackward>)
tensor(0.2213, grad_fn=<NllLossBackward>)
tensor(0.0415, grad_fn=<NllLossBackward>)
tensor(0.2728, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.0166, grad_fn=<NllLossBackward>)
tensor(0.0227, grad_fn=<NllLossBackward>)
tensor(0.0654, grad_fn=<NllLossBackward>)
tensor(0.1034, grad_fn=<NllLossBackward>)
tensor(0.0868, grad_fn=<NllLossBackward>)
tensor(0.1301, grad_fn=<NllLossBackward>)
tensor(0.0749, grad_fn=<NllLossBackward>)
tensor(0.1364, grad_fn=<NllLossBackward>)
tensor(0.1144, grad_fn=<NllLossBac

tensor(0.1675, grad_fn=<NllLossBackward>)
tensor(0.0526, grad_fn=<NllLossBackward>)
tensor(0.1722, grad_fn=<NllLossBackward>)
tensor(0.2075, grad_fn=<NllLossBackward>)
tensor(0.1456, grad_fn=<NllLossBackward>)
tensor(0.0389, grad_fn=<NllLossBackward>)
tensor(0.0521, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.1655, grad_fn=<NllLossBackward>)
tensor(0.0294, grad_fn=<NllLossBackward>)
tensor(0.1099, grad_fn=<NllLossBackward>)
tensor(0.0255, grad_fn=<NllLossBackward>)
tensor(0.1908, grad_fn=<NllLossBackward>)
tensor(0.2295, grad_fn=<NllLossBackward>)
tensor(0.0164, grad_fn=<NllLossBackward>)
tensor(0.0112, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.0557, grad_fn=<NllLossBackward>)
tensor(0.1886, grad_fn=<NllLossBackward>)
tensor(0.2328, grad_fn=<NllLossBackward>)
tensor(0.2058, grad_fn=<NllLossBackward>)
tensor(0.0325, grad_fn=<NllLossBackward>)
tensor(0.0487, grad_fn=<NllLossBackward>)
tensor(0.0082, grad_fn=<NllLossBac

tensor(0.2703, grad_fn=<NllLossBackward>)
tensor(0.1510, grad_fn=<NllLossBackward>)
tensor(0.0377, grad_fn=<NllLossBackward>)
tensor(0.0513, grad_fn=<NllLossBackward>)
tensor(0.0807, grad_fn=<NllLossBackward>)
tensor(0.1271, grad_fn=<NllLossBackward>)
tensor(0.0973, grad_fn=<NllLossBackward>)
tensor(0.2922, grad_fn=<NllLossBackward>)
tensor(0.0244, grad_fn=<NllLossBackward>)
tensor(0.1941, grad_fn=<NllLossBackward>)
tensor(0.2872, grad_fn=<NllLossBackward>)
tensor(0.0616, grad_fn=<NllLossBackward>)
tensor(0.1313, grad_fn=<NllLossBackward>)
tensor(0.1217, grad_fn=<NllLossBackward>)
tensor(0.1669, grad_fn=<NllLossBackward>)
tensor(0.0642, grad_fn=<NllLossBackward>)
tensor(0.1038, grad_fn=<NllLossBackward>)
tensor(0.1450, grad_fn=<NllLossBackward>)
tensor(0.0809, grad_fn=<NllLossBackward>)
tensor(0.0881, grad_fn=<NllLossBackward>)
tensor(0.0498, grad_fn=<NllLossBackward>)
tensor(0.2819, grad_fn=<NllLossBackward>)
tensor(0.0943, grad_fn=<NllLossBackward>)
tensor(0.0239, grad_fn=<NllLossBac

tensor(0.0639, grad_fn=<NllLossBackward>)
tensor(0.0443, grad_fn=<NllLossBackward>)
tensor(0.0224, grad_fn=<NllLossBackward>)
tensor(0.0869, grad_fn=<NllLossBackward>)
tensor(0.1933, grad_fn=<NllLossBackward>)
tensor(0.2252, grad_fn=<NllLossBackward>)
tensor(0.0826, grad_fn=<NllLossBackward>)
tensor(0.1449, grad_fn=<NllLossBackward>)
tensor(0.0309, grad_fn=<NllLossBackward>)
tensor(0.0606, grad_fn=<NllLossBackward>)
tensor(0.1819, grad_fn=<NllLossBackward>)
tensor(0.0628, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.0655, grad_fn=<NllLossBackward>)
tensor(0.2053, grad_fn=<NllLossBackward>)
tensor(0.2036, grad_fn=<NllLossBackward>)
tensor(0.2488, grad_fn=<NllLossBackward>)
tensor(0.1025, grad_fn=<NllLossBackward>)
tensor(0.0896, grad_fn=<NllLossBackward>)
tensor(0.2591, grad_fn=<NllLossBackward>)
tensor(0.0553, grad_fn=<NllLossBackward>)
tensor(0.1907, grad_fn=<NllLossBackward>)
tensor(0.1064, grad_fn=<NllLossBac

tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.0476, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.0697, grad_fn=<NllLossBackward>)
tensor(0.1068, grad_fn=<NllLossBackward>)
tensor(0.0689, grad_fn=<NllLossBackward>)
tensor(0.1844, grad_fn=<NllLossBackward>)
tensor(0.1539, grad_fn=<NllLossBackward>)
tensor(0.1271, grad_fn=<NllLossBackward>)
tensor(0.1337, grad_fn=<NllLossBackward>)
tensor(0.1706, grad_fn=<NllLossBackward>)
tensor(0.1416, grad_fn=<NllLossBackward>)
tensor(0.1317, grad_fn=<NllLossBackward>)
tensor(0.1552, grad_fn=<NllLossBackward>)
tensor(0.0548, grad_fn=<NllLossBackward>)
tensor(0.0657, grad_fn=<NllLossBackward>)
tensor(0.0274, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.0501, grad_fn=<NllLossBackward>)
tensor(0.1008, grad_fn=<NllLossBackward>)
tensor(0.0154, grad_fn=<NllLossBackward>)
tensor(0.0537, grad_fn=<NllLossBackward>)
tensor(0.0299, grad_fn=<NllLossBackward>)
tensor(0.0161, grad_fn=<NllLossBac

tensor(0.0593, grad_fn=<NllLossBackward>)
tensor(0.0207, grad_fn=<NllLossBackward>)
tensor(0.1266, grad_fn=<NllLossBackward>)
tensor(0.1122, grad_fn=<NllLossBackward>)
tensor(0.0906, grad_fn=<NllLossBackward>)
tensor(0.1089, grad_fn=<NllLossBackward>)
tensor(0.1607, grad_fn=<NllLossBackward>)
tensor(0.1747, grad_fn=<NllLossBackward>)
tensor(0.0732, grad_fn=<NllLossBackward>)
tensor(0.0182, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBackward>)
tensor(0.0564, grad_fn=<NllLossBackward>)
tensor(0.0520, grad_fn=<NllLossBackward>)
tensor(0.0953, grad_fn=<NllLossBackward>)
tensor(0.0711, grad_fn=<NllLossBackward>)
tensor(0.0434, grad_fn=<NllLossBackward>)
tensor(0.0969, grad_fn=<NllLossBackward>)
tensor(0.1664, grad_fn=<NllLossBackward>)
tensor(0.0492, grad_fn=<NllLossBackward>)
tensor(0.0733, grad_fn=<NllLossBackward>)
tensor(0.0688, grad_fn=<NllLossBackward>)
tensor(0.0922, grad_fn=<NllLossBackward>)
tensor(0.1951, grad_fn=<NllLossBackward>)
tensor(0.0394, grad_fn=<NllLossBac

tensor(0.1250, grad_fn=<NllLossBackward>)
tensor(0.0237, grad_fn=<NllLossBackward>)
tensor(0.0916, grad_fn=<NllLossBackward>)
tensor(0.0610, grad_fn=<NllLossBackward>)
tensor(0.0494, grad_fn=<NllLossBackward>)
tensor(0.1442, grad_fn=<NllLossBackward>)
tensor(0.2124, grad_fn=<NllLossBackward>)
tensor(0.0725, grad_fn=<NllLossBackward>)
tensor(0.0786, grad_fn=<NllLossBackward>)
tensor(0.0577, grad_fn=<NllLossBackward>)
tensor(0.0792, grad_fn=<NllLossBackward>)
tensor(0.1677, grad_fn=<NllLossBackward>)
tensor(0.0903, grad_fn=<NllLossBackward>)
tensor(0.0392, grad_fn=<NllLossBackward>)
tensor(0.0378, grad_fn=<NllLossBackward>)
tensor(0.0835, grad_fn=<NllLossBackward>)
tensor(0.1445, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.0326, grad_fn=<NllLossBackward>)
tensor(0.1993, grad_fn=<NllLossBackward>)
tensor(0.0525, grad_fn=<NllLossBackward>)
tensor(0.0554, grad_fn=<NllLossBackward>)
tensor(0.0342, grad_fn=<NllLossBackward>)
tensor(0.0126, grad_fn=<NllLossBac

tensor(0.0322, grad_fn=<NllLossBackward>)
tensor(0.1218, grad_fn=<NllLossBackward>)
tensor(0.1535, grad_fn=<NllLossBackward>)
tensor(0.2862, grad_fn=<NllLossBackward>)
tensor(0.0586, grad_fn=<NllLossBackward>)
tensor(0.0759, grad_fn=<NllLossBackward>)
tensor(0.0173, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.2979, grad_fn=<NllLossBackward>)
tensor(0.2363, grad_fn=<NllLossBackward>)
tensor(0.1199, grad_fn=<NllLossBackward>)
tensor(0.1670, grad_fn=<NllLossBackward>)
tensor(0.2111, grad_fn=<NllLossBackward>)
tensor(0.0332, grad_fn=<NllLossBackward>)
tensor(0.0099, grad_fn=<NllLossBackward>)
tensor(0.1695, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.0958, grad_fn=<NllLossBackward>)
tensor(0.0552, grad_fn=<NllLossBackward>)
tensor(0.0929, grad_fn=<NllLossBackward>)
tensor(0.1814, grad_fn=<NllLossBackward>)
tensor(0.0482, grad_fn=<NllLossBackward>)
tensor(0.1853, grad_fn=<NllLossBackward>)
tensor(0.1963, grad_fn=<NllLossBac

tensor(0.1528, grad_fn=<NllLossBackward>)
tensor(0.1619, grad_fn=<NllLossBackward>)
tensor(0.0116, grad_fn=<NllLossBackward>)
tensor(0.1558, grad_fn=<NllLossBackward>)
tensor(0.1495, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBackward>)
tensor(0.0961, grad_fn=<NllLossBackward>)
tensor(0.0706, grad_fn=<NllLossBackward>)
tensor(0.1672, grad_fn=<NllLossBackward>)
tensor(0.1487, grad_fn=<NllLossBackward>)
tensor(0.0398, grad_fn=<NllLossBackward>)
tensor(0.3275, grad_fn=<NllLossBackward>)
tensor(0.0863, grad_fn=<NllLossBackward>)
tensor(0.1394, grad_fn=<NllLossBackward>)
tensor(0.1551, grad_fn=<NllLossBackward>)
tensor(0.1984, grad_fn=<NllLossBackward>)
tensor(0.2274, grad_fn=<NllLossBackward>)
tensor(0.0607, grad_fn=<NllLossBackward>)
tensor(0.1214, grad_fn=<NllLossBackward>)
tensor(0.1403, grad_fn=<NllLossBackward>)
tensor(0.1348, grad_fn=<NllLossBackward>)
tensor(0.0451, grad_fn=<NllLossBackward>)
tensor(0.0592, grad_fn=<NllLossBackward>)
tensor(0.1962, grad_fn=<NllLossBac

tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.2219, grad_fn=<NllLossBackward>)
tensor(0.1600, grad_fn=<NllLossBackward>)
tensor(0.1144, grad_fn=<NllLossBackward>)
tensor(0.1246, grad_fn=<NllLossBackward>)
tensor(0.0346, grad_fn=<NllLossBackward>)
tensor(0.1119, grad_fn=<NllLossBackward>)
tensor(0.1557, grad_fn=<NllLossBackward>)
tensor(0.1114, grad_fn=<NllLossBackward>)
tensor(0.2354, grad_fn=<NllLossBackward>)
tensor(0.0282, grad_fn=<NllLossBackward>)
tensor(0.1866, grad_fn=<NllLossBackward>)
tensor(0.2659, grad_fn=<NllLossBackward>)
tensor(0.2405, grad_fn=<NllLossBackward>)
tensor(0.2276, grad_fn=<NllLossBackward>)
tensor(0.0966, grad_fn=<NllLossBackward>)
tensor(0.3186, grad_fn=<NllLossBackward>)
tensor(0.0927, grad_fn=<NllLossBackward>)
tensor(0.0660, grad_fn=<NllLossBackward>)
tensor(0.0755, grad_fn=<NllLossBackward>)
tensor(0.2108, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.1959, grad_fn=<NllLossBackward>)
tensor(0.1617, grad_fn=<NllLossBac

tensor(0.0192, grad_fn=<NllLossBackward>)
tensor(0.1646, grad_fn=<NllLossBackward>)
tensor(0.4326, grad_fn=<NllLossBackward>)
tensor(0.1611, grad_fn=<NllLossBackward>)
tensor(0.2155, grad_fn=<NllLossBackward>)
tensor(0.3473, grad_fn=<NllLossBackward>)
tensor(0.2791, grad_fn=<NllLossBackward>)
tensor(0.0878, grad_fn=<NllLossBackward>)
tensor(0.0452, grad_fn=<NllLossBackward>)
tensor(0.1530, grad_fn=<NllLossBackward>)
tensor(0.1910, grad_fn=<NllLossBackward>)
tensor(0.4080, grad_fn=<NllLossBackward>)
tensor(0.1789, grad_fn=<NllLossBackward>)
tensor(0.0505, grad_fn=<NllLossBackward>)
tensor(0.1576, grad_fn=<NllLossBackward>)
tensor(0.1770, grad_fn=<NllLossBackward>)
tensor(0.0694, grad_fn=<NllLossBackward>)
tensor(0.0933, grad_fn=<NllLossBackward>)
tensor(0.2238, grad_fn=<NllLossBackward>)
tensor(0.1120, grad_fn=<NllLossBackward>)
tensor(0.0612, grad_fn=<NllLossBackward>)
tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.0138, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBac

tensor(0.0480, grad_fn=<NllLossBackward>)
tensor(0.1003, grad_fn=<NllLossBackward>)
tensor(0.1378, grad_fn=<NllLossBackward>)
tensor(0.0699, grad_fn=<NllLossBackward>)
tensor(0.1677, grad_fn=<NllLossBackward>)
tensor(0.0506, grad_fn=<NllLossBackward>)
tensor(0.1267, grad_fn=<NllLossBackward>)
tensor(0.1797, grad_fn=<NllLossBackward>)
tensor(0.0469, grad_fn=<NllLossBackward>)
tensor(0.0787, grad_fn=<NllLossBackward>)
tensor(0.2006, grad_fn=<NllLossBackward>)
tensor(0.1408, grad_fn=<NllLossBackward>)
tensor(0.0081, grad_fn=<NllLossBackward>)
tensor(0.3160, grad_fn=<NllLossBackward>)
tensor(0.1041, grad_fn=<NllLossBackward>)
tensor(0.1732, grad_fn=<NllLossBackward>)
tensor(0.1973, grad_fn=<NllLossBackward>)
tensor(0.0417, grad_fn=<NllLossBackward>)
tensor(0.2340, grad_fn=<NllLossBackward>)
tensor(0.0604, grad_fn=<NllLossBackward>)
tensor(0.0372, grad_fn=<NllLossBackward>)
tensor(0.0710, grad_fn=<NllLossBackward>)
tensor(0.0624, grad_fn=<NllLossBackward>)
tensor(0.2797, grad_fn=<NllLossBac

tensor(0.2206, grad_fn=<NllLossBackward>)
tensor(0.1300, grad_fn=<NllLossBackward>)
tensor(0.1570, grad_fn=<NllLossBackward>)
tensor(0.1398, grad_fn=<NllLossBackward>)
tensor(0.1717, grad_fn=<NllLossBackward>)
tensor(0.0315, grad_fn=<NllLossBackward>)
tensor(0.1659, grad_fn=<NllLossBackward>)
tensor(0.0358, grad_fn=<NllLossBackward>)
tensor(0.0960, grad_fn=<NllLossBackward>)
tensor(0.1207, grad_fn=<NllLossBackward>)
tensor(0.0603, grad_fn=<NllLossBackward>)
tensor(0.1791, grad_fn=<NllLossBackward>)
tensor(0.0636, grad_fn=<NllLossBackward>)
tensor(0.0333, grad_fn=<NllLossBackward>)
tensor(0.0455, grad_fn=<NllLossBackward>)
tensor(0.1923, grad_fn=<NllLossBackward>)
tensor(0.0563, grad_fn=<NllLossBackward>)
tensor(0.0912, grad_fn=<NllLossBackward>)
tensor(0.0143, grad_fn=<NllLossBackward>)
tensor(0.1476, grad_fn=<NllLossBackward>)
tensor(0.3031, grad_fn=<NllLossBackward>)
tensor(0.1141, grad_fn=<NllLossBackward>)
tensor(0.0540, grad_fn=<NllLossBackward>)
tensor(0.0052, grad_fn=<NllLossBac

tensor(0.0461, grad_fn=<NllLossBackward>)
tensor(0.0466, grad_fn=<NllLossBackward>)
tensor(0.0213, grad_fn=<NllLossBackward>)
tensor(0.1975, grad_fn=<NllLossBackward>)
tensor(0.0841, grad_fn=<NllLossBackward>)
tensor(0.0445, grad_fn=<NllLossBackward>)
tensor(0.0615, grad_fn=<NllLossBackward>)
tensor(0.0413, grad_fn=<NllLossBackward>)
tensor(0.1463, grad_fn=<NllLossBackward>)
tensor(0.0498, grad_fn=<NllLossBackward>)
tensor(0.0859, grad_fn=<NllLossBackward>)
tensor(0.0320, grad_fn=<NllLossBackward>)
tensor(0.0465, grad_fn=<NllLossBackward>)
tensor(0.3274, grad_fn=<NllLossBackward>)
tensor(0.0291, grad_fn=<NllLossBackward>)
tensor(0.1283, grad_fn=<NllLossBackward>)
tensor(0.0695, grad_fn=<NllLossBackward>)
tensor(0.0370, grad_fn=<NllLossBackward>)
tensor(0.1128, grad_fn=<NllLossBackward>)
tensor(0.2390, grad_fn=<NllLossBackward>)
tensor(0.0926, grad_fn=<NllLossBackward>)
tensor(0.0821, grad_fn=<NllLossBackward>)
tensor(0.0268, grad_fn=<NllLossBackward>)
tensor(0.0848, grad_fn=<NllLossBac

tensor(0.0623, grad_fn=<NllLossBackward>)
tensor(0.1476, grad_fn=<NllLossBackward>)
tensor(0.1068, grad_fn=<NllLossBackward>)
tensor(0.0609, grad_fn=<NllLossBackward>)
tensor(0.0442, grad_fn=<NllLossBackward>)
tensor(0.0519, grad_fn=<NllLossBackward>)
tensor(0.1904, grad_fn=<NllLossBackward>)
tensor(0.2300, grad_fn=<NllLossBackward>)
tensor(0.1337, grad_fn=<NllLossBackward>)
tensor(0.1101, grad_fn=<NllLossBackward>)
tensor(0.1638, grad_fn=<NllLossBackward>)
tensor(0.0551, grad_fn=<NllLossBackward>)
tensor(0.0810, grad_fn=<NllLossBackward>)
tensor(0.0101, grad_fn=<NllLossBackward>)
tensor(0.2192, grad_fn=<NllLossBackward>)
tensor(0.4954, grad_fn=<NllLossBackward>)
tensor(0.0280, grad_fn=<NllLossBackward>)
tensor(0.2322, grad_fn=<NllLossBackward>)
tensor(0.0036, grad_fn=<NllLossBackward>)
tensor(0.0152, grad_fn=<NllLossBackward>)
tensor(0.1276, grad_fn=<NllLossBackward>)
tensor(0.0036, grad_fn=<NllLossBackward>)
tensor(0.1176, grad_fn=<NllLossBackward>)
tensor(0.0855, grad_fn=<NllLossBac

tensor(0.1591, grad_fn=<NllLossBackward>)
tensor(0.2336, grad_fn=<NllLossBackward>)
tensor(0.1607, grad_fn=<NllLossBackward>)
tensor(0.0314, grad_fn=<NllLossBackward>)
tensor(0.0144, grad_fn=<NllLossBackward>)
tensor(0.1624, grad_fn=<NllLossBackward>)
tensor(0.0709, grad_fn=<NllLossBackward>)
tensor(0.3572, grad_fn=<NllLossBackward>)
tensor(0.0547, grad_fn=<NllLossBackward>)
tensor(0.0365, grad_fn=<NllLossBackward>)
tensor(0.2340, grad_fn=<NllLossBackward>)
tensor(0.0977, grad_fn=<NllLossBackward>)
tensor(0.1144, grad_fn=<NllLossBackward>)
tensor(0.0185, grad_fn=<NllLossBackward>)
tensor(0.1136, grad_fn=<NllLossBackward>)
tensor(0.1675, grad_fn=<NllLossBackward>)
tensor(0.0933, grad_fn=<NllLossBackward>)
tensor(0.0705, grad_fn=<NllLossBackward>)
tensor(0.0727, grad_fn=<NllLossBackward>)
tensor(0.0381, grad_fn=<NllLossBackward>)
tensor(0.1534, grad_fn=<NllLossBackward>)
tensor(0.0500, grad_fn=<NllLossBackward>)
tensor(0.1478, grad_fn=<NllLossBackward>)
tensor(0.0441, grad_fn=<NllLossBac

tensor(0.0521, grad_fn=<NllLossBackward>)
tensor(0.1118, grad_fn=<NllLossBackward>)
tensor(0.0532, grad_fn=<NllLossBackward>)
tensor(0.0503, grad_fn=<NllLossBackward>)
tensor(0.0502, grad_fn=<NllLossBackward>)
tensor(0.0698, grad_fn=<NllLossBackward>)
tensor(0.1213, grad_fn=<NllLossBackward>)
tensor(0.0433, grad_fn=<NllLossBackward>)
tensor(0.2158, grad_fn=<NllLossBackward>)
tensor(0.0206, grad_fn=<NllLossBackward>)
tensor(0.1112, grad_fn=<NllLossBackward>)
tensor(0.0300, grad_fn=<NllLossBackward>)
tensor(0.1370, grad_fn=<NllLossBackward>)
tensor(0.1494, grad_fn=<NllLossBackward>)
tensor(0.0142, grad_fn=<NllLossBackward>)
tensor(0.0813, grad_fn=<NllLossBackward>)
tensor(0.1124, grad_fn=<NllLossBackward>)
tensor(0.2091, grad_fn=<NllLossBackward>)
tensor(0.0685, grad_fn=<NllLossBackward>)
tensor(0.1179, grad_fn=<NllLossBackward>)
tensor(0.1563, grad_fn=<NllLossBackward>)
tensor(0.0754, grad_fn=<NllLossBackward>)
tensor(0.1003, grad_fn=<NllLossBackward>)
tensor(0.0602, grad_fn=<NllLossBac

tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.0456, grad_fn=<NllLossBackward>)
tensor(0.0458, grad_fn=<NllLossBackward>)
tensor(0.1593, grad_fn=<NllLossBackward>)
tensor(0.1229, grad_fn=<NllLossBackward>)
tensor(0.0483, grad_fn=<NllLossBackward>)
tensor(0.2260, grad_fn=<NllLossBackward>)
tensor(0.1911, grad_fn=<NllLossBackward>)
tensor(0.0076, grad_fn=<NllLossBackward>)
tensor(0.0118, grad_fn=<NllLossBackward>)
tensor(0.1999, grad_fn=<NllLossBackward>)
tensor(0.0150, grad_fn=<NllLossBackward>)
tensor(0.0675, grad_fn=<NllLossBackward>)
tensor(0.1003, grad_fn=<NllLossBackward>)
tensor(0.0405, grad_fn=<NllLossBackward>)
tensor(0.0148, grad_fn=<NllLossBackward>)
tensor(0.0718, grad_fn=<NllLossBackward>)
tensor(0.4135, grad_fn=<NllLossBackward>)
tensor(0.1381, grad_fn=<NllLossBackward>)
tensor(0.1887, grad_fn=<NllLossBackward>)
tensor(0.1303, grad_fn=<NllLossBackward>)
tensor(0.1198, grad_fn=<NllLossBackward>)
tensor(0.0938, grad_fn=<NllLossBackward>)
tensor(0.0527, grad_fn=<NllLossBac

tensor(0.2137, grad_fn=<NllLossBackward>)
tensor(0.2188, grad_fn=<NllLossBackward>)
tensor(0.0231, grad_fn=<NllLossBackward>)
tensor(0.0404, grad_fn=<NllLossBackward>)
tensor(0.0311, grad_fn=<NllLossBackward>)
tensor(0.1204, grad_fn=<NllLossBackward>)
tensor(0.2777, grad_fn=<NllLossBackward>)
tensor(0.1249, grad_fn=<NllLossBackward>)
tensor(0.0735, grad_fn=<NllLossBackward>)
tensor(0.0140, grad_fn=<NllLossBackward>)
tensor(0.3055, grad_fn=<NllLossBackward>)
tensor(0.1288, grad_fn=<NllLossBackward>)
tensor(0.0617, grad_fn=<NllLossBackward>)
tensor(0.0109, grad_fn=<NllLossBackward>)
tensor(0.1071, grad_fn=<NllLossBackward>)
tensor(0.1843, grad_fn=<NllLossBackward>)
tensor(0.0544, grad_fn=<NllLossBackward>)
tensor(0.0549, grad_fn=<NllLossBackward>)
tensor(0.0772, grad_fn=<NllLossBackward>)
tensor(0.1277, grad_fn=<NllLossBackward>)
tensor(0.1227, grad_fn=<NllLossBackward>)
tensor(0.1146, grad_fn=<NllLossBackward>)
tensor(0.1719, grad_fn=<NllLossBackward>)
tensor(0.0762, grad_fn=<NllLossBac

tensor(0.0884, grad_fn=<NllLossBackward>)
tensor(0.0262, grad_fn=<NllLossBackward>)
tensor(0.3736, grad_fn=<NllLossBackward>)
tensor(0.0375, grad_fn=<NllLossBackward>)
tensor(0.1845, grad_fn=<NllLossBackward>)
tensor(0.0373, grad_fn=<NllLossBackward>)
tensor(0.2172, grad_fn=<NllLossBackward>)
tensor(0.0363, grad_fn=<NllLossBackward>)
tensor(0.2009, grad_fn=<NllLossBackward>)
tensor(0.0895, grad_fn=<NllLossBackward>)
tensor(0.0454, grad_fn=<NllLossBackward>)
tensor(0.0665, grad_fn=<NllLossBackward>)
tensor(0.1629, grad_fn=<NllLossBackward>)
tensor(0.1281, grad_fn=<NllLossBackward>)
tensor(0.0550, grad_fn=<NllLossBackward>)
tensor(0.0590, grad_fn=<NllLossBackward>)
tensor(0.1188, grad_fn=<NllLossBackward>)
tensor(0.1053, grad_fn=<NllLossBackward>)
tensor(0.0649, grad_fn=<NllLossBackward>)
tensor(0.4144, grad_fn=<NllLossBackward>)
tensor(0.1298, grad_fn=<NllLossBackward>)
tensor(0.0212, grad_fn=<NllLossBackward>)
tensor(0.1451, grad_fn=<NllLossBackward>)
tensor(0.0323, grad_fn=<NllLossBac

In [24]:
file.write('\nCentralized Mean train accuracy: ' + str(sum(train_acc)/len(train_acc)))
file.write('\nCentralized Mean test accuracy: ' + str(sum(test_acc)/len(test_acc)))

51


----------------
-----------------
**Data is distributed to nodes**

<!-- ### <span style="background-color:#F087F9"> Datanın nodelara dağıtılması </span>    -->

In [25]:
print(x_train_dict["x_train1"].shape, y_train_dict["y_train1"].shape)
print(x_test.shape, y_test.shape)

torch.Size([33417, 33]) torch.Size([33417])
torch.Size([14849, 33]) torch.Size([14849])


**Main model is created**

In [26]:
main_model = Net2nn(inputs,outputs)
main_optimizer = torch.optim.SGD(main_model.parameters(), lr=learning_rate, momentum=0.9)
main_criterion = nn.CrossEntropyLoss()

**Models,optimizers and loss functions in nodes are defined**

In [27]:
model_dict, optimizer_dict, criterion_dict = create_model_optimizer_criterion_dict(number_of_slices)

**Keys of dicts are being made iterable**

In [28]:
name_of_x_train_sets=list(x_train_dict.keys())
name_of_y_train_sets=list(y_train_dict.keys())

name_of_models=list(model_dict.keys())
name_of_optimizers=list(optimizer_dict.keys())
name_of_criterions=list(criterion_dict.keys())

print(name_of_x_train_sets)
print(name_of_y_train_sets)
print("\n ------------")
print(name_of_models)
print(name_of_optimizers)
print(name_of_criterions)

['x_train0', 'x_train1', 'x_train2', 'x_train3']
['y_train0', 'y_train1', 'y_train2', 'y_train3']

 ------------
['model0', 'model1', 'model2', 'model3']
['optimizer0', 'optimizer1', 'optimizer2', 'optimizer3']
['criterion0', 'criterion1', 'criterion2', 'criterion3']


In [29]:
print(main_model.fc2.weight[0:1,0:5])
print(model_dict["model1"].fc2.weight[0:1,0:5])

tensor([[ 0.0459,  0.0528,  0.0031,  0.0325, -0.0223]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0564,  0.0337,  0.0494, -0.0080, -0.0444]],
       grad_fn=<SliceBackward>)


**Parameters of main model are sent to nodes**  
Since the parameters of the main model and parameters of all local models in the nodes are randomly initialized, all these parameters will be different from each other. For this reason, the main model sends its parameters to the nodes before the training of local models in the nodes begins. You can check the weights below.

In [30]:
model_dict=send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_slices)

Updating model :model0
Updating model :model1
Updating model :model2
Updating model :model3


In [31]:
print(main_model.fc2.weight[0:1,0:5])
print(model_dict["model1"].fc2.weight[0:1,0:5])

tensor([[ 0.0459,  0.0528,  0.0031,  0.0325, -0.0223]],
       grad_fn=<SliceBackward>)
tensor([[ 0.0459,  0.0528,  0.0031,  0.0325, -0.0223]],
       grad_fn=<SliceBackward>)


**Models in the nodes are trained**

In [32]:
# start_train_end_node_process()
start_train_end_node_process_print_some(number_of_slices, print_amount)

Federated learning for slice 1
Subset 0
epoch:   1 | train accuracy: 0.90938 | test accuracy: 0.94222
epoch:   2 | train accuracy: 0.94712 | test accuracy: 0.95373
epoch:   3 | train accuracy: 0.95302 | test accuracy: 0.96114
epoch:   4 | train accuracy: 0.95715 | test accuracy: 0.96128
epoch:   5 | train accuracy: 0.96047 | test accuracy: 0.97030
epoch:   6 | train accuracy: 0.96247 | test accuracy: 0.96943
epoch:   7 | train accuracy: 0.96490 | test accuracy: 0.97010
epoch:   8 | train accuracy: 0.96553 | test accuracy: 0.97151
epoch:   9 | train accuracy: 0.96693 | test accuracy: 0.97205
epoch:  10 | train accuracy: 0.96741 | test accuracy: 0.96963
epoch:  11 | train accuracy: 0.96843 | test accuracy: 0.97380
epoch:  12 | train accuracy: 0.96882 | test accuracy: 0.97266
epoch:  13 | train accuracy: 0.96867 | test accuracy: 0.97757
epoch:  14 | train accuracy: 0.96986 | test accuracy: 0.97367
epoch:  15 | train accuracy: 0.97097 | test accuracy: 0.97690
epoch:  16 | train accuracy: 0

In [33]:
## As you can see, wieghts of local models are updated after training process
print(main_model.fc2.weight[0,0:5])
print(model_dict["model1"].fc2.weight[0,0:5])

tensor([ 0.0459,  0.0528,  0.0031,  0.0325, -0.0223], grad_fn=<SliceBackward>)
tensor([ 0.0767, -0.0119,  0.0087,  0.0556, -0.0069], grad_fn=<SliceBackward>)


### Let's compare the performance of federated main model, individual local models and centralized model  

**Federated main model vs individual local models before 1st iteration (on distributed test set)**  
Since main model is randomly initialized and no action taken on it yet, its performance is very poor. Please before_acc_table

In [34]:
before_acc_table=compare_local_and_merged_model_performance(number_of_slices=number_of_slices)
before_test_loss, before_test_accuracy = validation(main_model, test_dl, main_criterion)
file.write('\nbefore training main model')
confusion_mat(main_model, test_dl)

main_model= set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,model_dict, number_of_slices) 

after_acc_table=compare_local_and_merged_model_performance(number_of_slices=number_of_slices)
after_test_loss, after_test_accuracy = validation(main_model, test_dl, main_criterion)
file.write('\nafter training main model')
confusion_mat(main_model, test_dl)

confusion matrix for normal scenario for slices : 4
[[4222   41 3024  414    4]
 [1107    2  363 3866    0]
 [   3    0    8    0    0]
 [ 148    0  238    2    0]
 [ 345   42  572  448    0]]
confusion matrix for normal scenario for slices : 4
[[7586   52    0    4   63]
 [  95 5242    0    0    1]
 [   8    0    0    3    0]
 [ 307   20    0   59    2]
 [  76    1    0    0 1330]]


In [35]:
print("Federated main model vs individual local models before FedAvg first iteration")
file.write('\nBefore training federated')
file.write('\n'+str(before_acc_table))
before_acc_table

Federated main model vs individual local models before FedAvg first iteration


Unnamed: 0,sample,local_ind_model,merged_main_model
0,sample 0,0.9763,0.2851
1,sample 1,0.9577,0.2851
2,sample 2,0.963,0.2851
3,sample 3,0.9608,0.2851


In [36]:
print("Federated main model vs individual local models after FedAvg first iteration")
file.write('\nAfter training federated')
file.write('\n'+str(after_acc_table))
after_acc_table

Federated main model vs individual local models after FedAvg first iteration


Unnamed: 0,sample,local_ind_model,merged_main_model
0,sample 0,0.9763,0.9574
1,sample 1,0.9577,0.9574
2,sample 2,0.963,0.9574
3,sample 3,0.9608,0.9574


**Federated main model vs centralized model before 1st iteration (on all test data)**  
Please be aware that the centralized model gets approximately %98 accuracy on all test data.

In [37]:
print("Before 1st iteration main model accuracy on all test data: {:7.4f}".format(before_test_accuracy))
print("After 1st iteration main model accuracy on all test data: {:7.4f}".format(after_test_accuracy))
print("Centralized model accuracy on all test data: {:7.4f}".format(central_test_accuracy))

Before 1st iteration main model accuracy on all test data:  0.0661
After 1st iteration main model accuracy on all test data:  0.9608
Centralized model accuracy on all test data:  0.9536


This is a single iteration, we can send the weights of the main model back to the nodes and repeat the above steps.
Now let's check how the performance of the main model improves when we repeat the iteration 10 more times.

In [38]:
for i in range(2):
    model_dict=send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_slices)
    start_train_end_node_process_without_print(number_of_slices)
    main_model= set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,model_dict, number_of_slices) 
    test_loss, test_accuracy = validation(main_model, test_dl, main_criterion)
    print("Iteration", str(i+2), ": main_model accuracy on all test data: {:7.4f}".format(test_accuracy))   

Updating model :model0
Updating model :model1
Updating model :model2
Updating model :model3
Iteration 2 : main_model accuracy on all test data:  0.9733
Updating model :model0
Updating model :model1
Updating model :model2
Updating model :model3
Iteration 3 : main_model accuracy on all test data:  0.9766


In [39]:
confusion_mat(main_model, test_dl)

confusion matrix for normal scenario for slices : 4
[[7629   53    0   16    7]
 [   9 5329    0    0    0]
 [   5    0    6    0    0]
 [ 183    3    1  199    2]
 [  59    0    0   10 1338]]


In [40]:
for i in range(2):
    model_dict=send_main_model_to_nodes_and_update_model_dict(main_model, model_dict, number_of_slices)
    start_train_end_node_process_without_print(number_of_slices)
    main_model= set_averaged_weights_as_main_model_weights_and_update_main_model(main_model,model_dict, number_of_slices) 
    test_loss, test_accuracy = validation(main_model, test_dl, main_criterion)
    print("Iteration", str(i+2), ": main_model accuracy on all test data: {:7.4f}".format(test_accuracy))   

Updating model :model0
Updating model :model1
Updating model :model2
Updating model :model3
Iteration 2 : main_model accuracy on all test data:  0.9754
Updating model :model0
Updating model :model1
Updating model :model2
Updating model :model3
Iteration 3 : main_model accuracy on all test data:  0.9778


In [41]:
file = open(file_name, "a")
confusion_mat(main_model, test_dl)

confusion matrix for normal scenario for slices : 4
[[7635   45    0   21    4]
 [   8 5330    0    0    0]
 [   5    0    6    0    0]
 [ 183    1    0  202    2]
 [  54    0    0    6 1347]]


The accuracy of the centralized model was calculated as approximately 98%. The accuracy of the main model obtained by FedAvg method started from 85% and improved to 94%. In this case, we can say that although the main model obtained by FedAvg method was trained without seeing the data, its performance cannot be underestimated.

In [42]:
file.close()