In [22]:
# Copyright 2016, Yarin Gal, All rights reserved.
# This code is based on the code by Jose Miguel Hernandez-Lobato used for his 
# paper "Probabilistic Backpropagation for Scalable Learning of Bayesian Neural Networks".

import warnings
warnings.filterwarnings("ignore")

import math
from scipy.special import logsumexp
import numpy as np

from keras.regularizers import l2
# from keras import Input
from keras.layers import Input
from keras.layers import Dropout
from keras.layers import Dense
# from keras import Model

import time

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import torch.utils.data as Data
from scipy.stats import norm

import time


class netWBB:
    n_bootstrap_samples = 100     #number of bootstrap sampling
    NNs = []

    def __init__(self, X_train, y_train, n_hidden, n_epochs = 40,
        normalize = False):

        """
            Constructor for the class implementing a Bayesian neural network
            trained with the probabilistic back propagation method.

            @param X_train      Matrix with the features for the training data.
            @param y_train      Vector with the target variables for the
                                training data.
            @param n_hidden     Vector with the number of neurons for each
                                hidden layer.
            @param n_epochs     Numer of epochs for which to train the
                                network. The recommended value 40 should be
                                enough.
            @param normalize    Whether to normalize the input features. This
                                is recommended unles the input vector is for
                                example formed by binary features (a
                                fingerprint). In that case we do not recommend
                                to normalize the features.
        """


        # We normalize the training data to have zero mean and unit standard
        # deviation in the training set if necessary
        print("running")
        if normalize:
            self.std_X_train = np.std(X_train, 0)
            self.std_X_train[ self.std_X_train == 0 ] = 1
            self.mean_X_train = np.mean(X_train, 0)
        else:
            self.std_X_train = np.ones(X_train.shape[ 1 ])
            self.mean_X_train = np.zeros(X_train.shape[ 1 ])

        X_train = (X_train - np.full(X_train.shape, self.mean_X_train)) / \
            np.full(X_train.shape, self.std_X_train)

        self.mean_y_train = np.mean(y_train)
        self.std_y_train = np.std(y_train)

        y_train_normalized = (y_train - self.mean_y_train) / self.std_y_train
        y_train_normalized = np.array(y_train_normalized, ndmin = 2).T

        
        # TODO: implement WBB network
        input_shape = X_train.shape[1]
        output_shape = y_train_normalized.shape[1]
        max_index = X_train.shape[0]
        print("shape of input, output and max_index", input_shape, output_shape, max_index)

        lamda = 0.001
        Exp = torch.distributions.exponential.Exponential(torch.tensor([1.0]))
        for m in range (self.n_bootstrap_samples):
            print("Trainning model: ", m)
            weights = [Exp.sample() for _ in range(X_train.shape[0] + 1)]
            print(len(weights))

            # Network with a hidden layer and ReLU activation
            net = torch.nn.Sequential(
            torch.nn.Linear(input_shape, n_hidden[0]),
            torch.nn.ReLU(),
            torch.nn.Linear(n_hidden[0], output_shape),
            )
            
            
            BATCH_SIZE = 64
            optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

            # Minimizing the negative log-likelihood of our data with respect to θ is equivalent to 
            # minimizing the mean squared error between the observed y and our prediction thereof
            loss_func = torch.nn.MSELoss(reduction = 'none')
            torch_dataset = Data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train_normalized))

            loader = Data.DataLoader(
            dataset=torch_dataset, 
            batch_size=BATCH_SIZE, 
            shuffle=False, num_workers=2,)

            start_time = time.time()
            for epoch in range(n_epochs):
                optimizer.zero_grad()
                loss = 0

                index1 = 0;
                for step, (batch_x, batch_y) in enumerate(loader):  
                    
                    index2 = (step+1)*BATCH_SIZE   # set index of weight array to multiply with loss: weight*loss_func
                    index2 = min(index2,max_index) 
                    b_x = Variable(batch_x)
                    b_y = Variable(batch_y)               
                    prediction = net(b_x.float())    
                    
                    # print("weight ",(torch.FloatTensor(weights[index1:index2]).shape))
                    # print("loss ", loss_func(prediction, b_y.float()).shape)
                    loss += (torch.FloatTensor(weights[index1:index2]).unsqueeze(1) * loss_func(prediction, b_y.float())).sum()
                    # print((torch.FloatTensor(weights[index1:index2]).unsqueeze(1) * loss_func(prediction, b_y.float())).shape)
                    index1 = index2   

                # #add L1 regularization
                # l1 = 0
                # for p in net.parameters():
                #     l1 = l1 + p.abs().sum()       
                # loss += weights[-1] * lamda * l1

                #add L2 regularization
                l2 = 0
                for p in net.parameters():
                    l2 = l2 + 0.5 * (p ** 2).sum()      
                loss += (weights[-1] * lamda * l2).sum()
                
                loss.backward()         
                optimizer.step()        
                print("EPOCH: ", epoch, " LOSS: ", loss)
            print("BATCH_SIZE: ", BATCH_SIZE, "time execution for a neural net: ", time.time() - start_time )
            self.NNs.append(net)


    def predict(self, X_test, y_test):

        """
            Function for making predictions with the Bayesian neural network.

            @param X_test   The matrix of features for the test data
            
    
            @return m       The predictive mean for the test target variables.
            @return v       The predictive variance for the test target
                            variables.
            @return v_noise The estimated variance for the additive noise.

        """

        X_test = np.array(X_test, ndmin = 2)
        y_test = np.array(y_test, ndmin = 2).T

        # We normalize the test set

        X_test = (X_test - np.full(X_test.shape, self.mean_X_train)) / \
            np.full(X_test.shape, self.std_X_train)

        # We compute the predictive mean and variance for the target variables
        # of the test data

        NN_pred = np.array([self.NNs[m](torch.from_numpy(X_test).float()).detach().numpy() for m in range(self.n_bootstrap_samples)])
        NN_pred = NN_pred * self.std_y_train + self.mean_y_train

        standard_pred = NN_pred[0]
        rmse_standard_pred = np.mean((y_test.squeeze() - standard_pred.squeeze())**2.)**0.5

        # MC_error
        MC_pred = np.mean(NN_pred, axis = 0)
        rmse = np.mean((y_test.squeeze() - MC_pred.squeeze())**2.)**0.5

        variance_ll = 0.1
        tau = 1. / variance_ll
        # T = y_test.shape[0]
        T = self.n_bootstrap_samples
        # We compute the test log-likelihood
        ll = (logsumexp(-0.5 * tau * (y_test[None] - NN_pred)**2., 0) - np.log(T) 
            - 0.5*np.log(2*np.pi) + 0.5*np.log(tau))
        test_ll = np.mean(ll)

        # We are done!
        return rmse_standard_pred, rmse, test_ll


In [23]:
# Copyright 2016, Yarin Gal, All rights reserved.
# This code is based on the code by Jose Miguel Hernandez-Lobato used for his 
# paper "Probabilistic Backpropagation for Scalable Learning of Bayesian Neural Networks".

# This file contains code to train dropout networks on the UCI datasets using the following algorithm:
# 1. Create 20 random splits of the training-test dataset.
# 2. For each split:
# 3.   Create a validation (val) set taking 20% of the training set.
# 4.   Get best hyperparameters: dropout_rate and tau by training on (train-val) set and testing on val set.
# 5.   Train a network on the entire training set with the best pair of hyperparameters.
# 6.   Get the performance (MC RMSE and log-likelihood) on the test set.
# 7. Report the averaged performance (Monte Carlo RMSE and log-likelihood) on all 20 splits.

import math
import numpy as np
import argparse
import sys

# parser=argparse.ArgumentParser()

# parser.add_argument('--dir', '-d', required=True, help='Name of the UCI Dataset directory. Eg: bostonHousing')
# parser.add_argument('--epochx','-e', default=500, type=int, help='Multiplier for the number of epochs for training.')
# parser.add_argument('--hidden', '-nh', default=2, type=int, help='Number of hidden layers for the neural net')

# args=parser.parse_args()

# data_directory = args.dir
# epochs_multiplier = args.epochx
# num_hidden_layers = args.hidden

data_directory = "yacht"
epochs_multiplier = 30
num_hidden_layers = 1

# sys.path.append('net/')

# import netWBB
# from net import netWBB

# We delete previous results

from subprocess import call


_RESULTS_VALIDATION_LL = "./UCI_Datasets/" + data_directory + "/results/validation_ll_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_VALIDATION_RMSE = "./UCI_Datasets/" + data_directory + "/results/validation_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_VALIDATION_MC_RMSE = "./UCI_Datasets/" + data_directory + "/results/validation_MC_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"

_RESULTS_TEST_LL = "./UCI_Datasets/" + data_directory + "/results/test_ll_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_TAU = "./UCI_Datasets/" + data_directory + "/results/test_tau_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_RMSE = "./UCI_Datasets/" + data_directory + "/results/test_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_MC_RMSE = "./UCI_Datasets/" + data_directory + "/results/test_MC_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_LOG = "./UCI_Datasets/" + data_directory + "/results/log_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"

_DATA_DIRECTORY_PATH = "./UCI_Datasets/" + data_directory + "/data/"
_DROPOUT_RATES_FILE = _DATA_DIRECTORY_PATH + "dropout_rates.txt"
_TAU_VALUES_FILE = _DATA_DIRECTORY_PATH + "tau_values.txt"
_DATA_FILE = _DATA_DIRECTORY_PATH + "data.txt"
_HIDDEN_UNITS_FILE = _DATA_DIRECTORY_PATH + "n_hidden.txt"
_EPOCHS_FILE = _DATA_DIRECTORY_PATH + "n_epochs.txt"
_INDEX_FEATURES_FILE = _DATA_DIRECTORY_PATH + "index_features.txt"
_INDEX_TARGET_FILE = _DATA_DIRECTORY_PATH + "index_target.txt"
_N_SPLITS_FILE = _DATA_DIRECTORY_PATH + "n_splits.txt"

def _get_index_train_test_path(split_num, train = True):
    """
       Method to generate the path containing the training/test split for the given
       split number (generally from 1 to 20).
       @param split_num      Split number for which the data has to be generated
       @param train          Is true if the data is training data. Else false.
       @return path          Path of the file containing the requried data
    """
    if train:
        return _DATA_DIRECTORY_PATH + "index_train_" + str(split_num) + ".txt"
    else:
        return _DATA_DIRECTORY_PATH + "index_test_" + str(split_num) + ".txt" 


print ("Removing existing result files...")
call(["rm", _RESULTS_VALIDATION_LL])
call(["rm", _RESULTS_VALIDATION_RMSE])
call(["rm", _RESULTS_VALIDATION_MC_RMSE])
call(["rm", _RESULTS_TEST_LL])
call(["rm", _RESULTS_TEST_TAU])
call(["rm", _RESULTS_TEST_RMSE])
call(["rm", _RESULTS_TEST_MC_RMSE])
call(["rm", _RESULTS_TEST_LOG])
print ("Result files removed.")

# We fix the random seed

np.random.seed(1)

print ("Loading data and other hyperparameters...")
# We load the data

data = np.loadtxt(_DATA_FILE)

# We load the number of hidden units

n_hidden = np.loadtxt(_HIDDEN_UNITS_FILE).tolist()

# We load the number of training epocs

n_epochs = np.loadtxt(_EPOCHS_FILE).tolist()

# We load the indexes for the features and for the target

index_features = np.loadtxt(_INDEX_FEATURES_FILE)
index_target = np.loadtxt(_INDEX_TARGET_FILE)

X = data[ : , [int(i) for i in index_features.tolist()] ]
y = data[ : , int(index_target.tolist()) ]

# We iterate over the training test splits

n_splits = np.loadtxt(_N_SPLITS_FILE)
print ("Done.")

errors, MC_errors, lls = [], [], []
# for split in range(int(n_splits)):
for split in range(5):  # first, test in 5 splits dataset
    print ("Trainning on split: ", split)
    # We load the indexes of the training and test sets
    print ('Loading file: ' + _get_index_train_test_path(split, train=True))
    print ('Loading file: ' + _get_index_train_test_path(split, train=False))
    index_train = np.loadtxt(_get_index_train_test_path(split, train=True))
    index_test = np.loadtxt(_get_index_train_test_path(split, train=False))

    X_train = X[ [int(i) for i in index_train.tolist()] ]
    y_train = y[ [int(i) for i in index_train.tolist()] ]
    
    X_test = X[ [int(i) for i in index_test.tolist()] ]
    y_test = y[ [int(i) for i in index_test.tolist()] ]

    X_train_original = X_train
    y_train_original = y_train
    num_training_examples = int(0.8 * X_train.shape[0])
    X_validation = X_train[num_training_examples:, :]
    y_validation = y_train[num_training_examples:]
    X_train = X_train[0:num_training_examples, :]
    y_train = y_train[0:num_training_examples]
    
    # Printing the size of the training, validation and test sets
    print ('Number of training examples: ' + str(X_train.shape[0]))
    print ('Number of validation examples: ' + str(X_validation.shape[0]))
    print ('Number of test examples: ' + str(X_test.shape[0]))
    print ('Number of train_original examples: ' + str(X_train_original.shape[0]))

    # List of hyperparameters which we will try out using grid-search
    # dropout_rates = np.loadtxt(_DROPOUT_RATES_FILE).tolist()
    # tau_values = np.loadtxt(_TAU_VALUES_FILE).tolist()


    network = netWBB(X_train_original, y_train_original, ([ int(n_hidden) ] * num_hidden_layers),
            normalize = True, n_epochs = int(n_epochs * epochs_multiplier))


            
#             # Storing validation results
#             with open(_RESULTS_VALIDATION_RMSE, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(error) + '\n')

#             with open(_RESULTS_VALIDATION_MC_RMSE, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(MC_error) + '\n')

#             with open(_RESULTS_VALIDATION_LL, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(ll) + '\n')

#     # Storing test results
#     best_network = net.net(X_train_original, y_train_original, ([ int(n_hidden) ] * num_hidden_layers),
#                     normalize = True, n_epochs = int(n_epochs * epochs_multiplier), tau = best_tau,
#                     dropout = best_dropout)
    error, MC_error, ll = network.predict(X_test, y_test)
    
    with open(_RESULTS_TEST_RMSE, "a") as myfile:
        myfile.write(repr(error) + '\n')

    with open(_RESULTS_TEST_MC_RMSE, "a") as myfile:
        myfile.write(repr(MC_error) + '\n')

    with open(_RESULTS_TEST_LL, "a") as myfile:
        myfile.write(repr(ll) + '\n')

    # with open(_RESULTS_TEST_TAU, "a") as myfile:
    #     myfile.write(repr(best_network.tau) + '\n')

    print ("Tests on split " + str(split) + " complete.")
    errors += [error]
    MC_errors += [MC_error]
    lls += [ll]

with open(_RESULTS_TEST_LOG, "a") as myfile:
    myfile.write('errors %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(errors), np.std(errors), np.std(errors)/math.sqrt(n_splits),
        np.percentile(errors, 50), np.percentile(errors, 25), np.percentile(errors, 75)))
    myfile.write('MC errors %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(MC_errors), np.std(MC_errors), np.std(MC_errors)/math.sqrt(n_splits),
        np.percentile(MC_errors, 50), np.percentile(MC_errors, 25), np.percentile(MC_errors, 75)))
    myfile.write('lls %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(lls), np.std(lls), np.std(lls)/math.sqrt(n_splits), 
        np.percentile(lls, 50), np.percentile(lls, 25), np.percentile(lls, 75)))


Removing existing result files...
Result files removed.
Loading data and other hyperparameters...
Done.
Trainning on split:  0
Loading file: ./UCI_Datasets/yacht/data/index_train_0.txt
Loading file: ./UCI_Datasets/yacht/data/index_test_0.txt
Number of training examples: 221
Number of validation examples: 56
Number of test examples: 31
Number of train_original examples: 277
running
shape 6 1 277
Trainning model:  0
278
EPOCH:  0  LOSS:  tensor(356.8891, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(307.4855, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(270.0096, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(239.9880, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(214.6577, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(192.3997, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(172.4898, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(154.5892, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(138.7633, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(125.2663, grad_fn=<Add

EPOCH:  133  LOSS:  tensor(1.2032, grad_fn=<AddBackward0>)
EPOCH:  134  LOSS:  tensor(1.1892, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(1.1752, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(1.1610, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(1.1481, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(1.1356, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(1.1234, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(1.1120, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(1.1007, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(1.0896, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(1.0787, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(1.0679, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(1.0577, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(1.0479, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(1.0384, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(1.0291, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(1.0198, grad_fn=<AddBackward0

EPOCH:  272  LOSS:  tensor(0.4277, grad_fn=<AddBackward0>)
EPOCH:  273  LOSS:  tensor(0.4255, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.4233, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.4212, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.4193, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.4173, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.4154, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.4136, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.4118, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.4101, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.4083, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.4067, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.4050, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.4032, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.4013, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.3996, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.3979, grad_fn=<AddBackward0

EPOCH:  411  LOSS:  tensor(0.2394, grad_fn=<AddBackward0>)
EPOCH:  412  LOSS:  tensor(0.2385, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.2377, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.2368, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.2360, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.2352, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.2344, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.2336, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.2328, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.2320, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.2313, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.2304, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.2297, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.2288, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.2280, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.2273, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.2265, grad_fn=<AddBackward0

EPOCH:  550  LOSS:  tensor(0.1619, grad_fn=<AddBackward0>)
EPOCH:  551  LOSS:  tensor(0.1615, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.1612, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.1607, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.1603, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.1599, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.1595, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.1591, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1587, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1583, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1579, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1575, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1570, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1567, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1563, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1559, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.1555, grad_fn=<AddBackward0

Exception ignored in: <function WeakValueDictionary.__init__.<locals>.remove at 0x102ba5a60>
Traceback (most recent call last):
  File "/Users/mac/anaconda3/envs/SSL/lib/python3.6/weakref.py", line 109, in remove
    def remove(wr, selfref=ref(self), _atomic_removal=_remove_dead_weakref):
KeyboardInterrupt


EPOCH:  626  LOSS:  tensor(0.1370, grad_fn=<AddBackward0>)
EPOCH:  627  LOSS:  tensor(0.1367, grad_fn=<AddBackward0>)
EPOCH:  628  LOSS:  tensor(0.1365, grad_fn=<AddBackward0>)
EPOCH:  629  LOSS:  tensor(0.1362, grad_fn=<AddBackward0>)
EPOCH:  630  LOSS:  tensor(0.1359, grad_fn=<AddBackward0>)
EPOCH:  631  LOSS:  tensor(0.1357, grad_fn=<AddBackward0>)
EPOCH:  632  LOSS:  tensor(0.1354, grad_fn=<AddBackward0>)
EPOCH:  633  LOSS:  tensor(0.1352, grad_fn=<AddBackward0>)
EPOCH:  634  LOSS:  tensor(0.1349, grad_fn=<AddBackward0>)
EPOCH:  635  LOSS:  tensor(0.1347, grad_fn=<AddBackward0>)
EPOCH:  636  LOSS:  tensor(0.1344, grad_fn=<AddBackward0>)
EPOCH:  637  LOSS:  tensor(0.1342, grad_fn=<AddBackward0>)
EPOCH:  638  LOSS:  tensor(0.1339, grad_fn=<AddBackward0>)
EPOCH:  639  LOSS:  tensor(0.1337, grad_fn=<AddBackward0>)
EPOCH:  640  LOSS:  tensor(0.1334, grad_fn=<AddBackward0>)
EPOCH:  641  LOSS:  tensor(0.1332, grad_fn=<AddBackward0>)
EPOCH:  642  LOSS:  tensor(0.1329, grad_fn=<AddBackward0

KeyboardInterrupt: 

In [13]:
# Copyright 2016, Yarin Gal, All rights reserved.
# This code is based on the code by Jose Miguel Hernandez-Lobato used for his 
# paper "Probabilistic Backpropagation for Scalable Learning of Bayesian Neural Networks".

import warnings
warnings.filterwarnings("ignore")

import math
from scipy.special import logsumexp
import numpy as np

from keras.regularizers import l2
# from keras import Input
from keras.layers import Input
from keras.layers import Dropout
from keras.layers import Dense
# from keras import Model

import time

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch.autograd import Variable
import torch.nn.functional as F
import torch.nn as nn
import torch.utils.data as Data
from scipy.stats import norm

import time

In [17]:
class netWBB:
    n_bootstrap_samples = 100     #number of bootstrap sampling
    NNs = []

    def __init__(self, X_train, y_train, n_hidden, n_epochs = 40,
        normalize = False):

        """
            Constructor for the class implementing a Bayesian neural network
            trained with the probabilistic back propagation method.

            @param X_train      Matrix with the features for the training data.
            @param y_train      Vector with the target variables for the
                                training data.
            @param n_hidden     Vector with the number of neurons for each
                                hidden layer.
            @param n_epochs     Numer of epochs for which to train the
                                network. The recommended value 40 should be
                                enough.
            @param normalize    Whether to normalize the input features. This
                                is recommended unles the input vector is for
                                example formed by binary features (a
                                fingerprint). In that case we do not recommend
                                to normalize the features.
        """


        # We normalize the training data to have zero mean and unit standard
        # deviation in the training set if necessary
        print("running")
        if normalize:
            self.std_X_train = np.std(X_train, 0)
            self.std_X_train[ self.std_X_train == 0 ] = 1
            self.mean_X_train = np.mean(X_train, 0)
        else:
            self.std_X_train = np.ones(X_train.shape[ 1 ])
            self.mean_X_train = np.zeros(X_train.shape[ 1 ])

        X_train = (X_train - np.full(X_train.shape, self.mean_X_train)) / \
            np.full(X_train.shape, self.std_X_train)

        self.mean_y_train = np.mean(y_train)
        self.std_y_train = np.std(y_train)

        y_train_normalized = (y_train - self.mean_y_train) / self.std_y_train
        y_train_normalized = np.array(y_train_normalized, ndmin = 2).T

        
        # TODO: implement WBB network
        input_shape = X_train.shape[1]
        output_shape = y_train_normalized.shape[1]
        max_index = X_train.shape[0]
        print("shape of input, output and max_index", input_shape, output_shape, max_index)

        lamda = 0.001
        Exp = torch.distributions.exponential.Exponential(torch.tensor([1.0]))
        for m in range (self.n_bootstrap_samples):
            print("Trainning model: ", m)
            weights = [Exp.sample() for _ in range(X_train.shape[0] + 1)]
            print("....", len(weights))

            # Network with a hidden layer and ReLU activation
            net = torch.nn.Sequential(
            torch.nn.Linear(input_shape, n_hidden[0]),
            torch.nn.ReLU(),
            torch.nn.Linear(n_hidden[0], output_shape),
            )
            
            
            BATCH_SIZE = 64
            optimizer = torch.optim.Adam(net.parameters(), lr=0.01)

            # Minimizing the negative log-likelihood of our data with respect to θ is equivalent to 
            # minimizing the mean squared error between the observed y and our prediction thereof
            loss_func = torch.nn.MSELoss(reduction = 'none')
            torch_dataset = Data.TensorDataset(torch.from_numpy(X_train), torch.from_numpy(y_train_normalized))

            loader=Data.DataLoader(
            dataset=torch_dataset, 
            batch_size=BATCH_SIZE, 
            shuffle=False, num_workers=2,)

            start_time = time.time()
            for epoch in range(n_epochs):
                optimizer.zero_grad()
                loss = 0

                index1 = 0;
                for step, (batch_x, batch_y) in enumerate(loader):  
                    
                    index2 = (step+1)*BATCH_SIZE   # set index of weight array to multiply with loss: weight*loss_func
                    index2 = min(index2,max_index) 
                    b_x = Variable(batch_x)
                    b_y = Variable(batch_y)               
                    prediction = net(b_x.float())    
                    
                    # print("weight ",(torch.FloatTensor(weights[index1:index2]).shape))
                    # print("loss ", loss_func(prediction, b_y.float()).shape)
                    loss += (torch.FloatTensor(weights[index1:index2]).unsqueeze(1) * loss_func(prediction, b_y.float())).sum()
                    # print((torch.FloatTensor(weights[index1:index2]).unsqueeze(1) * loss_func(prediction, b_y.float())).shape)
                    index1 = index2   

                # #add L1 regularization
                # l1 = 0
                # for p in net.parameters():
                #     l1 = l1 + p.abs().sum()       
                # loss += weights[-1] * lamda * l1

                #add L2 regularization
                l2 = 0
                for p in net.parameters():
                    l2 = l2 + 0.5 * (p ** 2).sum()      
                loss += (weights[-1] * lamda * l2).sum()
                
                loss.backward()         
                optimizer.step()        
                print("EPOCH: ", epoch, " LOSS: ", loss)
            print("BATCH_SIZE: ", BATCH_SIZE, "time execution for a neural net: ", time.time() - start_time )
            self.NNs.append(net)


    def predict(self, X_test, y_test):

        """
            Function for making predictions with the Bayesian neural network.

            @param X_test   The matrix of features for the test data
            
    
            @return m       The predictive mean for the test target variables.
            @return v       The predictive variance for the test target
                            variables.
            @return v_noise The estimated variance for the additive noise.

        """

        X_test = np.array(X_test, ndmin = 2)
        y_test = np.array(y_test, ndmin = 2).T

        # We normalize the test set

        X_test = (X_test - np.full(X_test.shape, self.mean_X_train)) / \
            np.full(X_test.shape, self.std_X_train)

        # We compute the predictive mean and variance for the target variables
        # of the test data

        NN_pred = np.array([self.NNs[m](torch.from_numpy(X_test).float()).detach().numpy() for m in range(self.n_bootstrap_samples)])
        NN_pred = NN_pred * self.std_y_train + self.mean_y_train

        standard_pred = NN_pred[0]
        rmse_standard_pred = np.mean((y_test.squeeze() - standard_pred.squeeze())**2.)**0.5

        # MC_error
        MC_pred = np.mean(NN_pred, axis = 0)
        rmse = np.mean((y_test.squeeze() - MC_pred.squeeze())**2.)**0.5

        variance_ll = 0.1
        tau = 1. / variance_ll
        # T = y_test.shape[0]
        T = self.n_bootstrap_samples
        # We compute the test log-likelihood
        ll = (logsumexp(-0.5 * tau * (y_test[None] - NN_pred)**2., 0) - np.log(T) 
            - 0.5*np.log(2*np.pi) + 0.5*np.log(tau))
        test_ll = np.mean(ll)

        # We are done!
        return rmse_standard_pred, rmse, test_ll


In [18]:
# Copyright 2016, Yarin Gal, All rights reserved.
# This code is based on the code by Jose Miguel Hernandez-Lobato used for his 
# paper "Probabilistic Backpropagation for Scalable Learning of Bayesian Neural Networks".

# This file contains code to train dropout networks on the UCI datasets using the following algorithm:
# 1. Create 20 random splits of the training-test dataset.
# 2. For each split:
# 3.   Create a validation (val) set taking 20% of the training set.
# 4.   Get best hyperparameters: dropout_rate and tau by training on (train-val) set and testing on val set.
# 5.   Train a network on the entire training set with the best pair of hyperparameters.
# 6.   Get the performance (MC RMSE and log-likelihood) on the test set.
# 7. Report the averaged performance (Monte Carlo RMSE and log-likelihood) on all 20 splits.

import math
import numpy as np
import argparse
import sys

# parser=argparse.ArgumentParser()

# parser.add_argument('--dir', '-d', required=True, help='Name of the UCI Dataset directory. Eg: bostonHousing')
# parser.add_argument('--epochx','-e', default=500, type=int, help='Multiplier for the number of epochs for training.')
# parser.add_argument('--hidden', '-nh', default=2, type=int, help='Number of hidden layers for the neural net')

# args=parser.parse_args()

# data_directory = args.dir
# epochs_multiplier = args.epochx
# num_hidden_layers = args.hidden

data_directory = "yacht"
epochs_multiplier = 30
num_hidden_layers = 1

# sys.path.append('net/')

# import netWBB
# from net import netWBB

# We delete previous results

from subprocess import call


_RESULTS_VALIDATION_LL = "./UCI_Datasets/" + data_directory + "/results/validation_ll_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_VALIDATION_RMSE = "./UCI_Datasets/" + data_directory + "/results/validation_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_VALIDATION_MC_RMSE = "./UCI_Datasets/" + data_directory + "/results/validation_MC_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"

_RESULTS_TEST_LL = "./UCI_Datasets/" + data_directory + "/results/test_ll_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_TAU = "./UCI_Datasets/" + data_directory + "/results/test_tau_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_RMSE = "./UCI_Datasets/" + data_directory + "/results/test_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_MC_RMSE = "./UCI_Datasets/" + data_directory + "/results/test_MC_rmse_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"
_RESULTS_TEST_LOG = "./UCI_Datasets/" + data_directory + "/results/log_" + str(epochs_multiplier) + "_xepochs_" + str(num_hidden_layers) + "_hidden_layers.txt"

_DATA_DIRECTORY_PATH = "./UCI_Datasets/" + data_directory + "/data/"
_DROPOUT_RATES_FILE = _DATA_DIRECTORY_PATH + "dropout_rates.txt"
_TAU_VALUES_FILE = _DATA_DIRECTORY_PATH + "tau_values.txt"
_DATA_FILE = _DATA_DIRECTORY_PATH + "data.txt"
_HIDDEN_UNITS_FILE = _DATA_DIRECTORY_PATH + "n_hidden.txt"
_EPOCHS_FILE = _DATA_DIRECTORY_PATH + "n_epochs.txt"
_INDEX_FEATURES_FILE = _DATA_DIRECTORY_PATH + "index_features.txt"
_INDEX_TARGET_FILE = _DATA_DIRECTORY_PATH + "index_target.txt"
_N_SPLITS_FILE = _DATA_DIRECTORY_PATH + "n_splits.txt"


In [19]:
def _get_index_train_test_path(split_num, train = True):
    """
       Method to generate the path containing the training/test split for the given
       split number (generally from 1 to 20).
       @param split_num      Split number for which the data has to be generated
       @param train          Is true if the data is training data. Else false.
       @return path          Path of the file containing the requried data
    """
    if train:
        return _DATA_DIRECTORY_PATH + "index_train_" + str(split_num) + ".txt"
    else:
        return _DATA_DIRECTORY_PATH + "index_test_" + str(split_num) + ".txt" 


print ("Removing existing result files...")
call(["rm", _RESULTS_VALIDATION_LL])
call(["rm", _RESULTS_VALIDATION_RMSE])
call(["rm", _RESULTS_VALIDATION_MC_RMSE])
call(["rm", _RESULTS_TEST_LL])
call(["rm", _RESULTS_TEST_TAU])
call(["rm", _RESULTS_TEST_RMSE])
call(["rm", _RESULTS_TEST_MC_RMSE])
call(["rm", _RESULTS_TEST_LOG])
print ("Result files removed.")

# We fix the random seed

np.random.seed(1)

print ("Loading data and other hyperparameters...")
# We load the data

data = np.loadtxt(_DATA_FILE)

# We load the number of hidden units

n_hidden = np.loadtxt(_HIDDEN_UNITS_FILE).tolist()

# We load the number of training epocs

n_epochs = np.loadtxt(_EPOCHS_FILE).tolist()

# We load the indexes for the features and for the target

index_features = np.loadtxt(_INDEX_FEATURES_FILE)
index_target = np.loadtxt(_INDEX_TARGET_FILE)

X = data[ : , [int(i) for i in index_features.tolist()] ]
y = data[ : , int(index_target.tolist()) ]

# We iterate over the training test splits

n_splits = np.loadtxt(_N_SPLITS_FILE)
print ("Done.")

errors, MC_errors, lls = [], [], []
# for split in range(int(n_splits)):
for split in range(5):  # first, test in 5 splits dataset
    print ("Trainning on split: ", split)
    # We load the indexes of the training and test sets
    print ('Loading file: ' + _get_index_train_test_path(split, train=True))
    print ('Loading file: ' + _get_index_train_test_path(split, train=False))
    index_train = np.loadtxt(_get_index_train_test_path(split, train=True))
    index_test = np.loadtxt(_get_index_train_test_path(split, train=False))

    X_train = X[ [int(i) for i in index_train.tolist()] ]
    y_train = y[ [int(i) for i in index_train.tolist()] ]
    
    X_test = X[ [int(i) for i in index_test.tolist()] ]
    y_test = y[ [int(i) for i in index_test.tolist()] ]

    X_train_original = X_train
    y_train_original = y_train
    num_training_examples = int(0.8 * X_train.shape[0])
    X_validation = X_train[num_training_examples:, :]
    y_validation = y_train[num_training_examples:]
    X_train = X_train[0:num_training_examples, :]
    y_train = y_train[0:num_training_examples]
    
    # Printing the size of the training, validation and test sets
    print ('Number of training examples: ' + str(X_train.shape[0]))
    print ('Number of validation examples: ' + str(X_validation.shape[0]))
    print ('Number of test examples: ' + str(X_test.shape[0]))
    print ('Number of train_original examples: ' + str(X_train_original.shape[0]))

    # List of hyperparameters which we will try out using grid-search
    # dropout_rates = np.loadtxt(_DROPOUT_RATES_FILE).tolist()
    # tau_values = np.loadtxt(_TAU_VALUES_FILE).tolist()


    network = netWBB(X_train_original, y_train_original, ([ int(n_hidden) ] * num_hidden_layers),
            normalize = True, n_epochs = int(n_epochs * epochs_multiplier))


            
#             # Storing validation results
#             with open(_RESULTS_VALIDATION_RMSE, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(error) + '\n')

#             with open(_RESULTS_VALIDATION_MC_RMSE, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(MC_error) + '\n')

#             with open(_RESULTS_VALIDATION_LL, "a") as myfile:
#                 myfile.write('Dropout_Rate: ' + repr(dropout_rate) + ' Tau: ' + repr(tau) + ' :: ')
#                 myfile.write(repr(ll) + '\n')

#     # Storing test results
#     best_network = net.net(X_train_original, y_train_original, ([ int(n_hidden) ] * num_hidden_layers),
#                     normalize = True, n_epochs = int(n_epochs * epochs_multiplier), tau = best_tau,
#                     dropout = best_dropout)
    error, MC_error, ll = network.predict(X_test, y_test)
    
    with open(_RESULTS_TEST_RMSE, "a") as myfile:
        myfile.write(repr(error) + '\n')

    with open(_RESULTS_TEST_MC_RMSE, "a") as myfile:
        myfile.write(repr(MC_error) + '\n')

    with open(_RESULTS_TEST_LL, "a") as myfile:
        myfile.write(repr(ll) + '\n')

    # with open(_RESULTS_TEST_TAU, "a") as myfile:
    #     myfile.write(repr(best_network.tau) + '\n')

    print ("Tests on split " + str(split) + " complete.")
    errors += [error]
    MC_errors += [MC_error]
    lls += [ll]

with open(_RESULTS_TEST_LOG, "a") as myfile:
    myfile.write('errors %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(errors), np.std(errors), np.std(errors)/math.sqrt(n_splits),
        np.percentile(errors, 50), np.percentile(errors, 25), np.percentile(errors, 75)))
    myfile.write('MC errors %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(MC_errors), np.std(MC_errors), np.std(MC_errors)/math.sqrt(n_splits),
        np.percentile(MC_errors, 50), np.percentile(MC_errors, 25), np.percentile(MC_errors, 75)))
    myfile.write('lls %f +- %f (stddev) +- %f (std error), median %f 25p %f 75p %f \n' % (
        np.mean(lls), np.std(lls), np.std(lls)/math.sqrt(n_splits), 
        np.percentile(lls, 50), np.percentile(lls, 25), np.percentile(lls, 75)))


Removing existing result files...
Result files removed.
Loading data and other hyperparameters...
Done.
Trainning on split:  0
Loading file: ./UCI_Datasets/yacht/data/index_train_0.txt
Loading file: ./UCI_Datasets/yacht/data/index_test_0.txt
Number of training examples: 221
Number of validation examples: 56
Number of test examples: 31
Number of train_original examples: 277
running
shape of input, output and max_index 6 1 277
Trainning model:  0
.... 278
EPOCH:  0  LOSS:  tensor(373.3935, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(332.3792, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(297.7012, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(267.3447, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(240.2691, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(216.2865, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(195.3037, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(177.3007, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(162.1440, grad_fn=<AddBackward0>)
EPOCH:  9  

EPOCH:  132  LOSS:  tensor(1.5608, grad_fn=<AddBackward0>)
EPOCH:  133  LOSS:  tensor(1.5247, grad_fn=<AddBackward0>)
EPOCH:  134  LOSS:  tensor(1.4897, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(1.4564, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(1.4240, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(1.3924, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(1.3620, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(1.3327, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(1.3049, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(1.2781, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(1.2521, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(1.2262, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(1.2014, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(1.1781, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(1.1553, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(1.1329, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(1.1109, grad_fn=<AddBackward0

EPOCH:  271  LOSS:  tensor(0.2967, grad_fn=<AddBackward0>)
EPOCH:  272  LOSS:  tensor(0.2945, grad_fn=<AddBackward0>)
EPOCH:  273  LOSS:  tensor(0.2926, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.2906, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.2886, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.2866, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.2847, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.2826, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.2808, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.2789, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.2771, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.2753, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.2736, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2718, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2702, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.2685, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.2668, grad_fn=<AddBackward0

EPOCH:  410  LOSS:  tensor(0.1629, grad_fn=<AddBackward0>)
EPOCH:  411  LOSS:  tensor(0.1625, grad_fn=<AddBackward0>)
EPOCH:  412  LOSS:  tensor(0.1621, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.1617, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.1613, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.1610, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.1606, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.1602, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.1599, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1595, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1591, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1587, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1584, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1581, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1577, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1573, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1571, grad_fn=<AddBackward0

EPOCH:  549  LOSS:  tensor(0.1284, grad_fn=<AddBackward0>)
EPOCH:  550  LOSS:  tensor(0.1282, grad_fn=<AddBackward0>)
EPOCH:  551  LOSS:  tensor(0.1280, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.1279, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.1277, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.1276, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.1274, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.1273, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.1271, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1268, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1267, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1265, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1264, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1262, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1260, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1259, grad_fn=<AddBackward0

EPOCH:  688  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  689  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  690  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  691  LOSS:  tensor(0.1049, grad_fn=<AddBackward0>)
EPOCH:  692  LOSS:  tensor(0.1048, grad_fn=<AddBackward0>)
EPOCH:  693  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  694  LOSS:  tensor(0.1046, grad_fn=<AddBackward0>)
EPOCH:  695  LOSS:  tensor(0.1045, grad_fn=<AddBackward0>)
EPOCH:  696  LOSS:  tensor(0.1044, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.1044, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.1043, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.1042, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.1043, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.1042, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.1042, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.1042, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.1043, grad_fn=<AddBackward0

EPOCH:  827  LOSS:  tensor(0.0964, grad_fn=<AddBackward0>)
EPOCH:  828  LOSS:  tensor(0.0960, grad_fn=<AddBackward0>)
EPOCH:  829  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  830  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  831  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  832  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  833  LOSS:  tensor(0.0954, grad_fn=<AddBackward0>)
EPOCH:  834  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  835  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0951, grad_fn=<AddBackward0

EPOCH:  966  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  967  LOSS:  tensor(0.1005, grad_fn=<AddBackward0>)
EPOCH:  968  LOSS:  tensor(0.1032, grad_fn=<AddBackward0>)
EPOCH:  969  LOSS:  tensor(0.1014, grad_fn=<AddBackward0>)
EPOCH:  970  LOSS:  tensor(0.0964, grad_fn=<AddBackward0>)
EPOCH:  971  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  972  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  973  LOSS:  tensor(0.0920, grad_fn=<AddBackward0>)
EPOCH:  974  LOSS:  tensor(0.0947, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0963, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0892, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0897, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0923, grad_fn=<AddBackward0

EPOCH:  1104  LOSS:  tensor(0.1059, grad_fn=<AddBackward0>)
EPOCH:  1105  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  1106  LOSS:  tensor(0.1157, grad_fn=<AddBackward0>)
EPOCH:  1107  LOSS:  tensor(0.1197, grad_fn=<AddBackward0>)
EPOCH:  1108  LOSS:  tensor(0.1221, grad_fn=<AddBackward0>)
EPOCH:  1109  LOSS:  tensor(0.1230, grad_fn=<AddBackward0>)
EPOCH:  1110  LOSS:  tensor(0.1215, grad_fn=<AddBackward0>)
EPOCH:  1111  LOSS:  tensor(0.1174, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.1043, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0974, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0864, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0868, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0886, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0912, grad

EPOCH:  40  LOSS:  tensor(29.3218, grad_fn=<AddBackward0>)
EPOCH:  41  LOSS:  tensor(27.5367, grad_fn=<AddBackward0>)
EPOCH:  42  LOSS:  tensor(25.8062, grad_fn=<AddBackward0>)
EPOCH:  43  LOSS:  tensor(24.1541, grad_fn=<AddBackward0>)
EPOCH:  44  LOSS:  tensor(22.6211, grad_fn=<AddBackward0>)
EPOCH:  45  LOSS:  tensor(21.2027, grad_fn=<AddBackward0>)
EPOCH:  46  LOSS:  tensor(19.8942, grad_fn=<AddBackward0>)
EPOCH:  47  LOSS:  tensor(18.6745, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(17.5405, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(16.4473, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(15.4276, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(14.5032, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(13.6572, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(12.9060, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(12.2386, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(11.6364, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(11.0761, grad_fn=<AddBackward0

EPOCH:  180  LOSS:  tensor(0.4815, grad_fn=<AddBackward0>)
EPOCH:  181  LOSS:  tensor(0.4755, grad_fn=<AddBackward0>)
EPOCH:  182  LOSS:  tensor(0.4695, grad_fn=<AddBackward0>)
EPOCH:  183  LOSS:  tensor(0.4636, grad_fn=<AddBackward0>)
EPOCH:  184  LOSS:  tensor(0.4579, grad_fn=<AddBackward0>)
EPOCH:  185  LOSS:  tensor(0.4523, grad_fn=<AddBackward0>)
EPOCH:  186  LOSS:  tensor(0.4470, grad_fn=<AddBackward0>)
EPOCH:  187  LOSS:  tensor(0.4417, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.4366, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.4316, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.4267, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.4219, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.4172, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.4125, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.4078, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.4033, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.3988, grad_fn=<AddBackward0

EPOCH:  319  LOSS:  tensor(0.1544, grad_fn=<AddBackward0>)
EPOCH:  320  LOSS:  tensor(0.1535, grad_fn=<AddBackward0>)
EPOCH:  321  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  322  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  323  LOSS:  tensor(0.1510, grad_fn=<AddBackward0>)
EPOCH:  324  LOSS:  tensor(0.1503, grad_fn=<AddBackward0>)
EPOCH:  325  LOSS:  tensor(0.1495, grad_fn=<AddBackward0>)
EPOCH:  326  LOSS:  tensor(0.1487, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.1480, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.1473, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.1465, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.1458, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.1451, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.1444, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.1437, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.1431, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.1424, grad_fn=<AddBackward0

EPOCH:  458  LOSS:  tensor(0.0942, grad_fn=<AddBackward0>)
EPOCH:  459  LOSS:  tensor(0.0940, grad_fn=<AddBackward0>)
EPOCH:  460  LOSS:  tensor(0.0938, grad_fn=<AddBackward0>)
EPOCH:  461  LOSS:  tensor(0.0936, grad_fn=<AddBackward0>)
EPOCH:  462  LOSS:  tensor(0.0934, grad_fn=<AddBackward0>)
EPOCH:  463  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  464  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  465  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.0924, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.0911, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.0909, grad_fn=<AddBackward0

EPOCH:  597  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  598  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  599  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  600  LOSS:  tensor(0.0745, grad_fn=<AddBackward0>)
EPOCH:  601  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  602  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  603  LOSS:  tensor(0.0743, grad_fn=<AddBackward0>)
EPOCH:  604  LOSS:  tensor(0.0743, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.0741, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.0739, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0737, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.0737, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.0736, grad_fn=<AddBackward0

EPOCH:  736  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  737  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  738  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  739  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  740  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  741  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  742  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  743  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0661, grad_fn=<AddBackward0

EPOCH:  875  LOSS:  tensor(0.0590, grad_fn=<AddBackward0>)
EPOCH:  876  LOSS:  tensor(0.0589, grad_fn=<AddBackward0>)
EPOCH:  877  LOSS:  tensor(0.0589, grad_fn=<AddBackward0>)
EPOCH:  878  LOSS:  tensor(0.0589, grad_fn=<AddBackward0>)
EPOCH:  879  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  880  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  881  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  882  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0583, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0583, grad_fn=<AddBackward0

EPOCH:  1014  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  1015  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1016  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1017  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1018  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1019  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1020  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1021  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0538, grad

EPOCH:  1151  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  1152  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  1153  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  1154  LOSS:  tensor(0.0523, grad_fn=<AddBackward0>)
EPOCH:  1155  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  1156  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  1157  LOSS:  tensor(0.0506, grad_fn=<AddBackward0>)
EPOCH:  1158  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0486, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0493, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0494, grad

EPOCH:  88  LOSS:  tensor(3.2818, grad_fn=<AddBackward0>)
EPOCH:  89  LOSS:  tensor(3.1762, grad_fn=<AddBackward0>)
EPOCH:  90  LOSS:  tensor(3.0738, grad_fn=<AddBackward0>)
EPOCH:  91  LOSS:  tensor(2.9740, grad_fn=<AddBackward0>)
EPOCH:  92  LOSS:  tensor(2.8802, grad_fn=<AddBackward0>)
EPOCH:  93  LOSS:  tensor(2.7902, grad_fn=<AddBackward0>)
EPOCH:  94  LOSS:  tensor(2.7046, grad_fn=<AddBackward0>)
EPOCH:  95  LOSS:  tensor(2.6235, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(2.5449, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(2.4674, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(2.3933, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(2.3223, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(2.2540, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(2.1892, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(2.1284, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(2.0690, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(2.0100, grad_fn=<AddBackward0>)
EPOCH:  1

EPOCH:  228  LOSS:  tensor(0.3806, grad_fn=<AddBackward0>)
EPOCH:  229  LOSS:  tensor(0.3781, grad_fn=<AddBackward0>)
EPOCH:  230  LOSS:  tensor(0.3756, grad_fn=<AddBackward0>)
EPOCH:  231  LOSS:  tensor(0.3732, grad_fn=<AddBackward0>)
EPOCH:  232  LOSS:  tensor(0.3707, grad_fn=<AddBackward0>)
EPOCH:  233  LOSS:  tensor(0.3683, grad_fn=<AddBackward0>)
EPOCH:  234  LOSS:  tensor(0.3658, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.3635, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.3611, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.3587, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.3565, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.3542, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.3519, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.3496, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.3474, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.3452, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.3431, grad_fn=<AddBackward0

EPOCH:  367  LOSS:  tensor(0.2060, grad_fn=<AddBackward0>)
EPOCH:  368  LOSS:  tensor(0.2054, grad_fn=<AddBackward0>)
EPOCH:  369  LOSS:  tensor(0.2049, grad_fn=<AddBackward0>)
EPOCH:  370  LOSS:  tensor(0.2044, grad_fn=<AddBackward0>)
EPOCH:  371  LOSS:  tensor(0.2038, grad_fn=<AddBackward0>)
EPOCH:  372  LOSS:  tensor(0.2032, grad_fn=<AddBackward0>)
EPOCH:  373  LOSS:  tensor(0.2027, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.2021, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.2017, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.2011, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.2004, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.1999, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.1992, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.1986, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.1980, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.1974, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1966, grad_fn=<AddBackward0

EPOCH:  506  LOSS:  tensor(0.1528, grad_fn=<AddBackward0>)
EPOCH:  507  LOSS:  tensor(0.1526, grad_fn=<AddBackward0>)
EPOCH:  508  LOSS:  tensor(0.1523, grad_fn=<AddBackward0>)
EPOCH:  509  LOSS:  tensor(0.1521, grad_fn=<AddBackward0>)
EPOCH:  510  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  511  LOSS:  tensor(0.1517, grad_fn=<AddBackward0>)
EPOCH:  512  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1513, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1511, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1509, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1507, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1505, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1503, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1501, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1499, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1497, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1495, grad_fn=<AddBackward0

EPOCH:  645  LOSS:  tensor(0.1281, grad_fn=<AddBackward0>)
EPOCH:  646  LOSS:  tensor(0.1279, grad_fn=<AddBackward0>)
EPOCH:  647  LOSS:  tensor(0.1278, grad_fn=<AddBackward0>)
EPOCH:  648  LOSS:  tensor(0.1276, grad_fn=<AddBackward0>)
EPOCH:  649  LOSS:  tensor(0.1276, grad_fn=<AddBackward0>)
EPOCH:  650  LOSS:  tensor(0.1273, grad_fn=<AddBackward0>)
EPOCH:  651  LOSS:  tensor(0.1273, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.1271, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1267, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.1265, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.1264, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.1263, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.1261, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.1260, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.1258, grad_fn=<AddBackward0

EPOCH:  784  LOSS:  tensor(0.1093, grad_fn=<AddBackward0>)
EPOCH:  785  LOSS:  tensor(0.1091, grad_fn=<AddBackward0>)
EPOCH:  786  LOSS:  tensor(0.1091, grad_fn=<AddBackward0>)
EPOCH:  787  LOSS:  tensor(0.1090, grad_fn=<AddBackward0>)
EPOCH:  788  LOSS:  tensor(0.1089, grad_fn=<AddBackward0>)
EPOCH:  789  LOSS:  tensor(0.1088, grad_fn=<AddBackward0>)
EPOCH:  790  LOSS:  tensor(0.1087, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.1086, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.1085, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.1084, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.1082, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.1080, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.1079, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.1078, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.1079, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.1077, grad_fn=<AddBackward0

EPOCH:  923  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  924  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  925  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  926  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  927  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  928  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  929  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0947, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0946, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0946, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0945, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0945, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0943, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0943, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0942, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0941, grad_fn=<AddBackward0

EPOCH:  1061  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  1062  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  1063  LOSS:  tensor(0.0924, grad_fn=<AddBackward0>)
EPOCH:  1064  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  1065  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  1066  LOSS:  tensor(0.0907, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0899, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0896, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0892, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0888, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0884, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0883, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0880, grad

EPOCH:  1198  LOSS:  tensor(0.0844, grad_fn=<AddBackward0>)
EPOCH:  1199  LOSS:  tensor(0.0844, grad_fn=<AddBackward0>)
BATCH_SIZE:  64 time execution for a neural net:  74.92734479904175
Trainning model:  3
.... 278
EPOCH:  0  LOSS:  tensor(233.1235, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(199.7652, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(174.3139, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(153.7845, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(136.3099, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(121.0284, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(107.9103, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(97.2772, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(89.4819, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(84.4418, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(81.6854, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(80.2447, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(79.2130, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  

EPOCH:  137  LOSS:  tensor(0.8674, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(0.8571, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(0.8471, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(0.8373, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(0.8277, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(0.8181, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(0.8087, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(0.7995, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(0.7905, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.7816, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(0.7731, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(0.7647, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.7559, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.7470, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.7382, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.7293, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.7207, grad_fn=<AddBackward0

EPOCH:  276  LOSS:  tensor(0.2713, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.2698, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.2684, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.2669, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.2655, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.2641, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.2626, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.2612, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2598, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2584, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.2571, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.2558, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.2545, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.2532, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.2519, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.2506, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2494, grad_fn=<AddBackward0

EPOCH:  415  LOSS:  tensor(0.1625, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.1619, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.1614, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.1608, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1603, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1598, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1592, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1587, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1582, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1577, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1572, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1566, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1560, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1555, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1549, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1544, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1539, grad_fn=<AddBackward0

EPOCH:  554  LOSS:  tensor(0.1173, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.1171, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.1169, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.1167, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1165, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1163, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1161, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1157, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1156, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1154, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1152, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.1150, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1148, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.1146, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.1144, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.1142, grad_fn=<AddBackward0

EPOCH:  693  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  694  LOSS:  tensor(0.0982, grad_fn=<AddBackward0>)
EPOCH:  695  LOSS:  tensor(0.0981, grad_fn=<AddBackward0>)
EPOCH:  696  LOSS:  tensor(0.0980, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.0979, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0978, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0978, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0977, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0976, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0975, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0974, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0973, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0969, grad_fn=<AddBackward0

EPOCH:  832  LOSS:  tensor(0.0885, grad_fn=<AddBackward0>)
EPOCH:  833  LOSS:  tensor(0.0884, grad_fn=<AddBackward0>)
EPOCH:  834  LOSS:  tensor(0.0884, grad_fn=<AddBackward0>)
EPOCH:  835  LOSS:  tensor(0.0883, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0883, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0882, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0879, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0879, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0879, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0878, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0877, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0877, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0876, grad_fn=<AddBackward0

EPOCH:  971  LOSS:  tensor(0.0830, grad_fn=<AddBackward0>)
EPOCH:  972  LOSS:  tensor(0.0830, grad_fn=<AddBackward0>)
EPOCH:  973  LOSS:  tensor(0.0830, grad_fn=<AddBackward0>)
EPOCH:  974  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0827, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0827, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0827, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0825, grad_fn=<AddBackward0

EPOCH:  1109  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  1110  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  1111  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0786, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0786, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0786, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0783, grad

EPOCH:  45  LOSS:  tensor(15.5623, grad_fn=<AddBackward0>)
EPOCH:  46  LOSS:  tensor(14.6062, grad_fn=<AddBackward0>)
EPOCH:  47  LOSS:  tensor(13.6621, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(12.7689, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(11.9570, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(11.2288, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(10.5858, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(10.0057, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(9.4876, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(9.0088, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(8.5610, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(8.1316, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(7.7124, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(7.3043, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(6.9186, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(6.5458, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(6.1900, grad_fn=<AddBackward0>)
EPOCH:

EPOCH:  185  LOSS:  tensor(0.2934, grad_fn=<AddBackward0>)
EPOCH:  186  LOSS:  tensor(0.2908, grad_fn=<AddBackward0>)
EPOCH:  187  LOSS:  tensor(0.2883, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.2858, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.2834, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.2810, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.2787, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.2764, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.2743, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.2721, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.2699, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.2677, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.2655, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.2634, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.2612, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.2591, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.2570, grad_fn=<AddBackward0

EPOCH:  324  LOSS:  tensor(0.1251, grad_fn=<AddBackward0>)
EPOCH:  325  LOSS:  tensor(0.1245, grad_fn=<AddBackward0>)
EPOCH:  326  LOSS:  tensor(0.1240, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.1234, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.1228, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.1223, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.1218, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.1213, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.1208, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.1204, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.1199, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.1195, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.1190, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.1186, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.1181, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.1177, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.1173, grad_fn=<AddBackward0

EPOCH:  463  LOSS:  tensor(0.0779, grad_fn=<AddBackward0>)
EPOCH:  464  LOSS:  tensor(0.0776, grad_fn=<AddBackward0>)
EPOCH:  465  LOSS:  tensor(0.0775, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.0774, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.0771, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.0769, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.0766, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.0764, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.0762, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.0760, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.0757, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.0745, grad_fn=<AddBackward0

EPOCH:  602  LOSS:  tensor(0.0549, grad_fn=<AddBackward0>)
EPOCH:  603  LOSS:  tensor(0.0550, grad_fn=<AddBackward0>)
EPOCH:  604  LOSS:  tensor(0.0548, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.0546, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.0546, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.0543, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.0533, grad_fn=<AddBackward0

EPOCH:  741  LOSS:  tensor(0.0457, grad_fn=<AddBackward0>)
EPOCH:  742  LOSS:  tensor(0.0455, grad_fn=<AddBackward0>)
EPOCH:  743  LOSS:  tensor(0.0455, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0451, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0447, grad_fn=<AddBackward0

EPOCH:  880  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  881  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  882  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0398, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0396, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0398, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0397, grad_fn=<AddBackward0

EPOCH:  1019  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1020  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1021  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0392, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0395, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0398, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0391, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.0389, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0390, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0393, grad

EPOCH:  1156  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  1157  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  1158  LOSS:  tensor(0.0351, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0364, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0379, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0367, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0351, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0333, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0338, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0340, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0346, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0347, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0346, grad

EPOCH:  94  LOSS:  tensor(1.7233, grad_fn=<AddBackward0>)
EPOCH:  95  LOSS:  tensor(1.6765, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(1.6314, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(1.5889, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(1.5483, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(1.5083, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(1.4699, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(1.4337, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(1.3989, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(1.3663, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(1.3354, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(1.3054, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(1.2762, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(1.2480, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(1.2203, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(1.1943, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(1.1693, grad_fn=<AddBackward0>)
EPO

EPOCH:  233  LOSS:  tensor(0.3004, grad_fn=<AddBackward0>)
EPOCH:  234  LOSS:  tensor(0.2982, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.2961, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.2940, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.2921, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.2901, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.2882, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.2864, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.2845, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.2827, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.2809, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.2792, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.2776, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.2761, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.2746, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.2731, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.2716, grad_fn=<AddBackward0

EPOCH:  372  LOSS:  tensor(0.1551, grad_fn=<AddBackward0>)
EPOCH:  373  LOSS:  tensor(0.1547, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.1543, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.1539, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.1535, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.1531, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.1523, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.1512, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1508, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.1503, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.1499, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.1495, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.1491, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.1486, grad_fn=<AddBackward0

EPOCH:  511  LOSS:  tensor(0.1098, grad_fn=<AddBackward0>)
EPOCH:  512  LOSS:  tensor(0.1096, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1094, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1091, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1089, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1086, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1084, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1082, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1079, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1074, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1072, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.1070, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.1067, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.1065, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.1062, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.1060, grad_fn=<AddBackward0

EPOCH:  650  LOSS:  tensor(0.0842, grad_fn=<AddBackward0>)
EPOCH:  651  LOSS:  tensor(0.0840, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.0839, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.0837, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.0836, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.0835, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.0833, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.0832, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.0830, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.0825, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.0823, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.0822, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.0819, grad_fn=<AddBackward0

EPOCH:  789  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  790  LOSS:  tensor(0.0653, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.0650, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0649, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0648, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.0647, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0646, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0645, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0644, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0635, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0634, grad_fn=<AddBackward0

EPOCH:  928  LOSS:  tensor(0.0540, grad_fn=<AddBackward0>)
EPOCH:  929  LOSS:  tensor(0.0540, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0534, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0534, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0532, grad_fn=<AddBackward0

EPOCH:  1066  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0477, grad

EPOCH:  1  LOSS:  tensor(205.5556, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(181.6730, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(161.1103, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(143.4890, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(128.4857, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(115.6672, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(104.8232, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(96.2806, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(90.3084, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(86.8277, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(85.3704, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(85.0126, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(84.7798, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(84.1221, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(82.7724, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(80.7641, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(78.1256, grad_fn=<AddBackward0>)

EPOCH:  141  LOSS:  tensor(0.4267, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(0.4199, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(0.4132, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(0.4067, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(0.4005, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.3950, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(0.3886, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(0.3830, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.3772, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.3715, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.3662, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.3610, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.3558, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.3507, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.3456, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.3405, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.3356, grad_fn=<AddBackward0

EPOCH:  280  LOSS:  tensor(0.1316, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.1314, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.1309, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.1304, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.1300, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.1298, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.1294, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.1289, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.1287, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.1282, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.1280, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.1278, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.1274, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.1263, grad_fn=<AddBackward0

EPOCH:  419  LOSS:  tensor(0.0937, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.0922, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.0922, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.0920, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.0918, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.0910, grad_fn=<AddBackward0

EPOCH:  558  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.0745, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.0745, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.0739, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.0737, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.0734, grad_fn=<AddBackward0

EPOCH:  697  LOSS:  tensor(0.0656, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0653, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0652, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0650, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0650, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0649, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0649, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0649, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0648, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0648, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0647, grad_fn=<AddBackward0

EPOCH:  836  LOSS:  tensor(0.0609, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0621, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0630, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0658, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0705, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0762, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0771, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0627, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0595, grad_fn=<AddBackward0

EPOCH:  975  LOSS:  tensor(0.0549, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0557, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0555, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0544, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0529, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0540, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0531, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0526, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0524, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0526, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0529, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0528, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0527, grad_fn=<AddBackward0

EPOCH:  1112  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0482, grad

EPOCH:  48  LOSS:  tensor(24.6725, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(23.5642, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(22.4925, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(21.4493, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(20.4289, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(19.4494, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(18.5335, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(17.6864, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(16.8925, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(16.1439, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(15.4329, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(14.7701, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(14.1579, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(13.5852, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(13.0373, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(12.5045, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(11.9848, grad_fn=<AddBackward0

EPOCH:  188  LOSS:  tensor(0.7992, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.7892, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.7794, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.7697, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.7604, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.7515, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.7430, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.7348, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.7264, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.7182, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.7101, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.7050, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.6975, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.6881, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.6819, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.6757, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.6701, grad_fn=<AddBackward0

EPOCH:  327  LOSS:  tensor(0.2759, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.2745, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.2733, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.2720, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.2707, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.2695, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.2683, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.2670, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.2659, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.2646, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.2634, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.2623, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.2611, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.2598, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.2587, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.2577, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.2565, grad_fn=<AddBackward0

EPOCH:  466  LOSS:  tensor(0.1800, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.1795, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.1791, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.1787, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.1783, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.1778, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.1774, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.1769, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1765, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1761, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1757, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.1751, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1748, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1744, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1739, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1734, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.1729, grad_fn=<AddBackward0

EPOCH:  605  LOSS:  tensor(0.1250, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.1248, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.1245, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.1242, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.1240, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.1238, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.1235, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.1233, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.1230, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.1228, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.1225, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.1223, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.1221, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.1219, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.1216, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.1214, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.1211, grad_fn=<AddBackward0

EPOCH:  744  LOSS:  tensor(0.0942, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0940, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0937, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0923, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0914, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0909, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0907, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.0905, grad_fn=<AddBackward0

EPOCH:  883  LOSS:  tensor(0.0717, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0715, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0714, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0713, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0712, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0711, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0705, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0703, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0702, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0700, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0699, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0697, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0695, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.0694, grad_fn=<AddBackward0

EPOCH:  1022  LOSS:  tensor(0.0579, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0578, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0577, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0577, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0575, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0573, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.0567, grad

EPOCH:  1159  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0493, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0492, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0492, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0489, grad

EPOCH:  97  LOSS:  tensor(0.9270, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(0.8983, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(0.8711, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(0.8457, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(0.8221, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(0.8001, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(0.7795, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(0.7604, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(0.7424, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(0.7253, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(0.7089, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(0.6930, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(0.6780, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(0.6639, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(0.6508, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(0.6380, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(0.6257, grad_fn=<AddBackward0>)


EPOCH:  236  LOSS:  tensor(0.1733, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.1723, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.1712, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.1702, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.1692, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.1682, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.1672, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.1662, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.1653, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.1642, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.1632, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.1623, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.1615, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.1607, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.1599, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.1591, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.1583, grad_fn=<AddBackward0

EPOCH:  375  LOSS:  tensor(0.1065, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.1063, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.1060, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.1055, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1044, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.1041, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.1038, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.1036, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.1030, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.1028, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.1025, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.1022, grad_fn=<AddBackward0

EPOCH:  514  LOSS:  tensor(0.0793, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.0792, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.0790, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.0783, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.0782, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.0780, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.0779, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.0778, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.0777, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.0776, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.0775, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.0774, grad_fn=<AddBackward0

EPOCH:  653  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.0661, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.0659, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.0659, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.0658, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.0657, grad_fn=<AddBackward0

EPOCH:  792  LOSS:  tensor(0.0567, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0560, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0559, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0559, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0558, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.0557, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.0557, grad_fn=<AddBackward0

EPOCH:  931  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0506, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0502, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0499, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0493, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0488, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0486, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0486, grad_fn=<AddBackward0

EPOCH:  1069  LOSS:  tensor(0.0550, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0444, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0461, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0529, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0504, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0472, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0446, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0434, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0464, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0473, grad

EPOCH:  4  LOSS:  tensor(126.0576, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(106.4749, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(90.4896, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(78.0193, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(68.9771, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(63.2679, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(60.4352, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(59.8055, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(60.5985, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(61.7820, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(62.4828, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(62.1942, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(60.7610, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(58.3843, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(55.4285, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(52.2315, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(49.1544, grad_fn=<AddBackward0>)
E

EPOCH:  144  LOSS:  tensor(1.0803, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(1.0640, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(1.0480, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(1.0325, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(1.0172, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(1.0023, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.9880, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.9739, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.9602, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.9471, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.9342, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.9214, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.9088, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.8965, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.8842, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.8722, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.8604, grad_fn=<AddBackward0

EPOCH:  283  LOSS:  tensor(0.2694, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2676, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2660, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.2646, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.2630, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.2616, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.2601, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.2586, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.2572, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2558, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.2544, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.2530, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2517, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.2503, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.2490, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.2477, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.2464, grad_fn=<AddBackward0

EPOCH:  422  LOSS:  tensor(0.1457, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1451, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1445, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1441, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1434, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1428, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1416, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1410, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1404, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1398, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1391, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1387, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1380, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1374, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.1369, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1363, grad_fn=<AddBackward0

EPOCH:  561  LOSS:  tensor(0.0840, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.0838, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.0836, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.0833, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.0831, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.0824, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.0823, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.0818, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.0811, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.0807, grad_fn=<AddBackward0

EPOCH:  700  LOSS:  tensor(0.0616, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0610, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0609, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0605, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0605, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0603, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0601, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0601, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.0599, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0599, grad_fn=<AddBackward0

EPOCH:  839  LOSS:  tensor(0.0440, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0439, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0438, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0435, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0435, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0434, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0433, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0432, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0432, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0431, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0430, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0430, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0427, grad_fn=<AddBackward0

EPOCH:  978  LOSS:  tensor(0.0346, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0345, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0345, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0344, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0344, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0343, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0343, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0343, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0342, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0342, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0342, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0343, grad_fn=<AddBackward0>)
EPOCH:  993  LOSS:  tensor(0.0345, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0347, grad_fn=<AddBackward0

EPOCH:  1115  LOSS:  tensor(0.0307, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0303, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0298, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0294, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0291, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0284, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0284, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0288, grad_fn=<AddBackward0>)
EPOCH:  1130  LOSS:  tensor(0.0290, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0294, grad

EPOCH:  51  LOSS:  tensor(19.3697, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(18.4625, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(17.6289, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(16.8494, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(16.1410, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(15.4734, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(14.8332, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(14.2163, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(13.6246, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(13.0556, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(12.4983, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(11.9622, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(11.4516, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(10.9647, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(10.5013, grad_fn=<AddBackward0>)
EPOCH:  66  LOSS:  tensor(10.0630, grad_fn=<AddBackward0>)
EPOCH:  67  LOSS:  tensor(9.6458, grad_fn=<AddBackward0>

EPOCH:  191  LOSS:  tensor(0.5551, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.5497, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.5443, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.5391, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.5340, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.5289, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.5239, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.5189, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.5140, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.5092, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.5045, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.4999, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.4954, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.4910, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.4867, grad_fn=<AddBackward0>)
EPOCH:  206  LOSS:  tensor(0.4825, grad_fn=<AddBackward0>)
EPOCH:  207  LOSS:  tensor(0.4783, grad_fn=<AddBackward0

EPOCH:  330  LOSS:  tensor(0.2407, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.2399, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.2391, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.2383, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.2376, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.2368, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.2360, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.2353, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.2346, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.2338, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.2331, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.2324, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.2317, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.2310, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.2302, grad_fn=<AddBackward0>)
EPOCH:  345  LOSS:  tensor(0.2293, grad_fn=<AddBackward0>)
EPOCH:  346  LOSS:  tensor(0.2282, grad_fn=<AddBackward0

EPOCH:  469  LOSS:  tensor(0.1393, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.1389, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.1386, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.1382, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.1379, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1375, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1372, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1368, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.1364, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1361, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1357, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1354, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1350, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.1347, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.1344, grad_fn=<AddBackward0>)
EPOCH:  484  LOSS:  tensor(0.1341, grad_fn=<AddBackward0>)
EPOCH:  485  LOSS:  tensor(0.1337, grad_fn=<AddBackward0

EPOCH:  608  LOSS:  tensor(0.1025, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.1022, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.1020, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.1017, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.1015, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.1012, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.1009, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.1007, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.1005, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.1003, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.1000, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.0998, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.0997, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.0995, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.0993, grad_fn=<AddBackward0>)
EPOCH:  623  LOSS:  tensor(0.0991, grad_fn=<AddBackward0>)
EPOCH:  624  LOSS:  tensor(0.0988, grad_fn=<AddBackward0

EPOCH:  747  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0812, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0807, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0806, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0805, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0804, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0803, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.0802, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  762  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  763  LOSS:  tensor(0.0799, grad_fn=<AddBackward0

EPOCH:  886  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0681, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0679, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0677, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0675, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0675, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0674, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0673, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.0671, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.0670, grad_fn=<AddBackward0>)
EPOCH:  901  LOSS:  tensor(0.0669, grad_fn=<AddBackward0>)
EPOCH:  902  LOSS:  tensor(0.0668, grad_fn=<AddBackward0

EPOCH:  1025  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0567, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  1040  LOSS:  tensor(0.0560, grad_fn=<AddBackward0>)
EPOCH:  1041  LOSS:  tensor(0.0560, grad

EPOCH:  1162  LOSS:  tensor(0.0500, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0500, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0500, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0499, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0499, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  1177  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  1178  LOSS:  tensor(0.0494, grad

EPOCH:  100  LOSS:  tensor(1.1219, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(1.0962, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(1.0718, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(1.0488, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(1.0272, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(1.0068, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(0.9871, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(0.9682, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(0.9501, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(0.9328, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(0.9163, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(0.9004, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(0.8856, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(0.8713, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(0.8571, grad_fn=<AddBackward0>)
EPOCH:  115  LOSS:  tensor(0.8432, grad_fn=<AddBackward0>)
EPOCH:  116  LOSS:  tensor(0.8304, grad_fn=<AddBackward0

EPOCH:  239  LOSS:  tensor(0.1920, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.1907, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.1893, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.1879, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.1865, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.1851, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.1839, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.1825, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.1812, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.1800, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.1787, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.1776, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.1763, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.1751, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.1739, grad_fn=<AddBackward0>)
EPOCH:  254  LOSS:  tensor(0.1727, grad_fn=<AddBackward0>)
EPOCH:  255  LOSS:  tensor(0.1716, grad_fn=<AddBackward0

EPOCH:  378  LOSS:  tensor(0.0963, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.0960, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.0947, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.0941, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.0938, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.0922, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  393  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  394  LOSS:  tensor(0.0913, grad_fn=<AddBackward0

EPOCH:  517  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.0608, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.0603, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.0601, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.0599, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.0596, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.0594, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.0592, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.0590, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.0582, grad_fn=<AddBackward0>)
EPOCH:  532  LOSS:  tensor(0.0580, grad_fn=<AddBackward0>)
EPOCH:  533  LOSS:  tensor(0.0578, grad_fn=<AddBackward0

EPOCH:  656  LOSS:  tensor(0.0445, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.0445, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.0444, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.0442, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.0442, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.0441, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.0440, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.0440, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.0439, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.0439, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.0438, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.0438, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  671  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  672  LOSS:  tensor(0.0436, grad_fn=<AddBackward0

EPOCH:  795  LOSS:  tensor(0.0390, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0390, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0389, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0389, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0389, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0387, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0387, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0387, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.0387, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.0386, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.0386, grad_fn=<AddBackward0>)
EPOCH:  810  LOSS:  tensor(0.0386, grad_fn=<AddBackward0>)
EPOCH:  811  LOSS:  tensor(0.0386, grad_fn=<AddBackward0

EPOCH:  934  LOSS:  tensor(0.0336, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0335, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0333, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0331, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0331, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0329, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.0327, grad_fn=<AddBackward0>)
EPOCH:  949  LOSS:  tensor(0.0326, grad_fn=<AddBackward0>)
EPOCH:  950  LOSS:  tensor(0.0326, grad_fn=<AddBackward0

EPOCH:  1072  LOSS:  tensor(0.0267, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0267, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0266, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0266, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0266, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0266, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0266, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0265, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0264, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0264, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0263, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0263, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0263, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0262, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0262, grad_fn=<AddBackward0>)
EPOCH:  1087  LOSS:  tensor(0.0261, grad_fn=<AddBackward0>)
EPOCH:  1088  LOSS:  tensor(0.0261, grad

EPOCH:  8  LOSS:  tensor(105.1752, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(93.6883, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(85.5745, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(80.4662, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(77.8432, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(76.9111, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(76.9100, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(77.0344, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(76.4224, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(74.6013, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(71.5244, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(67.4196, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(62.7209, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(57.8915, grad_fn=<AddBackward0>)
EPOCH:  22  LOSS:  tensor(53.3612, grad_fn=<AddBackward0>)
EPOCH:  23  LOSS:  tensor(49.3295, grad_fn=<AddBackward0>)
EPOCH:  24  LOSS:  tensor(45.8497, grad_fn=<AddBackward0>

EPOCH:  148  LOSS:  tensor(0.6332, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.6246, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.6161, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.6078, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.5997, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.5917, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.5841, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.5768, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.5695, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.5624, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.5555, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.5487, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.5423, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.5360, grad_fn=<AddBackward0>)
EPOCH:  162  LOSS:  tensor(0.5300, grad_fn=<AddBackward0>)
EPOCH:  163  LOSS:  tensor(0.5239, grad_fn=<AddBackward0>)
EPOCH:  164  LOSS:  tensor(0.5179, grad_fn=<AddBackward0

EPOCH:  287  LOSS:  tensor(0.1791, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.1782, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.1774, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.1766, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.1758, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.1751, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.1743, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.1735, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.1728, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.1721, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.1714, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.1707, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.1700, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.1693, grad_fn=<AddBackward0>)
EPOCH:  301  LOSS:  tensor(0.1687, grad_fn=<AddBackward0>)
EPOCH:  302  LOSS:  tensor(0.1680, grad_fn=<AddBackward0>)
EPOCH:  303  LOSS:  tensor(0.1673, grad_fn=<AddBackward0

EPOCH:  426  LOSS:  tensor(0.1184, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1181, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1173, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1170, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1168, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1165, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1162, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1157, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.1154, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1151, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.1149, grad_fn=<AddBackward0>)
EPOCH:  440  LOSS:  tensor(0.1146, grad_fn=<AddBackward0>)
EPOCH:  441  LOSS:  tensor(0.1144, grad_fn=<AddBackward0>)
EPOCH:  442  LOSS:  tensor(0.1141, grad_fn=<AddBackward0

EPOCH:  565  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.0911, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.0909, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.0906, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.0905, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  579  LOSS:  tensor(0.0901, grad_fn=<AddBackward0>)
EPOCH:  580  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  581  LOSS:  tensor(0.0899, grad_fn=<AddBackward0

EPOCH:  704  LOSS:  tensor(0.0799, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0799, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0797, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0796, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0795, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0795, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0794, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0793, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0792, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0792, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.0791, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0790, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  718  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  719  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  720  LOSS:  tensor(0.0787, grad_fn=<AddBackward0

EPOCH:  843  LOSS:  tensor(0.0701, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0701, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0700, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0699, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0699, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0698, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0697, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0696, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0696, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0695, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0695, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0694, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  857  LOSS:  tensor(0.0692, grad_fn=<AddBackward0>)
EPOCH:  858  LOSS:  tensor(0.0692, grad_fn=<AddBackward0>)
EPOCH:  859  LOSS:  tensor(0.0691, grad_fn=<AddBackward0

EPOCH:  982  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0639, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0639, grad_fn=<AddBackward0>)
EPOCH:  993  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  995  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  996  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  997  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  998  LOSS:  tensor(0.0637, grad_fn=<AddBackward0

EPOCH:  1119  LOSS:  tensor(0.0589, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1130  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  1132  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  1133  LOSS:  tensor(0.0583, grad_fn=<AddBackward0>)
EPOCH:  1134  LOSS:  tensor(0.0583, grad_fn=<AddBackward0>)
EPOCH:  1135  LOSS:  tensor(0.0582, grad

EPOCH:  55  LOSS:  tensor(22.2127, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(20.7641, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(19.3557, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(18.0164, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(16.7515, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(15.5664, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(14.4897, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(13.5090, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(12.6213, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(11.8132, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(11.0767, grad_fn=<AddBackward0>)
EPOCH:  66  LOSS:  tensor(10.3939, grad_fn=<AddBackward0>)
EPOCH:  67  LOSS:  tensor(9.7575, grad_fn=<AddBackward0>)
EPOCH:  68  LOSS:  tensor(9.1561, grad_fn=<AddBackward0>)
EPOCH:  69  LOSS:  tensor(8.5971, grad_fn=<AddBackward0>)
EPOCH:  70  LOSS:  tensor(8.0724, grad_fn=<AddBackward0>)
EPOCH:  71  LOSS:  tensor(7.5848, grad_fn=<AddBackward0>)
EP

EPOCH:  195  LOSS:  tensor(0.5624, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.5562, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.5501, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.5443, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.5387, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.5333, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.5284, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.5237, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.5192, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.5149, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.5105, grad_fn=<AddBackward0>)
EPOCH:  206  LOSS:  tensor(0.5061, grad_fn=<AddBackward0>)
EPOCH:  207  LOSS:  tensor(0.5018, grad_fn=<AddBackward0>)
EPOCH:  208  LOSS:  tensor(0.4976, grad_fn=<AddBackward0>)
EPOCH:  209  LOSS:  tensor(0.4934, grad_fn=<AddBackward0>)
EPOCH:  210  LOSS:  tensor(0.4892, grad_fn=<AddBackward0>)
EPOCH:  211  LOSS:  tensor(0.4852, grad_fn=<AddBackward0

EPOCH:  334  LOSS:  tensor(0.2230, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.2223, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.2216, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.2211, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.2203, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.2197, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.2191, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.2184, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.2178, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.2171, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.2165, grad_fn=<AddBackward0>)
EPOCH:  345  LOSS:  tensor(0.2159, grad_fn=<AddBackward0>)
EPOCH:  346  LOSS:  tensor(0.2152, grad_fn=<AddBackward0>)
EPOCH:  347  LOSS:  tensor(0.2147, grad_fn=<AddBackward0>)
EPOCH:  348  LOSS:  tensor(0.2140, grad_fn=<AddBackward0>)
EPOCH:  349  LOSS:  tensor(0.2135, grad_fn=<AddBackward0>)
EPOCH:  350  LOSS:  tensor(0.2129, grad_fn=<AddBackward0

EPOCH:  473  LOSS:  tensor(0.1678, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1675, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1672, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1671, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.1668, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1665, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1665, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1662, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1658, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.1659, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.1656, grad_fn=<AddBackward0>)
EPOCH:  484  LOSS:  tensor(0.1653, grad_fn=<AddBackward0>)
EPOCH:  485  LOSS:  tensor(0.1650, grad_fn=<AddBackward0>)
EPOCH:  486  LOSS:  tensor(0.1647, grad_fn=<AddBackward0>)
EPOCH:  487  LOSS:  tensor(0.1645, grad_fn=<AddBackward0>)
EPOCH:  488  LOSS:  tensor(0.1643, grad_fn=<AddBackward0>)
EPOCH:  489  LOSS:  tensor(0.1642, grad_fn=<AddBackward0

EPOCH:  612  LOSS:  tensor(0.1416, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.1417, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.1415, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.1414, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.1412, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.1413, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.1410, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.1410, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.1408, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.1408, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.1406, grad_fn=<AddBackward0>)
EPOCH:  623  LOSS:  tensor(0.1406, grad_fn=<AddBackward0>)
EPOCH:  624  LOSS:  tensor(0.1403, grad_fn=<AddBackward0>)
EPOCH:  625  LOSS:  tensor(0.1404, grad_fn=<AddBackward0>)
EPOCH:  626  LOSS:  tensor(0.1401, grad_fn=<AddBackward0>)
EPOCH:  627  LOSS:  tensor(0.1401, grad_fn=<AddBackward0>)
EPOCH:  628  LOSS:  tensor(0.1399, grad_fn=<AddBackward0

EPOCH:  751  LOSS:  tensor(0.1275, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.1275, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.1273, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.1273, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.1271, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.1267, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.1266, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.1266, grad_fn=<AddBackward0>)
EPOCH:  762  LOSS:  tensor(0.1264, grad_fn=<AddBackward0>)
EPOCH:  763  LOSS:  tensor(0.1262, grad_fn=<AddBackward0>)
EPOCH:  764  LOSS:  tensor(0.1262, grad_fn=<AddBackward0>)
EPOCH:  765  LOSS:  tensor(0.1260, grad_fn=<AddBackward0>)
EPOCH:  766  LOSS:  tensor(0.1260, grad_fn=<AddBackward0>)
EPOCH:  767  LOSS:  tensor(0.1258, grad_fn=<AddBackward0

EPOCH:  890  LOSS:  tensor(0.1128, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.1125, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.1125, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.1123, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.1124, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.1121, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.1121, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.1118, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.1117, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.1118, grad_fn=<AddBackward0>)
EPOCH:  901  LOSS:  tensor(0.1117, grad_fn=<AddBackward0>)
EPOCH:  902  LOSS:  tensor(0.1116, grad_fn=<AddBackward0>)
EPOCH:  903  LOSS:  tensor(0.1117, grad_fn=<AddBackward0>)
EPOCH:  904  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  905  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  906  LOSS:  tensor(0.1112, grad_fn=<AddBackward0

EPOCH:  1029  LOSS:  tensor(0.1009, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.1008, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.1007, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.1006, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.1006, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.1004, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.1003, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.1004, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.1002, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.1001, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.1001, grad_fn=<AddBackward0>)
EPOCH:  1040  LOSS:  tensor(0.1000, grad_fn=<AddBackward0>)
EPOCH:  1041  LOSS:  tensor(0.1000, grad_fn=<AddBackward0>)
EPOCH:  1042  LOSS:  tensor(0.1000, grad_fn=<AddBackward0>)
EPOCH:  1043  LOSS:  tensor(0.0999, grad_fn=<AddBackward0>)
EPOCH:  1044  LOSS:  tensor(0.0998, grad_fn=<AddBackward0>)
EPOCH:  1045  LOSS:  tensor(0.0996, grad

EPOCH:  1166  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0927, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0936, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0960, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0973, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0986, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0993, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0991, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  1177  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  1178  LOSS:  tensor(0.0945, grad_fn=<AddBackward0>)
EPOCH:  1179  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  1180  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  1181  LOSS:  tensor(0.0920, grad_fn=<AddBackward0>)
EPOCH:  1182  LOSS:  tensor(0.0927, grad

EPOCH:  104  LOSS:  tensor(1.4933, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(1.4572, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(1.4219, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(1.3874, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(1.3542, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(1.3230, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(1.2928, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(1.2632, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(1.2343, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(1.2067, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(1.1795, grad_fn=<AddBackward0>)
EPOCH:  115  LOSS:  tensor(1.1532, grad_fn=<AddBackward0>)
EPOCH:  116  LOSS:  tensor(1.1281, grad_fn=<AddBackward0>)
EPOCH:  117  LOSS:  tensor(1.1039, grad_fn=<AddBackward0>)
EPOCH:  118  LOSS:  tensor(1.0817, grad_fn=<AddBackward0>)
EPOCH:  119  LOSS:  tensor(1.0606, grad_fn=<AddBackward0>)
EPOCH:  120  LOSS:  tensor(1.0399, grad_fn=<AddBackward0

EPOCH:  243  LOSS:  tensor(0.3099, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.3084, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.3069, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.3055, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.3040, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.3027, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.3013, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.2999, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.2986, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.2973, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.2960, grad_fn=<AddBackward0>)
EPOCH:  254  LOSS:  tensor(0.2947, grad_fn=<AddBackward0>)
EPOCH:  255  LOSS:  tensor(0.2936, grad_fn=<AddBackward0>)
EPOCH:  256  LOSS:  tensor(0.2924, grad_fn=<AddBackward0>)
EPOCH:  257  LOSS:  tensor(0.2911, grad_fn=<AddBackward0>)
EPOCH:  258  LOSS:  tensor(0.2899, grad_fn=<AddBackward0>)
EPOCH:  259  LOSS:  tensor(0.2887, grad_fn=<AddBackward0

EPOCH:  382  LOSS:  tensor(0.1462, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1456, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.1450, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.1443, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.1437, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.1430, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.1418, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.1405, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.1399, grad_fn=<AddBackward0>)
EPOCH:  393  LOSS:  tensor(0.1393, grad_fn=<AddBackward0>)
EPOCH:  394  LOSS:  tensor(0.1387, grad_fn=<AddBackward0>)
EPOCH:  395  LOSS:  tensor(0.1380, grad_fn=<AddBackward0>)
EPOCH:  396  LOSS:  tensor(0.1374, grad_fn=<AddBackward0>)
EPOCH:  397  LOSS:  tensor(0.1368, grad_fn=<AddBackward0>)
EPOCH:  398  LOSS:  tensor(0.1362, grad_fn=<AddBackward0

EPOCH:  521  LOSS:  tensor(0.0852, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.0849, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.0847, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.0844, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.0841, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.0838, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.0838, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.0834, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.0833, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.0831, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  532  LOSS:  tensor(0.0825, grad_fn=<AddBackward0>)
EPOCH:  533  LOSS:  tensor(0.0824, grad_fn=<AddBackward0>)
EPOCH:  534  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  535  LOSS:  tensor(0.0819, grad_fn=<AddBackward0>)
EPOCH:  536  LOSS:  tensor(0.0816, grad_fn=<AddBackward0>)
EPOCH:  537  LOSS:  tensor(0.0814, grad_fn=<AddBackward0

EPOCH:  660  LOSS:  tensor(0.0551, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.0552, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.0549, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.0548, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.0546, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.0544, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.0542, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.0543, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  671  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  672  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  673  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  674  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  675  LOSS:  tensor(0.0534, grad_fn=<AddBackward0>)
EPOCH:  676  LOSS:  tensor(0.0533, grad_fn=<AddBackward0

EPOCH:  799  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0445, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0440, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0439, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0434, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0432, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.0431, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  810  LOSS:  tensor(0.0427, grad_fn=<AddBackward0>)
EPOCH:  811  LOSS:  tensor(0.0427, grad_fn=<AddBackward0>)
EPOCH:  812  LOSS:  tensor(0.0427, grad_fn=<AddBackward0>)
EPOCH:  813  LOSS:  tensor(0.0427, grad_fn=<AddBackward0>)
EPOCH:  814  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  815  LOSS:  tensor(0.0429, grad_fn=<AddBackward0

EPOCH:  938  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0371, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0371, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0371, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0373, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.0374, grad_fn=<AddBackward0>)
EPOCH:  949  LOSS:  tensor(0.0372, grad_fn=<AddBackward0>)
EPOCH:  950  LOSS:  tensor(0.0370, grad_fn=<AddBackward0>)
EPOCH:  951  LOSS:  tensor(0.0367, grad_fn=<AddBackward0>)
EPOCH:  952  LOSS:  tensor(0.0366, grad_fn=<AddBackward0>)
EPOCH:  953  LOSS:  tensor(0.0365, grad_fn=<AddBackward0>)
EPOCH:  954  LOSS:  tensor(0.0365, grad_fn=<AddBackward0

EPOCH:  1076  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0555, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0645, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0796, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0766, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0493, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0374, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0361, grad_fn=<AddBackward0>)
EPOCH:  1087  LOSS:  tensor(0.0433, grad_fn=<AddBackward0>)
EPOCH:  1088  LOSS:  tensor(0.0492, grad_fn=<AddBackward0>)
EPOCH:  1089  LOSS:  tensor(0.0503, grad_fn=<AddBackward0>)
EPOCH:  1090  LOSS:  tensor(0.0466, grad_fn=<AddBackward0>)
EPOCH:  1091  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  1092  LOSS:  tensor(0.0343, grad

EPOCH:  12  LOSS:  tensor(110.1247, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(108.9851, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(108.9861, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(109.3019, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(109.1532, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(107.9793, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(105.7299, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(102.5959, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(98.8661, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(94.8704, grad_fn=<AddBackward0>)
EPOCH:  22  LOSS:  tensor(90.8179, grad_fn=<AddBackward0>)
EPOCH:  23  LOSS:  tensor(87.0216, grad_fn=<AddBackward0>)
EPOCH:  24  LOSS:  tensor(83.5700, grad_fn=<AddBackward0>)
EPOCH:  25  LOSS:  tensor(80.6278, grad_fn=<AddBackward0>)
EPOCH:  26  LOSS:  tensor(78.1184, grad_fn=<AddBackward0>)
EPOCH:  27  LOSS:  tensor(76.0272, grad_fn=<AddBackward0>)
EPOCH:  28  LOSS:  tensor(74.2213, grad_fn=<AddB

EPOCH:  152  LOSS:  tensor(0.6754, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.6646, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.6540, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.6436, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.6334, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.6236, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.6141, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.6051, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.5963, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.5876, grad_fn=<AddBackward0>)
EPOCH:  162  LOSS:  tensor(0.5794, grad_fn=<AddBackward0>)
EPOCH:  163  LOSS:  tensor(0.5716, grad_fn=<AddBackward0>)
EPOCH:  164  LOSS:  tensor(0.5639, grad_fn=<AddBackward0>)
EPOCH:  165  LOSS:  tensor(0.5562, grad_fn=<AddBackward0>)
EPOCH:  166  LOSS:  tensor(0.5489, grad_fn=<AddBackward0>)
EPOCH:  167  LOSS:  tensor(0.5418, grad_fn=<AddBackward0>)
EPOCH:  168  LOSS:  tensor(0.5349, grad_fn=<AddBackward0

EPOCH:  291  LOSS:  tensor(0.2238, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2228, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.2217, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.2209, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2198, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.2192, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.2182, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.2173, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.2165, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.2156, grad_fn=<AddBackward0>)
EPOCH:  301  LOSS:  tensor(0.2146, grad_fn=<AddBackward0>)
EPOCH:  302  LOSS:  tensor(0.2138, grad_fn=<AddBackward0>)
EPOCH:  303  LOSS:  tensor(0.2130, grad_fn=<AddBackward0>)
EPOCH:  304  LOSS:  tensor(0.2121, grad_fn=<AddBackward0>)
EPOCH:  305  LOSS:  tensor(0.2113, grad_fn=<AddBackward0>)
EPOCH:  306  LOSS:  tensor(0.2106, grad_fn=<AddBackward0>)
EPOCH:  307  LOSS:  tensor(0.2095, grad_fn=<AddBackward0

EPOCH:  430  LOSS:  tensor(0.1463, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1458, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1455, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1451, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1448, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1444, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1440, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.1437, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1433, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.1430, grad_fn=<AddBackward0>)
EPOCH:  440  LOSS:  tensor(0.1426, grad_fn=<AddBackward0>)
EPOCH:  441  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  442  LOSS:  tensor(0.1420, grad_fn=<AddBackward0>)
EPOCH:  443  LOSS:  tensor(0.1417, grad_fn=<AddBackward0>)
EPOCH:  444  LOSS:  tensor(0.1415, grad_fn=<AddBackward0>)
EPOCH:  445  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  446  LOSS:  tensor(0.1408, grad_fn=<AddBackward0

EPOCH:  569  LOSS:  tensor(0.1129, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.1126, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.1125, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.1123, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.1123, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.1121, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.1120, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.1118, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.1116, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.1115, grad_fn=<AddBackward0>)
EPOCH:  579  LOSS:  tensor(0.1114, grad_fn=<AddBackward0>)
EPOCH:  580  LOSS:  tensor(0.1111, grad_fn=<AddBackward0>)
EPOCH:  581  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  582  LOSS:  tensor(0.1108, grad_fn=<AddBackward0>)
EPOCH:  583  LOSS:  tensor(0.1106, grad_fn=<AddBackward0>)
EPOCH:  584  LOSS:  tensor(0.1107, grad_fn=<AddBackward0>)
EPOCH:  585  LOSS:  tensor(0.1103, grad_fn=<AddBackward0

EPOCH:  708  LOSS:  tensor(0.0946, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0942, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0943, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0941, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0939, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0937, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.0936, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0936, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  718  LOSS:  tensor(0.0934, grad_fn=<AddBackward0>)
EPOCH:  719  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  720  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  721  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  722  LOSS:  tensor(0.0927, grad_fn=<AddBackward0>)
EPOCH:  723  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  724  LOSS:  tensor(0.0926, grad_fn=<AddBackward0

EPOCH:  847  LOSS:  tensor(0.0816, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0816, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0812, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0811, grad_fn=<AddBackward0>)
EPOCH:  857  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  858  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  859  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  860  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  861  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  862  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  863  LOSS:  tensor(0.0807, grad_fn=<AddBackward0

EPOCH:  986  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0761, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0768, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0773, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0778, grad_fn=<AddBackward0>)
EPOCH:  993  LOSS:  tensor(0.0783, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  995  LOSS:  tensor(0.0779, grad_fn=<AddBackward0>)
EPOCH:  996  LOSS:  tensor(0.0772, grad_fn=<AddBackward0>)
EPOCH:  997  LOSS:  tensor(0.0763, grad_fn=<AddBackward0>)
EPOCH:  998  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  999  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  1000  LOSS:  tensor(0.0741, grad_fn=<AddBackward0>)
EPOCH:  1001  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  1002  LOSS:  tensor(0.0731, grad_fn=<AddBackwa

EPOCH:  1123  LOSS:  tensor(0.0692, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0677, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0701, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0758, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0715, grad_fn=<AddBackward0>)
EPOCH:  1130  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  1132  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  1133  LOSS:  tensor(0.0705, grad_fn=<AddBackward0>)
EPOCH:  1134  LOSS:  tensor(0.0720, grad_fn=<AddBackward0>)
EPOCH:  1135  LOSS:  tensor(0.0719, grad_fn=<AddBackward0>)
EPOCH:  1136  LOSS:  tensor(0.0701, grad_fn=<AddBackward0>)
EPOCH:  1137  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  1138  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  1139  LOSS:  tensor(0.0671, grad

EPOCH:  59  LOSS:  tensor(17.3146, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(16.2420, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(15.1895, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(14.1778, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(13.2307, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(12.3529, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(11.5564, grad_fn=<AddBackward0>)
EPOCH:  66  LOSS:  tensor(10.8308, grad_fn=<AddBackward0>)
EPOCH:  67  LOSS:  tensor(10.1597, grad_fn=<AddBackward0>)
EPOCH:  68  LOSS:  tensor(9.5352, grad_fn=<AddBackward0>)
EPOCH:  69  LOSS:  tensor(8.9476, grad_fn=<AddBackward0>)
EPOCH:  70  LOSS:  tensor(8.3860, grad_fn=<AddBackward0>)
EPOCH:  71  LOSS:  tensor(7.8559, grad_fn=<AddBackward0>)
EPOCH:  72  LOSS:  tensor(7.3644, grad_fn=<AddBackward0>)
EPOCH:  73  LOSS:  tensor(6.9133, grad_fn=<AddBackward0>)
EPOCH:  74  LOSS:  tensor(6.4854, grad_fn=<AddBackward0>)
EPOCH:  75  LOSS:  tensor(6.0821, grad_fn=<AddBackward0>)
EPOCH

EPOCH:  199  LOSS:  tensor(0.5218, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.5171, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.5129, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.5090, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.5049, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.5007, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.4971, grad_fn=<AddBackward0>)
EPOCH:  206  LOSS:  tensor(0.4935, grad_fn=<AddBackward0>)
EPOCH:  207  LOSS:  tensor(0.4898, grad_fn=<AddBackward0>)
EPOCH:  208  LOSS:  tensor(0.4860, grad_fn=<AddBackward0>)
EPOCH:  209  LOSS:  tensor(0.4823, grad_fn=<AddBackward0>)
EPOCH:  210  LOSS:  tensor(0.4788, grad_fn=<AddBackward0>)
EPOCH:  211  LOSS:  tensor(0.4750, grad_fn=<AddBackward0>)
EPOCH:  212  LOSS:  tensor(0.4714, grad_fn=<AddBackward0>)
EPOCH:  213  LOSS:  tensor(0.4684, grad_fn=<AddBackward0>)
EPOCH:  214  LOSS:  tensor(0.4650, grad_fn=<AddBackward0>)
EPOCH:  215  LOSS:  tensor(0.4616, grad_fn=<AddBackward0

EPOCH:  338  LOSS:  tensor(0.1991, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.1959, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.1975, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.1939, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.1930, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.1927, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.1906, grad_fn=<AddBackward0>)
EPOCH:  345  LOSS:  tensor(0.1882, grad_fn=<AddBackward0>)
EPOCH:  346  LOSS:  tensor(0.1875, grad_fn=<AddBackward0>)
EPOCH:  347  LOSS:  tensor(0.1863, grad_fn=<AddBackward0>)
EPOCH:  348  LOSS:  tensor(0.1865, grad_fn=<AddBackward0>)
EPOCH:  349  LOSS:  tensor(0.1842, grad_fn=<AddBackward0>)
EPOCH:  350  LOSS:  tensor(0.1808, grad_fn=<AddBackward0>)
EPOCH:  351  LOSS:  tensor(0.1809, grad_fn=<AddBackward0>)
EPOCH:  352  LOSS:  tensor(0.1786, grad_fn=<AddBackward0>)
EPOCH:  353  LOSS:  tensor(0.1774, grad_fn=<AddBackward0>)
EPOCH:  354  LOSS:  tensor(0.1778, grad_fn=<AddBackward0

EPOCH:  477  LOSS:  tensor(0.1018, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1012, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1007, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1006, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1001, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.0992, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.0985, grad_fn=<AddBackward0>)
EPOCH:  484  LOSS:  tensor(0.0987, grad_fn=<AddBackward0>)
EPOCH:  485  LOSS:  tensor(0.0979, grad_fn=<AddBackward0>)
EPOCH:  486  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  487  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  488  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  489  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  490  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  491  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  492  LOSS:  tensor(0.0938, grad_fn=<AddBackward0>)
EPOCH:  493  LOSS:  tensor(0.0933, grad_fn=<AddBackward0

EPOCH:  616  LOSS:  tensor(0.0624, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.0624, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.0625, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.0621, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.0618, grad_fn=<AddBackward0>)
EPOCH:  623  LOSS:  tensor(0.0619, grad_fn=<AddBackward0>)
EPOCH:  624  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  625  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  626  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  627  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  628  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  629  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  630  LOSS:  tensor(0.0608, grad_fn=<AddBackward0>)
EPOCH:  631  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  632  LOSS:  tensor(0.0609, grad_fn=<AddBackward0

EPOCH:  755  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  762  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  763  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  764  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  765  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  766  LOSS:  tensor(0.0475, grad_fn=<AddBackward0>)
EPOCH:  767  LOSS:  tensor(0.0474, grad_fn=<AddBackward0>)
EPOCH:  768  LOSS:  tensor(0.0474, grad_fn=<AddBackward0>)
EPOCH:  769  LOSS:  tensor(0.0472, grad_fn=<AddBackward0>)
EPOCH:  770  LOSS:  tensor(0.0471, grad_fn=<AddBackward0>)
EPOCH:  771  LOSS:  tensor(0.0470, grad_fn=<AddBackward0

EPOCH:  894  LOSS:  tensor(0.0420, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0422, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0461, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0462, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.0447, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.0426, grad_fn=<AddBackward0>)
EPOCH:  901  LOSS:  tensor(0.0416, grad_fn=<AddBackward0>)
EPOCH:  902  LOSS:  tensor(0.0418, grad_fn=<AddBackward0>)
EPOCH:  903  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  904  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  905  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  906  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  907  LOSS:  tensor(0.0419, grad_fn=<AddBackward0>)
EPOCH:  908  LOSS:  tensor(0.0413, grad_fn=<AddBackward0>)
EPOCH:  909  LOSS:  tensor(0.0415, grad_fn=<AddBackward0>)
EPOCH:  910  LOSS:  tensor(0.0419, grad_fn=<AddBackward0

EPOCH:  1033  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0394, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.0391, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.0387, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.0386, grad_fn=<AddBackward0>)
EPOCH:  1040  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1041  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1042  LOSS:  tensor(0.0385, grad_fn=<AddBackward0>)
EPOCH:  1043  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1044  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1045  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1046  LOSS:  tensor(0.0383, grad_fn=<AddBackward0>)
EPOCH:  1047  LOSS:  tensor(0.0383, grad_fn=<AddBackward0>)
EPOCH:  1048  LOSS:  tensor(0.0383, grad_fn=<AddBackward0>)
EPOCH:  1049  LOSS:  tensor(0.0383, grad

EPOCH:  1170  LOSS:  tensor(0.0381, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0373, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0368, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0367, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0368, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0371, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  1177  LOSS:  tensor(0.0381, grad_fn=<AddBackward0>)
EPOCH:  1178  LOSS:  tensor(0.0388, grad_fn=<AddBackward0>)
EPOCH:  1179  LOSS:  tensor(0.0393, grad_fn=<AddBackward0>)
EPOCH:  1180  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  1181  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  1182  LOSS:  tensor(0.0410, grad_fn=<AddBackward0>)
EPOCH:  1183  LOSS:  tensor(0.0415, grad_fn=<AddBackward0>)
EPOCH:  1184  LOSS:  tensor(0.0420, grad_fn=<AddBackward0>)
EPOCH:  1185  LOSS:  tensor(0.0422, grad_fn=<AddBackward0>)
EPOCH:  1186  LOSS:  tensor(0.0420, grad

EPOCH:  107  LOSS:  tensor(2.6772, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(2.5909, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(2.5087, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(2.4314, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(2.3610, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(2.2951, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(2.2317, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(2.1719, grad_fn=<AddBackward0>)
EPOCH:  115  LOSS:  tensor(2.1145, grad_fn=<AddBackward0>)
EPOCH:  116  LOSS:  tensor(2.0602, grad_fn=<AddBackward0>)
EPOCH:  117  LOSS:  tensor(2.0076, grad_fn=<AddBackward0>)
EPOCH:  118  LOSS:  tensor(1.9566, grad_fn=<AddBackward0>)
EPOCH:  119  LOSS:  tensor(1.9088, grad_fn=<AddBackward0>)
EPOCH:  120  LOSS:  tensor(1.8637, grad_fn=<AddBackward0>)
EPOCH:  121  LOSS:  tensor(1.8204, grad_fn=<AddBackward0>)
EPOCH:  122  LOSS:  tensor(1.7785, grad_fn=<AddBackward0>)
EPOCH:  123  LOSS:  tensor(1.7381, grad_fn=<AddBackward0

EPOCH:  246  LOSS:  tensor(0.4865, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.4835, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.4805, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.4774, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.4744, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.4715, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.4685, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.4659, grad_fn=<AddBackward0>)
EPOCH:  254  LOSS:  tensor(0.4633, grad_fn=<AddBackward0>)
EPOCH:  255  LOSS:  tensor(0.4607, grad_fn=<AddBackward0>)
EPOCH:  256  LOSS:  tensor(0.4581, grad_fn=<AddBackward0>)
EPOCH:  257  LOSS:  tensor(0.4556, grad_fn=<AddBackward0>)
EPOCH:  258  LOSS:  tensor(0.4530, grad_fn=<AddBackward0>)
EPOCH:  259  LOSS:  tensor(0.4505, grad_fn=<AddBackward0>)
EPOCH:  260  LOSS:  tensor(0.4480, grad_fn=<AddBackward0>)
EPOCH:  261  LOSS:  tensor(0.4455, grad_fn=<AddBackward0>)
EPOCH:  262  LOSS:  tensor(0.4431, grad_fn=<AddBackward0

EPOCH:  385  LOSS:  tensor(0.2519, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.2514, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.2505, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.2498, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.2491, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.2484, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.2477, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.2470, grad_fn=<AddBackward0>)
EPOCH:  393  LOSS:  tensor(0.2463, grad_fn=<AddBackward0>)
EPOCH:  394  LOSS:  tensor(0.2457, grad_fn=<AddBackward0>)
EPOCH:  395  LOSS:  tensor(0.2452, grad_fn=<AddBackward0>)
EPOCH:  396  LOSS:  tensor(0.2444, grad_fn=<AddBackward0>)
EPOCH:  397  LOSS:  tensor(0.2437, grad_fn=<AddBackward0>)
EPOCH:  398  LOSS:  tensor(0.2431, grad_fn=<AddBackward0>)
EPOCH:  399  LOSS:  tensor(0.2426, grad_fn=<AddBackward0>)
EPOCH:  400  LOSS:  tensor(0.2419, grad_fn=<AddBackward0>)
EPOCH:  401  LOSS:  tensor(0.2411, grad_fn=<AddBackward0

EPOCH:  524  LOSS:  tensor(0.1716, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.1712, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.1706, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.1701, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.1697, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.1692, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.1688, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.1685, grad_fn=<AddBackward0>)
EPOCH:  532  LOSS:  tensor(0.1681, grad_fn=<AddBackward0>)
EPOCH:  533  LOSS:  tensor(0.1677, grad_fn=<AddBackward0>)
EPOCH:  534  LOSS:  tensor(0.1674, grad_fn=<AddBackward0>)
EPOCH:  535  LOSS:  tensor(0.1671, grad_fn=<AddBackward0>)
EPOCH:  536  LOSS:  tensor(0.1667, grad_fn=<AddBackward0>)
EPOCH:  537  LOSS:  tensor(0.1664, grad_fn=<AddBackward0>)
EPOCH:  538  LOSS:  tensor(0.1662, grad_fn=<AddBackward0>)
EPOCH:  539  LOSS:  tensor(0.1658, grad_fn=<AddBackward0>)
EPOCH:  540  LOSS:  tensor(0.1656, grad_fn=<AddBackward0

EPOCH:  663  LOSS:  tensor(0.1316, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.1309, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.1304, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.1304, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.1307, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.1311, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.1316, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.1320, grad_fn=<AddBackward0>)
EPOCH:  671  LOSS:  tensor(0.1324, grad_fn=<AddBackward0>)
EPOCH:  672  LOSS:  tensor(0.1333, grad_fn=<AddBackward0>)
EPOCH:  673  LOSS:  tensor(0.1347, grad_fn=<AddBackward0>)
EPOCH:  674  LOSS:  tensor(0.1363, grad_fn=<AddBackward0>)
EPOCH:  675  LOSS:  tensor(0.1377, grad_fn=<AddBackward0>)
EPOCH:  676  LOSS:  tensor(0.1386, grad_fn=<AddBackward0>)
EPOCH:  677  LOSS:  tensor(0.1390, grad_fn=<AddBackward0>)
EPOCH:  678  LOSS:  tensor(0.1385, grad_fn=<AddBackward0>)
EPOCH:  679  LOSS:  tensor(0.1374, grad_fn=<AddBackward0

EPOCH:  802  LOSS:  tensor(0.0896, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.1136, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.1178, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.1166, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.1102, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.1013, grad_fn=<AddBackward0>)
EPOCH:  810  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  811  LOSS:  tensor(0.0882, grad_fn=<AddBackward0>)
EPOCH:  812  LOSS:  tensor(0.0877, grad_fn=<AddBackward0>)
EPOCH:  813  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  814  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  815  LOSS:  tensor(0.0976, grad_fn=<AddBackward0>)
EPOCH:  816  LOSS:  tensor(0.0989, grad_fn=<AddBackward0>)
EPOCH:  817  LOSS:  tensor(0.0982, grad_fn=<AddBackward0>)
EPOCH:  818  LOSS:  tensor(0.0960, grad_fn=<AddBackward0

EPOCH:  941  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.1426, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.1663, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.1427, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.1010, grad_fn=<AddBackward0>)
EPOCH:  949  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  950  LOSS:  tensor(0.1028, grad_fn=<AddBackward0>)
EPOCH:  951  LOSS:  tensor(0.0893, grad_fn=<AddBackward0>)
EPOCH:  952  LOSS:  tensor(0.0901, grad_fn=<AddBackward0>)
EPOCH:  953  LOSS:  tensor(0.0990, grad_fn=<AddBackward0>)
EPOCH:  954  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  955  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  956  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  957  LOSS:  tensor(0.0804, grad_fn=<AddBackward0

EPOCH:  1079  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0592, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0598, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0598, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0594, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0579, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0573, grad_fn=<AddBackward0>)
EPOCH:  1087  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1088  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1089  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1090  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1091  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  1092  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1093  LOSS:  tensor(0.0578, grad_fn=<AddBackward0>)
EPOCH:  1094  LOSS:  tensor(0.0577, grad_fn=<AddBackward0>)
EPOCH:  1095  LOSS:  tensor(0.0577, grad

EPOCH:  15  LOSS:  tensor(76.0260, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(72.4723, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(68.5209, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(64.4795, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(60.6742, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(57.2028, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(54.1449, grad_fn=<AddBackward0>)
EPOCH:  22  LOSS:  tensor(51.5346, grad_fn=<AddBackward0>)
EPOCH:  23  LOSS:  tensor(49.3332, grad_fn=<AddBackward0>)
EPOCH:  24  LOSS:  tensor(47.4181, grad_fn=<AddBackward0>)
EPOCH:  25  LOSS:  tensor(45.6850, grad_fn=<AddBackward0>)
EPOCH:  26  LOSS:  tensor(43.9976, grad_fn=<AddBackward0>)
EPOCH:  27  LOSS:  tensor(42.3129, grad_fn=<AddBackward0>)
EPOCH:  28  LOSS:  tensor(40.5636, grad_fn=<AddBackward0>)
EPOCH:  29  LOSS:  tensor(38.7752, grad_fn=<AddBackward0>)
EPOCH:  30  LOSS:  tensor(36.9150, grad_fn=<AddBackward0>)
EPOCH:  31  LOSS:  tensor(35.0701, grad_fn=<AddBackward0

EPOCH:  155  LOSS:  tensor(0.5046, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.5005, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.4966, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.4928, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.4890, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.4854, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.4818, grad_fn=<AddBackward0>)
EPOCH:  162  LOSS:  tensor(0.4783, grad_fn=<AddBackward0>)
EPOCH:  163  LOSS:  tensor(0.4749, grad_fn=<AddBackward0>)
EPOCH:  164  LOSS:  tensor(0.4716, grad_fn=<AddBackward0>)
EPOCH:  165  LOSS:  tensor(0.4684, grad_fn=<AddBackward0>)
EPOCH:  166  LOSS:  tensor(0.4653, grad_fn=<AddBackward0>)
EPOCH:  167  LOSS:  tensor(0.4621, grad_fn=<AddBackward0>)
EPOCH:  168  LOSS:  tensor(0.4591, grad_fn=<AddBackward0>)
EPOCH:  169  LOSS:  tensor(0.4561, grad_fn=<AddBackward0>)
EPOCH:  170  LOSS:  tensor(0.4531, grad_fn=<AddBackward0>)
EPOCH:  171  LOSS:  tensor(0.4502, grad_fn=<AddBackward0

EPOCH:  294  LOSS:  tensor(0.2568, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2557, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.2544, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.2529, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.2514, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.2498, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.2481, grad_fn=<AddBackward0>)
EPOCH:  301  LOSS:  tensor(0.2465, grad_fn=<AddBackward0>)
EPOCH:  302  LOSS:  tensor(0.2450, grad_fn=<AddBackward0>)
EPOCH:  303  LOSS:  tensor(0.2435, grad_fn=<AddBackward0>)
EPOCH:  304  LOSS:  tensor(0.2421, grad_fn=<AddBackward0>)
EPOCH:  305  LOSS:  tensor(0.2406, grad_fn=<AddBackward0>)
EPOCH:  306  LOSS:  tensor(0.2392, grad_fn=<AddBackward0>)
EPOCH:  307  LOSS:  tensor(0.2379, grad_fn=<AddBackward0>)
EPOCH:  308  LOSS:  tensor(0.2366, grad_fn=<AddBackward0>)
EPOCH:  309  LOSS:  tensor(0.2353, grad_fn=<AddBackward0>)
EPOCH:  310  LOSS:  tensor(0.2341, grad_fn=<AddBackward0

EPOCH:  433  LOSS:  tensor(0.1469, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1465, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1461, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1458, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.1454, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1450, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.1447, grad_fn=<AddBackward0>)
EPOCH:  440  LOSS:  tensor(0.1443, grad_fn=<AddBackward0>)
EPOCH:  441  LOSS:  tensor(0.1440, grad_fn=<AddBackward0>)
EPOCH:  442  LOSS:  tensor(0.1436, grad_fn=<AddBackward0>)
EPOCH:  443  LOSS:  tensor(0.1433, grad_fn=<AddBackward0>)
EPOCH:  444  LOSS:  tensor(0.1429, grad_fn=<AddBackward0>)
EPOCH:  445  LOSS:  tensor(0.1425, grad_fn=<AddBackward0>)
EPOCH:  446  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  447  LOSS:  tensor(0.1418, grad_fn=<AddBackward0>)
EPOCH:  448  LOSS:  tensor(0.1415, grad_fn=<AddBackward0>)
EPOCH:  449  LOSS:  tensor(0.1411, grad_fn=<AddBackward0

EPOCH:  572  LOSS:  tensor(0.1078, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.1076, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.1074, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.1073, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.1071, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.1069, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.1068, grad_fn=<AddBackward0>)
EPOCH:  579  LOSS:  tensor(0.1067, grad_fn=<AddBackward0>)
EPOCH:  580  LOSS:  tensor(0.1066, grad_fn=<AddBackward0>)
EPOCH:  581  LOSS:  tensor(0.1064, grad_fn=<AddBackward0>)
EPOCH:  582  LOSS:  tensor(0.1063, grad_fn=<AddBackward0>)
EPOCH:  583  LOSS:  tensor(0.1061, grad_fn=<AddBackward0>)
EPOCH:  584  LOSS:  tensor(0.1059, grad_fn=<AddBackward0>)
EPOCH:  585  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  586  LOSS:  tensor(0.1057, grad_fn=<AddBackward0>)
EPOCH:  587  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  588  LOSS:  tensor(0.1054, grad_fn=<AddBackward0

EPOCH:  711  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0911, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  718  LOSS:  tensor(0.0907, grad_fn=<AddBackward0>)
EPOCH:  719  LOSS:  tensor(0.0906, grad_fn=<AddBackward0>)
EPOCH:  720  LOSS:  tensor(0.0905, grad_fn=<AddBackward0>)
EPOCH:  721  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  722  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  723  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  724  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  725  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  726  LOSS:  tensor(0.0899, grad_fn=<AddBackward0>)
EPOCH:  727  LOSS:  tensor(0.0898, grad_fn=<AddBackward0

EPOCH:  850  LOSS:  tensor(0.0794, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0793, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0793, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0792, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0791, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0791, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0790, grad_fn=<AddBackward0>)
EPOCH:  857  LOSS:  tensor(0.0790, grad_fn=<AddBackward0>)
EPOCH:  858  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  859  LOSS:  tensor(0.0788, grad_fn=<AddBackward0>)
EPOCH:  860  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  861  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  862  LOSS:  tensor(0.0786, grad_fn=<AddBackward0>)
EPOCH:  863  LOSS:  tensor(0.0786, grad_fn=<AddBackward0>)
EPOCH:  864  LOSS:  tensor(0.0785, grad_fn=<AddBackward0>)
EPOCH:  865  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  866  LOSS:  tensor(0.0784, grad_fn=<AddBackward0

EPOCH:  989  LOSS:  tensor(0.0711, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0710, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0710, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  993  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  995  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  996  LOSS:  tensor(0.0707, grad_fn=<AddBackward0>)
EPOCH:  997  LOSS:  tensor(0.0707, grad_fn=<AddBackward0>)
EPOCH:  998  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  999  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  1000  LOSS:  tensor(0.0705, grad_fn=<AddBackward0>)
EPOCH:  1001  LOSS:  tensor(0.0705, grad_fn=<AddBackward0>)
EPOCH:  1002  LOSS:  tensor(0.0704, grad_fn=<AddBackward0>)
EPOCH:  1003  LOSS:  tensor(0.0704, grad_fn=<AddBackward0>)
EPOCH:  1004  LOSS:  tensor(0.0704, grad_fn=<AddBackward0>)
EPOCH:  1005  LOSS:  tensor(0.0703, grad_fn=<AddBac

EPOCH:  1126  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  1130  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  1132  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  1133  LOSS:  tensor(0.0659, grad_fn=<AddBackward0>)
EPOCH:  1134  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  1135  LOSS:  tensor(0.0656, grad_fn=<AddBackward0>)
EPOCH:  1136  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  1137  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1138  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1139  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1140  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1141  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1142  LOSS:  tensor(0.0654, grad

EPOCH:  62  LOSS:  tensor(14.0344, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(13.3300, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(12.6527, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(11.9848, grad_fn=<AddBackward0>)
EPOCH:  66  LOSS:  tensor(11.3178, grad_fn=<AddBackward0>)
EPOCH:  67  LOSS:  tensor(10.6834, grad_fn=<AddBackward0>)
EPOCH:  68  LOSS:  tensor(10.0979, grad_fn=<AddBackward0>)
EPOCH:  69  LOSS:  tensor(9.5555, grad_fn=<AddBackward0>)
EPOCH:  70  LOSS:  tensor(9.0412, grad_fn=<AddBackward0>)
EPOCH:  71  LOSS:  tensor(8.5584, grad_fn=<AddBackward0>)
EPOCH:  72  LOSS:  tensor(8.1270, grad_fn=<AddBackward0>)
EPOCH:  73  LOSS:  tensor(7.7226, grad_fn=<AddBackward0>)
EPOCH:  74  LOSS:  tensor(7.3275, grad_fn=<AddBackward0>)
EPOCH:  75  LOSS:  tensor(6.9454, grad_fn=<AddBackward0>)
EPOCH:  76  LOSS:  tensor(6.5930, grad_fn=<AddBackward0>)
EPOCH:  77  LOSS:  tensor(6.2642, grad_fn=<AddBackward0>)
EPOCH:  78  LOSS:  tensor(5.9475, grad_fn=<AddBackward0>)
EPOCH: 

EPOCH:  202  LOSS:  tensor(0.6737, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.6693, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.6650, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.6608, grad_fn=<AddBackward0>)
EPOCH:  206  LOSS:  tensor(0.6566, grad_fn=<AddBackward0>)
EPOCH:  207  LOSS:  tensor(0.6525, grad_fn=<AddBackward0>)
EPOCH:  208  LOSS:  tensor(0.6485, grad_fn=<AddBackward0>)
EPOCH:  209  LOSS:  tensor(0.6446, grad_fn=<AddBackward0>)
EPOCH:  210  LOSS:  tensor(0.6407, grad_fn=<AddBackward0>)
EPOCH:  211  LOSS:  tensor(0.6370, grad_fn=<AddBackward0>)
EPOCH:  212  LOSS:  tensor(0.6331, grad_fn=<AddBackward0>)
EPOCH:  213  LOSS:  tensor(0.6295, grad_fn=<AddBackward0>)
EPOCH:  214  LOSS:  tensor(0.6257, grad_fn=<AddBackward0>)
EPOCH:  215  LOSS:  tensor(0.6221, grad_fn=<AddBackward0>)
EPOCH:  216  LOSS:  tensor(0.6185, grad_fn=<AddBackward0>)
EPOCH:  217  LOSS:  tensor(0.6150, grad_fn=<AddBackward0>)
EPOCH:  218  LOSS:  tensor(0.6115, grad_fn=<AddBackward0

EPOCH:  341  LOSS:  tensor(0.3442, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.3429, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.3416, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.3403, grad_fn=<AddBackward0>)
EPOCH:  345  LOSS:  tensor(0.3390, grad_fn=<AddBackward0>)
EPOCH:  346  LOSS:  tensor(0.3377, grad_fn=<AddBackward0>)
EPOCH:  347  LOSS:  tensor(0.3365, grad_fn=<AddBackward0>)
EPOCH:  348  LOSS:  tensor(0.3352, grad_fn=<AddBackward0>)
EPOCH:  349  LOSS:  tensor(0.3340, grad_fn=<AddBackward0>)
EPOCH:  350  LOSS:  tensor(0.3327, grad_fn=<AddBackward0>)
EPOCH:  351  LOSS:  tensor(0.3310, grad_fn=<AddBackward0>)
EPOCH:  352  LOSS:  tensor(0.3296, grad_fn=<AddBackward0>)
EPOCH:  353  LOSS:  tensor(0.3280, grad_fn=<AddBackward0>)
EPOCH:  354  LOSS:  tensor(0.3265, grad_fn=<AddBackward0>)
EPOCH:  355  LOSS:  tensor(0.3250, grad_fn=<AddBackward0>)
EPOCH:  356  LOSS:  tensor(0.3236, grad_fn=<AddBackward0>)
EPOCH:  357  LOSS:  tensor(0.3223, grad_fn=<AddBackward0

EPOCH:  480  LOSS:  tensor(0.2047, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.2041, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.2036, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.2030, grad_fn=<AddBackward0>)
EPOCH:  484  LOSS:  tensor(0.2025, grad_fn=<AddBackward0>)
EPOCH:  485  LOSS:  tensor(0.2020, grad_fn=<AddBackward0>)
EPOCH:  486  LOSS:  tensor(0.2015, grad_fn=<AddBackward0>)
EPOCH:  487  LOSS:  tensor(0.2009, grad_fn=<AddBackward0>)
EPOCH:  488  LOSS:  tensor(0.2004, grad_fn=<AddBackward0>)
EPOCH:  489  LOSS:  tensor(0.1999, grad_fn=<AddBackward0>)
EPOCH:  490  LOSS:  tensor(0.1995, grad_fn=<AddBackward0>)
EPOCH:  491  LOSS:  tensor(0.1990, grad_fn=<AddBackward0>)
EPOCH:  492  LOSS:  tensor(0.1985, grad_fn=<AddBackward0>)
EPOCH:  493  LOSS:  tensor(0.1980, grad_fn=<AddBackward0>)
EPOCH:  494  LOSS:  tensor(0.1974, grad_fn=<AddBackward0>)
EPOCH:  495  LOSS:  tensor(0.1970, grad_fn=<AddBackward0>)
EPOCH:  496  LOSS:  tensor(0.1965, grad_fn=<AddBackward0

EPOCH:  619  LOSS:  tensor(0.1543, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.1541, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.1539, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.1536, grad_fn=<AddBackward0>)
EPOCH:  623  LOSS:  tensor(0.1534, grad_fn=<AddBackward0>)
EPOCH:  624  LOSS:  tensor(0.1531, grad_fn=<AddBackward0>)
EPOCH:  625  LOSS:  tensor(0.1528, grad_fn=<AddBackward0>)
EPOCH:  626  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  627  LOSS:  tensor(0.1524, grad_fn=<AddBackward0>)
EPOCH:  628  LOSS:  tensor(0.1521, grad_fn=<AddBackward0>)
EPOCH:  629  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  630  LOSS:  tensor(0.1517, grad_fn=<AddBackward0>)
EPOCH:  631  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  632  LOSS:  tensor(0.1512, grad_fn=<AddBackward0>)
EPOCH:  633  LOSS:  tensor(0.1510, grad_fn=<AddBackward0>)
EPOCH:  634  LOSS:  tensor(0.1508, grad_fn=<AddBackward0>)
EPOCH:  635  LOSS:  tensor(0.1506, grad_fn=<AddBackward0

EPOCH:  758  LOSS:  tensor(0.1313, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.1312, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.1311, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.1310, grad_fn=<AddBackward0>)
EPOCH:  762  LOSS:  tensor(0.1308, grad_fn=<AddBackward0>)
EPOCH:  763  LOSS:  tensor(0.1307, grad_fn=<AddBackward0>)
EPOCH:  764  LOSS:  tensor(0.1306, grad_fn=<AddBackward0>)
EPOCH:  765  LOSS:  tensor(0.1305, grad_fn=<AddBackward0>)
EPOCH:  766  LOSS:  tensor(0.1304, grad_fn=<AddBackward0>)
EPOCH:  767  LOSS:  tensor(0.1303, grad_fn=<AddBackward0>)
EPOCH:  768  LOSS:  tensor(0.1302, grad_fn=<AddBackward0>)
EPOCH:  769  LOSS:  tensor(0.1301, grad_fn=<AddBackward0>)
EPOCH:  770  LOSS:  tensor(0.1300, grad_fn=<AddBackward0>)
EPOCH:  771  LOSS:  tensor(0.1298, grad_fn=<AddBackward0>)
EPOCH:  772  LOSS:  tensor(0.1297, grad_fn=<AddBackward0>)
EPOCH:  773  LOSS:  tensor(0.1296, grad_fn=<AddBackward0>)
EPOCH:  774  LOSS:  tensor(0.1295, grad_fn=<AddBackward0

EPOCH:  897  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.1178, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.1177, grad_fn=<AddBackward0>)
EPOCH:  901  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  902  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  903  LOSS:  tensor(0.1175, grad_fn=<AddBackward0>)
EPOCH:  904  LOSS:  tensor(0.1174, grad_fn=<AddBackward0>)
EPOCH:  905  LOSS:  tensor(0.1172, grad_fn=<AddBackward0>)
EPOCH:  906  LOSS:  tensor(0.1172, grad_fn=<AddBackward0>)
EPOCH:  907  LOSS:  tensor(0.1172, grad_fn=<AddBackward0>)
EPOCH:  908  LOSS:  tensor(0.1169, grad_fn=<AddBackward0>)
EPOCH:  909  LOSS:  tensor(0.1169, grad_fn=<AddBackward0>)
EPOCH:  910  LOSS:  tensor(0.1168, grad_fn=<AddBackward0>)
EPOCH:  911  LOSS:  tensor(0.1166, grad_fn=<AddBackward0>)
EPOCH:  912  LOSS:  tensor(0.1166, grad_fn=<AddBackward0>)
EPOCH:  913  LOSS:  tensor(0.1165, grad_fn=<AddBackward0

EPOCH:  1036  LOSS:  tensor(0.1115, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.1165, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.1211, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.1227, grad_fn=<AddBackward0>)
EPOCH:  1040  LOSS:  tensor(0.1201, grad_fn=<AddBackward0>)
EPOCH:  1041  LOSS:  tensor(0.1134, grad_fn=<AddBackward0>)
EPOCH:  1042  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  1043  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  1044  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  1045  LOSS:  tensor(0.0997, grad_fn=<AddBackward0>)
EPOCH:  1046  LOSS:  tensor(0.1041, grad_fn=<AddBackward0>)
EPOCH:  1047  LOSS:  tensor(0.1070, grad_fn=<AddBackward0>)
EPOCH:  1048  LOSS:  tensor(0.1066, grad_fn=<AddBackward0>)
EPOCH:  1049  LOSS:  tensor(0.1031, grad_fn=<AddBackward0>)
EPOCH:  1050  LOSS:  tensor(0.0988, grad_fn=<AddBackward0>)
EPOCH:  1051  LOSS:  tensor(0.0961, grad_fn=<AddBackward0>)
EPOCH:  1052  LOSS:  tensor(0.0960, grad

EPOCH:  1173  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0862, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  1177  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  1178  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  1179  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  1180  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  1181  LOSS:  tensor(0.0862, grad_fn=<AddBackward0>)
EPOCH:  1182  LOSS:  tensor(0.0864, grad_fn=<AddBackward0>)
EPOCH:  1183  LOSS:  tensor(0.0867, grad_fn=<AddBackward0>)
EPOCH:  1184  LOSS:  tensor(0.0870, grad_fn=<AddBackward0>)
EPOCH:  1185  LOSS:  tensor(0.0876, grad_fn=<AddBackward0>)
EPOCH:  1186  LOSS:  tensor(0.0884, grad_fn=<AddBackward0>)
EPOCH:  1187  LOSS:  tensor(0.0896, grad_fn=<AddBackward0>)
EPOCH:  1188  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  1189  LOSS:  tensor(0.0934, grad

EPOCH:  111  LOSS:  tensor(1.1407, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(1.1160, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(1.0924, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(1.0704, grad_fn=<AddBackward0>)
EPOCH:  115  LOSS:  tensor(1.0499, grad_fn=<AddBackward0>)
EPOCH:  116  LOSS:  tensor(1.0300, grad_fn=<AddBackward0>)
EPOCH:  117  LOSS:  tensor(1.0113, grad_fn=<AddBackward0>)
EPOCH:  118  LOSS:  tensor(0.9929, grad_fn=<AddBackward0>)
EPOCH:  119  LOSS:  tensor(0.9751, grad_fn=<AddBackward0>)
EPOCH:  120  LOSS:  tensor(0.9580, grad_fn=<AddBackward0>)
EPOCH:  121  LOSS:  tensor(0.9414, grad_fn=<AddBackward0>)
EPOCH:  122  LOSS:  tensor(0.9248, grad_fn=<AddBackward0>)
EPOCH:  123  LOSS:  tensor(0.9088, grad_fn=<AddBackward0>)
EPOCH:  124  LOSS:  tensor(0.8935, grad_fn=<AddBackward0>)
EPOCH:  125  LOSS:  tensor(0.8784, grad_fn=<AddBackward0>)
EPOCH:  126  LOSS:  tensor(0.8636, grad_fn=<AddBackward0>)
EPOCH:  127  LOSS:  tensor(0.8491, grad_fn=<AddBackward0

EPOCH:  250  LOSS:  tensor(0.2556, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.2542, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.2527, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.2513, grad_fn=<AddBackward0>)
EPOCH:  254  LOSS:  tensor(0.2500, grad_fn=<AddBackward0>)
EPOCH:  255  LOSS:  tensor(0.2487, grad_fn=<AddBackward0>)
EPOCH:  256  LOSS:  tensor(0.2474, grad_fn=<AddBackward0>)
EPOCH:  257  LOSS:  tensor(0.2462, grad_fn=<AddBackward0>)
EPOCH:  258  LOSS:  tensor(0.2448, grad_fn=<AddBackward0>)
EPOCH:  259  LOSS:  tensor(0.2436, grad_fn=<AddBackward0>)
EPOCH:  260  LOSS:  tensor(0.2424, grad_fn=<AddBackward0>)
EPOCH:  261  LOSS:  tensor(0.2411, grad_fn=<AddBackward0>)
EPOCH:  262  LOSS:  tensor(0.2399, grad_fn=<AddBackward0>)
EPOCH:  263  LOSS:  tensor(0.2386, grad_fn=<AddBackward0>)
EPOCH:  264  LOSS:  tensor(0.2374, grad_fn=<AddBackward0>)
EPOCH:  265  LOSS:  tensor(0.2362, grad_fn=<AddBackward0>)
EPOCH:  266  LOSS:  tensor(0.2350, grad_fn=<AddBackward0

EPOCH:  389  LOSS:  tensor(0.1533, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.1529, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.1525, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.1521, grad_fn=<AddBackward0>)
EPOCH:  393  LOSS:  tensor(0.1517, grad_fn=<AddBackward0>)
EPOCH:  394  LOSS:  tensor(0.1513, grad_fn=<AddBackward0>)
EPOCH:  395  LOSS:  tensor(0.1509, grad_fn=<AddBackward0>)
EPOCH:  396  LOSS:  tensor(0.1505, grad_fn=<AddBackward0>)
EPOCH:  397  LOSS:  tensor(0.1501, grad_fn=<AddBackward0>)
EPOCH:  398  LOSS:  tensor(0.1497, grad_fn=<AddBackward0>)
EPOCH:  399  LOSS:  tensor(0.1493, grad_fn=<AddBackward0>)
EPOCH:  400  LOSS:  tensor(0.1489, grad_fn=<AddBackward0>)
EPOCH:  401  LOSS:  tensor(0.1485, grad_fn=<AddBackward0>)
EPOCH:  402  LOSS:  tensor(0.1482, grad_fn=<AddBackward0>)
EPOCH:  403  LOSS:  tensor(0.1478, grad_fn=<AddBackward0>)
EPOCH:  404  LOSS:  tensor(0.1475, grad_fn=<AddBackward0>)
EPOCH:  405  LOSS:  tensor(0.1472, grad_fn=<AddBackward0

EPOCH:  528  LOSS:  tensor(0.1195, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.1193, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.1192, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.1191, grad_fn=<AddBackward0>)
EPOCH:  532  LOSS:  tensor(0.1189, grad_fn=<AddBackward0>)
EPOCH:  533  LOSS:  tensor(0.1188, grad_fn=<AddBackward0>)
EPOCH:  534  LOSS:  tensor(0.1186, grad_fn=<AddBackward0>)
EPOCH:  535  LOSS:  tensor(0.1185, grad_fn=<AddBackward0>)
EPOCH:  536  LOSS:  tensor(0.1183, grad_fn=<AddBackward0>)
EPOCH:  537  LOSS:  tensor(0.1182, grad_fn=<AddBackward0>)
EPOCH:  538  LOSS:  tensor(0.1181, grad_fn=<AddBackward0>)
EPOCH:  539  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  540  LOSS:  tensor(0.1178, grad_fn=<AddBackward0>)
EPOCH:  541  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  542  LOSS:  tensor(0.1175, grad_fn=<AddBackward0>)
EPOCH:  543  LOSS:  tensor(0.1174, grad_fn=<AddBackward0>)
EPOCH:  544  LOSS:  tensor(0.1172, grad_fn=<AddBackward0

EPOCH:  667  LOSS:  tensor(0.1035, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.1035, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  671  LOSS:  tensor(0.1032, grad_fn=<AddBackward0>)
EPOCH:  672  LOSS:  tensor(0.1031, grad_fn=<AddBackward0>)
EPOCH:  673  LOSS:  tensor(0.1031, grad_fn=<AddBackward0>)
EPOCH:  674  LOSS:  tensor(0.1030, grad_fn=<AddBackward0>)
EPOCH:  675  LOSS:  tensor(0.1029, grad_fn=<AddBackward0>)
EPOCH:  676  LOSS:  tensor(0.1029, grad_fn=<AddBackward0>)
EPOCH:  677  LOSS:  tensor(0.1028, grad_fn=<AddBackward0>)
EPOCH:  678  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  679  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  680  LOSS:  tensor(0.1026, grad_fn=<AddBackward0>)
EPOCH:  681  LOSS:  tensor(0.1026, grad_fn=<AddBackward0>)
EPOCH:  682  LOSS:  tensor(0.1025, grad_fn=<AddBackward0>)
EPOCH:  683  LOSS:  tensor(0.1024, grad_fn=<AddBackward0

EPOCH:  806  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.0934, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.0934, grad_fn=<AddBackward0>)
EPOCH:  810  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  811  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  812  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  813  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  814  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  815  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  816  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  817  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  818  LOSS:  tensor(0.0927, grad_fn=<AddBackward0>)
EPOCH:  819  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  820  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  821  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  822  LOSS:  tensor(0.0924, grad_fn=<AddBackward0

EPOCH:  945  LOSS:  tensor(0.0857, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0856, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0856, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.0855, grad_fn=<AddBackward0>)
EPOCH:  949  LOSS:  tensor(0.0855, grad_fn=<AddBackward0>)
EPOCH:  950  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  951  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  952  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  953  LOSS:  tensor(0.0853, grad_fn=<AddBackward0>)
EPOCH:  954  LOSS:  tensor(0.0853, grad_fn=<AddBackward0>)
EPOCH:  955  LOSS:  tensor(0.0852, grad_fn=<AddBackward0>)
EPOCH:  956  LOSS:  tensor(0.0852, grad_fn=<AddBackward0>)
EPOCH:  957  LOSS:  tensor(0.0851, grad_fn=<AddBackward0>)
EPOCH:  958  LOSS:  tensor(0.0850, grad_fn=<AddBackward0>)
EPOCH:  959  LOSS:  tensor(0.0850, grad_fn=<AddBackward0>)
EPOCH:  960  LOSS:  tensor(0.0850, grad_fn=<AddBackward0>)
EPOCH:  961  LOSS:  tensor(0.0849, grad_fn=<AddBackward0

EPOCH:  1083  LOSS:  tensor(0.0758, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0757, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0757, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0756, grad_fn=<AddBackward0>)
EPOCH:  1087  LOSS:  tensor(0.0756, grad_fn=<AddBackward0>)
EPOCH:  1088  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  1089  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  1090  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  1091  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  1092  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  1093  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  1094  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  1095  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  1096  LOSS:  tensor(0.0751, grad_fn=<AddBackward0>)
EPOCH:  1097  LOSS:  tensor(0.0751, grad_fn=<AddBackward0>)
EPOCH:  1098  LOSS:  tensor(0.0751, grad_fn=<AddBackward0>)
EPOCH:  1099  LOSS:  tensor(0.0750, grad

EPOCH:  19  LOSS:  tensor(65.9591, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(62.3093, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(59.0600, grad_fn=<AddBackward0>)
EPOCH:  22  LOSS:  tensor(56.3077, grad_fn=<AddBackward0>)
EPOCH:  23  LOSS:  tensor(54.0606, grad_fn=<AddBackward0>)
EPOCH:  24  LOSS:  tensor(52.1640, grad_fn=<AddBackward0>)
EPOCH:  25  LOSS:  tensor(50.4818, grad_fn=<AddBackward0>)
EPOCH:  26  LOSS:  tensor(48.9039, grad_fn=<AddBackward0>)
EPOCH:  27  LOSS:  tensor(47.3274, grad_fn=<AddBackward0>)
EPOCH:  28  LOSS:  tensor(45.7070, grad_fn=<AddBackward0>)
EPOCH:  29  LOSS:  tensor(43.9724, grad_fn=<AddBackward0>)
EPOCH:  30  LOSS:  tensor(42.0944, grad_fn=<AddBackward0>)
EPOCH:  31  LOSS:  tensor(40.1002, grad_fn=<AddBackward0>)
EPOCH:  32  LOSS:  tensor(38.0301, grad_fn=<AddBackward0>)
EPOCH:  33  LOSS:  tensor(35.9520, grad_fn=<AddBackward0>)
EPOCH:  34  LOSS:  tensor(33.9550, grad_fn=<AddBackward0>)
EPOCH:  35  LOSS:  tensor(32.1127, grad_fn=<AddBackward0

EPOCH:  159  LOSS:  tensor(0.4986, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.4922, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.4862, grad_fn=<AddBackward0>)
EPOCH:  162  LOSS:  tensor(0.4807, grad_fn=<AddBackward0>)
EPOCH:  163  LOSS:  tensor(0.4750, grad_fn=<AddBackward0>)
EPOCH:  164  LOSS:  tensor(0.4695, grad_fn=<AddBackward0>)
EPOCH:  165  LOSS:  tensor(0.4643, grad_fn=<AddBackward0>)
EPOCH:  166  LOSS:  tensor(0.4591, grad_fn=<AddBackward0>)
EPOCH:  167  LOSS:  tensor(0.4538, grad_fn=<AddBackward0>)
EPOCH:  168  LOSS:  tensor(0.4492, grad_fn=<AddBackward0>)
EPOCH:  169  LOSS:  tensor(0.4445, grad_fn=<AddBackward0>)
EPOCH:  170  LOSS:  tensor(0.4395, grad_fn=<AddBackward0>)
EPOCH:  171  LOSS:  tensor(0.4349, grad_fn=<AddBackward0>)
EPOCH:  172  LOSS:  tensor(0.4303, grad_fn=<AddBackward0>)
EPOCH:  173  LOSS:  tensor(0.4260, grad_fn=<AddBackward0>)
EPOCH:  174  LOSS:  tensor(0.4218, grad_fn=<AddBackward0>)
EPOCH:  175  LOSS:  tensor(0.4176, grad_fn=<AddBackward0

EPOCH:  298  LOSS:  tensor(0.1835, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.1828, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.1821, grad_fn=<AddBackward0>)
EPOCH:  301  LOSS:  tensor(0.1815, grad_fn=<AddBackward0>)
EPOCH:  302  LOSS:  tensor(0.1807, grad_fn=<AddBackward0>)
EPOCH:  303  LOSS:  tensor(0.1802, grad_fn=<AddBackward0>)
EPOCH:  304  LOSS:  tensor(0.1796, grad_fn=<AddBackward0>)
EPOCH:  305  LOSS:  tensor(0.1790, grad_fn=<AddBackward0>)
EPOCH:  306  LOSS:  tensor(0.1784, grad_fn=<AddBackward0>)
EPOCH:  307  LOSS:  tensor(0.1778, grad_fn=<AddBackward0>)
EPOCH:  308  LOSS:  tensor(0.1772, grad_fn=<AddBackward0>)
EPOCH:  309  LOSS:  tensor(0.1766, grad_fn=<AddBackward0>)
EPOCH:  310  LOSS:  tensor(0.1760, grad_fn=<AddBackward0>)
EPOCH:  311  LOSS:  tensor(0.1754, grad_fn=<AddBackward0>)
EPOCH:  312  LOSS:  tensor(0.1749, grad_fn=<AddBackward0>)
EPOCH:  313  LOSS:  tensor(0.1743, grad_fn=<AddBackward0>)
EPOCH:  314  LOSS:  tensor(0.1737, grad_fn=<AddBackward0

EPOCH:  437  LOSS:  tensor(0.1206, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1204, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.1202, grad_fn=<AddBackward0>)
EPOCH:  440  LOSS:  tensor(0.1200, grad_fn=<AddBackward0>)
EPOCH:  441  LOSS:  tensor(0.1197, grad_fn=<AddBackward0>)
EPOCH:  442  LOSS:  tensor(0.1196, grad_fn=<AddBackward0>)
EPOCH:  443  LOSS:  tensor(0.1193, grad_fn=<AddBackward0>)
EPOCH:  444  LOSS:  tensor(0.1191, grad_fn=<AddBackward0>)
EPOCH:  445  LOSS:  tensor(0.1189, grad_fn=<AddBackward0>)
EPOCH:  446  LOSS:  tensor(0.1188, grad_fn=<AddBackward0>)
EPOCH:  447  LOSS:  tensor(0.1185, grad_fn=<AddBackward0>)
EPOCH:  448  LOSS:  tensor(0.1184, grad_fn=<AddBackward0>)
EPOCH:  449  LOSS:  tensor(0.1182, grad_fn=<AddBackward0>)
EPOCH:  450  LOSS:  tensor(0.1180, grad_fn=<AddBackward0>)
EPOCH:  451  LOSS:  tensor(0.1178, grad_fn=<AddBackward0>)
EPOCH:  452  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  453  LOSS:  tensor(0.1175, grad_fn=<AddBackward0

EPOCH:  576  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.0981, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.0980, grad_fn=<AddBackward0>)
EPOCH:  579  LOSS:  tensor(0.0979, grad_fn=<AddBackward0>)
EPOCH:  580  LOSS:  tensor(0.0978, grad_fn=<AddBackward0>)
EPOCH:  581  LOSS:  tensor(0.0976, grad_fn=<AddBackward0>)
EPOCH:  582  LOSS:  tensor(0.0976, grad_fn=<AddBackward0>)
EPOCH:  583  LOSS:  tensor(0.0974, grad_fn=<AddBackward0>)
EPOCH:  584  LOSS:  tensor(0.0973, grad_fn=<AddBackward0>)
EPOCH:  585  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  586  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  587  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  588  LOSS:  tensor(0.0968, grad_fn=<AddBackward0>)
EPOCH:  589  LOSS:  tensor(0.0967, grad_fn=<AddBackward0>)
EPOCH:  590  LOSS:  tensor(0.0966, grad_fn=<AddBackward0>)
EPOCH:  591  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  592  LOSS:  tensor(0.0964, grad_fn=<AddBackward0

EPOCH:  715  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.0827, grad_fn=<AddBackward0>)
EPOCH:  718  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  719  LOSS:  tensor(0.0825, grad_fn=<AddBackward0>)
EPOCH:  720  LOSS:  tensor(0.0824, grad_fn=<AddBackward0>)
EPOCH:  721  LOSS:  tensor(0.0823, grad_fn=<AddBackward0>)
EPOCH:  722  LOSS:  tensor(0.0822, grad_fn=<AddBackward0>)
EPOCH:  723  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  724  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  725  LOSS:  tensor(0.0819, grad_fn=<AddBackward0>)
EPOCH:  726  LOSS:  tensor(0.0818, grad_fn=<AddBackward0>)
EPOCH:  727  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  728  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  729  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  730  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  731  LOSS:  tensor(0.0812, grad_fn=<AddBackward0

EPOCH:  854  LOSS:  tensor(0.0689, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0688, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0688, grad_fn=<AddBackward0>)
EPOCH:  857  LOSS:  tensor(0.0687, grad_fn=<AddBackward0>)
EPOCH:  858  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  859  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  860  LOSS:  tensor(0.0685, grad_fn=<AddBackward0>)
EPOCH:  861  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  862  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  863  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  864  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  865  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  866  LOSS:  tensor(0.0681, grad_fn=<AddBackward0>)
EPOCH:  867  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  868  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  869  LOSS:  tensor(0.0679, grad_fn=<AddBackward0>)
EPOCH:  870  LOSS:  tensor(0.0679, grad_fn=<AddBackward0

EPOCH:  993  LOSS:  tensor(0.0629, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0627, grad_fn=<AddBackward0>)
EPOCH:  995  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  996  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  997  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  998  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  999  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  1000  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  1001  LOSS:  tensor(0.0627, grad_fn=<AddBackward0>)
EPOCH:  1002  LOSS:  tensor(0.0627, grad_fn=<AddBackward0>)
EPOCH:  1003  LOSS:  tensor(0.0627, grad_fn=<AddBackward0>)
EPOCH:  1004  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  1005  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  1006  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  1007  LOSS:  tensor(0.0625, grad_fn=<AddBackward0>)
EPOCH:  1008  LOSS:  tensor(0.0624, grad_fn=<AddBackward0>)
EPOCH:  1009  LOSS:  tensor(0.0623, grad_fn=<Ad

EPOCH:  1130  LOSS:  tensor(0.0553, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0549, grad_fn=<AddBackward0>)
EPOCH:  1132  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  1133  LOSS:  tensor(0.0590, grad_fn=<AddBackward0>)
EPOCH:  1134  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  1135  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  1136  LOSS:  tensor(0.0594, grad_fn=<AddBackward0>)
EPOCH:  1137  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1138  LOSS:  tensor(0.0552, grad_fn=<AddBackward0>)
EPOCH:  1139  LOSS:  tensor(0.0544, grad_fn=<AddBackward0>)
EPOCH:  1140  LOSS:  tensor(0.0546, grad_fn=<AddBackward0>)
EPOCH:  1141  LOSS:  tensor(0.0556, grad_fn=<AddBackward0>)
EPOCH:  1142  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  1143  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1144  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1145  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  1146  LOSS:  tensor(0.0554, grad

EPOCH:  67  LOSS:  tensor(4.7880, grad_fn=<AddBackward0>)
EPOCH:  68  LOSS:  tensor(4.5564, grad_fn=<AddBackward0>)
EPOCH:  69  LOSS:  tensor(4.3373, grad_fn=<AddBackward0>)
EPOCH:  70  LOSS:  tensor(4.1281, grad_fn=<AddBackward0>)
EPOCH:  71  LOSS:  tensor(3.9279, grad_fn=<AddBackward0>)
EPOCH:  72  LOSS:  tensor(3.7371, grad_fn=<AddBackward0>)
EPOCH:  73  LOSS:  tensor(3.5542, grad_fn=<AddBackward0>)
EPOCH:  74  LOSS:  tensor(3.3785, grad_fn=<AddBackward0>)
EPOCH:  75  LOSS:  tensor(3.2104, grad_fn=<AddBackward0>)
EPOCH:  76  LOSS:  tensor(3.0510, grad_fn=<AddBackward0>)
EPOCH:  77  LOSS:  tensor(2.9004, grad_fn=<AddBackward0>)
EPOCH:  78  LOSS:  tensor(2.7576, grad_fn=<AddBackward0>)
EPOCH:  79  LOSS:  tensor(2.6240, grad_fn=<AddBackward0>)
EPOCH:  80  LOSS:  tensor(2.4996, grad_fn=<AddBackward0>)
EPOCH:  81  LOSS:  tensor(2.3862, grad_fn=<AddBackward0>)
EPOCH:  82  LOSS:  tensor(2.2810, grad_fn=<AddBackward0>)
EPOCH:  83  LOSS:  tensor(2.1829, grad_fn=<AddBackward0>)
EPOCH:  84  LO

EPOCH:  207  LOSS:  tensor(0.3011, grad_fn=<AddBackward0>)
EPOCH:  208  LOSS:  tensor(0.2989, grad_fn=<AddBackward0>)
EPOCH:  209  LOSS:  tensor(0.2969, grad_fn=<AddBackward0>)
EPOCH:  210  LOSS:  tensor(0.2949, grad_fn=<AddBackward0>)
EPOCH:  211  LOSS:  tensor(0.2929, grad_fn=<AddBackward0>)
EPOCH:  212  LOSS:  tensor(0.2910, grad_fn=<AddBackward0>)
EPOCH:  213  LOSS:  tensor(0.2891, grad_fn=<AddBackward0>)
EPOCH:  214  LOSS:  tensor(0.2872, grad_fn=<AddBackward0>)
EPOCH:  215  LOSS:  tensor(0.2853, grad_fn=<AddBackward0>)
EPOCH:  216  LOSS:  tensor(0.2835, grad_fn=<AddBackward0>)
EPOCH:  217  LOSS:  tensor(0.2817, grad_fn=<AddBackward0>)
EPOCH:  218  LOSS:  tensor(0.2799, grad_fn=<AddBackward0>)
EPOCH:  219  LOSS:  tensor(0.2781, grad_fn=<AddBackward0>)
EPOCH:  220  LOSS:  tensor(0.2763, grad_fn=<AddBackward0>)
EPOCH:  221  LOSS:  tensor(0.2746, grad_fn=<AddBackward0>)
EPOCH:  222  LOSS:  tensor(0.2729, grad_fn=<AddBackward0>)
EPOCH:  223  LOSS:  tensor(0.2713, grad_fn=<AddBackward0

EPOCH:  346  LOSS:  tensor(0.1644, grad_fn=<AddBackward0>)
EPOCH:  347  LOSS:  tensor(0.1639, grad_fn=<AddBackward0>)
EPOCH:  348  LOSS:  tensor(0.1635, grad_fn=<AddBackward0>)
EPOCH:  349  LOSS:  tensor(0.1630, grad_fn=<AddBackward0>)
EPOCH:  350  LOSS:  tensor(0.1625, grad_fn=<AddBackward0>)
EPOCH:  351  LOSS:  tensor(0.1621, grad_fn=<AddBackward0>)
EPOCH:  352  LOSS:  tensor(0.1616, grad_fn=<AddBackward0>)
EPOCH:  353  LOSS:  tensor(0.1612, grad_fn=<AddBackward0>)
EPOCH:  354  LOSS:  tensor(0.1607, grad_fn=<AddBackward0>)
EPOCH:  355  LOSS:  tensor(0.1603, grad_fn=<AddBackward0>)
EPOCH:  356  LOSS:  tensor(0.1598, grad_fn=<AddBackward0>)
EPOCH:  357  LOSS:  tensor(0.1594, grad_fn=<AddBackward0>)
EPOCH:  358  LOSS:  tensor(0.1590, grad_fn=<AddBackward0>)
EPOCH:  359  LOSS:  tensor(0.1585, grad_fn=<AddBackward0>)
EPOCH:  360  LOSS:  tensor(0.1581, grad_fn=<AddBackward0>)
EPOCH:  361  LOSS:  tensor(0.1576, grad_fn=<AddBackward0>)
EPOCH:  362  LOSS:  tensor(0.1571, grad_fn=<AddBackward0

EPOCH:  485  LOSS:  tensor(0.1222, grad_fn=<AddBackward0>)
EPOCH:  486  LOSS:  tensor(0.1221, grad_fn=<AddBackward0>)
EPOCH:  487  LOSS:  tensor(0.1219, grad_fn=<AddBackward0>)
EPOCH:  488  LOSS:  tensor(0.1218, grad_fn=<AddBackward0>)
EPOCH:  489  LOSS:  tensor(0.1216, grad_fn=<AddBackward0>)
EPOCH:  490  LOSS:  tensor(0.1214, grad_fn=<AddBackward0>)
EPOCH:  491  LOSS:  tensor(0.1213, grad_fn=<AddBackward0>)
EPOCH:  492  LOSS:  tensor(0.1212, grad_fn=<AddBackward0>)
EPOCH:  493  LOSS:  tensor(0.1210, grad_fn=<AddBackward0>)
EPOCH:  494  LOSS:  tensor(0.1208, grad_fn=<AddBackward0>)
EPOCH:  495  LOSS:  tensor(0.1207, grad_fn=<AddBackward0>)
EPOCH:  496  LOSS:  tensor(0.1205, grad_fn=<AddBackward0>)
EPOCH:  497  LOSS:  tensor(0.1204, grad_fn=<AddBackward0>)
EPOCH:  498  LOSS:  tensor(0.1202, grad_fn=<AddBackward0>)
EPOCH:  499  LOSS:  tensor(0.1200, grad_fn=<AddBackward0>)
EPOCH:  500  LOSS:  tensor(0.1199, grad_fn=<AddBackward0>)
EPOCH:  501  LOSS:  tensor(0.1198, grad_fn=<AddBackward0

EPOCH:  624  LOSS:  tensor(0.1063, grad_fn=<AddBackward0>)
EPOCH:  625  LOSS:  tensor(0.1062, grad_fn=<AddBackward0>)
EPOCH:  626  LOSS:  tensor(0.1061, grad_fn=<AddBackward0>)
EPOCH:  627  LOSS:  tensor(0.1061, grad_fn=<AddBackward0>)
EPOCH:  628  LOSS:  tensor(0.1060, grad_fn=<AddBackward0>)
EPOCH:  629  LOSS:  tensor(0.1059, grad_fn=<AddBackward0>)
EPOCH:  630  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  631  LOSS:  tensor(0.1057, grad_fn=<AddBackward0>)
EPOCH:  632  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  633  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  634  LOSS:  tensor(0.1055, grad_fn=<AddBackward0>)
EPOCH:  635  LOSS:  tensor(0.1054, grad_fn=<AddBackward0>)
EPOCH:  636  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  637  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  638  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  639  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  640  LOSS:  tensor(0.1050, grad_fn=<AddBackward0

EPOCH:  763  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  764  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  765  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  766  LOSS:  tensor(0.0968, grad_fn=<AddBackward0>)
EPOCH:  767  LOSS:  tensor(0.0967, grad_fn=<AddBackward0>)
EPOCH:  768  LOSS:  tensor(0.0966, grad_fn=<AddBackward0>)
EPOCH:  769  LOSS:  tensor(0.0966, grad_fn=<AddBackward0>)
EPOCH:  770  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  771  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  772  LOSS:  tensor(0.0964, grad_fn=<AddBackward0>)
EPOCH:  773  LOSS:  tensor(0.0964, grad_fn=<AddBackward0>)
EPOCH:  774  LOSS:  tensor(0.0963, grad_fn=<AddBackward0>)
EPOCH:  775  LOSS:  tensor(0.0962, grad_fn=<AddBackward0>)
EPOCH:  776  LOSS:  tensor(0.0962, grad_fn=<AddBackward0>)
EPOCH:  777  LOSS:  tensor(0.0961, grad_fn=<AddBackward0>)
EPOCH:  778  LOSS:  tensor(0.0961, grad_fn=<AddBackward0>)
EPOCH:  779  LOSS:  tensor(0.0960, grad_fn=<AddBackward0

EPOCH:  902  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  903  LOSS:  tensor(0.0905, grad_fn=<AddBackward0>)
EPOCH:  904  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  905  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  906  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  907  LOSS:  tensor(0.0924, grad_fn=<AddBackward0>)
EPOCH:  908  LOSS:  tensor(0.0923, grad_fn=<AddBackward0>)
EPOCH:  909  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  910  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  911  LOSS:  tensor(0.0901, grad_fn=<AddBackward0>)
EPOCH:  912  LOSS:  tensor(0.0898, grad_fn=<AddBackward0>)
EPOCH:  913  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  914  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  915  LOSS:  tensor(0.0907, grad_fn=<AddBackward0>)
EPOCH:  916  LOSS:  tensor(0.0909, grad_fn=<AddBackward0>)
EPOCH:  917  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  918  LOSS:  tensor(0.0905, grad_fn=<AddBackward0

EPOCH:  1041  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  1042  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  1043  LOSS:  tensor(0.0876, grad_fn=<AddBackward0>)
EPOCH:  1044  LOSS:  tensor(0.0856, grad_fn=<AddBackward0>)
EPOCH:  1045  LOSS:  tensor(0.0846, grad_fn=<AddBackward0>)
EPOCH:  1046  LOSS:  tensor(0.0844, grad_fn=<AddBackward0>)
EPOCH:  1047  LOSS:  tensor(0.0849, grad_fn=<AddBackward0>)
EPOCH:  1048  LOSS:  tensor(0.0858, grad_fn=<AddBackward0>)
EPOCH:  1049  LOSS:  tensor(0.0868, grad_fn=<AddBackward0>)
EPOCH:  1050  LOSS:  tensor(0.0877, grad_fn=<AddBackward0>)
EPOCH:  1051  LOSS:  tensor(0.0885, grad_fn=<AddBackward0>)
EPOCH:  1052  LOSS:  tensor(0.0889, grad_fn=<AddBackward0>)
EPOCH:  1053  LOSS:  tensor(0.0890, grad_fn=<AddBackward0>)
EPOCH:  1054  LOSS:  tensor(0.0887, grad_fn=<AddBackward0>)
EPOCH:  1055  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  1056  LOSS:  tensor(0.0871, grad_fn=<AddBackward0>)
EPOCH:  1057  LOSS:  tensor(0.0862, grad

EPOCH:  1178  LOSS:  tensor(0.0811, grad_fn=<AddBackward0>)
EPOCH:  1179  LOSS:  tensor(0.0812, grad_fn=<AddBackward0>)
EPOCH:  1180  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1181  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1182  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1183  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1184  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1185  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1186  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1187  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1188  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  1189  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1190  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1191  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1192  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1193  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  1194  LOSS:  tensor(0.0813, grad

EPOCH:  116  LOSS:  tensor(1.2664, grad_fn=<AddBackward0>)
EPOCH:  117  LOSS:  tensor(1.2302, grad_fn=<AddBackward0>)
EPOCH:  118  LOSS:  tensor(1.1961, grad_fn=<AddBackward0>)
EPOCH:  119  LOSS:  tensor(1.1637, grad_fn=<AddBackward0>)
EPOCH:  120  LOSS:  tensor(1.1330, grad_fn=<AddBackward0>)
EPOCH:  121  LOSS:  tensor(1.1031, grad_fn=<AddBackward0>)
EPOCH:  122  LOSS:  tensor(1.0745, grad_fn=<AddBackward0>)
EPOCH:  123  LOSS:  tensor(1.0469, grad_fn=<AddBackward0>)
EPOCH:  124  LOSS:  tensor(1.0203, grad_fn=<AddBackward0>)
EPOCH:  125  LOSS:  tensor(0.9953, grad_fn=<AddBackward0>)
EPOCH:  126  LOSS:  tensor(0.9713, grad_fn=<AddBackward0>)
EPOCH:  127  LOSS:  tensor(0.9483, grad_fn=<AddBackward0>)
EPOCH:  128  LOSS:  tensor(0.9262, grad_fn=<AddBackward0>)
EPOCH:  129  LOSS:  tensor(0.9050, grad_fn=<AddBackward0>)
EPOCH:  130  LOSS:  tensor(0.8840, grad_fn=<AddBackward0>)
EPOCH:  131  LOSS:  tensor(0.8639, grad_fn=<AddBackward0>)
EPOCH:  132  LOSS:  tensor(0.8447, grad_fn=<AddBackward0

EPOCH:  255  LOSS:  tensor(0.2233, grad_fn=<AddBackward0>)
EPOCH:  256  LOSS:  tensor(0.2222, grad_fn=<AddBackward0>)
EPOCH:  257  LOSS:  tensor(0.2212, grad_fn=<AddBackward0>)
EPOCH:  258  LOSS:  tensor(0.2201, grad_fn=<AddBackward0>)
EPOCH:  259  LOSS:  tensor(0.2192, grad_fn=<AddBackward0>)
EPOCH:  260  LOSS:  tensor(0.2181, grad_fn=<AddBackward0>)
EPOCH:  261  LOSS:  tensor(0.2171, grad_fn=<AddBackward0>)
EPOCH:  262  LOSS:  tensor(0.2162, grad_fn=<AddBackward0>)
EPOCH:  263  LOSS:  tensor(0.2153, grad_fn=<AddBackward0>)
EPOCH:  264  LOSS:  tensor(0.2144, grad_fn=<AddBackward0>)
EPOCH:  265  LOSS:  tensor(0.2134, grad_fn=<AddBackward0>)
EPOCH:  266  LOSS:  tensor(0.2125, grad_fn=<AddBackward0>)
EPOCH:  267  LOSS:  tensor(0.2116, grad_fn=<AddBackward0>)
EPOCH:  268  LOSS:  tensor(0.2108, grad_fn=<AddBackward0>)
EPOCH:  269  LOSS:  tensor(0.2099, grad_fn=<AddBackward0>)
EPOCH:  270  LOSS:  tensor(0.2091, grad_fn=<AddBackward0>)
EPOCH:  271  LOSS:  tensor(0.2083, grad_fn=<AddBackward0

EPOCH:  394  LOSS:  tensor(0.1357, grad_fn=<AddBackward0>)
EPOCH:  395  LOSS:  tensor(0.1354, grad_fn=<AddBackward0>)
EPOCH:  396  LOSS:  tensor(0.1351, grad_fn=<AddBackward0>)
EPOCH:  397  LOSS:  tensor(0.1348, grad_fn=<AddBackward0>)
EPOCH:  398  LOSS:  tensor(0.1345, grad_fn=<AddBackward0>)
EPOCH:  399  LOSS:  tensor(0.1341, grad_fn=<AddBackward0>)
EPOCH:  400  LOSS:  tensor(0.1338, grad_fn=<AddBackward0>)
EPOCH:  401  LOSS:  tensor(0.1335, grad_fn=<AddBackward0>)
EPOCH:  402  LOSS:  tensor(0.1331, grad_fn=<AddBackward0>)
EPOCH:  403  LOSS:  tensor(0.1328, grad_fn=<AddBackward0>)
EPOCH:  404  LOSS:  tensor(0.1325, grad_fn=<AddBackward0>)
EPOCH:  405  LOSS:  tensor(0.1322, grad_fn=<AddBackward0>)
EPOCH:  406  LOSS:  tensor(0.1318, grad_fn=<AddBackward0>)
EPOCH:  407  LOSS:  tensor(0.1315, grad_fn=<AddBackward0>)
EPOCH:  408  LOSS:  tensor(0.1311, grad_fn=<AddBackward0>)
EPOCH:  409  LOSS:  tensor(0.1308, grad_fn=<AddBackward0>)
EPOCH:  410  LOSS:  tensor(0.1305, grad_fn=<AddBackward0

EPOCH:  533  LOSS:  tensor(0.0974, grad_fn=<AddBackward0>)
EPOCH:  534  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  535  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  536  LOSS:  tensor(0.0968, grad_fn=<AddBackward0>)
EPOCH:  537  LOSS:  tensor(0.0966, grad_fn=<AddBackward0>)
EPOCH:  538  LOSS:  tensor(0.0964, grad_fn=<AddBackward0>)
EPOCH:  539  LOSS:  tensor(0.0962, grad_fn=<AddBackward0>)
EPOCH:  540  LOSS:  tensor(0.0960, grad_fn=<AddBackward0>)
EPOCH:  541  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  542  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  543  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  544  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  545  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  546  LOSS:  tensor(0.0949, grad_fn=<AddBackward0>)
EPOCH:  547  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  548  LOSS:  tensor(0.0946, grad_fn=<AddBackward0>)
EPOCH:  549  LOSS:  tensor(0.0944, grad_fn=<AddBackward0

EPOCH:  672  LOSS:  tensor(0.0811, grad_fn=<AddBackward0>)
EPOCH:  673  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  674  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  675  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  676  LOSS:  tensor(0.0807, grad_fn=<AddBackward0>)
EPOCH:  677  LOSS:  tensor(0.0806, grad_fn=<AddBackward0>)
EPOCH:  678  LOSS:  tensor(0.0805, grad_fn=<AddBackward0>)
EPOCH:  679  LOSS:  tensor(0.0804, grad_fn=<AddBackward0>)
EPOCH:  680  LOSS:  tensor(0.0803, grad_fn=<AddBackward0>)
EPOCH:  681  LOSS:  tensor(0.0802, grad_fn=<AddBackward0>)
EPOCH:  682  LOSS:  tensor(0.0802, grad_fn=<AddBackward0>)
EPOCH:  683  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  684  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  685  LOSS:  tensor(0.0799, grad_fn=<AddBackward0>)
EPOCH:  686  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  687  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  688  LOSS:  tensor(0.0797, grad_fn=<AddBackward0

EPOCH:  811  LOSS:  tensor(0.0720, grad_fn=<AddBackward0>)
EPOCH:  812  LOSS:  tensor(0.0720, grad_fn=<AddBackward0>)
EPOCH:  813  LOSS:  tensor(0.0719, grad_fn=<AddBackward0>)
EPOCH:  814  LOSS:  tensor(0.0719, grad_fn=<AddBackward0>)
EPOCH:  815  LOSS:  tensor(0.0718, grad_fn=<AddBackward0>)
EPOCH:  816  LOSS:  tensor(0.0718, grad_fn=<AddBackward0>)
EPOCH:  817  LOSS:  tensor(0.0717, grad_fn=<AddBackward0>)
EPOCH:  818  LOSS:  tensor(0.0717, grad_fn=<AddBackward0>)
EPOCH:  819  LOSS:  tensor(0.0716, grad_fn=<AddBackward0>)
EPOCH:  820  LOSS:  tensor(0.0716, grad_fn=<AddBackward0>)
EPOCH:  821  LOSS:  tensor(0.0716, grad_fn=<AddBackward0>)
EPOCH:  822  LOSS:  tensor(0.0715, grad_fn=<AddBackward0>)
EPOCH:  823  LOSS:  tensor(0.0715, grad_fn=<AddBackward0>)
EPOCH:  824  LOSS:  tensor(0.0714, grad_fn=<AddBackward0>)
EPOCH:  825  LOSS:  tensor(0.0714, grad_fn=<AddBackward0>)
EPOCH:  826  LOSS:  tensor(0.0713, grad_fn=<AddBackward0>)
EPOCH:  827  LOSS:  tensor(0.0713, grad_fn=<AddBackward0

EPOCH:  950  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  951  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  952  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  953  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  954  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  955  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  956  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  957  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  958  LOSS:  tensor(0.0639, grad_fn=<AddBackward0>)
EPOCH:  959  LOSS:  tensor(0.0639, grad_fn=<AddBackward0>)
EPOCH:  960  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  961  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  962  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  963  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  964  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  965  LOSS:  tensor(0.0636, grad_fn=<AddBackward0>)
EPOCH:  966  LOSS:  tensor(0.0636, grad_fn=<AddBackward0

EPOCH:  1088  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1089  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1090  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1091  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1092  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1093  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1094  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1095  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1096  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1097  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  1098  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  1099  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  1100  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  1101  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  1102  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  1103  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  1104  LOSS:  tensor(0.0560, grad

EPOCH:  24  LOSS:  tensor(64.9372, grad_fn=<AddBackward0>)
EPOCH:  25  LOSS:  tensor(62.7778, grad_fn=<AddBackward0>)
EPOCH:  26  LOSS:  tensor(60.9025, grad_fn=<AddBackward0>)
EPOCH:  27  LOSS:  tensor(59.1639, grad_fn=<AddBackward0>)
EPOCH:  28  LOSS:  tensor(57.4930, grad_fn=<AddBackward0>)
EPOCH:  29  LOSS:  tensor(55.7240, grad_fn=<AddBackward0>)
EPOCH:  30  LOSS:  tensor(53.8264, grad_fn=<AddBackward0>)
EPOCH:  31  LOSS:  tensor(51.8658, grad_fn=<AddBackward0>)
EPOCH:  32  LOSS:  tensor(49.8808, grad_fn=<AddBackward0>)
EPOCH:  33  LOSS:  tensor(47.8659, grad_fn=<AddBackward0>)
EPOCH:  34  LOSS:  tensor(45.8841, grad_fn=<AddBackward0>)
EPOCH:  35  LOSS:  tensor(43.9682, grad_fn=<AddBackward0>)
EPOCH:  36  LOSS:  tensor(42.1043, grad_fn=<AddBackward0>)
EPOCH:  37  LOSS:  tensor(40.3562, grad_fn=<AddBackward0>)
EPOCH:  38  LOSS:  tensor(38.7311, grad_fn=<AddBackward0>)
EPOCH:  39  LOSS:  tensor(37.1118, grad_fn=<AddBackward0>)
EPOCH:  40  LOSS:  tensor(35.5731, grad_fn=<AddBackward0

EPOCH:  164  LOSS:  tensor(0.6100, grad_fn=<AddBackward0>)
EPOCH:  165  LOSS:  tensor(0.6036, grad_fn=<AddBackward0>)
EPOCH:  166  LOSS:  tensor(0.5974, grad_fn=<AddBackward0>)
EPOCH:  167  LOSS:  tensor(0.5915, grad_fn=<AddBackward0>)
EPOCH:  168  LOSS:  tensor(0.5858, grad_fn=<AddBackward0>)
EPOCH:  169  LOSS:  tensor(0.5802, grad_fn=<AddBackward0>)
EPOCH:  170  LOSS:  tensor(0.5747, grad_fn=<AddBackward0>)
EPOCH:  171  LOSS:  tensor(0.5693, grad_fn=<AddBackward0>)
EPOCH:  172  LOSS:  tensor(0.5640, grad_fn=<AddBackward0>)
EPOCH:  173  LOSS:  tensor(0.5588, grad_fn=<AddBackward0>)
EPOCH:  174  LOSS:  tensor(0.5537, grad_fn=<AddBackward0>)
EPOCH:  175  LOSS:  tensor(0.5487, grad_fn=<AddBackward0>)
EPOCH:  176  LOSS:  tensor(0.5438, grad_fn=<AddBackward0>)
EPOCH:  177  LOSS:  tensor(0.5390, grad_fn=<AddBackward0>)
EPOCH:  178  LOSS:  tensor(0.5346, grad_fn=<AddBackward0>)
EPOCH:  179  LOSS:  tensor(0.5303, grad_fn=<AddBackward0>)
EPOCH:  180  LOSS:  tensor(0.5262, grad_fn=<AddBackward0

EPOCH:  303  LOSS:  tensor(0.2688, grad_fn=<AddBackward0>)
EPOCH:  304  LOSS:  tensor(0.2678, grad_fn=<AddBackward0>)
EPOCH:  305  LOSS:  tensor(0.2669, grad_fn=<AddBackward0>)
EPOCH:  306  LOSS:  tensor(0.2659, grad_fn=<AddBackward0>)
EPOCH:  307  LOSS:  tensor(0.2649, grad_fn=<AddBackward0>)
EPOCH:  308  LOSS:  tensor(0.2639, grad_fn=<AddBackward0>)
EPOCH:  309  LOSS:  tensor(0.2629, grad_fn=<AddBackward0>)
EPOCH:  310  LOSS:  tensor(0.2619, grad_fn=<AddBackward0>)
EPOCH:  311  LOSS:  tensor(0.2609, grad_fn=<AddBackward0>)
EPOCH:  312  LOSS:  tensor(0.2599, grad_fn=<AddBackward0>)
EPOCH:  313  LOSS:  tensor(0.2589, grad_fn=<AddBackward0>)
EPOCH:  314  LOSS:  tensor(0.2580, grad_fn=<AddBackward0>)
EPOCH:  315  LOSS:  tensor(0.2571, grad_fn=<AddBackward0>)
EPOCH:  316  LOSS:  tensor(0.2562, grad_fn=<AddBackward0>)
EPOCH:  317  LOSS:  tensor(0.2553, grad_fn=<AddBackward0>)
EPOCH:  318  LOSS:  tensor(0.2544, grad_fn=<AddBackward0>)
EPOCH:  319  LOSS:  tensor(0.2536, grad_fn=<AddBackward0

EPOCH:  442  LOSS:  tensor(0.1877, grad_fn=<AddBackward0>)
EPOCH:  443  LOSS:  tensor(0.1874, grad_fn=<AddBackward0>)
EPOCH:  444  LOSS:  tensor(0.1870, grad_fn=<AddBackward0>)
EPOCH:  445  LOSS:  tensor(0.1867, grad_fn=<AddBackward0>)
EPOCH:  446  LOSS:  tensor(0.1863, grad_fn=<AddBackward0>)
EPOCH:  447  LOSS:  tensor(0.1860, grad_fn=<AddBackward0>)
EPOCH:  448  LOSS:  tensor(0.1857, grad_fn=<AddBackward0>)
EPOCH:  449  LOSS:  tensor(0.1854, grad_fn=<AddBackward0>)
EPOCH:  450  LOSS:  tensor(0.1851, grad_fn=<AddBackward0>)
EPOCH:  451  LOSS:  tensor(0.1848, grad_fn=<AddBackward0>)
EPOCH:  452  LOSS:  tensor(0.1845, grad_fn=<AddBackward0>)
EPOCH:  453  LOSS:  tensor(0.1842, grad_fn=<AddBackward0>)
EPOCH:  454  LOSS:  tensor(0.1839, grad_fn=<AddBackward0>)
EPOCH:  455  LOSS:  tensor(0.1836, grad_fn=<AddBackward0>)
EPOCH:  456  LOSS:  tensor(0.1833, grad_fn=<AddBackward0>)
EPOCH:  457  LOSS:  tensor(0.1830, grad_fn=<AddBackward0>)
EPOCH:  458  LOSS:  tensor(0.1827, grad_fn=<AddBackward0

EPOCH:  581  LOSS:  tensor(0.1442, grad_fn=<AddBackward0>)
EPOCH:  582  LOSS:  tensor(0.1437, grad_fn=<AddBackward0>)
EPOCH:  583  LOSS:  tensor(0.1434, grad_fn=<AddBackward0>)
EPOCH:  584  LOSS:  tensor(0.1430, grad_fn=<AddBackward0>)
EPOCH:  585  LOSS:  tensor(0.1427, grad_fn=<AddBackward0>)
EPOCH:  586  LOSS:  tensor(0.1423, grad_fn=<AddBackward0>)
EPOCH:  587  LOSS:  tensor(0.1419, grad_fn=<AddBackward0>)
EPOCH:  588  LOSS:  tensor(0.1416, grad_fn=<AddBackward0>)
EPOCH:  589  LOSS:  tensor(0.1413, grad_fn=<AddBackward0>)
EPOCH:  590  LOSS:  tensor(0.1410, grad_fn=<AddBackward0>)
EPOCH:  591  LOSS:  tensor(0.1406, grad_fn=<AddBackward0>)
EPOCH:  592  LOSS:  tensor(0.1403, grad_fn=<AddBackward0>)
EPOCH:  593  LOSS:  tensor(0.1400, grad_fn=<AddBackward0>)
EPOCH:  594  LOSS:  tensor(0.1396, grad_fn=<AddBackward0>)
EPOCH:  595  LOSS:  tensor(0.1393, grad_fn=<AddBackward0>)
EPOCH:  596  LOSS:  tensor(0.1390, grad_fn=<AddBackward0>)
EPOCH:  597  LOSS:  tensor(0.1387, grad_fn=<AddBackward0

EPOCH:  720  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  721  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  722  LOSS:  tensor(0.0909, grad_fn=<AddBackward0>)
EPOCH:  723  LOSS:  tensor(0.0906, grad_fn=<AddBackward0>)
EPOCH:  724  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  725  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  726  LOSS:  tensor(0.0897, grad_fn=<AddBackward0>)
EPOCH:  727  LOSS:  tensor(0.0895, grad_fn=<AddBackward0>)
EPOCH:  728  LOSS:  tensor(0.0892, grad_fn=<AddBackward0>)
EPOCH:  729  LOSS:  tensor(0.0890, grad_fn=<AddBackward0>)
EPOCH:  730  LOSS:  tensor(0.0888, grad_fn=<AddBackward0>)
EPOCH:  731  LOSS:  tensor(0.0885, grad_fn=<AddBackward0>)
EPOCH:  732  LOSS:  tensor(0.0883, grad_fn=<AddBackward0>)
EPOCH:  733  LOSS:  tensor(0.0880, grad_fn=<AddBackward0>)
EPOCH:  734  LOSS:  tensor(0.0878, grad_fn=<AddBackward0>)
EPOCH:  735  LOSS:  tensor(0.0875, grad_fn=<AddBackward0>)
EPOCH:  736  LOSS:  tensor(0.0873, grad_fn=<AddBackward0

EPOCH:  859  LOSS:  tensor(0.0689, grad_fn=<AddBackward0>)
EPOCH:  860  LOSS:  tensor(0.0688, grad_fn=<AddBackward0>)
EPOCH:  861  LOSS:  tensor(0.0687, grad_fn=<AddBackward0>)
EPOCH:  862  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  863  LOSS:  tensor(0.0685, grad_fn=<AddBackward0>)
EPOCH:  864  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  865  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  866  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  867  LOSS:  tensor(0.0681, grad_fn=<AddBackward0>)
EPOCH:  868  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  869  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  870  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  871  LOSS:  tensor(0.0677, grad_fn=<AddBackward0>)
EPOCH:  872  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  873  LOSS:  tensor(0.0675, grad_fn=<AddBackward0>)
EPOCH:  874  LOSS:  tensor(0.0674, grad_fn=<AddBackward0>)
EPOCH:  875  LOSS:  tensor(0.0673, grad_fn=<AddBackward0

EPOCH:  998  LOSS:  tensor(0.0581, grad_fn=<AddBackward0>)
EPOCH:  999  LOSS:  tensor(0.0580, grad_fn=<AddBackward0>)
EPOCH:  1000  LOSS:  tensor(0.0580, grad_fn=<AddBackward0>)
EPOCH:  1001  LOSS:  tensor(0.0579, grad_fn=<AddBackward0>)
EPOCH:  1002  LOSS:  tensor(0.0578, grad_fn=<AddBackward0>)
EPOCH:  1003  LOSS:  tensor(0.0578, grad_fn=<AddBackward0>)
EPOCH:  1004  LOSS:  tensor(0.0577, grad_fn=<AddBackward0>)
EPOCH:  1005  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1006  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1007  LOSS:  tensor(0.0575, grad_fn=<AddBackward0>)
EPOCH:  1008  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  1009  LOSS:  tensor(0.0573, grad_fn=<AddBackward0>)
EPOCH:  1010  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1011  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1012  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1013  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1014  LOSS:  tensor(0.0570, grad_f

EPOCH:  1135  LOSS:  tensor(0.0484, grad_fn=<AddBackward0>)
EPOCH:  1136  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1137  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1138  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1139  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  1140  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1141  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1142  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  1143  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  1144  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1145  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1146  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  1147  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  1148  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  1149  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1150  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  1151  LOSS:  tensor(0.0477, grad

EPOCH:  72  LOSS:  tensor(9.0827, grad_fn=<AddBackward0>)
EPOCH:  73  LOSS:  tensor(8.6193, grad_fn=<AddBackward0>)
EPOCH:  74  LOSS:  tensor(8.1847, grad_fn=<AddBackward0>)
EPOCH:  75  LOSS:  tensor(7.7739, grad_fn=<AddBackward0>)
EPOCH:  76  LOSS:  tensor(7.3788, grad_fn=<AddBackward0>)
EPOCH:  77  LOSS:  tensor(6.9992, grad_fn=<AddBackward0>)
EPOCH:  78  LOSS:  tensor(6.6347, grad_fn=<AddBackward0>)
EPOCH:  79  LOSS:  tensor(6.2940, grad_fn=<AddBackward0>)
EPOCH:  80  LOSS:  tensor(5.9775, grad_fn=<AddBackward0>)
EPOCH:  81  LOSS:  tensor(5.6818, grad_fn=<AddBackward0>)
EPOCH:  82  LOSS:  tensor(5.4059, grad_fn=<AddBackward0>)
EPOCH:  83  LOSS:  tensor(5.1473, grad_fn=<AddBackward0>)
EPOCH:  84  LOSS:  tensor(4.9028, grad_fn=<AddBackward0>)
EPOCH:  85  LOSS:  tensor(4.6688, grad_fn=<AddBackward0>)
EPOCH:  86  LOSS:  tensor(4.4488, grad_fn=<AddBackward0>)
EPOCH:  87  LOSS:  tensor(4.2463, grad_fn=<AddBackward0>)
EPOCH:  88  LOSS:  tensor(4.0579, grad_fn=<AddBackward0>)
EPOCH:  89  LO

EPOCH:  212  LOSS:  tensor(0.5274, grad_fn=<AddBackward0>)
EPOCH:  213  LOSS:  tensor(0.5243, grad_fn=<AddBackward0>)
EPOCH:  214  LOSS:  tensor(0.5212, grad_fn=<AddBackward0>)
EPOCH:  215  LOSS:  tensor(0.5183, grad_fn=<AddBackward0>)
EPOCH:  216  LOSS:  tensor(0.5153, grad_fn=<AddBackward0>)
EPOCH:  217  LOSS:  tensor(0.5122, grad_fn=<AddBackward0>)
EPOCH:  218  LOSS:  tensor(0.5095, grad_fn=<AddBackward0>)
EPOCH:  219  LOSS:  tensor(0.5067, grad_fn=<AddBackward0>)
EPOCH:  220  LOSS:  tensor(0.5038, grad_fn=<AddBackward0>)
EPOCH:  221  LOSS:  tensor(0.5011, grad_fn=<AddBackward0>)
EPOCH:  222  LOSS:  tensor(0.4984, grad_fn=<AddBackward0>)
EPOCH:  223  LOSS:  tensor(0.4956, grad_fn=<AddBackward0>)
EPOCH:  224  LOSS:  tensor(0.4930, grad_fn=<AddBackward0>)
EPOCH:  225  LOSS:  tensor(0.4903, grad_fn=<AddBackward0>)
EPOCH:  226  LOSS:  tensor(0.4877, grad_fn=<AddBackward0>)
EPOCH:  227  LOSS:  tensor(0.4851, grad_fn=<AddBackward0>)
EPOCH:  228  LOSS:  tensor(0.4825, grad_fn=<AddBackward0

EPOCH:  351  LOSS:  tensor(0.2847, grad_fn=<AddBackward0>)
EPOCH:  352  LOSS:  tensor(0.2839, grad_fn=<AddBackward0>)
EPOCH:  353  LOSS:  tensor(0.2831, grad_fn=<AddBackward0>)
EPOCH:  354  LOSS:  tensor(0.2824, grad_fn=<AddBackward0>)
EPOCH:  355  LOSS:  tensor(0.2817, grad_fn=<AddBackward0>)
EPOCH:  356  LOSS:  tensor(0.2809, grad_fn=<AddBackward0>)
EPOCH:  357  LOSS:  tensor(0.2802, grad_fn=<AddBackward0>)
EPOCH:  358  LOSS:  tensor(0.2795, grad_fn=<AddBackward0>)
EPOCH:  359  LOSS:  tensor(0.2787, grad_fn=<AddBackward0>)
EPOCH:  360  LOSS:  tensor(0.2780, grad_fn=<AddBackward0>)
EPOCH:  361  LOSS:  tensor(0.2774, grad_fn=<AddBackward0>)
EPOCH:  362  LOSS:  tensor(0.2766, grad_fn=<AddBackward0>)
EPOCH:  363  LOSS:  tensor(0.2759, grad_fn=<AddBackward0>)
EPOCH:  364  LOSS:  tensor(0.2752, grad_fn=<AddBackward0>)
EPOCH:  365  LOSS:  tensor(0.2746, grad_fn=<AddBackward0>)
EPOCH:  366  LOSS:  tensor(0.2738, grad_fn=<AddBackward0>)
EPOCH:  367  LOSS:  tensor(0.2732, grad_fn=<AddBackward0

EPOCH:  490  LOSS:  tensor(0.2164, grad_fn=<AddBackward0>)
EPOCH:  491  LOSS:  tensor(0.2161, grad_fn=<AddBackward0>)
EPOCH:  492  LOSS:  tensor(0.2158, grad_fn=<AddBackward0>)
EPOCH:  493  LOSS:  tensor(0.2154, grad_fn=<AddBackward0>)
EPOCH:  494  LOSS:  tensor(0.2151, grad_fn=<AddBackward0>)
EPOCH:  495  LOSS:  tensor(0.2148, grad_fn=<AddBackward0>)
EPOCH:  496  LOSS:  tensor(0.2145, grad_fn=<AddBackward0>)
EPOCH:  497  LOSS:  tensor(0.2142, grad_fn=<AddBackward0>)
EPOCH:  498  LOSS:  tensor(0.2139, grad_fn=<AddBackward0>)
EPOCH:  499  LOSS:  tensor(0.2136, grad_fn=<AddBackward0>)
EPOCH:  500  LOSS:  tensor(0.2133, grad_fn=<AddBackward0>)
EPOCH:  501  LOSS:  tensor(0.2130, grad_fn=<AddBackward0>)
EPOCH:  502  LOSS:  tensor(0.2126, grad_fn=<AddBackward0>)
EPOCH:  503  LOSS:  tensor(0.2123, grad_fn=<AddBackward0>)
EPOCH:  504  LOSS:  tensor(0.2120, grad_fn=<AddBackward0>)
EPOCH:  505  LOSS:  tensor(0.2117, grad_fn=<AddBackward0>)
EPOCH:  506  LOSS:  tensor(0.2114, grad_fn=<AddBackward0

EPOCH:  629  LOSS:  tensor(0.1729, grad_fn=<AddBackward0>)
EPOCH:  630  LOSS:  tensor(0.1727, grad_fn=<AddBackward0>)
EPOCH:  631  LOSS:  tensor(0.1725, grad_fn=<AddBackward0>)
EPOCH:  632  LOSS:  tensor(0.1723, grad_fn=<AddBackward0>)
EPOCH:  633  LOSS:  tensor(0.1721, grad_fn=<AddBackward0>)
EPOCH:  634  LOSS:  tensor(0.1719, grad_fn=<AddBackward0>)
EPOCH:  635  LOSS:  tensor(0.1717, grad_fn=<AddBackward0>)
EPOCH:  636  LOSS:  tensor(0.1715, grad_fn=<AddBackward0>)
EPOCH:  637  LOSS:  tensor(0.1714, grad_fn=<AddBackward0>)
EPOCH:  638  LOSS:  tensor(0.1712, grad_fn=<AddBackward0>)
EPOCH:  639  LOSS:  tensor(0.1710, grad_fn=<AddBackward0>)
EPOCH:  640  LOSS:  tensor(0.1708, grad_fn=<AddBackward0>)
EPOCH:  641  LOSS:  tensor(0.1706, grad_fn=<AddBackward0>)
EPOCH:  642  LOSS:  tensor(0.1704, grad_fn=<AddBackward0>)
EPOCH:  643  LOSS:  tensor(0.1703, grad_fn=<AddBackward0>)
EPOCH:  644  LOSS:  tensor(0.1701, grad_fn=<AddBackward0>)
EPOCH:  645  LOSS:  tensor(0.1699, grad_fn=<AddBackward0

EPOCH:  768  LOSS:  tensor(0.1427, grad_fn=<AddBackward0>)
EPOCH:  769  LOSS:  tensor(0.1425, grad_fn=<AddBackward0>)
EPOCH:  770  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  771  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  772  LOSS:  tensor(0.1420, grad_fn=<AddBackward0>)
EPOCH:  773  LOSS:  tensor(0.1418, grad_fn=<AddBackward0>)
EPOCH:  774  LOSS:  tensor(0.1416, grad_fn=<AddBackward0>)
EPOCH:  775  LOSS:  tensor(0.1415, grad_fn=<AddBackward0>)
EPOCH:  776  LOSS:  tensor(0.1413, grad_fn=<AddBackward0>)
EPOCH:  777  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  778  LOSS:  tensor(0.1410, grad_fn=<AddBackward0>)
EPOCH:  779  LOSS:  tensor(0.1408, grad_fn=<AddBackward0>)
EPOCH:  780  LOSS:  tensor(0.1407, grad_fn=<AddBackward0>)
EPOCH:  781  LOSS:  tensor(0.1405, grad_fn=<AddBackward0>)
EPOCH:  782  LOSS:  tensor(0.1403, grad_fn=<AddBackward0>)
EPOCH:  783  LOSS:  tensor(0.1402, grad_fn=<AddBackward0>)
EPOCH:  784  LOSS:  tensor(0.1400, grad_fn=<AddBackward0

EPOCH:  907  LOSS:  tensor(0.1277, grad_fn=<AddBackward0>)
EPOCH:  908  LOSS:  tensor(0.1285, grad_fn=<AddBackward0>)
EPOCH:  909  LOSS:  tensor(0.1285, grad_fn=<AddBackward0>)
EPOCH:  910  LOSS:  tensor(0.1281, grad_fn=<AddBackward0>)
EPOCH:  911  LOSS:  tensor(0.1266, grad_fn=<AddBackward0>)
EPOCH:  912  LOSS:  tensor(0.1248, grad_fn=<AddBackward0>)
EPOCH:  913  LOSS:  tensor(0.1228, grad_fn=<AddBackward0>)
EPOCH:  914  LOSS:  tensor(0.1211, grad_fn=<AddBackward0>)
EPOCH:  915  LOSS:  tensor(0.1203, grad_fn=<AddBackward0>)
EPOCH:  916  LOSS:  tensor(0.1204, grad_fn=<AddBackward0>)
EPOCH:  917  LOSS:  tensor(0.1211, grad_fn=<AddBackward0>)
EPOCH:  918  LOSS:  tensor(0.1221, grad_fn=<AddBackward0>)
EPOCH:  919  LOSS:  tensor(0.1233, grad_fn=<AddBackward0>)
EPOCH:  920  LOSS:  tensor(0.1243, grad_fn=<AddBackward0>)
EPOCH:  921  LOSS:  tensor(0.1251, grad_fn=<AddBackward0>)
EPOCH:  922  LOSS:  tensor(0.1250, grad_fn=<AddBackward0>)
EPOCH:  923  LOSS:  tensor(0.1244, grad_fn=<AddBackward0

EPOCH:  1046  LOSS:  tensor(0.1087, grad_fn=<AddBackward0>)
EPOCH:  1047  LOSS:  tensor(0.1083, grad_fn=<AddBackward0>)
EPOCH:  1048  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  1049  LOSS:  tensor(0.1080, grad_fn=<AddBackward0>)
EPOCH:  1050  LOSS:  tensor(0.1082, grad_fn=<AddBackward0>)
EPOCH:  1051  LOSS:  tensor(0.1086, grad_fn=<AddBackward0>)
EPOCH:  1052  LOSS:  tensor(0.1092, grad_fn=<AddBackward0>)
EPOCH:  1053  LOSS:  tensor(0.1098, grad_fn=<AddBackward0>)
EPOCH:  1054  LOSS:  tensor(0.1108, grad_fn=<AddBackward0>)
EPOCH:  1055  LOSS:  tensor(0.1116, grad_fn=<AddBackward0>)
EPOCH:  1056  LOSS:  tensor(0.1125, grad_fn=<AddBackward0>)
EPOCH:  1057  LOSS:  tensor(0.1132, grad_fn=<AddBackward0>)
EPOCH:  1058  LOSS:  tensor(0.1140, grad_fn=<AddBackward0>)
EPOCH:  1059  LOSS:  tensor(0.1145, grad_fn=<AddBackward0>)
EPOCH:  1060  LOSS:  tensor(0.1148, grad_fn=<AddBackward0>)
EPOCH:  1061  LOSS:  tensor(0.1145, grad_fn=<AddBackward0>)
EPOCH:  1062  LOSS:  tensor(0.1138, grad

EPOCH:  1183  LOSS:  tensor(0.1018, grad_fn=<AddBackward0>)
EPOCH:  1184  LOSS:  tensor(0.1021, grad_fn=<AddBackward0>)
EPOCH:  1185  LOSS:  tensor(0.1026, grad_fn=<AddBackward0>)
EPOCH:  1186  LOSS:  tensor(0.1032, grad_fn=<AddBackward0>)
EPOCH:  1187  LOSS:  tensor(0.1041, grad_fn=<AddBackward0>)
EPOCH:  1188  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  1189  LOSS:  tensor(0.1064, grad_fn=<AddBackward0>)
EPOCH:  1190  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  1191  LOSS:  tensor(0.1101, grad_fn=<AddBackward0>)
EPOCH:  1192  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  1193  LOSS:  tensor(0.1137, grad_fn=<AddBackward0>)
EPOCH:  1194  LOSS:  tensor(0.1148, grad_fn=<AddBackward0>)
EPOCH:  1195  LOSS:  tensor(0.1155, grad_fn=<AddBackward0>)
EPOCH:  1196  LOSS:  tensor(0.1145, grad_fn=<AddBackward0>)
EPOCH:  1197  LOSS:  tensor(0.1126, grad_fn=<AddBackward0>)
EPOCH:  1198  LOSS:  tensor(0.1094, grad_fn=<AddBackward0>)
EPOCH:  1199  LOSS:  tensor(0.1061, grad

EPOCH:  121  LOSS:  tensor(0.8787, grad_fn=<AddBackward0>)
EPOCH:  122  LOSS:  tensor(0.8678, grad_fn=<AddBackward0>)
EPOCH:  123  LOSS:  tensor(0.8569, grad_fn=<AddBackward0>)
EPOCH:  124  LOSS:  tensor(0.8459, grad_fn=<AddBackward0>)
EPOCH:  125  LOSS:  tensor(0.8352, grad_fn=<AddBackward0>)
EPOCH:  126  LOSS:  tensor(0.8249, grad_fn=<AddBackward0>)
EPOCH:  127  LOSS:  tensor(0.8143, grad_fn=<AddBackward0>)
EPOCH:  128  LOSS:  tensor(0.8039, grad_fn=<AddBackward0>)
EPOCH:  129  LOSS:  tensor(0.7939, grad_fn=<AddBackward0>)
EPOCH:  130  LOSS:  tensor(0.7844, grad_fn=<AddBackward0>)
EPOCH:  131  LOSS:  tensor(0.7749, grad_fn=<AddBackward0>)
EPOCH:  132  LOSS:  tensor(0.7656, grad_fn=<AddBackward0>)
EPOCH:  133  LOSS:  tensor(0.7565, grad_fn=<AddBackward0>)
EPOCH:  134  LOSS:  tensor(0.7475, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(0.7385, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(0.7298, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(0.7213, grad_fn=<AddBackward0

EPOCH:  260  LOSS:  tensor(0.3036, grad_fn=<AddBackward0>)
EPOCH:  261  LOSS:  tensor(0.3023, grad_fn=<AddBackward0>)
EPOCH:  262  LOSS:  tensor(0.3010, grad_fn=<AddBackward0>)
EPOCH:  263  LOSS:  tensor(0.2998, grad_fn=<AddBackward0>)
EPOCH:  264  LOSS:  tensor(0.2986, grad_fn=<AddBackward0>)
EPOCH:  265  LOSS:  tensor(0.2974, grad_fn=<AddBackward0>)
EPOCH:  266  LOSS:  tensor(0.2963, grad_fn=<AddBackward0>)
EPOCH:  267  LOSS:  tensor(0.2952, grad_fn=<AddBackward0>)
EPOCH:  268  LOSS:  tensor(0.2941, grad_fn=<AddBackward0>)
EPOCH:  269  LOSS:  tensor(0.2930, grad_fn=<AddBackward0>)
EPOCH:  270  LOSS:  tensor(0.2920, grad_fn=<AddBackward0>)
EPOCH:  271  LOSS:  tensor(0.2911, grad_fn=<AddBackward0>)
EPOCH:  272  LOSS:  tensor(0.2901, grad_fn=<AddBackward0>)
EPOCH:  273  LOSS:  tensor(0.2892, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.2883, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.2874, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.2865, grad_fn=<AddBackward0

EPOCH:  399  LOSS:  tensor(0.2147, grad_fn=<AddBackward0>)
EPOCH:  400  LOSS:  tensor(0.2143, grad_fn=<AddBackward0>)
EPOCH:  401  LOSS:  tensor(0.2140, grad_fn=<AddBackward0>)
EPOCH:  402  LOSS:  tensor(0.2136, grad_fn=<AddBackward0>)
EPOCH:  403  LOSS:  tensor(0.2133, grad_fn=<AddBackward0>)
EPOCH:  404  LOSS:  tensor(0.2130, grad_fn=<AddBackward0>)
EPOCH:  405  LOSS:  tensor(0.2126, grad_fn=<AddBackward0>)
EPOCH:  406  LOSS:  tensor(0.2123, grad_fn=<AddBackward0>)
EPOCH:  407  LOSS:  tensor(0.2120, grad_fn=<AddBackward0>)
EPOCH:  408  LOSS:  tensor(0.2116, grad_fn=<AddBackward0>)
EPOCH:  409  LOSS:  tensor(0.2113, grad_fn=<AddBackward0>)
EPOCH:  410  LOSS:  tensor(0.2110, grad_fn=<AddBackward0>)
EPOCH:  411  LOSS:  tensor(0.2107, grad_fn=<AddBackward0>)
EPOCH:  412  LOSS:  tensor(0.2104, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.2101, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.2099, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.2096, grad_fn=<AddBackward0

EPOCH:  538  LOSS:  tensor(0.1721, grad_fn=<AddBackward0>)
EPOCH:  539  LOSS:  tensor(0.1719, grad_fn=<AddBackward0>)
EPOCH:  540  LOSS:  tensor(0.1718, grad_fn=<AddBackward0>)
EPOCH:  541  LOSS:  tensor(0.1715, grad_fn=<AddBackward0>)
EPOCH:  542  LOSS:  tensor(0.1714, grad_fn=<AddBackward0>)
EPOCH:  543  LOSS:  tensor(0.1712, grad_fn=<AddBackward0>)
EPOCH:  544  LOSS:  tensor(0.1710, grad_fn=<AddBackward0>)
EPOCH:  545  LOSS:  tensor(0.1708, grad_fn=<AddBackward0>)
EPOCH:  546  LOSS:  tensor(0.1706, grad_fn=<AddBackward0>)
EPOCH:  547  LOSS:  tensor(0.1704, grad_fn=<AddBackward0>)
EPOCH:  548  LOSS:  tensor(0.1702, grad_fn=<AddBackward0>)
EPOCH:  549  LOSS:  tensor(0.1700, grad_fn=<AddBackward0>)
EPOCH:  550  LOSS:  tensor(0.1698, grad_fn=<AddBackward0>)
EPOCH:  551  LOSS:  tensor(0.1696, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.1694, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.1693, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.1691, grad_fn=<AddBackward0

EPOCH:  677  LOSS:  tensor(0.1455, grad_fn=<AddBackward0>)
EPOCH:  678  LOSS:  tensor(0.1452, grad_fn=<AddBackward0>)
EPOCH:  679  LOSS:  tensor(0.1449, grad_fn=<AddBackward0>)
EPOCH:  680  LOSS:  tensor(0.1446, grad_fn=<AddBackward0>)
EPOCH:  681  LOSS:  tensor(0.1443, grad_fn=<AddBackward0>)
EPOCH:  682  LOSS:  tensor(0.1440, grad_fn=<AddBackward0>)
EPOCH:  683  LOSS:  tensor(0.1437, grad_fn=<AddBackward0>)
EPOCH:  684  LOSS:  tensor(0.1433, grad_fn=<AddBackward0>)
EPOCH:  685  LOSS:  tensor(0.1431, grad_fn=<AddBackward0>)
EPOCH:  686  LOSS:  tensor(0.1428, grad_fn=<AddBackward0>)
EPOCH:  687  LOSS:  tensor(0.1425, grad_fn=<AddBackward0>)
EPOCH:  688  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  689  LOSS:  tensor(0.1419, grad_fn=<AddBackward0>)
EPOCH:  690  LOSS:  tensor(0.1416, grad_fn=<AddBackward0>)
EPOCH:  691  LOSS:  tensor(0.1413, grad_fn=<AddBackward0>)
EPOCH:  692  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  693  LOSS:  tensor(0.1408, grad_fn=<AddBackward0

EPOCH:  816  LOSS:  tensor(0.1166, grad_fn=<AddBackward0>)
EPOCH:  817  LOSS:  tensor(0.1165, grad_fn=<AddBackward0>)
EPOCH:  818  LOSS:  tensor(0.1163, grad_fn=<AddBackward0>)
EPOCH:  819  LOSS:  tensor(0.1162, grad_fn=<AddBackward0>)
EPOCH:  820  LOSS:  tensor(0.1161, grad_fn=<AddBackward0>)
EPOCH:  821  LOSS:  tensor(0.1160, grad_fn=<AddBackward0>)
EPOCH:  822  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  823  LOSS:  tensor(0.1158, grad_fn=<AddBackward0>)
EPOCH:  824  LOSS:  tensor(0.1157, grad_fn=<AddBackward0>)
EPOCH:  825  LOSS:  tensor(0.1155, grad_fn=<AddBackward0>)
EPOCH:  826  LOSS:  tensor(0.1154, grad_fn=<AddBackward0>)
EPOCH:  827  LOSS:  tensor(0.1153, grad_fn=<AddBackward0>)
EPOCH:  828  LOSS:  tensor(0.1152, grad_fn=<AddBackward0>)
EPOCH:  829  LOSS:  tensor(0.1151, grad_fn=<AddBackward0>)
EPOCH:  830  LOSS:  tensor(0.1150, grad_fn=<AddBackward0>)
EPOCH:  831  LOSS:  tensor(0.1149, grad_fn=<AddBackward0>)
EPOCH:  832  LOSS:  tensor(0.1148, grad_fn=<AddBackward0

EPOCH:  955  LOSS:  tensor(0.1028, grad_fn=<AddBackward0>)
EPOCH:  956  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  957  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  958  LOSS:  tensor(0.1026, grad_fn=<AddBackward0>)
EPOCH:  959  LOSS:  tensor(0.1025, grad_fn=<AddBackward0>)
EPOCH:  960  LOSS:  tensor(0.1025, grad_fn=<AddBackward0>)
EPOCH:  961  LOSS:  tensor(0.1024, grad_fn=<AddBackward0>)
EPOCH:  962  LOSS:  tensor(0.1024, grad_fn=<AddBackward0>)
EPOCH:  963  LOSS:  tensor(0.1023, grad_fn=<AddBackward0>)
EPOCH:  964  LOSS:  tensor(0.1022, grad_fn=<AddBackward0>)
EPOCH:  965  LOSS:  tensor(0.1022, grad_fn=<AddBackward0>)
EPOCH:  966  LOSS:  tensor(0.1021, grad_fn=<AddBackward0>)
EPOCH:  967  LOSS:  tensor(0.1020, grad_fn=<AddBackward0>)
EPOCH:  968  LOSS:  tensor(0.1020, grad_fn=<AddBackward0>)
EPOCH:  969  LOSS:  tensor(0.1019, grad_fn=<AddBackward0>)
EPOCH:  970  LOSS:  tensor(0.1019, grad_fn=<AddBackward0>)
EPOCH:  971  LOSS:  tensor(0.1018, grad_fn=<AddBackward0

EPOCH:  1093  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  1094  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  1095  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  1096  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  1097  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  1098  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  1099  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  1100  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  1101  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  1102  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  1103  LOSS:  tensor(0.0954, grad_fn=<AddBackward0>)
EPOCH:  1104  LOSS:  tensor(0.0954, grad_fn=<AddBackward0>)
EPOCH:  1105  LOSS:  tensor(0.0954, grad_fn=<AddBackward0>)
EPOCH:  1106  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  1107  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  1108  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  1109  LOSS:  tensor(0.0952, grad

EPOCH:  29  LOSS:  tensor(43.1146, grad_fn=<AddBackward0>)
EPOCH:  30  LOSS:  tensor(40.9202, grad_fn=<AddBackward0>)
EPOCH:  31  LOSS:  tensor(38.7883, grad_fn=<AddBackward0>)
EPOCH:  32  LOSS:  tensor(36.8242, grad_fn=<AddBackward0>)
EPOCH:  33  LOSS:  tensor(34.9868, grad_fn=<AddBackward0>)
EPOCH:  34  LOSS:  tensor(33.2488, grad_fn=<AddBackward0>)
EPOCH:  35  LOSS:  tensor(31.4984, grad_fn=<AddBackward0>)
EPOCH:  36  LOSS:  tensor(29.7478, grad_fn=<AddBackward0>)
EPOCH:  37  LOSS:  tensor(28.0008, grad_fn=<AddBackward0>)
EPOCH:  38  LOSS:  tensor(26.2226, grad_fn=<AddBackward0>)
EPOCH:  39  LOSS:  tensor(24.4803, grad_fn=<AddBackward0>)
EPOCH:  40  LOSS:  tensor(22.7547, grad_fn=<AddBackward0>)
EPOCH:  41  LOSS:  tensor(21.1126, grad_fn=<AddBackward0>)
EPOCH:  42  LOSS:  tensor(19.5591, grad_fn=<AddBackward0>)
EPOCH:  43  LOSS:  tensor(18.1619, grad_fn=<AddBackward0>)
EPOCH:  44  LOSS:  tensor(16.9096, grad_fn=<AddBackward0>)
EPOCH:  45  LOSS:  tensor(15.8002, grad_fn=<AddBackward0

EPOCH:  169  LOSS:  tensor(0.4461, grad_fn=<AddBackward0>)
EPOCH:  170  LOSS:  tensor(0.4422, grad_fn=<AddBackward0>)
EPOCH:  171  LOSS:  tensor(0.4381, grad_fn=<AddBackward0>)
EPOCH:  172  LOSS:  tensor(0.4344, grad_fn=<AddBackward0>)
EPOCH:  173  LOSS:  tensor(0.4307, grad_fn=<AddBackward0>)
EPOCH:  174  LOSS:  tensor(0.4271, grad_fn=<AddBackward0>)
EPOCH:  175  LOSS:  tensor(0.4233, grad_fn=<AddBackward0>)
EPOCH:  176  LOSS:  tensor(0.4196, grad_fn=<AddBackward0>)
EPOCH:  177  LOSS:  tensor(0.4161, grad_fn=<AddBackward0>)
EPOCH:  178  LOSS:  tensor(0.4126, grad_fn=<AddBackward0>)
EPOCH:  179  LOSS:  tensor(0.4090, grad_fn=<AddBackward0>)
EPOCH:  180  LOSS:  tensor(0.4057, grad_fn=<AddBackward0>)
EPOCH:  181  LOSS:  tensor(0.4023, grad_fn=<AddBackward0>)
EPOCH:  182  LOSS:  tensor(0.3991, grad_fn=<AddBackward0>)
EPOCH:  183  LOSS:  tensor(0.3957, grad_fn=<AddBackward0>)
EPOCH:  184  LOSS:  tensor(0.3926, grad_fn=<AddBackward0>)
EPOCH:  185  LOSS:  tensor(0.3896, grad_fn=<AddBackward0

EPOCH:  308  LOSS:  tensor(0.2035, grad_fn=<AddBackward0>)
EPOCH:  309  LOSS:  tensor(0.2028, grad_fn=<AddBackward0>)
EPOCH:  310  LOSS:  tensor(0.2021, grad_fn=<AddBackward0>)
EPOCH:  311  LOSS:  tensor(0.2014, grad_fn=<AddBackward0>)
EPOCH:  312  LOSS:  tensor(0.2008, grad_fn=<AddBackward0>)
EPOCH:  313  LOSS:  tensor(0.2001, grad_fn=<AddBackward0>)
EPOCH:  314  LOSS:  tensor(0.1994, grad_fn=<AddBackward0>)
EPOCH:  315  LOSS:  tensor(0.1987, grad_fn=<AddBackward0>)
EPOCH:  316  LOSS:  tensor(0.1981, grad_fn=<AddBackward0>)
EPOCH:  317  LOSS:  tensor(0.1974, grad_fn=<AddBackward0>)
EPOCH:  318  LOSS:  tensor(0.1967, grad_fn=<AddBackward0>)
EPOCH:  319  LOSS:  tensor(0.1961, grad_fn=<AddBackward0>)
EPOCH:  320  LOSS:  tensor(0.1954, grad_fn=<AddBackward0>)
EPOCH:  321  LOSS:  tensor(0.1948, grad_fn=<AddBackward0>)
EPOCH:  322  LOSS:  tensor(0.1942, grad_fn=<AddBackward0>)
EPOCH:  323  LOSS:  tensor(0.1936, grad_fn=<AddBackward0>)
EPOCH:  324  LOSS:  tensor(0.1929, grad_fn=<AddBackward0

EPOCH:  447  LOSS:  tensor(0.1454, grad_fn=<AddBackward0>)
EPOCH:  448  LOSS:  tensor(0.1450, grad_fn=<AddBackward0>)
EPOCH:  449  LOSS:  tensor(0.1445, grad_fn=<AddBackward0>)
EPOCH:  450  LOSS:  tensor(0.1441, grad_fn=<AddBackward0>)
EPOCH:  451  LOSS:  tensor(0.1436, grad_fn=<AddBackward0>)
EPOCH:  452  LOSS:  tensor(0.1432, grad_fn=<AddBackward0>)
EPOCH:  453  LOSS:  tensor(0.1428, grad_fn=<AddBackward0>)
EPOCH:  454  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  455  LOSS:  tensor(0.1420, grad_fn=<AddBackward0>)
EPOCH:  456  LOSS:  tensor(0.1417, grad_fn=<AddBackward0>)
EPOCH:  457  LOSS:  tensor(0.1413, grad_fn=<AddBackward0>)
EPOCH:  458  LOSS:  tensor(0.1409, grad_fn=<AddBackward0>)
EPOCH:  459  LOSS:  tensor(0.1406, grad_fn=<AddBackward0>)
EPOCH:  460  LOSS:  tensor(0.1402, grad_fn=<AddBackward0>)
EPOCH:  461  LOSS:  tensor(0.1399, grad_fn=<AddBackward0>)
EPOCH:  462  LOSS:  tensor(0.1395, grad_fn=<AddBackward0>)
EPOCH:  463  LOSS:  tensor(0.1391, grad_fn=<AddBackward0

EPOCH:  586  LOSS:  tensor(0.1115, grad_fn=<AddBackward0>)
EPOCH:  587  LOSS:  tensor(0.1114, grad_fn=<AddBackward0>)
EPOCH:  588  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  589  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  590  LOSS:  tensor(0.1110, grad_fn=<AddBackward0>)
EPOCH:  591  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  592  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  593  LOSS:  tensor(0.1108, grad_fn=<AddBackward0>)
EPOCH:  594  LOSS:  tensor(0.1108, grad_fn=<AddBackward0>)
EPOCH:  595  LOSS:  tensor(0.1108, grad_fn=<AddBackward0>)
EPOCH:  596  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  597  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  598  LOSS:  tensor(0.1110, grad_fn=<AddBackward0>)
EPOCH:  599  LOSS:  tensor(0.1111, grad_fn=<AddBackward0>)
EPOCH:  600  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  601  LOSS:  tensor(0.1114, grad_fn=<AddBackward0>)
EPOCH:  602  LOSS:  tensor(0.1117, grad_fn=<AddBackward0

EPOCH:  725  LOSS:  tensor(0.0967, grad_fn=<AddBackward0>)
EPOCH:  726  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  727  LOSS:  tensor(0.0974, grad_fn=<AddBackward0>)
EPOCH:  728  LOSS:  tensor(0.0979, grad_fn=<AddBackward0>)
EPOCH:  729  LOSS:  tensor(0.0986, grad_fn=<AddBackward0>)
EPOCH:  730  LOSS:  tensor(0.0997, grad_fn=<AddBackward0>)
EPOCH:  731  LOSS:  tensor(0.1012, grad_fn=<AddBackward0>)
EPOCH:  732  LOSS:  tensor(0.1031, grad_fn=<AddBackward0>)
EPOCH:  733  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  734  LOSS:  tensor(0.1086, grad_fn=<AddBackward0>)
EPOCH:  735  LOSS:  tensor(0.1120, grad_fn=<AddBackward0>)
EPOCH:  736  LOSS:  tensor(0.1156, grad_fn=<AddBackward0>)
EPOCH:  737  LOSS:  tensor(0.1191, grad_fn=<AddBackward0>)
EPOCH:  738  LOSS:  tensor(0.1214, grad_fn=<AddBackward0>)
EPOCH:  739  LOSS:  tensor(0.1215, grad_fn=<AddBackward0>)
EPOCH:  740  LOSS:  tensor(0.1187, grad_fn=<AddBackward0>)
EPOCH:  741  LOSS:  tensor(0.1130, grad_fn=<AddBackward0

EPOCH:  864  LOSS:  tensor(0.0853, grad_fn=<AddBackward0>)
EPOCH:  865  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  866  LOSS:  tensor(0.0857, grad_fn=<AddBackward0>)
EPOCH:  867  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  868  LOSS:  tensor(0.0865, grad_fn=<AddBackward0>)
EPOCH:  869  LOSS:  tensor(0.0873, grad_fn=<AddBackward0>)
EPOCH:  870  LOSS:  tensor(0.0883, grad_fn=<AddBackward0>)
EPOCH:  871  LOSS:  tensor(0.0896, grad_fn=<AddBackward0>)
EPOCH:  872  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  873  LOSS:  tensor(0.0939, grad_fn=<AddBackward0>)
EPOCH:  874  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  875  LOSS:  tensor(0.1008, grad_fn=<AddBackward0>)
EPOCH:  876  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  877  LOSS:  tensor(0.1098, grad_fn=<AddBackward0>)
EPOCH:  878  LOSS:  tensor(0.1139, grad_fn=<AddBackward0>)
EPOCH:  879  LOSS:  tensor(0.1163, grad_fn=<AddBackward0>)
EPOCH:  880  LOSS:  tensor(0.1158, grad_fn=<AddBackward0

EPOCH:  1003  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  1004  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  1005  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  1006  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  1007  LOSS:  tensor(0.0739, grad_fn=<AddBackward0>)
EPOCH:  1008  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  1009  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  1010  LOSS:  tensor(0.0735, grad_fn=<AddBackward0>)
EPOCH:  1011  LOSS:  tensor(0.0734, grad_fn=<AddBackward0>)
EPOCH:  1012  LOSS:  tensor(0.0732, grad_fn=<AddBackward0>)
EPOCH:  1013  LOSS:  tensor(0.0732, grad_fn=<AddBackward0>)
EPOCH:  1014  LOSS:  tensor(0.0731, grad_fn=<AddBackward0>)
EPOCH:  1015  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  1016  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  1017  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  1018  LOSS:  tensor(0.0729, grad_fn=<AddBackward0>)
EPOCH:  1019  LOSS:  tensor(0.0729, grad

EPOCH:  1140  LOSS:  tensor(0.0851, grad_fn=<AddBackward0>)
EPOCH:  1141  LOSS:  tensor(0.0843, grad_fn=<AddBackward0>)
EPOCH:  1142  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  1143  LOSS:  tensor(0.0763, grad_fn=<AddBackward0>)
EPOCH:  1144  LOSS:  tensor(0.0719, grad_fn=<AddBackward0>)
EPOCH:  1145  LOSS:  tensor(0.0690, grad_fn=<AddBackward0>)
EPOCH:  1146  LOSS:  tensor(0.0681, grad_fn=<AddBackward0>)
EPOCH:  1147  LOSS:  tensor(0.0691, grad_fn=<AddBackward0>)
EPOCH:  1148  LOSS:  tensor(0.0710, grad_fn=<AddBackward0>)
EPOCH:  1149  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  1150  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  1151  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  1152  LOSS:  tensor(0.0741, grad_fn=<AddBackward0>)
EPOCH:  1153  LOSS:  tensor(0.0727, grad_fn=<AddBackward0>)
EPOCH:  1154  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  1155  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  1156  LOSS:  tensor(0.0683, grad

EPOCH:  77  LOSS:  tensor(3.1786, grad_fn=<AddBackward0>)
EPOCH:  78  LOSS:  tensor(3.0233, grad_fn=<AddBackward0>)
EPOCH:  79  LOSS:  tensor(2.8764, grad_fn=<AddBackward0>)
EPOCH:  80  LOSS:  tensor(2.7364, grad_fn=<AddBackward0>)
EPOCH:  81  LOSS:  tensor(2.6005, grad_fn=<AddBackward0>)
EPOCH:  82  LOSS:  tensor(2.4723, grad_fn=<AddBackward0>)
EPOCH:  83  LOSS:  tensor(2.3517, grad_fn=<AddBackward0>)
EPOCH:  84  LOSS:  tensor(2.2368, grad_fn=<AddBackward0>)
EPOCH:  85  LOSS:  tensor(2.1275, grad_fn=<AddBackward0>)
EPOCH:  86  LOSS:  tensor(2.0199, grad_fn=<AddBackward0>)
EPOCH:  87  LOSS:  tensor(1.9173, grad_fn=<AddBackward0>)
EPOCH:  88  LOSS:  tensor(1.8223, grad_fn=<AddBackward0>)
EPOCH:  89  LOSS:  tensor(1.7348, grad_fn=<AddBackward0>)
EPOCH:  90  LOSS:  tensor(1.6528, grad_fn=<AddBackward0>)
EPOCH:  91  LOSS:  tensor(1.5742, grad_fn=<AddBackward0>)
EPOCH:  92  LOSS:  tensor(1.5058, grad_fn=<AddBackward0>)
EPOCH:  93  LOSS:  tensor(1.4432, grad_fn=<AddBackward0>)
EPOCH:  94  LO

EPOCH:  217  LOSS:  tensor(0.2196, grad_fn=<AddBackward0>)
EPOCH:  218  LOSS:  tensor(0.2182, grad_fn=<AddBackward0>)
EPOCH:  219  LOSS:  tensor(0.2169, grad_fn=<AddBackward0>)
EPOCH:  220  LOSS:  tensor(0.2155, grad_fn=<AddBackward0>)
EPOCH:  221  LOSS:  tensor(0.2142, grad_fn=<AddBackward0>)
EPOCH:  222  LOSS:  tensor(0.2130, grad_fn=<AddBackward0>)
EPOCH:  223  LOSS:  tensor(0.2118, grad_fn=<AddBackward0>)
EPOCH:  224  LOSS:  tensor(0.2106, grad_fn=<AddBackward0>)
EPOCH:  225  LOSS:  tensor(0.2094, grad_fn=<AddBackward0>)
EPOCH:  226  LOSS:  tensor(0.2082, grad_fn=<AddBackward0>)
EPOCH:  227  LOSS:  tensor(0.2071, grad_fn=<AddBackward0>)
EPOCH:  228  LOSS:  tensor(0.2059, grad_fn=<AddBackward0>)
EPOCH:  229  LOSS:  tensor(0.2049, grad_fn=<AddBackward0>)
EPOCH:  230  LOSS:  tensor(0.2038, grad_fn=<AddBackward0>)
EPOCH:  231  LOSS:  tensor(0.2028, grad_fn=<AddBackward0>)
EPOCH:  232  LOSS:  tensor(0.2017, grad_fn=<AddBackward0>)
EPOCH:  233  LOSS:  tensor(0.2007, grad_fn=<AddBackward0

EPOCH:  356  LOSS:  tensor(0.1336, grad_fn=<AddBackward0>)
EPOCH:  357  LOSS:  tensor(0.1333, grad_fn=<AddBackward0>)
EPOCH:  358  LOSS:  tensor(0.1330, grad_fn=<AddBackward0>)
EPOCH:  359  LOSS:  tensor(0.1327, grad_fn=<AddBackward0>)
EPOCH:  360  LOSS:  tensor(0.1324, grad_fn=<AddBackward0>)
EPOCH:  361  LOSS:  tensor(0.1321, grad_fn=<AddBackward0>)
EPOCH:  362  LOSS:  tensor(0.1318, grad_fn=<AddBackward0>)
EPOCH:  363  LOSS:  tensor(0.1316, grad_fn=<AddBackward0>)
EPOCH:  364  LOSS:  tensor(0.1313, grad_fn=<AddBackward0>)
EPOCH:  365  LOSS:  tensor(0.1310, grad_fn=<AddBackward0>)
EPOCH:  366  LOSS:  tensor(0.1308, grad_fn=<AddBackward0>)
EPOCH:  367  LOSS:  tensor(0.1305, grad_fn=<AddBackward0>)
EPOCH:  368  LOSS:  tensor(0.1303, grad_fn=<AddBackward0>)
EPOCH:  369  LOSS:  tensor(0.1300, grad_fn=<AddBackward0>)
EPOCH:  370  LOSS:  tensor(0.1297, grad_fn=<AddBackward0>)
EPOCH:  371  LOSS:  tensor(0.1294, grad_fn=<AddBackward0>)
EPOCH:  372  LOSS:  tensor(0.1292, grad_fn=<AddBackward0

EPOCH:  495  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  496  LOSS:  tensor(0.0953, grad_fn=<AddBackward0>)
EPOCH:  497  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  498  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  499  LOSS:  tensor(0.0946, grad_fn=<AddBackward0>)
EPOCH:  500  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  501  LOSS:  tensor(0.0942, grad_fn=<AddBackward0>)
EPOCH:  502  LOSS:  tensor(0.0940, grad_fn=<AddBackward0>)
EPOCH:  503  LOSS:  tensor(0.0938, grad_fn=<AddBackward0>)
EPOCH:  504  LOSS:  tensor(0.0936, grad_fn=<AddBackward0>)
EPOCH:  505  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  506  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  507  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  508  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  509  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  510  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  511  LOSS:  tensor(0.0924, grad_fn=<AddBackward0

EPOCH:  634  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  635  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  636  LOSS:  tensor(0.0605, grad_fn=<AddBackward0>)
EPOCH:  637  LOSS:  tensor(0.0604, grad_fn=<AddBackward0>)
EPOCH:  638  LOSS:  tensor(0.0602, grad_fn=<AddBackward0>)
EPOCH:  639  LOSS:  tensor(0.0601, grad_fn=<AddBackward0>)
EPOCH:  640  LOSS:  tensor(0.0600, grad_fn=<AddBackward0>)
EPOCH:  641  LOSS:  tensor(0.0599, grad_fn=<AddBackward0>)
EPOCH:  642  LOSS:  tensor(0.0598, grad_fn=<AddBackward0>)
EPOCH:  643  LOSS:  tensor(0.0597, grad_fn=<AddBackward0>)
EPOCH:  644  LOSS:  tensor(0.0596, grad_fn=<AddBackward0>)
EPOCH:  645  LOSS:  tensor(0.0594, grad_fn=<AddBackward0>)
EPOCH:  646  LOSS:  tensor(0.0593, grad_fn=<AddBackward0>)
EPOCH:  647  LOSS:  tensor(0.0592, grad_fn=<AddBackward0>)
EPOCH:  648  LOSS:  tensor(0.0591, grad_fn=<AddBackward0>)
EPOCH:  649  LOSS:  tensor(0.0590, grad_fn=<AddBackward0>)
EPOCH:  650  LOSS:  tensor(0.0589, grad_fn=<AddBackward0

EPOCH:  773  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  774  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  775  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  776  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  777  LOSS:  tensor(0.0510, grad_fn=<AddBackward0>)
EPOCH:  778  LOSS:  tensor(0.0510, grad_fn=<AddBackward0>)
EPOCH:  779  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  780  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  781  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  782  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  783  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  784  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  785  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  786  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  787  LOSS:  tensor(0.0506, grad_fn=<AddBackward0>)
EPOCH:  788  LOSS:  tensor(0.0506, grad_fn=<AddBackward0>)
EPOCH:  789  LOSS:  tensor(0.0506, grad_fn=<AddBackward0

EPOCH:  912  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  913  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  914  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  915  LOSS:  tensor(0.0474, grad_fn=<AddBackward0>)
EPOCH:  916  LOSS:  tensor(0.0465, grad_fn=<AddBackward0>)
EPOCH:  917  LOSS:  tensor(0.0458, grad_fn=<AddBackward0>)
EPOCH:  918  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  919  LOSS:  tensor(0.0451, grad_fn=<AddBackward0>)
EPOCH:  920  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  921  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  922  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  923  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  924  LOSS:  tensor(0.0455, grad_fn=<AddBackward0>)
EPOCH:  925  LOSS:  tensor(0.0458, grad_fn=<AddBackward0>)
EPOCH:  926  LOSS:  tensor(0.0460, grad_fn=<AddBackward0>)
EPOCH:  927  LOSS:  tensor(0.0462, grad_fn=<AddBackward0>)
EPOCH:  928  LOSS:  tensor(0.0463, grad_fn=<AddBackward0

EPOCH:  1051  LOSS:  tensor(0.0410, grad_fn=<AddBackward0>)
EPOCH:  1052  LOSS:  tensor(0.0412, grad_fn=<AddBackward0>)
EPOCH:  1053  LOSS:  tensor(0.0414, grad_fn=<AddBackward0>)
EPOCH:  1054  LOSS:  tensor(0.0419, grad_fn=<AddBackward0>)
EPOCH:  1055  LOSS:  tensor(0.0426, grad_fn=<AddBackward0>)
EPOCH:  1056  LOSS:  tensor(0.0439, grad_fn=<AddBackward0>)
EPOCH:  1057  LOSS:  tensor(0.0458, grad_fn=<AddBackward0>)
EPOCH:  1058  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  1059  LOSS:  tensor(0.0527, grad_fn=<AddBackward0>)
EPOCH:  1060  LOSS:  tensor(0.0582, grad_fn=<AddBackward0>)
EPOCH:  1061  LOSS:  tensor(0.0646, grad_fn=<AddBackward0>)
EPOCH:  1062  LOSS:  tensor(0.0723, grad_fn=<AddBackward0>)
EPOCH:  1063  LOSS:  tensor(0.0766, grad_fn=<AddBackward0>)
EPOCH:  1064  LOSS:  tensor(0.0741, grad_fn=<AddBackward0>)
EPOCH:  1065  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  1066  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0419, grad

EPOCH:  1188  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  1189  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1190  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1191  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1192  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1193  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1194  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1195  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1196  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1197  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  1198  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  1199  LOSS:  tensor(0.0378, grad_fn=<AddBackward0>)
BATCH_SIZE:  64 time execution for a neural net:  69.50806307792664
Trainning model:  29
.... 278
EPOCH:  0  LOSS:  tensor(347.9861, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(295.6248, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(257.4485, grad_fn=<AddBackward0>)
EPOCH

EPOCH:  126  LOSS:  tensor(1.0794, grad_fn=<AddBackward0>)
EPOCH:  127  LOSS:  tensor(1.0590, grad_fn=<AddBackward0>)
EPOCH:  128  LOSS:  tensor(1.0393, grad_fn=<AddBackward0>)
EPOCH:  129  LOSS:  tensor(1.0210, grad_fn=<AddBackward0>)
EPOCH:  130  LOSS:  tensor(1.0034, grad_fn=<AddBackward0>)
EPOCH:  131  LOSS:  tensor(0.9864, grad_fn=<AddBackward0>)
EPOCH:  132  LOSS:  tensor(0.9700, grad_fn=<AddBackward0>)
EPOCH:  133  LOSS:  tensor(0.9543, grad_fn=<AddBackward0>)
EPOCH:  134  LOSS:  tensor(0.9392, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(0.9243, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(0.9096, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(0.8953, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(0.8811, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(0.8672, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(0.8536, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(0.8395, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(0.8257, grad_fn=<AddBackward0

EPOCH:  265  LOSS:  tensor(0.2879, grad_fn=<AddBackward0>)
EPOCH:  266  LOSS:  tensor(0.2867, grad_fn=<AddBackward0>)
EPOCH:  267  LOSS:  tensor(0.2855, grad_fn=<AddBackward0>)
EPOCH:  268  LOSS:  tensor(0.2843, grad_fn=<AddBackward0>)
EPOCH:  269  LOSS:  tensor(0.2831, grad_fn=<AddBackward0>)
EPOCH:  270  LOSS:  tensor(0.2820, grad_fn=<AddBackward0>)
EPOCH:  271  LOSS:  tensor(0.2808, grad_fn=<AddBackward0>)
EPOCH:  272  LOSS:  tensor(0.2796, grad_fn=<AddBackward0>)
EPOCH:  273  LOSS:  tensor(0.2784, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.2772, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.2761, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.2749, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.2738, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.2726, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.2714, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.2703, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.2693, grad_fn=<AddBackward0

EPOCH:  404  LOSS:  tensor(0.1923, grad_fn=<AddBackward0>)
EPOCH:  405  LOSS:  tensor(0.1918, grad_fn=<AddBackward0>)
EPOCH:  406  LOSS:  tensor(0.1913, grad_fn=<AddBackward0>)
EPOCH:  407  LOSS:  tensor(0.1908, grad_fn=<AddBackward0>)
EPOCH:  408  LOSS:  tensor(0.1905, grad_fn=<AddBackward0>)
EPOCH:  409  LOSS:  tensor(0.1899, grad_fn=<AddBackward0>)
EPOCH:  410  LOSS:  tensor(0.1894, grad_fn=<AddBackward0>)
EPOCH:  411  LOSS:  tensor(0.1889, grad_fn=<AddBackward0>)
EPOCH:  412  LOSS:  tensor(0.1885, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.1880, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.1875, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.1872, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.1867, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.1863, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.1859, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1856, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1852, grad_fn=<AddBackward0

EPOCH:  543  LOSS:  tensor(0.1353, grad_fn=<AddBackward0>)
EPOCH:  544  LOSS:  tensor(0.1346, grad_fn=<AddBackward0>)
EPOCH:  545  LOSS:  tensor(0.1340, grad_fn=<AddBackward0>)
EPOCH:  546  LOSS:  tensor(0.1334, grad_fn=<AddBackward0>)
EPOCH:  547  LOSS:  tensor(0.1327, grad_fn=<AddBackward0>)
EPOCH:  548  LOSS:  tensor(0.1321, grad_fn=<AddBackward0>)
EPOCH:  549  LOSS:  tensor(0.1315, grad_fn=<AddBackward0>)
EPOCH:  550  LOSS:  tensor(0.1309, grad_fn=<AddBackward0>)
EPOCH:  551  LOSS:  tensor(0.1303, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.1297, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.1292, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.1287, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.1282, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.1276, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1264, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1259, grad_fn=<AddBackward0

EPOCH:  682  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  683  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  684  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  685  LOSS:  tensor(0.0928, grad_fn=<AddBackward0>)
EPOCH:  686  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  687  LOSS:  tensor(0.0923, grad_fn=<AddBackward0>)
EPOCH:  688  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  689  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  690  LOSS:  tensor(0.0917, grad_fn=<AddBackward0>)
EPOCH:  691  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  692  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  693  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  694  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  695  LOSS:  tensor(0.0906, grad_fn=<AddBackward0>)
EPOCH:  696  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0900, grad_fn=<AddBackward0

EPOCH:  821  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  822  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  823  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  824  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  825  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  826  LOSS:  tensor(0.0675, grad_fn=<AddBackward0>)
EPOCH:  827  LOSS:  tensor(0.0673, grad_fn=<AddBackward0>)
EPOCH:  828  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  829  LOSS:  tensor(0.0670, grad_fn=<AddBackward0>)
EPOCH:  830  LOSS:  tensor(0.0669, grad_fn=<AddBackward0>)
EPOCH:  831  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  832  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  833  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  834  LOSS:  tensor(0.0662, grad_fn=<AddBackward0>)
EPOCH:  835  LOSS:  tensor(0.0661, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0658, grad_fn=<AddBackward0

EPOCH:  960  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  961  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  962  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  963  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  964  LOSS:  tensor(0.0486, grad_fn=<AddBackward0>)
EPOCH:  965  LOSS:  tensor(0.0485, grad_fn=<AddBackward0>)
EPOCH:  966  LOSS:  tensor(0.0483, grad_fn=<AddBackward0>)
EPOCH:  967  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  968  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  969  LOSS:  tensor(0.0480, grad_fn=<AddBackward0>)
EPOCH:  970  LOSS:  tensor(0.0478, grad_fn=<AddBackward0>)
EPOCH:  971  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  972  LOSS:  tensor(0.0476, grad_fn=<AddBackward0>)
EPOCH:  973  LOSS:  tensor(0.0474, grad_fn=<AddBackward0>)
EPOCH:  974  LOSS:  tensor(0.0473, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0472, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0471, grad_fn=<AddBackward0

EPOCH:  1098  LOSS:  tensor(0.0378, grad_fn=<AddBackward0>)
EPOCH:  1099  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  1100  LOSS:  tensor(0.0377, grad_fn=<AddBackward0>)
EPOCH:  1101  LOSS:  tensor(0.0376, grad_fn=<AddBackward0>)
EPOCH:  1102  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1103  LOSS:  tensor(0.0375, grad_fn=<AddBackward0>)
EPOCH:  1104  LOSS:  tensor(0.0374, grad_fn=<AddBackward0>)
EPOCH:  1105  LOSS:  tensor(0.0374, grad_fn=<AddBackward0>)
EPOCH:  1106  LOSS:  tensor(0.0373, grad_fn=<AddBackward0>)
EPOCH:  1107  LOSS:  tensor(0.0372, grad_fn=<AddBackward0>)
EPOCH:  1108  LOSS:  tensor(0.0372, grad_fn=<AddBackward0>)
EPOCH:  1109  LOSS:  tensor(0.0372, grad_fn=<AddBackward0>)
EPOCH:  1110  LOSS:  tensor(0.0371, grad_fn=<AddBackward0>)
EPOCH:  1111  LOSS:  tensor(0.0370, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.0370, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0369, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0368, grad

EPOCH:  34  LOSS:  tensor(71.9940, grad_fn=<AddBackward0>)
EPOCH:  35  LOSS:  tensor(69.0839, grad_fn=<AddBackward0>)
EPOCH:  36  LOSS:  tensor(66.2730, grad_fn=<AddBackward0>)
EPOCH:  37  LOSS:  tensor(63.5644, grad_fn=<AddBackward0>)
EPOCH:  38  LOSS:  tensor(60.8699, grad_fn=<AddBackward0>)
EPOCH:  39  LOSS:  tensor(58.1800, grad_fn=<AddBackward0>)
EPOCH:  40  LOSS:  tensor(55.5082, grad_fn=<AddBackward0>)
EPOCH:  41  LOSS:  tensor(52.8686, grad_fn=<AddBackward0>)
EPOCH:  42  LOSS:  tensor(50.2916, grad_fn=<AddBackward0>)
EPOCH:  43  LOSS:  tensor(47.8982, grad_fn=<AddBackward0>)
EPOCH:  44  LOSS:  tensor(45.6701, grad_fn=<AddBackward0>)
EPOCH:  45  LOSS:  tensor(43.5733, grad_fn=<AddBackward0>)
EPOCH:  46  LOSS:  tensor(41.6088, grad_fn=<AddBackward0>)
EPOCH:  47  LOSS:  tensor(39.7625, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(38.0300, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(36.3698, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(34.7712, grad_fn=<AddBackward0

EPOCH:  174  LOSS:  tensor(0.9459, grad_fn=<AddBackward0>)
EPOCH:  175  LOSS:  tensor(0.9362, grad_fn=<AddBackward0>)
EPOCH:  176  LOSS:  tensor(0.9266, grad_fn=<AddBackward0>)
EPOCH:  177  LOSS:  tensor(0.9172, grad_fn=<AddBackward0>)
EPOCH:  178  LOSS:  tensor(0.9080, grad_fn=<AddBackward0>)
EPOCH:  179  LOSS:  tensor(0.8988, grad_fn=<AddBackward0>)
EPOCH:  180  LOSS:  tensor(0.8885, grad_fn=<AddBackward0>)
EPOCH:  181  LOSS:  tensor(0.8782, grad_fn=<AddBackward0>)
EPOCH:  182  LOSS:  tensor(0.8680, grad_fn=<AddBackward0>)
EPOCH:  183  LOSS:  tensor(0.8579, grad_fn=<AddBackward0>)
EPOCH:  184  LOSS:  tensor(0.8481, grad_fn=<AddBackward0>)
EPOCH:  185  LOSS:  tensor(0.8387, grad_fn=<AddBackward0>)
EPOCH:  186  LOSS:  tensor(0.8297, grad_fn=<AddBackward0>)
EPOCH:  187  LOSS:  tensor(0.8209, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.8123, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.8040, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.7958, grad_fn=<AddBackward0

EPOCH:  313  LOSS:  tensor(0.3262, grad_fn=<AddBackward0>)
EPOCH:  314  LOSS:  tensor(0.3248, grad_fn=<AddBackward0>)
EPOCH:  315  LOSS:  tensor(0.3233, grad_fn=<AddBackward0>)
EPOCH:  316  LOSS:  tensor(0.3218, grad_fn=<AddBackward0>)
EPOCH:  317  LOSS:  tensor(0.3205, grad_fn=<AddBackward0>)
EPOCH:  318  LOSS:  tensor(0.3192, grad_fn=<AddBackward0>)
EPOCH:  319  LOSS:  tensor(0.3180, grad_fn=<AddBackward0>)
EPOCH:  320  LOSS:  tensor(0.3168, grad_fn=<AddBackward0>)
EPOCH:  321  LOSS:  tensor(0.3154, grad_fn=<AddBackward0>)
EPOCH:  322  LOSS:  tensor(0.3140, grad_fn=<AddBackward0>)
EPOCH:  323  LOSS:  tensor(0.3126, grad_fn=<AddBackward0>)
EPOCH:  324  LOSS:  tensor(0.3112, grad_fn=<AddBackward0>)
EPOCH:  325  LOSS:  tensor(0.3099, grad_fn=<AddBackward0>)
EPOCH:  326  LOSS:  tensor(0.3085, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.3072, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.3059, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.3046, grad_fn=<AddBackward0

EPOCH:  452  LOSS:  tensor(0.2349, grad_fn=<AddBackward0>)
EPOCH:  453  LOSS:  tensor(0.2347, grad_fn=<AddBackward0>)
EPOCH:  454  LOSS:  tensor(0.2344, grad_fn=<AddBackward0>)
EPOCH:  455  LOSS:  tensor(0.2342, grad_fn=<AddBackward0>)
EPOCH:  456  LOSS:  tensor(0.2339, grad_fn=<AddBackward0>)
EPOCH:  457  LOSS:  tensor(0.2337, grad_fn=<AddBackward0>)
EPOCH:  458  LOSS:  tensor(0.2335, grad_fn=<AddBackward0>)
EPOCH:  459  LOSS:  tensor(0.2332, grad_fn=<AddBackward0>)
EPOCH:  460  LOSS:  tensor(0.2330, grad_fn=<AddBackward0>)
EPOCH:  461  LOSS:  tensor(0.2327, grad_fn=<AddBackward0>)
EPOCH:  462  LOSS:  tensor(0.2325, grad_fn=<AddBackward0>)
EPOCH:  463  LOSS:  tensor(0.2323, grad_fn=<AddBackward0>)
EPOCH:  464  LOSS:  tensor(0.2321, grad_fn=<AddBackward0>)
EPOCH:  465  LOSS:  tensor(0.2319, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.2316, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.2314, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.2312, grad_fn=<AddBackward0

EPOCH:  591  LOSS:  tensor(0.2092, grad_fn=<AddBackward0>)
EPOCH:  592  LOSS:  tensor(0.2090, grad_fn=<AddBackward0>)
EPOCH:  593  LOSS:  tensor(0.2087, grad_fn=<AddBackward0>)
EPOCH:  594  LOSS:  tensor(0.2085, grad_fn=<AddBackward0>)
EPOCH:  595  LOSS:  tensor(0.2082, grad_fn=<AddBackward0>)
EPOCH:  596  LOSS:  tensor(0.2080, grad_fn=<AddBackward0>)
EPOCH:  597  LOSS:  tensor(0.2078, grad_fn=<AddBackward0>)
EPOCH:  598  LOSS:  tensor(0.2075, grad_fn=<AddBackward0>)
EPOCH:  599  LOSS:  tensor(0.2072, grad_fn=<AddBackward0>)
EPOCH:  600  LOSS:  tensor(0.2069, grad_fn=<AddBackward0>)
EPOCH:  601  LOSS:  tensor(0.2067, grad_fn=<AddBackward0>)
EPOCH:  602  LOSS:  tensor(0.2064, grad_fn=<AddBackward0>)
EPOCH:  603  LOSS:  tensor(0.2052, grad_fn=<AddBackward0>)
EPOCH:  604  LOSS:  tensor(0.2035, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.2016, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.1997, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.1979, grad_fn=<AddBackward0

EPOCH:  730  LOSS:  tensor(0.1453, grad_fn=<AddBackward0>)
EPOCH:  731  LOSS:  tensor(0.1451, grad_fn=<AddBackward0>)
EPOCH:  732  LOSS:  tensor(0.1449, grad_fn=<AddBackward0>)
EPOCH:  733  LOSS:  tensor(0.1447, grad_fn=<AddBackward0>)
EPOCH:  734  LOSS:  tensor(0.1445, grad_fn=<AddBackward0>)
EPOCH:  735  LOSS:  tensor(0.1443, grad_fn=<AddBackward0>)
EPOCH:  736  LOSS:  tensor(0.1441, grad_fn=<AddBackward0>)
EPOCH:  737  LOSS:  tensor(0.1438, grad_fn=<AddBackward0>)
EPOCH:  738  LOSS:  tensor(0.1436, grad_fn=<AddBackward0>)
EPOCH:  739  LOSS:  tensor(0.1434, grad_fn=<AddBackward0>)
EPOCH:  740  LOSS:  tensor(0.1432, grad_fn=<AddBackward0>)
EPOCH:  741  LOSS:  tensor(0.1430, grad_fn=<AddBackward0>)
EPOCH:  742  LOSS:  tensor(0.1428, grad_fn=<AddBackward0>)
EPOCH:  743  LOSS:  tensor(0.1426, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.1423, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.1421, grad_fn=<AddBackward0

EPOCH:  869  LOSS:  tensor(0.1210, grad_fn=<AddBackward0>)
EPOCH:  870  LOSS:  tensor(0.1208, grad_fn=<AddBackward0>)
EPOCH:  871  LOSS:  tensor(0.1207, grad_fn=<AddBackward0>)
EPOCH:  872  LOSS:  tensor(0.1206, grad_fn=<AddBackward0>)
EPOCH:  873  LOSS:  tensor(0.1205, grad_fn=<AddBackward0>)
EPOCH:  874  LOSS:  tensor(0.1204, grad_fn=<AddBackward0>)
EPOCH:  875  LOSS:  tensor(0.1203, grad_fn=<AddBackward0>)
EPOCH:  876  LOSS:  tensor(0.1202, grad_fn=<AddBackward0>)
EPOCH:  877  LOSS:  tensor(0.1201, grad_fn=<AddBackward0>)
EPOCH:  878  LOSS:  tensor(0.1200, grad_fn=<AddBackward0>)
EPOCH:  879  LOSS:  tensor(0.1198, grad_fn=<AddBackward0>)
EPOCH:  880  LOSS:  tensor(0.1197, grad_fn=<AddBackward0>)
EPOCH:  881  LOSS:  tensor(0.1196, grad_fn=<AddBackward0>)
EPOCH:  882  LOSS:  tensor(0.1195, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.1194, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.1193, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.1192, grad_fn=<AddBackward0

EPOCH:  1008  LOSS:  tensor(0.1080, grad_fn=<AddBackward0>)
EPOCH:  1009  LOSS:  tensor(0.1079, grad_fn=<AddBackward0>)
EPOCH:  1010  LOSS:  tensor(0.1078, grad_fn=<AddBackward0>)
EPOCH:  1011  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  1012  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  1013  LOSS:  tensor(0.1076, grad_fn=<AddBackward0>)
EPOCH:  1014  LOSS:  tensor(0.1075, grad_fn=<AddBackward0>)
EPOCH:  1015  LOSS:  tensor(0.1074, grad_fn=<AddBackward0>)
EPOCH:  1016  LOSS:  tensor(0.1074, grad_fn=<AddBackward0>)
EPOCH:  1017  LOSS:  tensor(0.1073, grad_fn=<AddBackward0>)
EPOCH:  1018  LOSS:  tensor(0.1072, grad_fn=<AddBackward0>)
EPOCH:  1019  LOSS:  tensor(0.1071, grad_fn=<AddBackward0>)
EPOCH:  1020  LOSS:  tensor(0.1071, grad_fn=<AddBackward0>)
EPOCH:  1021  LOSS:  tensor(0.1070, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.1069, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.1069, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.1068, grad

EPOCH:  1145  LOSS:  tensor(0.0988, grad_fn=<AddBackward0>)
EPOCH:  1146  LOSS:  tensor(0.0988, grad_fn=<AddBackward0>)
EPOCH:  1147  LOSS:  tensor(0.0987, grad_fn=<AddBackward0>)
EPOCH:  1148  LOSS:  tensor(0.0986, grad_fn=<AddBackward0>)
EPOCH:  1149  LOSS:  tensor(0.0986, grad_fn=<AddBackward0>)
EPOCH:  1150  LOSS:  tensor(0.0985, grad_fn=<AddBackward0>)
EPOCH:  1151  LOSS:  tensor(0.0984, grad_fn=<AddBackward0>)
EPOCH:  1152  LOSS:  tensor(0.0984, grad_fn=<AddBackward0>)
EPOCH:  1153  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  1154  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  1155  LOSS:  tensor(0.0982, grad_fn=<AddBackward0>)
EPOCH:  1156  LOSS:  tensor(0.0981, grad_fn=<AddBackward0>)
EPOCH:  1157  LOSS:  tensor(0.0981, grad_fn=<AddBackward0>)
EPOCH:  1158  LOSS:  tensor(0.0980, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0980, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0979, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0978, grad

EPOCH:  82  LOSS:  tensor(5.8403, grad_fn=<AddBackward0>)
EPOCH:  83  LOSS:  tensor(5.5623, grad_fn=<AddBackward0>)
EPOCH:  84  LOSS:  tensor(5.3030, grad_fn=<AddBackward0>)
EPOCH:  85  LOSS:  tensor(5.0559, grad_fn=<AddBackward0>)
EPOCH:  86  LOSS:  tensor(4.8215, grad_fn=<AddBackward0>)
EPOCH:  87  LOSS:  tensor(4.6065, grad_fn=<AddBackward0>)
EPOCH:  88  LOSS:  tensor(4.4029, grad_fn=<AddBackward0>)
EPOCH:  89  LOSS:  tensor(4.2147, grad_fn=<AddBackward0>)
EPOCH:  90  LOSS:  tensor(4.0341, grad_fn=<AddBackward0>)
EPOCH:  91  LOSS:  tensor(3.8704, grad_fn=<AddBackward0>)
EPOCH:  92  LOSS:  tensor(3.7138, grad_fn=<AddBackward0>)
EPOCH:  93  LOSS:  tensor(3.5591, grad_fn=<AddBackward0>)
EPOCH:  94  LOSS:  tensor(3.4038, grad_fn=<AddBackward0>)
EPOCH:  95  LOSS:  tensor(3.2567, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(3.1081, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(2.9643, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(2.8351, grad_fn=<AddBackward0>)
EPOCH:  99  LO

EPOCH:  222  LOSS:  tensor(0.3815, grad_fn=<AddBackward0>)
EPOCH:  223  LOSS:  tensor(0.3789, grad_fn=<AddBackward0>)
EPOCH:  224  LOSS:  tensor(0.3763, grad_fn=<AddBackward0>)
EPOCH:  225  LOSS:  tensor(0.3738, grad_fn=<AddBackward0>)
EPOCH:  226  LOSS:  tensor(0.3712, grad_fn=<AddBackward0>)
EPOCH:  227  LOSS:  tensor(0.3686, grad_fn=<AddBackward0>)
EPOCH:  228  LOSS:  tensor(0.3661, grad_fn=<AddBackward0>)
EPOCH:  229  LOSS:  tensor(0.3637, grad_fn=<AddBackward0>)
EPOCH:  230  LOSS:  tensor(0.3613, grad_fn=<AddBackward0>)
EPOCH:  231  LOSS:  tensor(0.3589, grad_fn=<AddBackward0>)
EPOCH:  232  LOSS:  tensor(0.3565, grad_fn=<AddBackward0>)
EPOCH:  233  LOSS:  tensor(0.3541, grad_fn=<AddBackward0>)
EPOCH:  234  LOSS:  tensor(0.3518, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.3496, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.3474, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.3453, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.3432, grad_fn=<AddBackward0

EPOCH:  361  LOSS:  tensor(0.1859, grad_fn=<AddBackward0>)
EPOCH:  362  LOSS:  tensor(0.1854, grad_fn=<AddBackward0>)
EPOCH:  363  LOSS:  tensor(0.1848, grad_fn=<AddBackward0>)
EPOCH:  364  LOSS:  tensor(0.1842, grad_fn=<AddBackward0>)
EPOCH:  365  LOSS:  tensor(0.1837, grad_fn=<AddBackward0>)
EPOCH:  366  LOSS:  tensor(0.1831, grad_fn=<AddBackward0>)
EPOCH:  367  LOSS:  tensor(0.1826, grad_fn=<AddBackward0>)
EPOCH:  368  LOSS:  tensor(0.1820, grad_fn=<AddBackward0>)
EPOCH:  369  LOSS:  tensor(0.1816, grad_fn=<AddBackward0>)
EPOCH:  370  LOSS:  tensor(0.1810, grad_fn=<AddBackward0>)
EPOCH:  371  LOSS:  tensor(0.1805, grad_fn=<AddBackward0>)
EPOCH:  372  LOSS:  tensor(0.1800, grad_fn=<AddBackward0>)
EPOCH:  373  LOSS:  tensor(0.1795, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.1790, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.1785, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.1780, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.1775, grad_fn=<AddBackward0

EPOCH:  500  LOSS:  tensor(0.1364, grad_fn=<AddBackward0>)
EPOCH:  501  LOSS:  tensor(0.1363, grad_fn=<AddBackward0>)
EPOCH:  502  LOSS:  tensor(0.1360, grad_fn=<AddBackward0>)
EPOCH:  503  LOSS:  tensor(0.1358, grad_fn=<AddBackward0>)
EPOCH:  504  LOSS:  tensor(0.1357, grad_fn=<AddBackward0>)
EPOCH:  505  LOSS:  tensor(0.1355, grad_fn=<AddBackward0>)
EPOCH:  506  LOSS:  tensor(0.1352, grad_fn=<AddBackward0>)
EPOCH:  507  LOSS:  tensor(0.1350, grad_fn=<AddBackward0>)
EPOCH:  508  LOSS:  tensor(0.1348, grad_fn=<AddBackward0>)
EPOCH:  509  LOSS:  tensor(0.1346, grad_fn=<AddBackward0>)
EPOCH:  510  LOSS:  tensor(0.1344, grad_fn=<AddBackward0>)
EPOCH:  511  LOSS:  tensor(0.1342, grad_fn=<AddBackward0>)
EPOCH:  512  LOSS:  tensor(0.1340, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1338, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1336, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1335, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1333, grad_fn=<AddBackward0

EPOCH:  639  LOSS:  tensor(0.1136, grad_fn=<AddBackward0>)
EPOCH:  640  LOSS:  tensor(0.1134, grad_fn=<AddBackward0>)
EPOCH:  641  LOSS:  tensor(0.1133, grad_fn=<AddBackward0>)
EPOCH:  642  LOSS:  tensor(0.1131, grad_fn=<AddBackward0>)
EPOCH:  643  LOSS:  tensor(0.1129, grad_fn=<AddBackward0>)
EPOCH:  644  LOSS:  tensor(0.1127, grad_fn=<AddBackward0>)
EPOCH:  645  LOSS:  tensor(0.1126, grad_fn=<AddBackward0>)
EPOCH:  646  LOSS:  tensor(0.1124, grad_fn=<AddBackward0>)
EPOCH:  647  LOSS:  tensor(0.1122, grad_fn=<AddBackward0>)
EPOCH:  648  LOSS:  tensor(0.1121, grad_fn=<AddBackward0>)
EPOCH:  649  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  650  LOSS:  tensor(0.1117, grad_fn=<AddBackward0>)
EPOCH:  651  LOSS:  tensor(0.1116, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.1114, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.1110, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1108, grad_fn=<AddBackward0

EPOCH:  778  LOSS:  tensor(0.0914, grad_fn=<AddBackward0>)
EPOCH:  779  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  780  LOSS:  tensor(0.0912, grad_fn=<AddBackward0>)
EPOCH:  781  LOSS:  tensor(0.0911, grad_fn=<AddBackward0>)
EPOCH:  782  LOSS:  tensor(0.0909, grad_fn=<AddBackward0>)
EPOCH:  783  LOSS:  tensor(0.0908, grad_fn=<AddBackward0>)
EPOCH:  784  LOSS:  tensor(0.0907, grad_fn=<AddBackward0>)
EPOCH:  785  LOSS:  tensor(0.0906, grad_fn=<AddBackward0>)
EPOCH:  786  LOSS:  tensor(0.0905, grad_fn=<AddBackward0>)
EPOCH:  787  LOSS:  tensor(0.0904, grad_fn=<AddBackward0>)
EPOCH:  788  LOSS:  tensor(0.0903, grad_fn=<AddBackward0>)
EPOCH:  789  LOSS:  tensor(0.0902, grad_fn=<AddBackward0>)
EPOCH:  790  LOSS:  tensor(0.0901, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.0899, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0898, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0897, grad_fn=<AddBackward0

EPOCH:  917  LOSS:  tensor(0.0775, grad_fn=<AddBackward0>)
EPOCH:  918  LOSS:  tensor(0.0774, grad_fn=<AddBackward0>)
EPOCH:  919  LOSS:  tensor(0.0773, grad_fn=<AddBackward0>)
EPOCH:  920  LOSS:  tensor(0.0772, grad_fn=<AddBackward0>)
EPOCH:  921  LOSS:  tensor(0.0771, grad_fn=<AddBackward0>)
EPOCH:  922  LOSS:  tensor(0.0770, grad_fn=<AddBackward0>)
EPOCH:  923  LOSS:  tensor(0.0770, grad_fn=<AddBackward0>)
EPOCH:  924  LOSS:  tensor(0.0769, grad_fn=<AddBackward0>)
EPOCH:  925  LOSS:  tensor(0.0768, grad_fn=<AddBackward0>)
EPOCH:  926  LOSS:  tensor(0.0767, grad_fn=<AddBackward0>)
EPOCH:  927  LOSS:  tensor(0.0766, grad_fn=<AddBackward0>)
EPOCH:  928  LOSS:  tensor(0.0765, grad_fn=<AddBackward0>)
EPOCH:  929  LOSS:  tensor(0.0765, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0764, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0763, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0762, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0761, grad_fn=<AddBackward0

EPOCH:  1055  LOSS:  tensor(0.0596, grad_fn=<AddBackward0>)
EPOCH:  1056  LOSS:  tensor(0.0595, grad_fn=<AddBackward0>)
EPOCH:  1057  LOSS:  tensor(0.0594, grad_fn=<AddBackward0>)
EPOCH:  1058  LOSS:  tensor(0.0593, grad_fn=<AddBackward0>)
EPOCH:  1059  LOSS:  tensor(0.0592, grad_fn=<AddBackward0>)
EPOCH:  1060  LOSS:  tensor(0.0591, grad_fn=<AddBackward0>)
EPOCH:  1061  LOSS:  tensor(0.0591, grad_fn=<AddBackward0>)
EPOCH:  1062  LOSS:  tensor(0.0590, grad_fn=<AddBackward0>)
EPOCH:  1063  LOSS:  tensor(0.0589, grad_fn=<AddBackward0>)
EPOCH:  1064  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1065  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1066  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0584, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0583, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0582, grad

EPOCH:  1192  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  1193  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  1194  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  1195  LOSS:  tensor(0.0507, grad_fn=<AddBackward0>)
EPOCH:  1196  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  1197  LOSS:  tensor(0.0508, grad_fn=<AddBackward0>)
EPOCH:  1198  LOSS:  tensor(0.0509, grad_fn=<AddBackward0>)
EPOCH:  1199  LOSS:  tensor(0.0510, grad_fn=<AddBackward0>)
BATCH_SIZE:  64 time execution for a neural net:  67.75258493423462
Trainning model:  32
.... 278
EPOCH:  0  LOSS:  tensor(340.6624, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(300.6013, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(266.3151, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(236.6396, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(211.0940, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(188.9466, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(169.9730, grad_fn=<AddBackward0>)
EPOCH:  7

EPOCH:  130  LOSS:  tensor(0.7931, grad_fn=<AddBackward0>)
EPOCH:  131  LOSS:  tensor(0.7809, grad_fn=<AddBackward0>)
EPOCH:  132  LOSS:  tensor(0.7692, grad_fn=<AddBackward0>)
EPOCH:  133  LOSS:  tensor(0.7577, grad_fn=<AddBackward0>)
EPOCH:  134  LOSS:  tensor(0.7458, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(0.7345, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(0.7239, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(0.7139, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(0.7044, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(0.6952, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(0.6862, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(0.6773, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(0.6687, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(0.6601, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(0.6518, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(0.6436, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.6358, grad_fn=<AddBackward0

EPOCH:  269  LOSS:  tensor(0.2287, grad_fn=<AddBackward0>)
EPOCH:  270  LOSS:  tensor(0.2270, grad_fn=<AddBackward0>)
EPOCH:  271  LOSS:  tensor(0.2254, grad_fn=<AddBackward0>)
EPOCH:  272  LOSS:  tensor(0.2238, grad_fn=<AddBackward0>)
EPOCH:  273  LOSS:  tensor(0.2221, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.2204, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.2187, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.2171, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.2155, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.2139, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.2123, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.2108, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.2093, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.2077, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.2063, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2047, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2034, grad_fn=<AddBackward0

EPOCH:  408  LOSS:  tensor(0.1088, grad_fn=<AddBackward0>)
EPOCH:  409  LOSS:  tensor(0.1085, grad_fn=<AddBackward0>)
EPOCH:  410  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  411  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  412  LOSS:  tensor(0.1073, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.1069, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.1065, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.1062, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.1055, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1044, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1040, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1037, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1034, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1030, grad_fn=<AddBackward0

EPOCH:  547  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  548  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  549  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  550  LOSS:  tensor(0.0680, grad_fn=<AddBackward0>)
EPOCH:  551  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.0674, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.0670, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.0661, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.0659, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.0655, grad_fn=<AddBackward0

EPOCH:  686  LOSS:  tensor(0.0500, grad_fn=<AddBackward0>)
EPOCH:  687  LOSS:  tensor(0.0499, grad_fn=<AddBackward0>)
EPOCH:  688  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  689  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  690  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  691  LOSS:  tensor(0.0496, grad_fn=<AddBackward0>)
EPOCH:  692  LOSS:  tensor(0.0495, grad_fn=<AddBackward0>)
EPOCH:  693  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  694  LOSS:  tensor(0.0493, grad_fn=<AddBackward0>)
EPOCH:  695  LOSS:  tensor(0.0492, grad_fn=<AddBackward0>)
EPOCH:  696  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0489, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0488, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0487, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0486, grad_fn=<AddBackward0

EPOCH:  825  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  826  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  827  LOSS:  tensor(0.0403, grad_fn=<AddBackward0>)
EPOCH:  828  LOSS:  tensor(0.0403, grad_fn=<AddBackward0>)
EPOCH:  829  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  830  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  831  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  832  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  833  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  834  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  835  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0399, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0398, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0398, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0397, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0396, grad_fn=<AddBackward0

EPOCH:  964  LOSS:  tensor(0.0335, grad_fn=<AddBackward0>)
EPOCH:  965  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  966  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  967  LOSS:  tensor(0.0333, grad_fn=<AddBackward0>)
EPOCH:  968  LOSS:  tensor(0.0333, grad_fn=<AddBackward0>)
EPOCH:  969  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  970  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  971  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  972  LOSS:  tensor(0.0331, grad_fn=<AddBackward0>)
EPOCH:  973  LOSS:  tensor(0.0331, grad_fn=<AddBackward0>)
EPOCH:  974  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0329, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0329, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0328, grad_fn=<AddBackward0

EPOCH:  1102  LOSS:  tensor(0.0290, grad_fn=<AddBackward0>)
EPOCH:  1103  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1104  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1105  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1106  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1107  LOSS:  tensor(0.0288, grad_fn=<AddBackward0>)
EPOCH:  1108  LOSS:  tensor(0.0288, grad_fn=<AddBackward0>)
EPOCH:  1109  LOSS:  tensor(0.0288, grad_fn=<AddBackward0>)
EPOCH:  1110  LOSS:  tensor(0.0288, grad_fn=<AddBackward0>)
EPOCH:  1111  LOSS:  tensor(0.0287, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.0287, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0287, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0287, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0286, grad

EPOCH:  38  LOSS:  tensor(37.5848, grad_fn=<AddBackward0>)
EPOCH:  39  LOSS:  tensor(36.0375, grad_fn=<AddBackward0>)
EPOCH:  40  LOSS:  tensor(34.5380, grad_fn=<AddBackward0>)
EPOCH:  41  LOSS:  tensor(33.0080, grad_fn=<AddBackward0>)
EPOCH:  42  LOSS:  tensor(31.4496, grad_fn=<AddBackward0>)
EPOCH:  43  LOSS:  tensor(29.8785, grad_fn=<AddBackward0>)
EPOCH:  44  LOSS:  tensor(28.3207, grad_fn=<AddBackward0>)
EPOCH:  45  LOSS:  tensor(26.8154, grad_fn=<AddBackward0>)
EPOCH:  46  LOSS:  tensor(25.3708, grad_fn=<AddBackward0>)
EPOCH:  47  LOSS:  tensor(24.0030, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(22.7055, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(21.4541, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(20.2748, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(19.1399, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(17.9898, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(16.8587, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(15.7880, grad_fn=<AddBackward0

EPOCH:  178  LOSS:  tensor(0.3080, grad_fn=<AddBackward0>)
EPOCH:  179  LOSS:  tensor(0.3048, grad_fn=<AddBackward0>)
EPOCH:  180  LOSS:  tensor(0.3015, grad_fn=<AddBackward0>)
EPOCH:  181  LOSS:  tensor(0.2983, grad_fn=<AddBackward0>)
EPOCH:  182  LOSS:  tensor(0.2951, grad_fn=<AddBackward0>)
EPOCH:  183  LOSS:  tensor(0.2921, grad_fn=<AddBackward0>)
EPOCH:  184  LOSS:  tensor(0.2891, grad_fn=<AddBackward0>)
EPOCH:  185  LOSS:  tensor(0.2861, grad_fn=<AddBackward0>)
EPOCH:  186  LOSS:  tensor(0.2832, grad_fn=<AddBackward0>)
EPOCH:  187  LOSS:  tensor(0.2803, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.2776, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.2749, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.2723, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.2696, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.2671, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.2647, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.2623, grad_fn=<AddBackward0

EPOCH:  317  LOSS:  tensor(0.1215, grad_fn=<AddBackward0>)
EPOCH:  318  LOSS:  tensor(0.1210, grad_fn=<AddBackward0>)
EPOCH:  319  LOSS:  tensor(0.1206, grad_fn=<AddBackward0>)
EPOCH:  320  LOSS:  tensor(0.1201, grad_fn=<AddBackward0>)
EPOCH:  321  LOSS:  tensor(0.1197, grad_fn=<AddBackward0>)
EPOCH:  322  LOSS:  tensor(0.1192, grad_fn=<AddBackward0>)
EPOCH:  323  LOSS:  tensor(0.1188, grad_fn=<AddBackward0>)
EPOCH:  324  LOSS:  tensor(0.1184, grad_fn=<AddBackward0>)
EPOCH:  325  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  326  LOSS:  tensor(0.1175, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.1171, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.1167, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.1163, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.1155, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.1151, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.1147, grad_fn=<AddBackward0

EPOCH:  456  LOSS:  tensor(0.0823, grad_fn=<AddBackward0>)
EPOCH:  457  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  458  LOSS:  tensor(0.0819, grad_fn=<AddBackward0>)
EPOCH:  459  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  460  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  461  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  462  LOSS:  tensor(0.0812, grad_fn=<AddBackward0>)
EPOCH:  463  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  464  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  465  LOSS:  tensor(0.0807, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.0805, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.0803, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.0802, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.0797, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.0795, grad_fn=<AddBackward0

EPOCH:  595  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  596  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  597  LOSS:  tensor(0.0616, grad_fn=<AddBackward0>)
EPOCH:  598  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  599  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  600  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  601  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  602  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  603  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  604  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.0610, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.0609, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.0608, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0605, grad_fn=<AddBackward0

EPOCH:  734  LOSS:  tensor(0.0461, grad_fn=<AddBackward0>)
EPOCH:  735  LOSS:  tensor(0.0461, grad_fn=<AddBackward0>)
EPOCH:  736  LOSS:  tensor(0.0460, grad_fn=<AddBackward0>)
EPOCH:  737  LOSS:  tensor(0.0460, grad_fn=<AddBackward0>)
EPOCH:  738  LOSS:  tensor(0.0459, grad_fn=<AddBackward0>)
EPOCH:  739  LOSS:  tensor(0.0458, grad_fn=<AddBackward0>)
EPOCH:  740  LOSS:  tensor(0.0458, grad_fn=<AddBackward0>)
EPOCH:  741  LOSS:  tensor(0.0457, grad_fn=<AddBackward0>)
EPOCH:  742  LOSS:  tensor(0.0456, grad_fn=<AddBackward0>)
EPOCH:  743  LOSS:  tensor(0.0456, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.0455, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0452, grad_fn=<AddBackward0

EPOCH:  873  LOSS:  tensor(0.0406, grad_fn=<AddBackward0>)
EPOCH:  874  LOSS:  tensor(0.0406, grad_fn=<AddBackward0>)
EPOCH:  875  LOSS:  tensor(0.0407, grad_fn=<AddBackward0>)
EPOCH:  876  LOSS:  tensor(0.0406, grad_fn=<AddBackward0>)
EPOCH:  877  LOSS:  tensor(0.0406, grad_fn=<AddBackward0>)
EPOCH:  878  LOSS:  tensor(0.0406, grad_fn=<AddBackward0>)
EPOCH:  879  LOSS:  tensor(0.0405, grad_fn=<AddBackward0>)
EPOCH:  880  LOSS:  tensor(0.0405, grad_fn=<AddBackward0>)
EPOCH:  881  LOSS:  tensor(0.0405, grad_fn=<AddBackward0>)
EPOCH:  882  LOSS:  tensor(0.0405, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0403, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0401, grad_fn=<AddBackward0

EPOCH:  1012  LOSS:  tensor(0.0356, grad_fn=<AddBackward0>)
EPOCH:  1013  LOSS:  tensor(0.0356, grad_fn=<AddBackward0>)
EPOCH:  1014  LOSS:  tensor(0.0356, grad_fn=<AddBackward0>)
EPOCH:  1015  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1016  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1017  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1018  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1019  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1020  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  1021  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0353, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0353, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0352, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0352, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0351, grad

EPOCH:  1149  LOSS:  tensor(0.0329, grad_fn=<AddBackward0>)
EPOCH:  1150  LOSS:  tensor(0.0331, grad_fn=<AddBackward0>)
EPOCH:  1151  LOSS:  tensor(0.0333, grad_fn=<AddBackward0>)
EPOCH:  1152  LOSS:  tensor(0.0335, grad_fn=<AddBackward0>)
EPOCH:  1153  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1154  LOSS:  tensor(0.0338, grad_fn=<AddBackward0>)
EPOCH:  1155  LOSS:  tensor(0.0338, grad_fn=<AddBackward0>)
EPOCH:  1156  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1157  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1158  LOSS:  tensor(0.0336, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0330, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0328, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0327, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0326, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0324, grad

EPOCH:  86  LOSS:  tensor(7.2381, grad_fn=<AddBackward0>)
EPOCH:  87  LOSS:  tensor(6.9737, grad_fn=<AddBackward0>)
EPOCH:  88  LOSS:  tensor(6.7193, grad_fn=<AddBackward0>)
EPOCH:  89  LOSS:  tensor(6.4726, grad_fn=<AddBackward0>)
EPOCH:  90  LOSS:  tensor(6.2331, grad_fn=<AddBackward0>)
EPOCH:  91  LOSS:  tensor(5.9998, grad_fn=<AddBackward0>)
EPOCH:  92  LOSS:  tensor(5.7768, grad_fn=<AddBackward0>)
EPOCH:  93  LOSS:  tensor(5.5643, grad_fn=<AddBackward0>)
EPOCH:  94  LOSS:  tensor(5.3623, grad_fn=<AddBackward0>)
EPOCH:  95  LOSS:  tensor(5.1714, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(4.9856, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(4.8080, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(4.6382, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(4.4762, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(4.3202, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(4.1684, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(4.0228, grad_fn=<AddBackward0>)
EPOCH:  103

EPOCH:  226  LOSS:  tensor(0.4975, grad_fn=<AddBackward0>)
EPOCH:  227  LOSS:  tensor(0.4930, grad_fn=<AddBackward0>)
EPOCH:  228  LOSS:  tensor(0.4886, grad_fn=<AddBackward0>)
EPOCH:  229  LOSS:  tensor(0.4842, grad_fn=<AddBackward0>)
EPOCH:  230  LOSS:  tensor(0.4802, grad_fn=<AddBackward0>)
EPOCH:  231  LOSS:  tensor(0.4759, grad_fn=<AddBackward0>)
EPOCH:  232  LOSS:  tensor(0.4719, grad_fn=<AddBackward0>)
EPOCH:  233  LOSS:  tensor(0.4678, grad_fn=<AddBackward0>)
EPOCH:  234  LOSS:  tensor(0.4637, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.4595, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.4556, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.4515, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.4475, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.4437, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.4402, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.4364, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.4327, grad_fn=<AddBackward0

EPOCH:  365  LOSS:  tensor(0.2380, grad_fn=<AddBackward0>)
EPOCH:  366  LOSS:  tensor(0.2373, grad_fn=<AddBackward0>)
EPOCH:  367  LOSS:  tensor(0.2365, grad_fn=<AddBackward0>)
EPOCH:  368  LOSS:  tensor(0.2358, grad_fn=<AddBackward0>)
EPOCH:  369  LOSS:  tensor(0.2351, grad_fn=<AddBackward0>)
EPOCH:  370  LOSS:  tensor(0.2344, grad_fn=<AddBackward0>)
EPOCH:  371  LOSS:  tensor(0.2336, grad_fn=<AddBackward0>)
EPOCH:  372  LOSS:  tensor(0.2329, grad_fn=<AddBackward0>)
EPOCH:  373  LOSS:  tensor(0.2322, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.2314, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.2308, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.2301, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.2294, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.2288, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.2281, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.2274, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.2268, grad_fn=<AddBackward0

EPOCH:  504  LOSS:  tensor(0.1552, grad_fn=<AddBackward0>)
EPOCH:  505  LOSS:  tensor(0.1548, grad_fn=<AddBackward0>)
EPOCH:  506  LOSS:  tensor(0.1544, grad_fn=<AddBackward0>)
EPOCH:  507  LOSS:  tensor(0.1539, grad_fn=<AddBackward0>)
EPOCH:  508  LOSS:  tensor(0.1535, grad_fn=<AddBackward0>)
EPOCH:  509  LOSS:  tensor(0.1531, grad_fn=<AddBackward0>)
EPOCH:  510  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  511  LOSS:  tensor(0.1523, grad_fn=<AddBackward0>)
EPOCH:  512  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1511, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1507, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1503, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1499, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1495, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1491, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1487, grad_fn=<AddBackward0

EPOCH:  643  LOSS:  tensor(0.1121, grad_fn=<AddBackward0>)
EPOCH:  644  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  645  LOSS:  tensor(0.1117, grad_fn=<AddBackward0>)
EPOCH:  646  LOSS:  tensor(0.1115, grad_fn=<AddBackward0>)
EPOCH:  647  LOSS:  tensor(0.1113, grad_fn=<AddBackward0>)
EPOCH:  648  LOSS:  tensor(0.1111, grad_fn=<AddBackward0>)
EPOCH:  649  LOSS:  tensor(0.1109, grad_fn=<AddBackward0>)
EPOCH:  650  LOSS:  tensor(0.1107, grad_fn=<AddBackward0>)
EPOCH:  651  LOSS:  tensor(0.1105, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.1103, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.1102, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.1100, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1098, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.1096, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.1094, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.1092, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.1090, grad_fn=<AddBackward0

EPOCH:  782  LOSS:  tensor(0.0901, grad_fn=<AddBackward0>)
EPOCH:  783  LOSS:  tensor(0.0900, grad_fn=<AddBackward0>)
EPOCH:  784  LOSS:  tensor(0.0899, grad_fn=<AddBackward0>)
EPOCH:  785  LOSS:  tensor(0.0898, grad_fn=<AddBackward0>)
EPOCH:  786  LOSS:  tensor(0.0897, grad_fn=<AddBackward0>)
EPOCH:  787  LOSS:  tensor(0.0896, grad_fn=<AddBackward0>)
EPOCH:  788  LOSS:  tensor(0.0894, grad_fn=<AddBackward0>)
EPOCH:  789  LOSS:  tensor(0.0894, grad_fn=<AddBackward0>)
EPOCH:  790  LOSS:  tensor(0.0892, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.0891, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.0890, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0890, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0889, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.0887, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0886, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0885, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0884, grad_fn=<AddBackward0

EPOCH:  921  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  922  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  923  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  924  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  925  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  926  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  927  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  928  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  929  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0751, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0753, grad_fn=<AddBackward0

EPOCH:  1059  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1060  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1061  LOSS:  tensor(0.0653, grad_fn=<AddBackward0>)
EPOCH:  1062  LOSS:  tensor(0.0653, grad_fn=<AddBackward0>)
EPOCH:  1063  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1064  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1065  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  1066  LOSS:  tensor(0.0656, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0658, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0658, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0656, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0654, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0648, grad

EPOCH:  1196  LOSS:  tensor(0.0605, grad_fn=<AddBackward0>)
EPOCH:  1197  LOSS:  tensor(0.0583, grad_fn=<AddBackward0>)
EPOCH:  1198  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  1199  LOSS:  tensor(0.0604, grad_fn=<AddBackward0>)
BATCH_SIZE:  64 time execution for a neural net:  68.48173904418945
Trainning model:  35
.... 278
EPOCH:  0  LOSS:  tensor(344.9692, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(299.8878, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(260.5429, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(226.5722, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(197.4379, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(172.8719, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(152.6295, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(136.6188, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(124.4468, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(115.7330, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(110.0254, grad_fn=<AddBackward0>)
EPOCH:  11  

EPOCH:  134  LOSS:  tensor(1.6006, grad_fn=<AddBackward0>)
EPOCH:  135  LOSS:  tensor(1.5686, grad_fn=<AddBackward0>)
EPOCH:  136  LOSS:  tensor(1.5373, grad_fn=<AddBackward0>)
EPOCH:  137  LOSS:  tensor(1.5067, grad_fn=<AddBackward0>)
EPOCH:  138  LOSS:  tensor(1.4772, grad_fn=<AddBackward0>)
EPOCH:  139  LOSS:  tensor(1.4486, grad_fn=<AddBackward0>)
EPOCH:  140  LOSS:  tensor(1.4212, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(1.3944, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(1.3689, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(1.3446, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(1.3207, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(1.2976, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(1.2752, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(1.2541, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(1.2347, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(1.2157, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(1.1974, grad_fn=<AddBackward0

EPOCH:  273  LOSS:  tensor(0.3792, grad_fn=<AddBackward0>)
EPOCH:  274  LOSS:  tensor(0.3766, grad_fn=<AddBackward0>)
EPOCH:  275  LOSS:  tensor(0.3741, grad_fn=<AddBackward0>)
EPOCH:  276  LOSS:  tensor(0.3716, grad_fn=<AddBackward0>)
EPOCH:  277  LOSS:  tensor(0.3692, grad_fn=<AddBackward0>)
EPOCH:  278  LOSS:  tensor(0.3667, grad_fn=<AddBackward0>)
EPOCH:  279  LOSS:  tensor(0.3644, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.3620, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.3597, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.3574, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.3551, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.3528, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.3505, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.3483, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.3460, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.3439, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.3417, grad_fn=<AddBackward0

EPOCH:  412  LOSS:  tensor(0.1739, grad_fn=<AddBackward0>)
EPOCH:  413  LOSS:  tensor(0.1731, grad_fn=<AddBackward0>)
EPOCH:  414  LOSS:  tensor(0.1723, grad_fn=<AddBackward0>)
EPOCH:  415  LOSS:  tensor(0.1716, grad_fn=<AddBackward0>)
EPOCH:  416  LOSS:  tensor(0.1708, grad_fn=<AddBackward0>)
EPOCH:  417  LOSS:  tensor(0.1701, grad_fn=<AddBackward0>)
EPOCH:  418  LOSS:  tensor(0.1693, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1685, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1678, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1670, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1663, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1656, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1648, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1641, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1634, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1627, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1620, grad_fn=<AddBackward0

EPOCH:  551  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  552  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  553  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  554  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  555  LOSS:  tensor(0.1044, grad_fn=<AddBackward0>)
EPOCH:  556  LOSS:  tensor(0.1041, grad_fn=<AddBackward0>)
EPOCH:  557  LOSS:  tensor(0.1038, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1035, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1032, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1029, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1024, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1021, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1018, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1016, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.1013, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1011, grad_fn=<AddBackward0

EPOCH:  690  LOSS:  tensor(0.0743, grad_fn=<AddBackward0>)
EPOCH:  691  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  692  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  693  LOSS:  tensor(0.0739, grad_fn=<AddBackward0>)
EPOCH:  694  LOSS:  tensor(0.0737, grad_fn=<AddBackward0>)
EPOCH:  695  LOSS:  tensor(0.0735, grad_fn=<AddBackward0>)
EPOCH:  696  LOSS:  tensor(0.0734, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.0732, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0729, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0727, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0726, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0724, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0722, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0721, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0719, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0718, grad_fn=<AddBackward0

EPOCH:  829  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  830  LOSS:  tensor(0.0573, grad_fn=<AddBackward0>)
EPOCH:  831  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  832  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  833  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  834  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  835  LOSS:  tensor(0.0567, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0565, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0564, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0561, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0559, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0558, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0558, grad_fn=<AddBackward0

EPOCH:  968  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  969  LOSS:  tensor(0.0476, grad_fn=<AddBackward0>)
EPOCH:  970  LOSS:  tensor(0.0479, grad_fn=<AddBackward0>)
EPOCH:  971  LOSS:  tensor(0.0497, grad_fn=<AddBackward0>)
EPOCH:  972  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  973  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  974  LOSS:  tensor(0.0540, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0529, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0492, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0477, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0468, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0466, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0469, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0475, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0481, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0485, grad_fn=<AddBackward0

EPOCH:  1106  LOSS:  tensor(0.0546, grad_fn=<AddBackward0>)
EPOCH:  1107  LOSS:  tensor(0.0636, grad_fn=<AddBackward0>)
EPOCH:  1108  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  1109  LOSS:  tensor(0.0568, grad_fn=<AddBackward0>)
EPOCH:  1110  LOSS:  tensor(0.0466, grad_fn=<AddBackward0>)
EPOCH:  1111  LOSS:  tensor(0.0400, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.0401, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0498, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0442, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0390, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0404, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0445, grad

EPOCH:  42  LOSS:  tensor(33.6108, grad_fn=<AddBackward0>)
EPOCH:  43  LOSS:  tensor(32.4563, grad_fn=<AddBackward0>)
EPOCH:  44  LOSS:  tensor(31.2837, grad_fn=<AddBackward0>)
EPOCH:  45  LOSS:  tensor(30.0612, grad_fn=<AddBackward0>)
EPOCH:  46  LOSS:  tensor(28.8337, grad_fn=<AddBackward0>)
EPOCH:  47  LOSS:  tensor(27.6342, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(26.4536, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(25.3179, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(24.2550, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(23.2651, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(22.3527, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(21.4833, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(20.6349, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(19.8059, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(18.9856, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(18.1656, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(17.3543, grad_fn=<AddBackward0

EPOCH:  182  LOSS:  tensor(0.5544, grad_fn=<AddBackward0>)
EPOCH:  183  LOSS:  tensor(0.5478, grad_fn=<AddBackward0>)
EPOCH:  184  LOSS:  tensor(0.5416, grad_fn=<AddBackward0>)
EPOCH:  185  LOSS:  tensor(0.5359, grad_fn=<AddBackward0>)
EPOCH:  186  LOSS:  tensor(0.5296, grad_fn=<AddBackward0>)
EPOCH:  187  LOSS:  tensor(0.5236, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.5175, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.5113, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.5051, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.4988, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.4927, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.4868, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.4809, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.4756, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.4702, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.4644, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.4593, grad_fn=<AddBackward0

EPOCH:  321  LOSS:  tensor(0.1925, grad_fn=<AddBackward0>)
EPOCH:  322  LOSS:  tensor(0.1914, grad_fn=<AddBackward0>)
EPOCH:  323  LOSS:  tensor(0.1902, grad_fn=<AddBackward0>)
EPOCH:  324  LOSS:  tensor(0.1892, grad_fn=<AddBackward0>)
EPOCH:  325  LOSS:  tensor(0.1882, grad_fn=<AddBackward0>)
EPOCH:  326  LOSS:  tensor(0.1871, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.1861, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.1850, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.1841, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.1831, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.1820, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.1810, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.1801, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.1791, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.1781, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.1772, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.1762, grad_fn=<AddBackward0

EPOCH:  460  LOSS:  tensor(0.1174, grad_fn=<AddBackward0>)
EPOCH:  461  LOSS:  tensor(0.1168, grad_fn=<AddBackward0>)
EPOCH:  462  LOSS:  tensor(0.1160, grad_fn=<AddBackward0>)
EPOCH:  463  LOSS:  tensor(0.1153, grad_fn=<AddBackward0>)
EPOCH:  464  LOSS:  tensor(0.1146, grad_fn=<AddBackward0>)
EPOCH:  465  LOSS:  tensor(0.1139, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.1132, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.1125, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.1118, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.1105, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.1099, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.1093, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.1087, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1076, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1071, grad_fn=<AddBackward0

EPOCH:  599  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  600  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  601  LOSS:  tensor(0.0806, grad_fn=<AddBackward0>)
EPOCH:  602  LOSS:  tensor(0.0805, grad_fn=<AddBackward0>)
EPOCH:  603  LOSS:  tensor(0.0804, grad_fn=<AddBackward0>)
EPOCH:  604  LOSS:  tensor(0.0803, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.0797, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.0795, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0794, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0792, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.0791, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.0790, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.0789, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.0787, grad_fn=<AddBackward0

EPOCH:  738  LOSS:  tensor(0.0652, grad_fn=<AddBackward0>)
EPOCH:  739  LOSS:  tensor(0.0651, grad_fn=<AddBackward0>)
EPOCH:  740  LOSS:  tensor(0.0649, grad_fn=<AddBackward0>)
EPOCH:  741  LOSS:  tensor(0.0648, grad_fn=<AddBackward0>)
EPOCH:  742  LOSS:  tensor(0.0647, grad_fn=<AddBackward0>)
EPOCH:  743  LOSS:  tensor(0.0646, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.0645, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0644, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0636, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0635, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0634, grad_fn=<AddBackward0

EPOCH:  877  LOSS:  tensor(0.0527, grad_fn=<AddBackward0>)
EPOCH:  878  LOSS:  tensor(0.0526, grad_fn=<AddBackward0>)
EPOCH:  879  LOSS:  tensor(0.0525, grad_fn=<AddBackward0>)
EPOCH:  880  LOSS:  tensor(0.0525, grad_fn=<AddBackward0>)
EPOCH:  881  LOSS:  tensor(0.0524, grad_fn=<AddBackward0>)
EPOCH:  882  LOSS:  tensor(0.0523, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.0523, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0522, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0522, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0520, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0517, grad_fn=<AddBackward0

EPOCH:  1016  LOSS:  tensor(0.0454, grad_fn=<AddBackward0>)
EPOCH:  1017  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  1018  LOSS:  tensor(0.0453, grad_fn=<AddBackward0>)
EPOCH:  1019  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  1020  LOSS:  tensor(0.0452, grad_fn=<AddBackward0>)
EPOCH:  1021  LOSS:  tensor(0.0451, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.0451, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0448, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0447, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0447, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0446, grad

EPOCH:  1153  LOSS:  tensor(0.0407, grad_fn=<AddBackward0>)
EPOCH:  1154  LOSS:  tensor(0.0408, grad_fn=<AddBackward0>)
EPOCH:  1155  LOSS:  tensor(0.0409, grad_fn=<AddBackward0>)
EPOCH:  1156  LOSS:  tensor(0.0411, grad_fn=<AddBackward0>)
EPOCH:  1157  LOSS:  tensor(0.0414, grad_fn=<AddBackward0>)
EPOCH:  1158  LOSS:  tensor(0.0417, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0421, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0426, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0432, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0446, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0446, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0426, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0415, grad

EPOCH:  90  LOSS:  tensor(2.2062, grad_fn=<AddBackward0>)
EPOCH:  91  LOSS:  tensor(2.1321, grad_fn=<AddBackward0>)
EPOCH:  92  LOSS:  tensor(2.0619, grad_fn=<AddBackward0>)
EPOCH:  93  LOSS:  tensor(1.9968, grad_fn=<AddBackward0>)
EPOCH:  94  LOSS:  tensor(1.9354, grad_fn=<AddBackward0>)
EPOCH:  95  LOSS:  tensor(1.8763, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(1.8194, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(1.7645, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(1.7120, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(1.6618, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(1.6141, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(1.5679, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(1.5244, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(1.4829, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(1.4430, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(1.4050, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(1.3690, grad_fn=<AddBackward0>)
EPOCH: 

EPOCH:  230  LOSS:  tensor(0.3752, grad_fn=<AddBackward0>)
EPOCH:  231  LOSS:  tensor(0.3734, grad_fn=<AddBackward0>)
EPOCH:  232  LOSS:  tensor(0.3716, grad_fn=<AddBackward0>)
EPOCH:  233  LOSS:  tensor(0.3698, grad_fn=<AddBackward0>)
EPOCH:  234  LOSS:  tensor(0.3680, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.3663, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.3645, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.3628, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.3611, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.3595, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.3578, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.3563, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.3547, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.3531, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.3516, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.3500, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.3485, grad_fn=<AddBackward0

EPOCH:  369  LOSS:  tensor(0.2113, grad_fn=<AddBackward0>)
EPOCH:  370  LOSS:  tensor(0.2106, grad_fn=<AddBackward0>)
EPOCH:  371  LOSS:  tensor(0.2100, grad_fn=<AddBackward0>)
EPOCH:  372  LOSS:  tensor(0.2095, grad_fn=<AddBackward0>)
EPOCH:  373  LOSS:  tensor(0.2090, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.2084, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.2078, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.2072, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.2067, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.2062, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.2056, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.2051, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.2046, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.2041, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.2036, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.2031, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.2026, grad_fn=<AddBackward0

EPOCH:  508  LOSS:  tensor(0.1564, grad_fn=<AddBackward0>)
EPOCH:  509  LOSS:  tensor(0.1560, grad_fn=<AddBackward0>)
EPOCH:  510  LOSS:  tensor(0.1557, grad_fn=<AddBackward0>)
EPOCH:  511  LOSS:  tensor(0.1553, grad_fn=<AddBackward0>)
EPOCH:  512  LOSS:  tensor(0.1550, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1546, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1542, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1538, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1535, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1531, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1524, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1521, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1518, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.1512, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.1508, grad_fn=<AddBackward0

EPOCH:  647  LOSS:  tensor(0.1209, grad_fn=<AddBackward0>)
EPOCH:  648  LOSS:  tensor(0.1205, grad_fn=<AddBackward0>)
EPOCH:  649  LOSS:  tensor(0.1202, grad_fn=<AddBackward0>)
EPOCH:  650  LOSS:  tensor(0.1199, grad_fn=<AddBackward0>)
EPOCH:  651  LOSS:  tensor(0.1196, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.1192, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.1189, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.1186, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1183, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.1179, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.1176, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.1173, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.1170, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.1167, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.1164, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.1161, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.1158, grad_fn=<AddBackward0

EPOCH:  786  LOSS:  tensor(0.0932, grad_fn=<AddBackward0>)
EPOCH:  787  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  788  LOSS:  tensor(0.0930, grad_fn=<AddBackward0>)
EPOCH:  789  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  790  LOSS:  tensor(0.0927, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.0925, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0924, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0922, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0921, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0920, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0918, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0914, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0913, grad_fn=<AddBackward0

EPOCH:  925  LOSS:  tensor(0.0758, grad_fn=<AddBackward0>)
EPOCH:  926  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  927  LOSS:  tensor(0.0743, grad_fn=<AddBackward0>)
EPOCH:  928  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  929  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0736, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0740, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0742, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0748, grad_fn=<AddBackward0

EPOCH:  1063  LOSS:  tensor(0.0730, grad_fn=<AddBackward0>)
EPOCH:  1064  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  1065  LOSS:  tensor(0.0762, grad_fn=<AddBackward0>)
EPOCH:  1066  LOSS:  tensor(0.0770, grad_fn=<AddBackward0>)
EPOCH:  1067  LOSS:  tensor(0.0768, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0729, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0696, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0664, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0642, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0632, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0634, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0656, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0667, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0675, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0676, grad

EPOCH:  0  LOSS:  tensor(367.0998, grad_fn=<AddBackward0>)
EPOCH:  1  LOSS:  tensor(327.2378, grad_fn=<AddBackward0>)
EPOCH:  2  LOSS:  tensor(293.0792, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(263.4921, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(237.1722, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(213.2737, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(191.2227, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(170.9164, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(152.4883, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(136.2498, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(122.5463, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(111.5699, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(103.3678, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(97.9370, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(94.8115, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(93.4317, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(93.1606, grad_fn=<AddBackwa

EPOCH:  140  LOSS:  tensor(0.9599, grad_fn=<AddBackward0>)
EPOCH:  141  LOSS:  tensor(0.9461, grad_fn=<AddBackward0>)
EPOCH:  142  LOSS:  tensor(0.9326, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(0.9196, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(0.9069, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(0.8939, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.8810, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(0.8683, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(0.8558, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.8438, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.8324, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.8213, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.8107, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.8005, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.7908, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.7814, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.7723, grad_fn=<AddBackward0

EPOCH:  279  LOSS:  tensor(0.2964, grad_fn=<AddBackward0>)
EPOCH:  280  LOSS:  tensor(0.2948, grad_fn=<AddBackward0>)
EPOCH:  281  LOSS:  tensor(0.2932, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.2917, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.2902, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2887, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2872, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.2857, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.2841, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.2826, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.2812, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.2798, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.2784, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2771, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.2758, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.2746, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2734, grad_fn=<AddBackward0

EPOCH:  418  LOSS:  tensor(0.1553, grad_fn=<AddBackward0>)
EPOCH:  419  LOSS:  tensor(0.1549, grad_fn=<AddBackward0>)
EPOCH:  420  LOSS:  tensor(0.1544, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1540, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1536, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1532, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1527, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1523, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1519, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1515, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1511, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1507, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1502, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1498, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1494, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1489, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1485, grad_fn=<AddBackward0

EPOCH:  557  LOSS:  tensor(0.1107, grad_fn=<AddBackward0>)
EPOCH:  558  LOSS:  tensor(0.1105, grad_fn=<AddBackward0>)
EPOCH:  559  LOSS:  tensor(0.1104, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1102, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1100, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1099, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1097, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1096, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1094, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.1093, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1091, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.1090, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.1088, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.1086, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.1085, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.1084, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.1082, grad_fn=<AddBackward0

EPOCH:  696  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  697  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  698  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0970, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0968, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0962, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0960, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0959, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0956, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0954, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0953, grad_fn=<AddBackward0

EPOCH:  835  LOSS:  tensor(0.0865, grad_fn=<AddBackward0>)
EPOCH:  836  LOSS:  tensor(0.0865, grad_fn=<AddBackward0>)
EPOCH:  837  LOSS:  tensor(0.0864, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0862, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0859, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0858, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0858, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0857, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0857, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0856, grad_fn=<AddBackward0

EPOCH:  974  LOSS:  tensor(0.0802, grad_fn=<AddBackward0>)
EPOCH:  975  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  976  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0799, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0799, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0798, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0797, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0797, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0796, grad_fn=<AddBackward0

EPOCH:  1111  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1112  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1113  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0748, grad

EPOCH:  47  LOSS:  tensor(27.2665, grad_fn=<AddBackward0>)
EPOCH:  48  LOSS:  tensor(26.1623, grad_fn=<AddBackward0>)
EPOCH:  49  LOSS:  tensor(25.0433, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(23.8977, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(22.7330, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(21.5817, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(20.4425, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(19.3241, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(18.2958, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(17.3370, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(16.4737, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(15.6858, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(14.9752, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(14.3597, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(13.8011, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(13.2818, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(12.7924, grad_fn=<AddBackward0

EPOCH:  187  LOSS:  tensor(0.4214, grad_fn=<AddBackward0>)
EPOCH:  188  LOSS:  tensor(0.4157, grad_fn=<AddBackward0>)
EPOCH:  189  LOSS:  tensor(0.4101, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.4046, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.3993, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.3940, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.3889, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.3837, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.3789, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.3741, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.3694, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.3649, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.3605, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.3559, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.3514, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.3471, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.3429, grad_fn=<AddBackward0

EPOCH:  326  LOSS:  tensor(0.1126, grad_fn=<AddBackward0>)
EPOCH:  327  LOSS:  tensor(0.1119, grad_fn=<AddBackward0>)
EPOCH:  328  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.1107, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.1101, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.1095, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.1089, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.1083, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.1071, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.1064, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.1059, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.1045, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.1039, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.1027, grad_fn=<AddBackward0

EPOCH:  465  LOSS:  tensor(0.0619, grad_fn=<AddBackward0>)
EPOCH:  466  LOSS:  tensor(0.0618, grad_fn=<AddBackward0>)
EPOCH:  467  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.0616, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.0612, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.0610, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.0609, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.0608, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.0608, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.0605, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.0604, grad_fn=<AddBackward0

EPOCH:  604  LOSS:  tensor(0.0520, grad_fn=<AddBackward0>)
EPOCH:  605  LOSS:  tensor(0.0520, grad_fn=<AddBackward0>)
EPOCH:  606  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.0517, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0516, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0516, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.0514, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.0513, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.0513, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.0510, grad_fn=<AddBackward0

EPOCH:  743  LOSS:  tensor(0.0438, grad_fn=<AddBackward0>)
EPOCH:  744  LOSS:  tensor(0.0437, grad_fn=<AddBackward0>)
EPOCH:  745  LOSS:  tensor(0.0436, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0435, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0435, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0434, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0433, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0433, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0432, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0431, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0430, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0429, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0428, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0427, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0426, grad_fn=<AddBackward0

EPOCH:  882  LOSS:  tensor(0.0359, grad_fn=<AddBackward0>)
EPOCH:  883  LOSS:  tensor(0.0358, grad_fn=<AddBackward0>)
EPOCH:  884  LOSS:  tensor(0.0358, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0357, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0357, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0357, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0356, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0354, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0353, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0353, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0352, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0352, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0352, grad_fn=<AddBackward0

EPOCH:  1021  LOSS:  tensor(0.0416, grad_fn=<AddBackward0>)
EPOCH:  1022  LOSS:  tensor(0.0433, grad_fn=<AddBackward0>)
EPOCH:  1023  LOSS:  tensor(0.0438, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0435, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0415, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0384, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0350, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0323, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0311, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0312, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0324, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0339, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.0355, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0365, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0369, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.0363, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0350, grad

EPOCH:  1158  LOSS:  tensor(0.0290, grad_fn=<AddBackward0>)
EPOCH:  1159  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1160  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0282, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0282, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0282, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0283, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0284, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0286, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0289, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0293, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0299, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0307, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0317, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0332, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0349, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0372, grad

EPOCH:  95  LOSS:  tensor(3.2778, grad_fn=<AddBackward0>)
EPOCH:  96  LOSS:  tensor(3.1681, grad_fn=<AddBackward0>)
EPOCH:  97  LOSS:  tensor(3.0644, grad_fn=<AddBackward0>)
EPOCH:  98  LOSS:  tensor(2.9661, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(2.8725, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(2.7834, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(2.6998, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(2.6198, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(2.5429, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(2.4698, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(2.4002, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(2.3335, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(2.2695, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(2.2082, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(2.1498, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(2.0943, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(2.0412, grad_fn=<AddBackward0>)
EP

EPOCH:  234  LOSS:  tensor(0.4596, grad_fn=<AddBackward0>)
EPOCH:  235  LOSS:  tensor(0.4568, grad_fn=<AddBackward0>)
EPOCH:  236  LOSS:  tensor(0.4542, grad_fn=<AddBackward0>)
EPOCH:  237  LOSS:  tensor(0.4515, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.4488, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.4460, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.4434, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.4406, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.4378, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.4351, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.4322, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.4293, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.4263, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.4233, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.4203, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.4172, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.4143, grad_fn=<AddBackward0

EPOCH:  373  LOSS:  tensor(0.2361, grad_fn=<AddBackward0>)
EPOCH:  374  LOSS:  tensor(0.2353, grad_fn=<AddBackward0>)
EPOCH:  375  LOSS:  tensor(0.2344, grad_fn=<AddBackward0>)
EPOCH:  376  LOSS:  tensor(0.2336, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.2328, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.2319, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.2311, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.2303, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.2294, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.2285, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.2277, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.2268, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.2260, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.2252, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.2244, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.2236, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.2227, grad_fn=<AddBackward0

EPOCH:  512  LOSS:  tensor(0.1602, grad_fn=<AddBackward0>)
EPOCH:  513  LOSS:  tensor(0.1598, grad_fn=<AddBackward0>)
EPOCH:  514  LOSS:  tensor(0.1594, grad_fn=<AddBackward0>)
EPOCH:  515  LOSS:  tensor(0.1591, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1588, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1584, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1581, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1577, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1574, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1571, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1567, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.1564, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.1561, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.1558, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.1555, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.1552, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.1549, grad_fn=<AddBackward0

EPOCH:  651  LOSS:  tensor(0.1220, grad_fn=<AddBackward0>)
EPOCH:  652  LOSS:  tensor(0.1218, grad_fn=<AddBackward0>)
EPOCH:  653  LOSS:  tensor(0.1216, grad_fn=<AddBackward0>)
EPOCH:  654  LOSS:  tensor(0.1214, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1212, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.1210, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.1207, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.1205, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.1203, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.1201, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.1199, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.1197, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.1195, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.1193, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.1191, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.1189, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.1187, grad_fn=<AddBackward0

EPOCH:  790  LOSS:  tensor(0.0943, grad_fn=<AddBackward0>)
EPOCH:  791  LOSS:  tensor(0.0941, grad_fn=<AddBackward0>)
EPOCH:  792  LOSS:  tensor(0.0939, grad_fn=<AddBackward0>)
EPOCH:  793  LOSS:  tensor(0.0937, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.0933, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.0931, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0929, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0927, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0924, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0922, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0919, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0918, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0913, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0911, grad_fn=<AddBackward0

EPOCH:  929  LOSS:  tensor(0.0645, grad_fn=<AddBackward0>)
EPOCH:  930  LOSS:  tensor(0.0644, grad_fn=<AddBackward0>)
EPOCH:  931  LOSS:  tensor(0.0643, grad_fn=<AddBackward0>)
EPOCH:  932  LOSS:  tensor(0.0641, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0640, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0639, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0638, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0637, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0636, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0635, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0634, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0633, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0631, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0630, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0629, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0628, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0627, grad_fn=<AddBackward0

EPOCH:  1067  LOSS:  tensor(0.0522, grad_fn=<AddBackward0>)
EPOCH:  1068  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  1069  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  1070  LOSS:  tensor(0.0520, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0518, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0517, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0516, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0514, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0514, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0513, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0511, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0510, grad

EPOCH:  2  LOSS:  tensor(149.5882, grad_fn=<AddBackward0>)
EPOCH:  3  LOSS:  tensor(129.9762, grad_fn=<AddBackward0>)
EPOCH:  4  LOSS:  tensor(113.0631, grad_fn=<AddBackward0>)
EPOCH:  5  LOSS:  tensor(98.7728, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(87.2692, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(78.5098, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(72.6170, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(69.4000, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(68.1730, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(67.9010, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(67.7390, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(67.0640, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(65.4137, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(62.6887, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(59.1206, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(55.1465, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(51.1270, grad_fn=<AddBackward0>)
EP

EPOCH:  142  LOSS:  tensor(0.5402, grad_fn=<AddBackward0>)
EPOCH:  143  LOSS:  tensor(0.5350, grad_fn=<AddBackward0>)
EPOCH:  144  LOSS:  tensor(0.5299, grad_fn=<AddBackward0>)
EPOCH:  145  LOSS:  tensor(0.5249, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.5199, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(0.5150, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(0.5106, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.5062, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.5019, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.4974, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.4929, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.4884, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.4839, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.4794, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.4751, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.4708, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.4665, grad_fn=<AddBackward0

EPOCH:  281  LOSS:  tensor(0.2252, grad_fn=<AddBackward0>)
EPOCH:  282  LOSS:  tensor(0.2242, grad_fn=<AddBackward0>)
EPOCH:  283  LOSS:  tensor(0.2232, grad_fn=<AddBackward0>)
EPOCH:  284  LOSS:  tensor(0.2221, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.2212, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.2204, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.2196, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.2188, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.2180, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.2172, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.2165, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2157, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.2149, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.2142, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2134, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.2126, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.2120, grad_fn=<AddBackward0

EPOCH:  420  LOSS:  tensor(0.1430, grad_fn=<AddBackward0>)
EPOCH:  421  LOSS:  tensor(0.1426, grad_fn=<AddBackward0>)
EPOCH:  422  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  423  LOSS:  tensor(0.1418, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1414, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1407, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1403, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1400, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1396, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1393, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1389, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1386, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1382, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1378, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1375, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1372, grad_fn=<AddBackward0

EPOCH:  559  LOSS:  tensor(0.1068, grad_fn=<AddBackward0>)
EPOCH:  560  LOSS:  tensor(0.1066, grad_fn=<AddBackward0>)
EPOCH:  561  LOSS:  tensor(0.1064, grad_fn=<AddBackward0>)
EPOCH:  562  LOSS:  tensor(0.1062, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.1060, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.1059, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.1057, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.1055, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.1049, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.1047, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.1045, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.1043, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.1042, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.1040, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.1038, grad_fn=<AddBackward0

EPOCH:  698  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  699  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  700  LOSS:  tensor(0.0819, grad_fn=<AddBackward0>)
EPOCH:  701  LOSS:  tensor(0.0818, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0815, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0814, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0812, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0811, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0810, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0809, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0808, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0807, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0806, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0805, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0804, grad_fn=<AddBackward0

EPOCH:  837  LOSS:  tensor(0.0690, grad_fn=<AddBackward0>)
EPOCH:  838  LOSS:  tensor(0.0689, grad_fn=<AddBackward0>)
EPOCH:  839  LOSS:  tensor(0.0688, grad_fn=<AddBackward0>)
EPOCH:  840  LOSS:  tensor(0.0688, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.0687, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0687, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0686, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0685, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0685, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0684, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0683, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0682, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0681, grad_fn=<AddBackward0

EPOCH:  976  LOSS:  tensor(0.0619, grad_fn=<AddBackward0>)
EPOCH:  977  LOSS:  tensor(0.0618, grad_fn=<AddBackward0>)
EPOCH:  978  LOSS:  tensor(0.0618, grad_fn=<AddBackward0>)
EPOCH:  979  LOSS:  tensor(0.0618, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0617, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0616, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0616, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0615, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0614, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0612, grad_fn=<AddBackward0

EPOCH:  1113  LOSS:  tensor(0.0586, grad_fn=<AddBackward0>)
EPOCH:  1114  LOSS:  tensor(0.0591, grad_fn=<AddBackward0>)
EPOCH:  1115  LOSS:  tensor(0.0597, grad_fn=<AddBackward0>)
EPOCH:  1116  LOSS:  tensor(0.0604, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0609, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0613, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0611, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0607, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0597, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0585, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0575, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0569, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0567, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0570, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0574, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0579, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0584, grad

EPOCH:  49  LOSS:  tensor(16.7584, grad_fn=<AddBackward0>)
EPOCH:  50  LOSS:  tensor(15.9100, grad_fn=<AddBackward0>)
EPOCH:  51  LOSS:  tensor(15.0898, grad_fn=<AddBackward0>)
EPOCH:  52  LOSS:  tensor(14.3127, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(13.5706, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(12.8879, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(12.2617, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(11.6700, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(11.1147, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(10.5887, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(10.0938, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(9.6280, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(9.1795, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(8.7511, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(8.3359, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(7.9425, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(7.5722, grad_fn=<AddBackward0>)
EPO

EPOCH:  189  LOSS:  tensor(0.5219, grad_fn=<AddBackward0>)
EPOCH:  190  LOSS:  tensor(0.5165, grad_fn=<AddBackward0>)
EPOCH:  191  LOSS:  tensor(0.5111, grad_fn=<AddBackward0>)
EPOCH:  192  LOSS:  tensor(0.5058, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.5006, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.4955, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.4906, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.4858, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.4810, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.4768, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.4721, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.4678, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.4633, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.4591, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.4551, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.4509, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.4470, grad_fn=<AddBackward0

EPOCH:  328  LOSS:  tensor(0.2149, grad_fn=<AddBackward0>)
EPOCH:  329  LOSS:  tensor(0.2141, grad_fn=<AddBackward0>)
EPOCH:  330  LOSS:  tensor(0.2135, grad_fn=<AddBackward0>)
EPOCH:  331  LOSS:  tensor(0.2127, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.2119, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.2114, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.2106, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.2098, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.2091, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.2084, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.2076, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.2067, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.2062, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.2053, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.2046, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.2039, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.2032, grad_fn=<AddBackward0

EPOCH:  467  LOSS:  tensor(0.1428, grad_fn=<AddBackward0>)
EPOCH:  468  LOSS:  tensor(0.1424, grad_fn=<AddBackward0>)
EPOCH:  469  LOSS:  tensor(0.1422, grad_fn=<AddBackward0>)
EPOCH:  470  LOSS:  tensor(0.1418, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.1415, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.1411, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.1407, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1404, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1400, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1397, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.1393, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1389, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1385, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1383, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1378, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.1375, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.1372, grad_fn=<AddBackward0

EPOCH:  606  LOSS:  tensor(0.1084, grad_fn=<AddBackward0>)
EPOCH:  607  LOSS:  tensor(0.1082, grad_fn=<AddBackward0>)
EPOCH:  608  LOSS:  tensor(0.1081, grad_fn=<AddBackward0>)
EPOCH:  609  LOSS:  tensor(0.1079, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.1077, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.1076, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.1074, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.1073, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.1073, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.1070, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.1069, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.1068, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.1066, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.1065, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.1063, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.1062, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.1060, grad_fn=<AddBackward0

EPOCH:  745  LOSS:  tensor(0.0842, grad_fn=<AddBackward0>)
EPOCH:  746  LOSS:  tensor(0.0840, grad_fn=<AddBackward0>)
EPOCH:  747  LOSS:  tensor(0.0838, grad_fn=<AddBackward0>)
EPOCH:  748  LOSS:  tensor(0.0836, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0834, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0833, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0830, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0829, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0828, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0826, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0824, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0822, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0819, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.0816, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.0814, grad_fn=<AddBackward0

EPOCH:  884  LOSS:  tensor(0.0677, grad_fn=<AddBackward0>)
EPOCH:  885  LOSS:  tensor(0.0676, grad_fn=<AddBackward0>)
EPOCH:  886  LOSS:  tensor(0.0674, grad_fn=<AddBackward0>)
EPOCH:  887  LOSS:  tensor(0.0673, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0671, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0670, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0668, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0666, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0663, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0659, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0657, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.0654, grad_fn=<AddBackward0

EPOCH:  1023  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  1024  LOSS:  tensor(0.0545, grad_fn=<AddBackward0>)
EPOCH:  1025  LOSS:  tensor(0.0544, grad_fn=<AddBackward0>)
EPOCH:  1026  LOSS:  tensor(0.0544, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0542, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.0541, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.0538, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.0537, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.0536, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.0535, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.0534, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.0532, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.0532, grad

EPOCH:  1160  LOSS:  tensor(0.0465, grad_fn=<AddBackward0>)
EPOCH:  1161  LOSS:  tensor(0.0463, grad_fn=<AddBackward0>)
EPOCH:  1162  LOSS:  tensor(0.0463, grad_fn=<AddBackward0>)
EPOCH:  1163  LOSS:  tensor(0.0462, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0464, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0466, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0469, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0476, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0482, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0488, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0494, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.0501, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.0527, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.0531, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.0528, grad

EPOCH:  98  LOSS:  tensor(2.5769, grad_fn=<AddBackward0>)
EPOCH:  99  LOSS:  tensor(2.4994, grad_fn=<AddBackward0>)
EPOCH:  100  LOSS:  tensor(2.4263, grad_fn=<AddBackward0>)
EPOCH:  101  LOSS:  tensor(2.3570, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(2.2904, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(2.2258, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(2.1637, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(2.1038, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(2.0459, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(1.9897, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(1.9354, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(1.8831, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(1.8327, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(1.7840, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(1.7381, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(1.6935, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(1.6523, grad_fn=<AddBackward0>)

EPOCH:  237  LOSS:  tensor(0.3789, grad_fn=<AddBackward0>)
EPOCH:  238  LOSS:  tensor(0.3764, grad_fn=<AddBackward0>)
EPOCH:  239  LOSS:  tensor(0.3732, grad_fn=<AddBackward0>)
EPOCH:  240  LOSS:  tensor(0.3709, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.3689, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.3662, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.3638, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.3614, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.3590, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.3577, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.3552, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.3525, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.3508, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.3489, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.3470, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.3448, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.3431, grad_fn=<AddBackward0

EPOCH:  376  LOSS:  tensor(0.1804, grad_fn=<AddBackward0>)
EPOCH:  377  LOSS:  tensor(0.1798, grad_fn=<AddBackward0>)
EPOCH:  378  LOSS:  tensor(0.1792, grad_fn=<AddBackward0>)
EPOCH:  379  LOSS:  tensor(0.1786, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.1779, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.1774, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.1767, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1761, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.1755, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.1749, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.1743, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.1737, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.1732, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.1726, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.1720, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.1715, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.1710, grad_fn=<AddBackward0

EPOCH:  515  LOSS:  tensor(0.1308, grad_fn=<AddBackward0>)
EPOCH:  516  LOSS:  tensor(0.1309, grad_fn=<AddBackward0>)
EPOCH:  517  LOSS:  tensor(0.1307, grad_fn=<AddBackward0>)
EPOCH:  518  LOSS:  tensor(0.1306, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1306, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1304, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1301, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1299, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.1298, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.1296, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.1294, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.1291, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.1289, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.1286, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.1283, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.1280, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.1278, grad_fn=<AddBackward0

EPOCH:  654  LOSS:  tensor(0.1065, grad_fn=<AddBackward0>)
EPOCH:  655  LOSS:  tensor(0.1063, grad_fn=<AddBackward0>)
EPOCH:  656  LOSS:  tensor(0.1061, grad_fn=<AddBackward0>)
EPOCH:  657  LOSS:  tensor(0.1060, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.1057, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.1056, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.1055, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.1054, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.1053, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.1051, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.1050, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.1050, grad_fn=<AddBackward0

EPOCH:  793  LOSS:  tensor(0.0988, grad_fn=<AddBackward0>)
EPOCH:  794  LOSS:  tensor(0.1013, grad_fn=<AddBackward0>)
EPOCH:  795  LOSS:  tensor(0.1052, grad_fn=<AddBackward0>)
EPOCH:  796  LOSS:  tensor(0.1111, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.1194, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.1306, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.1425, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.1534, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.1561, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.1484, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.1287, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.1070, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0944, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.1058, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.1160, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.1189, grad_fn=<AddBackward0

EPOCH:  932  LOSS:  tensor(0.0860, grad_fn=<AddBackward0>)
EPOCH:  933  LOSS:  tensor(0.0862, grad_fn=<AddBackward0>)
EPOCH:  934  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  935  LOSS:  tensor(0.0858, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0856, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0853, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0854, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0855, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0858, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0861, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0865, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0868, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0872, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0877, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0886, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.0899, grad_fn=<AddBackward0

EPOCH:  1070  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  1071  LOSS:  tensor(0.0820, grad_fn=<AddBackward0>)
EPOCH:  1072  LOSS:  tensor(0.0825, grad_fn=<AddBackward0>)
EPOCH:  1073  LOSS:  tensor(0.0831, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0841, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0850, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0857, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0859, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0863, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0866, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0871, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0881, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0891, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0900, grad

EPOCH:  5  LOSS:  tensor(138.1835, grad_fn=<AddBackward0>)
EPOCH:  6  LOSS:  tensor(125.5180, grad_fn=<AddBackward0>)
EPOCH:  7  LOSS:  tensor(115.8054, grad_fn=<AddBackward0>)
EPOCH:  8  LOSS:  tensor(108.9488, grad_fn=<AddBackward0>)
EPOCH:  9  LOSS:  tensor(104.5781, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(102.3171, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(101.3692, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(100.7556, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(99.7285, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(97.8769, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(95.1175, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(91.5780, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(87.5881, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(83.5128, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(79.6675, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(76.2292, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(73.2873, grad_fn=<AddBackwa

EPOCH:  145  LOSS:  tensor(0.5961, grad_fn=<AddBackward0>)
EPOCH:  146  LOSS:  tensor(0.5868, grad_fn=<AddBackward0>)
EPOCH:  147  LOSS:  tensor(0.5776, grad_fn=<AddBackward0>)
EPOCH:  148  LOSS:  tensor(0.5685, grad_fn=<AddBackward0>)
EPOCH:  149  LOSS:  tensor(0.5598, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.5513, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.5428, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.5346, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.5266, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.5187, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.5110, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.5035, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.4961, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.4889, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.4820, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.4752, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.4685, grad_fn=<AddBackward0

EPOCH:  284  LOSS:  tensor(0.1900, grad_fn=<AddBackward0>)
EPOCH:  285  LOSS:  tensor(0.1892, grad_fn=<AddBackward0>)
EPOCH:  286  LOSS:  tensor(0.1884, grad_fn=<AddBackward0>)
EPOCH:  287  LOSS:  tensor(0.1877, grad_fn=<AddBackward0>)
EPOCH:  288  LOSS:  tensor(0.1869, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.1862, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.1854, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.1847, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.1839, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.1832, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.1825, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.1818, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.1811, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.1804, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.1796, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.1789, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.1782, grad_fn=<AddBackward0

EPOCH:  423  LOSS:  tensor(0.1020, grad_fn=<AddBackward0>)
EPOCH:  424  LOSS:  tensor(0.1016, grad_fn=<AddBackward0>)
EPOCH:  425  LOSS:  tensor(0.1012, grad_fn=<AddBackward0>)
EPOCH:  426  LOSS:  tensor(0.1009, grad_fn=<AddBackward0>)
EPOCH:  427  LOSS:  tensor(0.1005, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1001, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.0997, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.0994, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.0990, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.0986, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.0983, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.0980, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.0976, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.0972, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.0969, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.0962, grad_fn=<AddBackward0

EPOCH:  562  LOSS:  tensor(0.0764, grad_fn=<AddBackward0>)
EPOCH:  563  LOSS:  tensor(0.0800, grad_fn=<AddBackward0>)
EPOCH:  564  LOSS:  tensor(0.0849, grad_fn=<AddBackward0>)
EPOCH:  565  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  566  LOSS:  tensor(0.0978, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.1054, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.1011, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.0787, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.0695, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.0665, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.0695, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.0801, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.0813, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.0784, grad_fn=<AddBackward0

EPOCH:  701  LOSS:  tensor(0.0524, grad_fn=<AddBackward0>)
EPOCH:  702  LOSS:  tensor(0.0524, grad_fn=<AddBackward0>)
EPOCH:  703  LOSS:  tensor(0.0524, grad_fn=<AddBackward0>)
EPOCH:  704  LOSS:  tensor(0.0523, grad_fn=<AddBackward0>)
EPOCH:  705  LOSS:  tensor(0.0521, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.0520, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.0519, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.0517, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.0517, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.0515, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.0514, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.0513, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.0513, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.0512, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.0511, grad_fn=<AddBackward0

EPOCH:  840  LOSS:  tensor(0.1470, grad_fn=<AddBackward0>)
EPOCH:  841  LOSS:  tensor(0.1302, grad_fn=<AddBackward0>)
EPOCH:  842  LOSS:  tensor(0.0764, grad_fn=<AddBackward0>)
EPOCH:  843  LOSS:  tensor(0.0443, grad_fn=<AddBackward0>)
EPOCH:  844  LOSS:  tensor(0.0606, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0935, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0987, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0713, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0459, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0491, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0691, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0768, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0633, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0462, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0449, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0563, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0629, grad_fn=<AddBackward0

EPOCH:  979  LOSS:  tensor(0.1310, grad_fn=<AddBackward0>)
EPOCH:  980  LOSS:  tensor(0.0962, grad_fn=<AddBackward0>)
EPOCH:  981  LOSS:  tensor(0.0610, grad_fn=<AddBackward0>)
EPOCH:  982  LOSS:  tensor(0.0402, grad_fn=<AddBackward0>)
EPOCH:  983  LOSS:  tensor(0.0396, grad_fn=<AddBackward0>)
EPOCH:  984  LOSS:  tensor(0.0539, grad_fn=<AddBackward0>)
EPOCH:  985  LOSS:  tensor(0.0718, grad_fn=<AddBackward0>)
EPOCH:  986  LOSS:  tensor(0.0821, grad_fn=<AddBackward0>)
EPOCH:  987  LOSS:  tensor(0.0793, grad_fn=<AddBackward0>)
EPOCH:  988  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  989  LOSS:  tensor(0.0490, grad_fn=<AddBackward0>)
EPOCH:  990  LOSS:  tensor(0.0386, grad_fn=<AddBackward0>)
EPOCH:  991  LOSS:  tensor(0.0380, grad_fn=<AddBackward0>)
EPOCH:  992  LOSS:  tensor(0.0450, grad_fn=<AddBackward0>)
EPOCH:  993  LOSS:  tensor(0.0533, grad_fn=<AddBackward0>)
EPOCH:  994  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  995  LOSS:  tensor(0.0555, grad_fn=<AddBackward0

EPOCH:  1116  LOSS:  tensor(0.0340, grad_fn=<AddBackward0>)
EPOCH:  1117  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1118  LOSS:  tensor(0.0337, grad_fn=<AddBackward0>)
EPOCH:  1119  LOSS:  tensor(0.0338, grad_fn=<AddBackward0>)
EPOCH:  1120  LOSS:  tensor(0.0341, grad_fn=<AddBackward0>)
EPOCH:  1121  LOSS:  tensor(0.0343, grad_fn=<AddBackward0>)
EPOCH:  1122  LOSS:  tensor(0.0345, grad_fn=<AddBackward0>)
EPOCH:  1123  LOSS:  tensor(0.0346, grad_fn=<AddBackward0>)
EPOCH:  1124  LOSS:  tensor(0.0345, grad_fn=<AddBackward0>)
EPOCH:  1125  LOSS:  tensor(0.0344, grad_fn=<AddBackward0>)
EPOCH:  1126  LOSS:  tensor(0.0342, grad_fn=<AddBackward0>)
EPOCH:  1127  LOSS:  tensor(0.0340, grad_fn=<AddBackward0>)
EPOCH:  1128  LOSS:  tensor(0.0338, grad_fn=<AddBackward0>)
EPOCH:  1129  LOSS:  tensor(0.0336, grad_fn=<AddBackward0>)
EPOCH:  1130  LOSS:  tensor(0.0335, grad_fn=<AddBackward0>)
EPOCH:  1131  LOSS:  tensor(0.0334, grad_fn=<AddBackward0>)
EPOCH:  1132  LOSS:  tensor(0.0333, grad

EPOCH:  52  LOSS:  tensor(19.3070, grad_fn=<AddBackward0>)
EPOCH:  53  LOSS:  tensor(18.4891, grad_fn=<AddBackward0>)
EPOCH:  54  LOSS:  tensor(17.6920, grad_fn=<AddBackward0>)
EPOCH:  55  LOSS:  tensor(16.9213, grad_fn=<AddBackward0>)
EPOCH:  56  LOSS:  tensor(16.1822, grad_fn=<AddBackward0>)
EPOCH:  57  LOSS:  tensor(15.5086, grad_fn=<AddBackward0>)
EPOCH:  58  LOSS:  tensor(14.8698, grad_fn=<AddBackward0>)
EPOCH:  59  LOSS:  tensor(14.2644, grad_fn=<AddBackward0>)
EPOCH:  60  LOSS:  tensor(13.6867, grad_fn=<AddBackward0>)
EPOCH:  61  LOSS:  tensor(13.1472, grad_fn=<AddBackward0>)
EPOCH:  62  LOSS:  tensor(12.6285, grad_fn=<AddBackward0>)
EPOCH:  63  LOSS:  tensor(12.1241, grad_fn=<AddBackward0>)
EPOCH:  64  LOSS:  tensor(11.6476, grad_fn=<AddBackward0>)
EPOCH:  65  LOSS:  tensor(11.2062, grad_fn=<AddBackward0>)
EPOCH:  66  LOSS:  tensor(10.7998, grad_fn=<AddBackward0>)
EPOCH:  67  LOSS:  tensor(10.4135, grad_fn=<AddBackward0>)
EPOCH:  68  LOSS:  tensor(10.0424, grad_fn=<AddBackward0

EPOCH:  192  LOSS:  tensor(0.6870, grad_fn=<AddBackward0>)
EPOCH:  193  LOSS:  tensor(0.6794, grad_fn=<AddBackward0>)
EPOCH:  194  LOSS:  tensor(0.6717, grad_fn=<AddBackward0>)
EPOCH:  195  LOSS:  tensor(0.6644, grad_fn=<AddBackward0>)
EPOCH:  196  LOSS:  tensor(0.6568, grad_fn=<AddBackward0>)
EPOCH:  197  LOSS:  tensor(0.6495, grad_fn=<AddBackward0>)
EPOCH:  198  LOSS:  tensor(0.6422, grad_fn=<AddBackward0>)
EPOCH:  199  LOSS:  tensor(0.6351, grad_fn=<AddBackward0>)
EPOCH:  200  LOSS:  tensor(0.6283, grad_fn=<AddBackward0>)
EPOCH:  201  LOSS:  tensor(0.6215, grad_fn=<AddBackward0>)
EPOCH:  202  LOSS:  tensor(0.6150, grad_fn=<AddBackward0>)
EPOCH:  203  LOSS:  tensor(0.6084, grad_fn=<AddBackward0>)
EPOCH:  204  LOSS:  tensor(0.6020, grad_fn=<AddBackward0>)
EPOCH:  205  LOSS:  tensor(0.5956, grad_fn=<AddBackward0>)
EPOCH:  206  LOSS:  tensor(0.5894, grad_fn=<AddBackward0>)
EPOCH:  207  LOSS:  tensor(0.5835, grad_fn=<AddBackward0>)
EPOCH:  208  LOSS:  tensor(0.5777, grad_fn=<AddBackward0

EPOCH:  331  LOSS:  tensor(0.2357, grad_fn=<AddBackward0>)
EPOCH:  332  LOSS:  tensor(0.2346, grad_fn=<AddBackward0>)
EPOCH:  333  LOSS:  tensor(0.2332, grad_fn=<AddBackward0>)
EPOCH:  334  LOSS:  tensor(0.2320, grad_fn=<AddBackward0>)
EPOCH:  335  LOSS:  tensor(0.2303, grad_fn=<AddBackward0>)
EPOCH:  336  LOSS:  tensor(0.2295, grad_fn=<AddBackward0>)
EPOCH:  337  LOSS:  tensor(0.2276, grad_fn=<AddBackward0>)
EPOCH:  338  LOSS:  tensor(0.2262, grad_fn=<AddBackward0>)
EPOCH:  339  LOSS:  tensor(0.2256, grad_fn=<AddBackward0>)
EPOCH:  340  LOSS:  tensor(0.2239, grad_fn=<AddBackward0>)
EPOCH:  341  LOSS:  tensor(0.2233, grad_fn=<AddBackward0>)
EPOCH:  342  LOSS:  tensor(0.2218, grad_fn=<AddBackward0>)
EPOCH:  343  LOSS:  tensor(0.2207, grad_fn=<AddBackward0>)
EPOCH:  344  LOSS:  tensor(0.2193, grad_fn=<AddBackward0>)
EPOCH:  345  LOSS:  tensor(0.2182, grad_fn=<AddBackward0>)
EPOCH:  346  LOSS:  tensor(0.2169, grad_fn=<AddBackward0>)
EPOCH:  347  LOSS:  tensor(0.2157, grad_fn=<AddBackward0

EPOCH:  470  LOSS:  tensor(0.1371, grad_fn=<AddBackward0>)
EPOCH:  471  LOSS:  tensor(0.1421, grad_fn=<AddBackward0>)
EPOCH:  472  LOSS:  tensor(0.1417, grad_fn=<AddBackward0>)
EPOCH:  473  LOSS:  tensor(0.1365, grad_fn=<AddBackward0>)
EPOCH:  474  LOSS:  tensor(0.1306, grad_fn=<AddBackward0>)
EPOCH:  475  LOSS:  tensor(0.1278, grad_fn=<AddBackward0>)
EPOCH:  476  LOSS:  tensor(0.1291, grad_fn=<AddBackward0>)
EPOCH:  477  LOSS:  tensor(0.1321, grad_fn=<AddBackward0>)
EPOCH:  478  LOSS:  tensor(0.1337, grad_fn=<AddBackward0>)
EPOCH:  479  LOSS:  tensor(0.1329, grad_fn=<AddBackward0>)
EPOCH:  480  LOSS:  tensor(0.1301, grad_fn=<AddBackward0>)
EPOCH:  481  LOSS:  tensor(0.1270, grad_fn=<AddBackward0>)
EPOCH:  482  LOSS:  tensor(0.1257, grad_fn=<AddBackward0>)
EPOCH:  483  LOSS:  tensor(0.1259, grad_fn=<AddBackward0>)
EPOCH:  484  LOSS:  tensor(0.1274, grad_fn=<AddBackward0>)
EPOCH:  485  LOSS:  tensor(0.1283, grad_fn=<AddBackward0>)
EPOCH:  486  LOSS:  tensor(0.1278, grad_fn=<AddBackward0

EPOCH:  609  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  610  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  611  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  612  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  613  LOSS:  tensor(0.0950, grad_fn=<AddBackward0>)
EPOCH:  614  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  615  LOSS:  tensor(0.0951, grad_fn=<AddBackward0>)
EPOCH:  616  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  617  LOSS:  tensor(0.0952, grad_fn=<AddBackward0>)
EPOCH:  618  LOSS:  tensor(0.0955, grad_fn=<AddBackward0>)
EPOCH:  619  LOSS:  tensor(0.0958, grad_fn=<AddBackward0>)
EPOCH:  620  LOSS:  tensor(0.0965, grad_fn=<AddBackward0>)
EPOCH:  621  LOSS:  tensor(0.0975, grad_fn=<AddBackward0>)
EPOCH:  622  LOSS:  tensor(0.0992, grad_fn=<AddBackward0>)
EPOCH:  623  LOSS:  tensor(0.1015, grad_fn=<AddBackward0>)
EPOCH:  624  LOSS:  tensor(0.1048, grad_fn=<AddBackward0>)
EPOCH:  625  LOSS:  tensor(0.1091, grad_fn=<AddBackward0

EPOCH:  748  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  749  LOSS:  tensor(0.0743, grad_fn=<AddBackward0>)
EPOCH:  750  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  751  LOSS:  tensor(0.0744, grad_fn=<AddBackward0>)
EPOCH:  752  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  753  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  754  LOSS:  tensor(0.0749, grad_fn=<AddBackward0>)
EPOCH:  755  LOSS:  tensor(0.0750, grad_fn=<AddBackward0>)
EPOCH:  756  LOSS:  tensor(0.0751, grad_fn=<AddBackward0>)
EPOCH:  757  LOSS:  tensor(0.0752, grad_fn=<AddBackward0>)
EPOCH:  758  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  759  LOSS:  tensor(0.0753, grad_fn=<AddBackward0>)
EPOCH:  760  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  761  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  762  LOSS:  tensor(0.0754, grad_fn=<AddBackward0>)
EPOCH:  763  LOSS:  tensor(0.0755, grad_fn=<AddBackward0>)
EPOCH:  764  LOSS:  tensor(0.0757, grad_fn=<AddBackward0

EPOCH:  887  LOSS:  tensor(0.0818, grad_fn=<AddBackward0>)
EPOCH:  888  LOSS:  tensor(0.0746, grad_fn=<AddBackward0>)
EPOCH:  889  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  890  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  891  LOSS:  tensor(0.0748, grad_fn=<AddBackward0>)
EPOCH:  892  LOSS:  tensor(0.0757, grad_fn=<AddBackward0>)
EPOCH:  893  LOSS:  tensor(0.0710, grad_fn=<AddBackward0>)
EPOCH:  894  LOSS:  tensor(0.0672, grad_fn=<AddBackward0>)
EPOCH:  895  LOSS:  tensor(0.0685, grad_fn=<AddBackward0>)
EPOCH:  896  LOSS:  tensor(0.0718, grad_fn=<AddBackward0>)
EPOCH:  897  LOSS:  tensor(0.0722, grad_fn=<AddBackward0>)
EPOCH:  898  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  899  LOSS:  tensor(0.0670, grad_fn=<AddBackward0>)
EPOCH:  900  LOSS:  tensor(0.0677, grad_fn=<AddBackward0>)
EPOCH:  901  LOSS:  tensor(0.0697, grad_fn=<AddBackward0>)
EPOCH:  902  LOSS:  tensor(0.0700, grad_fn=<AddBackward0>)
EPOCH:  903  LOSS:  tensor(0.0684, grad_fn=<AddBackward0

EPOCH:  1026  LOSS:  tensor(0.0729, grad_fn=<AddBackward0>)
EPOCH:  1027  LOSS:  tensor(0.0817, grad_fn=<AddBackward0>)
EPOCH:  1028  LOSS:  tensor(0.0961, grad_fn=<AddBackward0>)
EPOCH:  1029  LOSS:  tensor(0.1187, grad_fn=<AddBackward0>)
EPOCH:  1030  LOSS:  tensor(0.1528, grad_fn=<AddBackward0>)
EPOCH:  1031  LOSS:  tensor(0.1981, grad_fn=<AddBackward0>)
EPOCH:  1032  LOSS:  tensor(0.2489, grad_fn=<AddBackward0>)
EPOCH:  1033  LOSS:  tensor(0.2850, grad_fn=<AddBackward0>)
EPOCH:  1034  LOSS:  tensor(0.2799, grad_fn=<AddBackward0>)
EPOCH:  1035  LOSS:  tensor(0.2155, grad_fn=<AddBackward0>)
EPOCH:  1036  LOSS:  tensor(0.1250, grad_fn=<AddBackward0>)
EPOCH:  1037  LOSS:  tensor(0.0653, grad_fn=<AddBackward0>)
EPOCH:  1038  LOSS:  tensor(0.0698, grad_fn=<AddBackward0>)
EPOCH:  1039  LOSS:  tensor(0.1169, grad_fn=<AddBackward0>)
EPOCH:  1040  LOSS:  tensor(0.1550, grad_fn=<AddBackward0>)
EPOCH:  1041  LOSS:  tensor(0.1489, grad_fn=<AddBackward0>)
EPOCH:  1042  LOSS:  tensor(0.1051, grad

EPOCH:  1163  LOSS:  tensor(0.0588, grad_fn=<AddBackward0>)
EPOCH:  1164  LOSS:  tensor(0.0604, grad_fn=<AddBackward0>)
EPOCH:  1165  LOSS:  tensor(0.0625, grad_fn=<AddBackward0>)
EPOCH:  1166  LOSS:  tensor(0.0655, grad_fn=<AddBackward0>)
EPOCH:  1167  LOSS:  tensor(0.0698, grad_fn=<AddBackward0>)
EPOCH:  1168  LOSS:  tensor(0.0758, grad_fn=<AddBackward0>)
EPOCH:  1169  LOSS:  tensor(0.0843, grad_fn=<AddBackward0>)
EPOCH:  1170  LOSS:  tensor(0.0957, grad_fn=<AddBackward0>)
EPOCH:  1171  LOSS:  tensor(0.1103, grad_fn=<AddBackward0>)
EPOCH:  1172  LOSS:  tensor(0.1285, grad_fn=<AddBackward0>)
EPOCH:  1173  LOSS:  tensor(0.1491, grad_fn=<AddBackward0>)
EPOCH:  1174  LOSS:  tensor(0.1684, grad_fn=<AddBackward0>)
EPOCH:  1175  LOSS:  tensor(0.1788, grad_fn=<AddBackward0>)
EPOCH:  1176  LOSS:  tensor(0.1748, grad_fn=<AddBackward0>)
EPOCH:  1177  LOSS:  tensor(0.1518, grad_fn=<AddBackward0>)
EPOCH:  1178  LOSS:  tensor(0.1166, grad_fn=<AddBackward0>)
EPOCH:  1179  LOSS:  tensor(0.0811, grad

EPOCH:  101  LOSS:  tensor(2.9963, grad_fn=<AddBackward0>)
EPOCH:  102  LOSS:  tensor(2.9191, grad_fn=<AddBackward0>)
EPOCH:  103  LOSS:  tensor(2.8460, grad_fn=<AddBackward0>)
EPOCH:  104  LOSS:  tensor(2.7779, grad_fn=<AddBackward0>)
EPOCH:  105  LOSS:  tensor(2.7117, grad_fn=<AddBackward0>)
EPOCH:  106  LOSS:  tensor(2.6471, grad_fn=<AddBackward0>)
EPOCH:  107  LOSS:  tensor(2.5853, grad_fn=<AddBackward0>)
EPOCH:  108  LOSS:  tensor(2.5245, grad_fn=<AddBackward0>)
EPOCH:  109  LOSS:  tensor(2.4645, grad_fn=<AddBackward0>)
EPOCH:  110  LOSS:  tensor(2.4072, grad_fn=<AddBackward0>)
EPOCH:  111  LOSS:  tensor(2.3536, grad_fn=<AddBackward0>)
EPOCH:  112  LOSS:  tensor(2.3029, grad_fn=<AddBackward0>)
EPOCH:  113  LOSS:  tensor(2.2546, grad_fn=<AddBackward0>)
EPOCH:  114  LOSS:  tensor(2.2086, grad_fn=<AddBackward0>)
EPOCH:  115  LOSS:  tensor(2.1648, grad_fn=<AddBackward0>)
EPOCH:  116  LOSS:  tensor(2.1219, grad_fn=<AddBackward0>)
EPOCH:  117  LOSS:  tensor(2.0798, grad_fn=<AddBackward0

EPOCH:  240  LOSS:  tensor(0.3856, grad_fn=<AddBackward0>)
EPOCH:  241  LOSS:  tensor(0.3826, grad_fn=<AddBackward0>)
EPOCH:  242  LOSS:  tensor(0.3791, grad_fn=<AddBackward0>)
EPOCH:  243  LOSS:  tensor(0.3764, grad_fn=<AddBackward0>)
EPOCH:  244  LOSS:  tensor(0.3734, grad_fn=<AddBackward0>)
EPOCH:  245  LOSS:  tensor(0.3699, grad_fn=<AddBackward0>)
EPOCH:  246  LOSS:  tensor(0.3672, grad_fn=<AddBackward0>)
EPOCH:  247  LOSS:  tensor(0.3642, grad_fn=<AddBackward0>)
EPOCH:  248  LOSS:  tensor(0.3612, grad_fn=<AddBackward0>)
EPOCH:  249  LOSS:  tensor(0.3586, grad_fn=<AddBackward0>)
EPOCH:  250  LOSS:  tensor(0.3556, grad_fn=<AddBackward0>)
EPOCH:  251  LOSS:  tensor(0.3526, grad_fn=<AddBackward0>)
EPOCH:  252  LOSS:  tensor(0.3498, grad_fn=<AddBackward0>)
EPOCH:  253  LOSS:  tensor(0.3470, grad_fn=<AddBackward0>)
EPOCH:  254  LOSS:  tensor(0.3442, grad_fn=<AddBackward0>)
EPOCH:  255  LOSS:  tensor(0.3413, grad_fn=<AddBackward0>)
EPOCH:  256  LOSS:  tensor(0.3386, grad_fn=<AddBackward0

EPOCH:  379  LOSS:  tensor(0.1740, grad_fn=<AddBackward0>)
EPOCH:  380  LOSS:  tensor(0.1733, grad_fn=<AddBackward0>)
EPOCH:  381  LOSS:  tensor(0.1718, grad_fn=<AddBackward0>)
EPOCH:  382  LOSS:  tensor(0.1701, grad_fn=<AddBackward0>)
EPOCH:  383  LOSS:  tensor(0.1686, grad_fn=<AddBackward0>)
EPOCH:  384  LOSS:  tensor(0.1675, grad_fn=<AddBackward0>)
EPOCH:  385  LOSS:  tensor(0.1666, grad_fn=<AddBackward0>)
EPOCH:  386  LOSS:  tensor(0.1659, grad_fn=<AddBackward0>)
EPOCH:  387  LOSS:  tensor(0.1656, grad_fn=<AddBackward0>)
EPOCH:  388  LOSS:  tensor(0.1658, grad_fn=<AddBackward0>)
EPOCH:  389  LOSS:  tensor(0.1663, grad_fn=<AddBackward0>)
EPOCH:  390  LOSS:  tensor(0.1669, grad_fn=<AddBackward0>)
EPOCH:  391  LOSS:  tensor(0.1679, grad_fn=<AddBackward0>)
EPOCH:  392  LOSS:  tensor(0.1685, grad_fn=<AddBackward0>)
EPOCH:  393  LOSS:  tensor(0.1691, grad_fn=<AddBackward0>)
EPOCH:  394  LOSS:  tensor(0.1684, grad_fn=<AddBackward0>)
EPOCH:  395  LOSS:  tensor(0.1679, grad_fn=<AddBackward0

EPOCH:  518  LOSS:  tensor(0.1185, grad_fn=<AddBackward0>)
EPOCH:  519  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  520  LOSS:  tensor(0.1142, grad_fn=<AddBackward0>)
EPOCH:  521  LOSS:  tensor(0.1138, grad_fn=<AddBackward0>)
EPOCH:  522  LOSS:  tensor(0.1144, grad_fn=<AddBackward0>)
EPOCH:  523  LOSS:  tensor(0.1153, grad_fn=<AddBackward0>)
EPOCH:  524  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  525  LOSS:  tensor(0.1159, grad_fn=<AddBackward0>)
EPOCH:  526  LOSS:  tensor(0.1154, grad_fn=<AddBackward0>)
EPOCH:  527  LOSS:  tensor(0.1144, grad_fn=<AddBackward0>)
EPOCH:  528  LOSS:  tensor(0.1131, grad_fn=<AddBackward0>)
EPOCH:  529  LOSS:  tensor(0.1120, grad_fn=<AddBackward0>)
EPOCH:  530  LOSS:  tensor(0.1112, grad_fn=<AddBackward0>)
EPOCH:  531  LOSS:  tensor(0.1107, grad_fn=<AddBackward0>)
EPOCH:  532  LOSS:  tensor(0.1105, grad_fn=<AddBackward0>)
EPOCH:  533  LOSS:  tensor(0.1105, grad_fn=<AddBackward0>)
EPOCH:  534  LOSS:  tensor(0.1106, grad_fn=<AddBackward0

EPOCH:  657  LOSS:  tensor(0.0971, grad_fn=<AddBackward0>)
EPOCH:  658  LOSS:  tensor(0.0948, grad_fn=<AddBackward0>)
EPOCH:  659  LOSS:  tensor(0.0910, grad_fn=<AddBackward0>)
EPOCH:  660  LOSS:  tensor(0.0888, grad_fn=<AddBackward0>)
EPOCH:  661  LOSS:  tensor(0.0894, grad_fn=<AddBackward0>)
EPOCH:  662  LOSS:  tensor(0.0916, grad_fn=<AddBackward0>)
EPOCH:  663  LOSS:  tensor(0.0926, grad_fn=<AddBackward0>)
EPOCH:  664  LOSS:  tensor(0.0915, grad_fn=<AddBackward0>)
EPOCH:  665  LOSS:  tensor(0.0894, grad_fn=<AddBackward0>)
EPOCH:  666  LOSS:  tensor(0.0878, grad_fn=<AddBackward0>)
EPOCH:  667  LOSS:  tensor(0.0878, grad_fn=<AddBackward0>)
EPOCH:  668  LOSS:  tensor(0.0886, grad_fn=<AddBackward0>)
EPOCH:  669  LOSS:  tensor(0.0892, grad_fn=<AddBackward0>)
EPOCH:  670  LOSS:  tensor(0.0888, grad_fn=<AddBackward0>)
EPOCH:  671  LOSS:  tensor(0.0879, grad_fn=<AddBackward0>)
EPOCH:  672  LOSS:  tensor(0.0871, grad_fn=<AddBackward0>)
EPOCH:  673  LOSS:  tensor(0.0868, grad_fn=<AddBackward0

EPOCH:  796  LOSS:  tensor(0.0710, grad_fn=<AddBackward0>)
EPOCH:  797  LOSS:  tensor(0.0709, grad_fn=<AddBackward0>)
EPOCH:  798  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  799  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  800  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  801  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  802  LOSS:  tensor(0.0707, grad_fn=<AddBackward0>)
EPOCH:  803  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  804  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  805  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  806  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  807  LOSS:  tensor(0.0708, grad_fn=<AddBackward0>)
EPOCH:  808  LOSS:  tensor(0.0712, grad_fn=<AddBackward0>)
EPOCH:  809  LOSS:  tensor(0.0717, grad_fn=<AddBackward0>)
EPOCH:  810  LOSS:  tensor(0.0726, grad_fn=<AddBackward0>)
EPOCH:  811  LOSS:  tensor(0.0738, grad_fn=<AddBackward0>)
EPOCH:  812  LOSS:  tensor(0.0758, grad_fn=<AddBackward0

EPOCH:  935  LOSS:  tensor(0.0934, grad_fn=<AddBackward0>)
EPOCH:  936  LOSS:  tensor(0.0867, grad_fn=<AddBackward0>)
EPOCH:  937  LOSS:  tensor(0.0769, grad_fn=<AddBackward0>)
EPOCH:  938  LOSS:  tensor(0.0678, grad_fn=<AddBackward0>)
EPOCH:  939  LOSS:  tensor(0.0628, grad_fn=<AddBackward0>)
EPOCH:  940  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  941  LOSS:  tensor(0.0660, grad_fn=<AddBackward0>)
EPOCH:  942  LOSS:  tensor(0.0706, grad_fn=<AddBackward0>)
EPOCH:  943  LOSS:  tensor(0.0739, grad_fn=<AddBackward0>)
EPOCH:  944  LOSS:  tensor(0.0747, grad_fn=<AddBackward0>)
EPOCH:  945  LOSS:  tensor(0.0729, grad_fn=<AddBackward0>)
EPOCH:  946  LOSS:  tensor(0.0693, grad_fn=<AddBackward0>)
EPOCH:  947  LOSS:  tensor(0.0658, grad_fn=<AddBackward0>)
EPOCH:  948  LOSS:  tensor(0.0633, grad_fn=<AddBackward0>)
EPOCH:  949  LOSS:  tensor(0.0626, grad_fn=<AddBackward0>)
EPOCH:  950  LOSS:  tensor(0.0634, grad_fn=<AddBackward0>)
EPOCH:  951  LOSS:  tensor(0.0651, grad_fn=<AddBackward0

EPOCH:  1073  LOSS:  tensor(0.0587, grad_fn=<AddBackward0>)
EPOCH:  1074  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1075  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1076  LOSS:  tensor(0.0572, grad_fn=<AddBackward0>)
EPOCH:  1077  LOSS:  tensor(0.0575, grad_fn=<AddBackward0>)
EPOCH:  1078  LOSS:  tensor(0.0579, grad_fn=<AddBackward0>)
EPOCH:  1079  LOSS:  tensor(0.0582, grad_fn=<AddBackward0>)
EPOCH:  1080  LOSS:  tensor(0.0582, grad_fn=<AddBackward0>)
EPOCH:  1081  LOSS:  tensor(0.0580, grad_fn=<AddBackward0>)
EPOCH:  1082  LOSS:  tensor(0.0576, grad_fn=<AddBackward0>)
EPOCH:  1083  LOSS:  tensor(0.0571, grad_fn=<AddBackward0>)
EPOCH:  1084  LOSS:  tensor(0.0566, grad_fn=<AddBackward0>)
EPOCH:  1085  LOSS:  tensor(0.0562, grad_fn=<AddBackward0>)
EPOCH:  1086  LOSS:  tensor(0.0560, grad_fn=<AddBackward0>)
EPOCH:  1087  LOSS:  tensor(0.0559, grad_fn=<AddBackward0>)
EPOCH:  1088  LOSS:  tensor(0.0559, grad_fn=<AddBackward0>)
EPOCH:  1089  LOSS:  tensor(0.0561, grad

EPOCH:  9  LOSS:  tensor(66.7077, grad_fn=<AddBackward0>)
EPOCH:  10  LOSS:  tensor(60.0942, grad_fn=<AddBackward0>)
EPOCH:  11  LOSS:  tensor(56.4766, grad_fn=<AddBackward0>)
EPOCH:  12  LOSS:  tensor(55.2000, grad_fn=<AddBackward0>)
EPOCH:  13  LOSS:  tensor(55.3985, grad_fn=<AddBackward0>)
EPOCH:  14  LOSS:  tensor(56.0896, grad_fn=<AddBackward0>)
EPOCH:  15  LOSS:  tensor(56.4835, grad_fn=<AddBackward0>)
EPOCH:  16  LOSS:  tensor(56.1678, grad_fn=<AddBackward0>)
EPOCH:  17  LOSS:  tensor(54.9308, grad_fn=<AddBackward0>)
EPOCH:  18  LOSS:  tensor(52.7929, grad_fn=<AddBackward0>)
EPOCH:  19  LOSS:  tensor(49.9309, grad_fn=<AddBackward0>)
EPOCH:  20  LOSS:  tensor(46.5900, grad_fn=<AddBackward0>)
EPOCH:  21  LOSS:  tensor(43.1394, grad_fn=<AddBackward0>)
EPOCH:  22  LOSS:  tensor(39.9160, grad_fn=<AddBackward0>)
EPOCH:  23  LOSS:  tensor(37.0876, grad_fn=<AddBackward0>)
EPOCH:  24  LOSS:  tensor(34.7189, grad_fn=<AddBackward0>)
EPOCH:  25  LOSS:  tensor(32.7651, grad_fn=<AddBackward0>

EPOCH:  149  LOSS:  tensor(0.4495, grad_fn=<AddBackward0>)
EPOCH:  150  LOSS:  tensor(0.4456, grad_fn=<AddBackward0>)
EPOCH:  151  LOSS:  tensor(0.4415, grad_fn=<AddBackward0>)
EPOCH:  152  LOSS:  tensor(0.4377, grad_fn=<AddBackward0>)
EPOCH:  153  LOSS:  tensor(0.4341, grad_fn=<AddBackward0>)
EPOCH:  154  LOSS:  tensor(0.4305, grad_fn=<AddBackward0>)
EPOCH:  155  LOSS:  tensor(0.4270, grad_fn=<AddBackward0>)
EPOCH:  156  LOSS:  tensor(0.4234, grad_fn=<AddBackward0>)
EPOCH:  157  LOSS:  tensor(0.4199, grad_fn=<AddBackward0>)
EPOCH:  158  LOSS:  tensor(0.4165, grad_fn=<AddBackward0>)
EPOCH:  159  LOSS:  tensor(0.4131, grad_fn=<AddBackward0>)
EPOCH:  160  LOSS:  tensor(0.4098, grad_fn=<AddBackward0>)
EPOCH:  161  LOSS:  tensor(0.4066, grad_fn=<AddBackward0>)
EPOCH:  162  LOSS:  tensor(0.4035, grad_fn=<AddBackward0>)
EPOCH:  163  LOSS:  tensor(0.4006, grad_fn=<AddBackward0>)
EPOCH:  164  LOSS:  tensor(0.3976, grad_fn=<AddBackward0>)
EPOCH:  165  LOSS:  tensor(0.3946, grad_fn=<AddBackward0

EPOCH:  288  LOSS:  tensor(0.2174, grad_fn=<AddBackward0>)
EPOCH:  289  LOSS:  tensor(0.2167, grad_fn=<AddBackward0>)
EPOCH:  290  LOSS:  tensor(0.2160, grad_fn=<AddBackward0>)
EPOCH:  291  LOSS:  tensor(0.2154, grad_fn=<AddBackward0>)
EPOCH:  292  LOSS:  tensor(0.2148, grad_fn=<AddBackward0>)
EPOCH:  293  LOSS:  tensor(0.2141, grad_fn=<AddBackward0>)
EPOCH:  294  LOSS:  tensor(0.2133, grad_fn=<AddBackward0>)
EPOCH:  295  LOSS:  tensor(0.2127, grad_fn=<AddBackward0>)
EPOCH:  296  LOSS:  tensor(0.2121, grad_fn=<AddBackward0>)
EPOCH:  297  LOSS:  tensor(0.2114, grad_fn=<AddBackward0>)
EPOCH:  298  LOSS:  tensor(0.2108, grad_fn=<AddBackward0>)
EPOCH:  299  LOSS:  tensor(0.2101, grad_fn=<AddBackward0>)
EPOCH:  300  LOSS:  tensor(0.2094, grad_fn=<AddBackward0>)
EPOCH:  301  LOSS:  tensor(0.2088, grad_fn=<AddBackward0>)
EPOCH:  302  LOSS:  tensor(0.2082, grad_fn=<AddBackward0>)
EPOCH:  303  LOSS:  tensor(0.2076, grad_fn=<AddBackward0>)
EPOCH:  304  LOSS:  tensor(0.2069, grad_fn=<AddBackward0

EPOCH:  427  LOSS:  tensor(0.1594, grad_fn=<AddBackward0>)
EPOCH:  428  LOSS:  tensor(0.1590, grad_fn=<AddBackward0>)
EPOCH:  429  LOSS:  tensor(0.1587, grad_fn=<AddBackward0>)
EPOCH:  430  LOSS:  tensor(0.1585, grad_fn=<AddBackward0>)
EPOCH:  431  LOSS:  tensor(0.1582, grad_fn=<AddBackward0>)
EPOCH:  432  LOSS:  tensor(0.1579, grad_fn=<AddBackward0>)
EPOCH:  433  LOSS:  tensor(0.1577, grad_fn=<AddBackward0>)
EPOCH:  434  LOSS:  tensor(0.1574, grad_fn=<AddBackward0>)
EPOCH:  435  LOSS:  tensor(0.1572, grad_fn=<AddBackward0>)
EPOCH:  436  LOSS:  tensor(0.1569, grad_fn=<AddBackward0>)
EPOCH:  437  LOSS:  tensor(0.1567, grad_fn=<AddBackward0>)
EPOCH:  438  LOSS:  tensor(0.1564, grad_fn=<AddBackward0>)
EPOCH:  439  LOSS:  tensor(0.1562, grad_fn=<AddBackward0>)
EPOCH:  440  LOSS:  tensor(0.1560, grad_fn=<AddBackward0>)
EPOCH:  441  LOSS:  tensor(0.1557, grad_fn=<AddBackward0>)
EPOCH:  442  LOSS:  tensor(0.1554, grad_fn=<AddBackward0>)
EPOCH:  443  LOSS:  tensor(0.1552, grad_fn=<AddBackward0

EPOCH:  566  LOSS:  tensor(0.1272, grad_fn=<AddBackward0>)
EPOCH:  567  LOSS:  tensor(0.1269, grad_fn=<AddBackward0>)
EPOCH:  568  LOSS:  tensor(0.1265, grad_fn=<AddBackward0>)
EPOCH:  569  LOSS:  tensor(0.1262, grad_fn=<AddBackward0>)
EPOCH:  570  LOSS:  tensor(0.1259, grad_fn=<AddBackward0>)
EPOCH:  571  LOSS:  tensor(0.1257, grad_fn=<AddBackward0>)
EPOCH:  572  LOSS:  tensor(0.1255, grad_fn=<AddBackward0>)
EPOCH:  573  LOSS:  tensor(0.1253, grad_fn=<AddBackward0>)
EPOCH:  574  LOSS:  tensor(0.1251, grad_fn=<AddBackward0>)
EPOCH:  575  LOSS:  tensor(0.1249, grad_fn=<AddBackward0>)
EPOCH:  576  LOSS:  tensor(0.1248, grad_fn=<AddBackward0>)
EPOCH:  577  LOSS:  tensor(0.1246, grad_fn=<AddBackward0>)
EPOCH:  578  LOSS:  tensor(0.1244, grad_fn=<AddBackward0>)
EPOCH:  579  LOSS:  tensor(0.1242, grad_fn=<AddBackward0>)
EPOCH:  580  LOSS:  tensor(0.1240, grad_fn=<AddBackward0>)
EPOCH:  581  LOSS:  tensor(0.1238, grad_fn=<AddBackward0>)
EPOCH:  582  LOSS:  tensor(0.1237, grad_fn=<AddBackward0

EPOCH:  705  LOSS:  tensor(0.1035, grad_fn=<AddBackward0>)
EPOCH:  706  LOSS:  tensor(0.1033, grad_fn=<AddBackward0>)
EPOCH:  707  LOSS:  tensor(0.1032, grad_fn=<AddBackward0>)
EPOCH:  708  LOSS:  tensor(0.1030, grad_fn=<AddBackward0>)
EPOCH:  709  LOSS:  tensor(0.1029, grad_fn=<AddBackward0>)
EPOCH:  710  LOSS:  tensor(0.1027, grad_fn=<AddBackward0>)
EPOCH:  711  LOSS:  tensor(0.1026, grad_fn=<AddBackward0>)
EPOCH:  712  LOSS:  tensor(0.1024, grad_fn=<AddBackward0>)
EPOCH:  713  LOSS:  tensor(0.1023, grad_fn=<AddBackward0>)
EPOCH:  714  LOSS:  tensor(0.1022, grad_fn=<AddBackward0>)
EPOCH:  715  LOSS:  tensor(0.1020, grad_fn=<AddBackward0>)
EPOCH:  716  LOSS:  tensor(0.1019, grad_fn=<AddBackward0>)
EPOCH:  717  LOSS:  tensor(0.1018, grad_fn=<AddBackward0>)
EPOCH:  718  LOSS:  tensor(0.1016, grad_fn=<AddBackward0>)
EPOCH:  719  LOSS:  tensor(0.1015, grad_fn=<AddBackward0>)
EPOCH:  720  LOSS:  tensor(0.1014, grad_fn=<AddBackward0>)
EPOCH:  721  LOSS:  tensor(0.1013, grad_fn=<AddBackward0

EPOCH:  844  LOSS:  tensor(0.0784, grad_fn=<AddBackward0>)
EPOCH:  845  LOSS:  tensor(0.0783, grad_fn=<AddBackward0>)
EPOCH:  846  LOSS:  tensor(0.0781, grad_fn=<AddBackward0>)
EPOCH:  847  LOSS:  tensor(0.0780, grad_fn=<AddBackward0>)
EPOCH:  848  LOSS:  tensor(0.0779, grad_fn=<AddBackward0>)
EPOCH:  849  LOSS:  tensor(0.0777, grad_fn=<AddBackward0>)
EPOCH:  850  LOSS:  tensor(0.0776, grad_fn=<AddBackward0>)
EPOCH:  851  LOSS:  tensor(0.0774, grad_fn=<AddBackward0>)
EPOCH:  852  LOSS:  tensor(0.0773, grad_fn=<AddBackward0>)
EPOCH:  853  LOSS:  tensor(0.0771, grad_fn=<AddBackward0>)
EPOCH:  854  LOSS:  tensor(0.0770, grad_fn=<AddBackward0>)
EPOCH:  855  LOSS:  tensor(0.0769, grad_fn=<AddBackward0>)
EPOCH:  856  LOSS:  tensor(0.0768, grad_fn=<AddBackward0>)
EPOCH:  857  LOSS:  tensor(0.0766, grad_fn=<AddBackward0>)
EPOCH:  858  LOSS:  tensor(0.0765, grad_fn=<AddBackward0>)
EPOCH:  859  LOSS:  tensor(0.0764, grad_fn=<AddBackward0>)
EPOCH:  860  LOSS:  tensor(0.0763, grad_fn=<AddBackward0

KeyboardInterrupt: 