
# Standardized only all inputs and output


The purpose of this is to bring features and outputs to the same scale.  

Run this model on the web at: 



## Standardization

$  \large x_{stand} = \frac {(x - \mu _x)}{\sigma _x}   $




## Experiment description

This run is done using CFD. Used an 80% train test split. 
The CFD data has 56 samples where each sample has 27 features. There are 10 inputs and 17 outputs. All inputs and outputs were used. 

 


Train_X.........(44, 10)

Train_y.........(44, 17)

Test_X.........(12, 10)

Test_y.........(12, 17)





In [1]:
######################################################
##
## regression with scaling and DL for system control
##
######################################################

import torch
import numpy as np
import pandas as pd
import sklearn
import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
## coefficient of determination (R**2)
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

#######################################################

N_EPOCHS = 10000
batch_size = 5
learning_rate = 0.001    ## 0.01  ## 0.1 ## 1e-5 

#######################################################

np.set_printoptions(suppress=True)
torch.set_printoptions(sci_mode=False)


In [2]:

CFD_raw_data = pd.read_csv('CFD.6.2022.csv') 


In [3]:

headers_list = CFD_raw_data.columns.values.tolist()
## print(headers_list)
## print(len(headers_list))
## print(headers_list[27])

for i, name in enumerate(headers_list):
    print((i, name))
    

(0, 'index')
(1, 'i_pul_coal_inj_kg_thm')
(2, 'i_nat_gas_inj_kg_thm')
(3, 'i_nat_gas_t_k')
(4, 'i_o2_vol_perce')
(5, 'i_bf_windrate_nm3_hr')
(6, 'i_hb_moist_g_nm3')
(7, 'i_hot_blast_press_pa')
(8, 'i_hot_blast_temp_k')
(9, 'i_coke_weight_kg')
(10, 'i_ore_weight_kg')
(11, 'o_tuyere_exit_velo_m_s')
(12, 'o_tuyere_t_k')
(13, 'o_raceway_flame_temp_k')
(14, 'o_raceway_coal_burn_perce')
(15, 'o_raceway_volume_m')
(16, 'o_raceway_depth _m')
(17, 'o_shaft_co_utiliz')
(18, 'o_shaft_h2_utiliz')
(19, 'o_shaft_top_gas_temp_c')
(20, 'o_shaft_press_drop_pa')
(21, 'o_shaft_coke_rate_kg_thm')
(22, 'o_shaft_cohesive_zone_tip_height_m')
(23, 'o_shaft_cohes_zone_root_height_m')
(24, 'o_shaft_co_v_perc')
(25, 'o_shaft_co2_v_perc')
(26, 'o_shaft_h2_v_perce')
(27, 'o_shaft_n2_v_perc')


In [4]:

CFDdata_np = CFD_raw_data.to_numpy()

## print(CFDdata_np)
print(CFDdata_np.shape)


(56, 28)


In [5]:

#######################################################

input_indeces  = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]
output_indeces = [11, 12 ,13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27]

#######################################################

X = CFDdata_np[:, input_indeces]

y = CFDdata_np[:, output_indeces]

print(X.shape)
print(y.shape)

#######################################################

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)

#######################################################

print(X_train.shape)
print(X_test.shape)
print(y_train.shape)
print(y_test.shape)



(56, 10)
(56, 17)
(44, 10)
(12, 10)
(44, 17)
(12, 17)


In [6]:

## fix data type
X_train  = X_train.astype(np.float32)
X_test   = X_test.astype(np.float32)
y_train  = y_train.astype(np.float32)
y_test   = y_test.astype(np.float32)



##################################################



ss_X = StandardScaler()
obj_X = ss_X.fit(X_train)
X_train_scaled = obj_X.transform(X_train)
X_test_scaled  = obj_X.transform(X_test)

ss_y = StandardScaler()
obj_y = ss_y.fit(y_train)
y_train_scaled = obj_y.transform(y_train)
y_test_scaled  = obj_y.transform(y_test)



##################################################



In [7]:

X_train_tr  = torch.from_numpy(X_train_scaled)
X_test_tr   = torch.from_numpy(X_test_scaled)
y_train_tr  = torch.from_numpy(y_train_scaled)
y_test_tr   = torch.from_numpy(y_test_scaled)


In [8]:



print(X_train_tr.shape)
print(X_test_tr.shape)
print(y_train_tr.shape)
print(y_test_tr.shape)





torch.Size([44, 10])
torch.Size([12, 10])
torch.Size([44, 17])
torch.Size([12, 17])


In [9]:

#######################################################
## define dataset

train_ds = TensorDataset(X_train_tr, y_train_tr)

#######################################################
## define dataloader

train_dl = DataLoader(train_ds, batch_size, shuffle=True)


#######################################################


## Deep Learning Architecture

All inputs and outputs are scaled. 

In [10]:
    
#############################################################


class DL_Net_Dropout(nn.Module):
    
    ## initialize the layers
    def __init__(self):
        super().__init__()
        
        
        self.linear1 = nn.Linear(10, 32)
        self.act1    = nn.Tanh() 
        self.linear2 = nn.Linear(32, 24)
        self.act2    = nn.Tanh() 
        self.linear3 = nn.Linear(24, 17)
        self.dropout = nn.Dropout(0.25)
    
    ## perform inference
    def forward(self, x):
        
        x = self.linear1(x)
        x = self.act1(x)
        x = self.dropout(x)
        x = self.linear2(x)
        x = self.act2(x)
        x = self.dropout(x)
        x = self.linear3(x)
       
        return x

      
#############################################################


In [14]:

#######################################################


def fit(num_epochs, model, loss_fn, opt):
    
    for epoch in range(num_epochs):
        for xb, yb in train_dl:
            pred = model(xb)
            loss = loss_fn(pred, yb)
            loss.backward()
            opt.step()
            opt.zero_grad()
            print('Training loss:', loss_fn(pred, yb))
            
    pred = model(X_train_tr)        
    print('Training loss:', loss_fn(  pred,                   y_train_tr         ))
    print('Training R**2:', r2_score( pred.detach().numpy(),  y_train_tr.numpy()  ))
    

#######################################################


In [15]:

model = DL_Net_Dropout()
opt = torch.optim.Adam(   model.parameters(), lr=learning_rate   )
loss_fn = F.mse_loss

## loss_fn = F.l1_loss



In [16]:

fit(N_EPOCHS, model, loss_fn, opt)


Training loss: tensor(1.7200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.7614, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.6749, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.7186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.9713, grad_fn=<MseLossBackward0>)
Training loss: tensor(1.1534, grad_fn=<MseLossBackward0>)
Training loss: tensor(1.0725, grad_fn=<MseLossBackward0>)
Training loss: tensor(1.1733, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.4930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.7367, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.6270, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.8969, grad_fn=<MseLossBackward0>)
Training loss: tensor(1.4659, grad_fn=<MseLossBackward0>)
Training loss: tensor(1.5956, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.8597, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.5763, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.8163, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1499, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3918, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1640, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.5928, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2025, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1667, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2806, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2119, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3919, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3634, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1455, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2538, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2395, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1968, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2515, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2522, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2044, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2694, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1847, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.4786, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1775, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.5093, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2892, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2086, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0957, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1735, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2900, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1694, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3442, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1763, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1514, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1787, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1458, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3315, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2113, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2083, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2643, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3944, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1551, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3775, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1591, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2234, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.4163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1103, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2481, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.3002, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1708, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1643, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2623, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3042, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1237, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2468, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1471, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1770, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3517, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1449, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3539, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2754, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1701, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2837, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2474, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2558, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2235, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1396, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1828, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1742, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2584, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1225, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1340, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1881, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2181, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1648, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1760, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1507, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1296, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1143, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1445, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1318, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1691, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2445, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1482, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1454, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2801, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1104, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3062, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1615, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2703, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1500, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1739, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3392, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1325, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1633, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2490, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1520, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1769, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2299, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2330, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1560, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1414, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1295, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2414, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1687, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2390, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1500, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1539, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1716, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2205, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1110, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1545, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1830, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1211, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2553, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2596, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1269, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1280, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1398, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1394, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2021, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1549, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1572, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1669, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1631, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2746, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1527, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1321, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2392, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1258, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1924, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1516, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1334, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1666, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1202, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1446, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1347, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1193, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1487, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2270, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1832, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2201, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1834, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1697, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1580, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1085, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1932, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1474, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1595, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2537, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2012, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1708, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1564, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1311, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1254, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2173, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2337, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0865, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1498, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1929, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1223, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1588, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1285, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1441, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1792, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2550, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1474, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1055, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1320, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1461, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1056, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0920, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1086, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1786, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1580, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1893, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1502, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2559, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1238, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1060, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1029, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1565, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1103, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0972, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2475, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0980, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1706, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2072, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1946, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1790, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2594, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1896, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1501, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1375, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0693, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1593, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1349, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1433, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1198, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0994, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1630, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0926, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1223, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1447, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1360, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1329, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0559, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1167, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1404, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1583, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1272, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2565, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1070, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1540, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0949, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1565, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0679, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1247, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1354, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1569, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1541, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1130, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1710, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1640, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1405, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1551, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1519, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1260, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1579, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0863, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1332, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1549, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1996, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1631, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2857, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1803, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1018, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1041, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1158, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1551, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0998, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1780, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0608, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1609, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1013, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1087, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2524, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1258, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1461, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2515, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1513, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1093, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1956, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1473, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1635, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1581, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1002, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1345, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3265, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1194, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1624, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2666, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1425, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2479, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1442, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0944, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1538, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1643, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1573, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2260, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0713, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2615, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2438, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1953, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1648, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1404, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0826, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0786, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1392, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2567, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1245, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1215, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1536, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1736, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0998, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0583, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1484, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1555, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1061, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1351, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1806, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1220, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1356, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1536, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2439, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1195, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1099, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1542, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1199, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1639, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1744, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1271, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1881, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2321, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1121, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1632, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0780, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1148, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1266, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2292, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1099, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1639, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1582, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1424, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1439, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1333, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0814, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3249, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2498, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1573, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2168, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2367, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1810, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0870, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1441, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1130, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0828, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1652, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1334, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1888, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1073, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2328, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1040, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1908, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0947, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1347, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1008, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1205, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1507, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1425, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2381, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1346, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1010, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1398, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1519, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1361, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2668, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1130, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0695, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1293, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1176, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1496, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1455, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2042, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1223, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2431, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1412, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1139, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1641, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1081, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1451, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1476, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2337, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1122, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0608, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1242, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0833, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2093, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1378, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1358, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2225, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1438, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1884, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1707, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1444, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1346, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1255, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1243, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1044, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2346, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0728, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1292, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0932, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1636, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0975, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0686, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1355, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1244, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1916, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1293, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1667, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1887, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0944, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1840, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0742, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1430, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1244, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1514, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0906, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1537, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1767, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1579, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1613, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1328, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0811, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2005, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1085, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1560, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0982, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2045, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1242, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1884, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1461, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1362, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2042, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1103, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1345, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1318, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1210, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1406, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1143, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1660, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1785, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0689, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1307, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2336, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1515, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1601, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1574, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0962, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0949, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1380, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1793, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1649, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1870, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1973, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2131, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1371, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0950, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1447, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1470, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1521, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0934, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1468, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0851, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0869, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2528, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1049, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0980, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1011, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1285, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1616, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0864, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1328, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1136, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1678, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1049, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1204, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0819, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1280, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1642, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1810, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0923, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1442, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2614, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0955, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0975, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1154, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1400, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1938, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0915, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0900, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1236, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1374, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1430, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1355, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1442, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1333, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1687, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1383, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0879, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0984, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1284, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2398, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1045, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1060, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1822, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1490, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1334, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1651, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2638, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1005, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1655, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1568, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0940, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0805, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0919, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1973, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1155, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0887, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0880, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1270, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1527, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1625, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1193, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1266, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2154, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2387, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1596, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1428, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1247, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0907, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0817, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1233, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2624, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1173, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0949, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1086, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0872, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1263, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0731, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0730, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0915, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1816, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1087, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2730, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1222, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1032, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2243, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1242, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1065, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0830, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1292, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1744, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1212, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2360, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0888, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1532, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1498, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2814, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0873, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1303, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1668, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1164, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1431, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1038, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1348, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0874, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1980, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1673, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1012, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1435, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1332, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1040, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1296, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1378, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1543, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1005, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1746, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1308, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1614, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0824, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1563, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0535, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2138, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1658, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0879, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1116, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1520, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0817, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1435, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1496, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0916, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1818, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1205, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1082, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2271, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1781, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1005, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1582, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1222, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1212, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2308, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1042, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0840, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1633, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1716, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1248, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0908, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1225, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1745, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1487, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1312, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1823, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0830, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1246, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1779, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1011, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1496, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2320, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0818, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1544, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1516, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1362, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1539, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1607, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1887, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1673, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1276, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0678, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1315, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1188, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1902, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1400, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1015, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1607, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0729, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0830, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1731, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1822, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0784, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1688, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1110, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1363, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1544, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1322, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1807, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1215, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0916, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1213, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2243, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2044, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0887, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1693, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1246, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0823, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1282, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1354, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1083, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1797, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1025, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1789, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1986, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1461, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1640, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1838, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2035, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1239, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1039, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1878, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0923, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1015, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1650, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0828, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1520, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1759, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1208, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2500, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1245, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1073, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0879, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1241, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1722, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1399, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0812, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1305, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1405, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1410, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1526, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0937, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1617, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1237, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0791, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1242, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1310, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0960, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1255, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0765, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1104, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1733, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0719, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1131, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1008, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1543, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1188, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1847, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1607, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1390, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1609, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1008, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1453, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0725, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1664, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0972, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1454, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2533, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1530, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0842, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2303, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0840, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1468, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1551, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1693, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1373, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1426, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1966, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1410, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0905, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0702, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1545, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1564, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1655, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1073, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0992, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0741, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1027, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1297, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1366, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1735, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1440, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0612, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0732, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1796, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0849, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1107, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2134, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1464, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1303, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1735, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0891, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0699, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1232, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1159, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1718, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0781, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1109, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1909, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1216, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1388, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0997, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1793, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0874, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1461, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1784, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0902, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1902, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1615, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0568, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1332, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1472, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1617, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1499, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1466, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1152, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0932, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0957, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0753, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0946, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1347, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1068, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2154, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1720, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0577, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0914, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1248, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0785, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1622, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2346, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0926, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1528, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0934, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0763, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1729, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1396, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1390, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1674, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1623, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1331, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1471, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1336, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1365, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1078, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1492, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1395, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1540, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1835, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1749, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0920, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0956, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1136, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1101, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2567, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1615, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1606, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0832, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0610, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0683, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1078, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1123, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0721, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0922, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1873, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1429, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0994, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1081, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0717, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1907, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1367, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1373, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1348, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1487, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0658, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1526, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0643, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1211, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1721, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1403, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0829, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1253, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1928, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1078, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0662, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1138, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0868, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1731, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1503, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1494, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0886, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0754, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1994, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1039, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1524, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1389, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0964, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1770, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1562, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1387, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2006, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2318, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1014, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1367, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1635, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1255, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0900, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1517, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2593, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1499, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1373, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1010, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1304, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0685, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1664, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0951, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1266, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2216, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0858, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1409, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0937, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0769, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1690, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1724, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1185, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0868, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1081, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1547, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1657, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1684, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0917, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2194, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2167, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1787, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1486, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1506, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1199, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1621, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1284, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1515, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1986, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1085, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0759, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2064, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1819, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1736, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1471, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1266, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1343, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1251, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1524, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1267, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2230, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1110, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0996, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1265, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1202, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1364, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1152, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0962, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1362, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1553, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2321, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1851, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1944, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1257, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1035, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0609, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1409, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2324, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0746, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1522, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0765, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0964, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1647, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1231, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1079, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1443, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1770, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1132, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1793, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0964, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1893, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1916, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1349, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1249, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1326, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1293, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1366, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1489, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2176, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1522, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1138, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1496, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1865, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1424, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0937, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1623, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1123, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2261, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0703, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0672, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0797, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0887, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1321, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1148, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1414, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0967, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1292, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1025, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1695, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0730, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0969, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1556, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1561, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1087, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0785, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1599, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0639, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1815, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0969, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0856, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1662, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1278, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1847, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1337, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2645, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0951, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1530, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1877, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0852, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1033, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1278, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2139, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1946, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0917, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1041, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1029, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1025, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1414, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1573, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1435, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1333, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1205, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1176, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1656, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1260, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1394, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1257, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1026, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2702, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0858, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0719, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1370, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1291, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1165, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1819, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0932, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1193, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1251, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1536, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1355, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1909, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1404, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0969, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2047, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1506, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1952, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0644, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1215, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1418, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1509, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1248, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0922, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1816, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0870, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1710, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1790, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0966, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0842, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0895, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0849, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1030, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1041, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1201, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1965, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0612, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0955, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1641, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1231, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1837, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1954, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1513, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1120, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0974, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1578, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1734, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0923, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1555, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1540, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0690, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0939, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1462, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1619, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0965, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0835, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1479, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1349, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0746, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1538, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1213, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1079, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1104, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1666, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1504, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0926, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0968, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1244, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1648, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1703, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1216, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1318, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0996, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1364, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1390, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1668, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1691, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1138, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0686, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1333, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0838, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1813, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1103, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1605, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1991, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1924, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1073, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1445, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0751, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1470, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1840, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0683, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2916, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1596, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1836, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1435, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0998, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0858, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0953, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1547, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1350, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1222, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1021, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1879, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0786, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1553, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2062, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1353, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1524, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1350, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1083, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1997, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0956, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1331, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0727, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1509, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1695, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1050, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1330, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1425, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1419, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0842, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2371, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1504, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0690, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1926, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1270, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2374, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0611, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0834, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1493, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1271, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1139, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1635, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0724, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0676, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1225, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1946, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1247, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1211, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1642, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0505, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1586, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2178, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0816, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1360, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1792, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1400, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1315, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1735, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0821, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0809, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1143, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2737, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1236, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0688, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1289, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1639, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1400, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1562, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0692, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1721, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0984, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1428, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1068, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1194, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0694, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1056, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0807, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1339, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1070, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0638, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1357, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1580, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1026, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1848, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1741, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1030, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1122, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1155, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1032, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1380, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1610, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2026, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1609, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0879, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1193, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1059, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1353, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1018, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0692, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0960, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1454, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1665, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0980, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1188, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1145, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1289, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1307, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1722, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1065, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1083, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0804, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0893, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0806, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0717, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1044, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0939, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1174, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1185, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1457, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1686, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2121, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1389, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1657, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1911, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1354, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1516, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1231, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1528, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1801, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0663, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1567, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1794, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1468, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0849, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0792, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0968, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1346, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0729, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1528, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1502, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0907, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1038, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0813, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1209, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1649, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0733, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1199, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1236, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1801, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1265, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1429, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1687, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1232, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1522, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0893, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0716, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1693, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1308, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1852, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1271, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0781, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1450, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0662, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1263, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1081, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1691, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2503, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0735, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1117, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1218, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1774, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1662, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1519, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1453, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2415, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1245, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1833, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1716, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1223, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0730, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0755, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0579, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1616, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1843, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1014, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2016, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1609, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1243, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1116, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1791, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1621, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1778, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0714, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1083, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2473, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0791, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2530, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1533, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1103, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1233, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1918, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0955, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1068, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1331, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1503, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1506, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1378, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1756, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1741, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1145, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1136, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1704, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1128, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1924, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1514, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1860, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1403, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0475, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1061, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2034, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1164, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0740, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0500, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2685, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1130, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0968, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0957, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1817, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1368, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2758, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1128, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0909, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0682, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1697, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0972, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1476, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1900, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0835, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1295, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1786, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1194, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1867, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1541, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1410, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1520, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1035, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1390, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1633, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2066, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1347, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1046, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1626, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1121, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1584, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1635, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1306, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0636, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1481, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1389, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1291, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1784, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1217, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1116, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0659, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1501, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0758, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0639, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1364, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1060, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1394, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1362, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2719, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1085, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1198, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1030, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1923, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1868, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0488, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1308, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1670, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1545, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1222, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0852, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1672, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1597, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2101, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0738, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1234, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1755, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0812, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1371, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1503, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1554, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1159, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1038, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1880, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1945, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0878, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1380, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1190, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1295, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0699, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1453, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1190, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1254, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1065, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1061, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2096, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1583, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0757, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1712, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1518, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0935, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1561, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1534, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1622, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1339, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1244, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1713, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1123, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1198, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1470, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0982, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0731, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0744, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0754, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1533, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1452, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0900, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1733, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1688, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1807, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1629, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1358, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1534, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0784, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2513, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0922, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1409, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1136, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0815, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1875, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1736, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1167, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1026, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0970, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1570, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2603, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1372, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0935, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1674, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0780, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0769, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1686, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1432, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1876, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1173, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1774, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1282, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1545, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1406, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0783, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1643, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0759, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1159, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0815, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0547, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1265, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1030, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0769, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1567, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1243, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1750, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0632, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1073, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0892, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0692, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0687, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1734, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1852, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1012, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0764, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1191, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1425, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1550, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1637, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1740, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0857, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1501, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0934, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1325, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1602, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1874, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1718, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1128, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0887, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0742, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1025, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1478, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1002, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1425, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1158, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1532, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1315, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0678, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1751, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1419, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1005, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1401, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0909, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1306, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1467, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1899, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1140, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1627, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0763, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2052, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1341, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1554, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1284, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1285, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1454, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1689, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1145, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1443, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0963, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1248, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1372, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0842, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1862, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1838, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1285, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0731, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1514, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0699, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1358, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0997, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1655, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1059, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0536, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1136, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0665, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1986, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1280, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0891, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2423, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1720, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0863, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1044, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1305, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1474, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0902, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1472, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0874, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1296, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1457, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1254, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1505, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0783, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0806, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0973, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1493, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1032, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1458, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1246, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0936, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1529, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1010, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1087, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1382, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1361, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2008, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1522, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1349, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1559, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1678, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1193, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1469, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0830, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1305, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0867, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1267, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1470, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0972, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1152, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0831, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1027, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1037, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1402, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1239, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0619, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2198, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2375, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0947, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1031, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1191, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1715, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1979, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1638, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1040, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1185, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0793, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1724, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1591, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1037, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1712, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2086, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0711, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1261, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1676, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1917, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1324, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0812, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1031, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1110, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1510, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1528, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1451, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1101, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1293, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1148, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1409, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2121, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2504, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0929, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1592, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1205, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0952, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0951, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1365, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1340, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1310, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1780, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0914, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1255, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1481, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1522, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1005, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1026, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0879, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0691, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1641, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1782, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0869, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2256, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1788, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1737, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0829, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0807, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1642, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1947, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1075, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1440, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2593, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0780, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1419, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1033, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1638, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1185, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2116, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1957, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1431, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0723, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0878, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1665, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0952, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1446, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1066, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2086, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0755, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0714, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1443, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2063, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0722, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1428, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1013, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1998, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1569, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1039, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0819, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1250, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2096, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1139, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0556, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0647, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1395, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1456, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1027, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0838, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1424, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1778, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0884, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0858, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1121, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1824, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1152, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0778, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1420, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1164, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1108, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0581, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0963, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1021, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1340, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2456, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1088, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1451, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1365, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1102, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0914, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1219, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1211, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1336, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0960, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1061, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1716, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1240, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1054, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0866, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1089, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1377, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1261, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1600, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0690, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0496, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1517, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1620, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1873, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1326, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0785, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1638, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1269, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0968, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1101, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0975, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1224, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2361, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1220, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0499, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0834, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1783, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0859, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1238, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1256, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1406, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1143, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1473, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1275, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1154, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0688, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0775, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1054, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2446, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1623, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1984, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0867, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1233, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1027, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0785, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1395, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1221, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1940, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1784, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1049, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1065, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0851, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2348, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0908, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1455, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1459, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1542, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1718, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1330, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1254, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1668, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1274, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0604, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2269, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1374, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1182, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1617, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0992, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1717, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1608, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1062, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1675, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0647, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1307, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0998, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1658, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1858, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1466, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1337, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1920, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1219, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0873, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1033, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0926, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1797, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0640, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1040, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1220, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1071, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1210, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1530, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1059, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1666, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0667, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0608, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1270, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1055, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2230, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0679, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0731, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1130, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2348, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2622, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2110, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1488, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0932, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1201, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1238, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1174, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1047, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2050, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1434, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0806, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1758, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0947, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1053, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2394, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1333, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1490, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1402, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1684, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1392, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0875, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1337, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1586, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0805, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1140, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2187, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1149, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0920, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1322, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1011, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1342, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1152, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1382, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1361, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1647, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0843, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1909, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0833, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2000, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1375, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0907, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0797, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1039, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1581, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1305, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1662, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1470, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0961, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1511, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1232, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0580, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0923, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0564, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1363, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1947, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0844, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1329, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1374, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1595, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0994, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2245, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0787, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0919, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1451, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0736, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2005, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0876, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1569, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1781, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0928, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1304, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1263, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1116, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0924, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1383, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1343, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0881, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0893, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0529, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2325, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1322, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1909, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1382, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1104, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1029, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1523, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0698, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0940, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1734, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0935, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1869, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0973, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1266, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1054, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1672, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1006, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1460, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2444, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0934, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1473, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0875, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1506, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0460, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2074, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1326, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0698, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1693, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1617, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1868, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1594, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1669, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0876, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1090, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0821, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1829, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1364, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0743, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0699, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1621, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0872, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1391, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0855, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1049, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1432, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1177, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1837, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1861, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1385, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0955, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1325, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0593, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1469, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1515, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1779, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1592, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1556, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1035, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1133, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1268, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1060, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1440, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1220, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1089, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0691, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1704, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0725, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2010, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1272, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0728, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0818, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1276, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1347, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1006, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1185, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0970, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1438, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1128, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0760, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1330, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1834, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1659, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1418, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1598, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1255, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0961, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1794, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2012, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1272, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1366, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1013, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1797, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1417, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1544, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0992, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1013, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0664, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0896, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1345, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1604, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1328, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1529, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2444, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0447, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0792, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1389, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1199, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0549, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1476, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1104, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2595, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1037, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1120, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1004, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1340, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1303, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1550, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1330, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2203, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1458, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0856, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1740, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1736, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1449, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1200, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1800, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1085, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1612, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1516, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1178, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0470, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1668, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1308, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1176, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0718, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1233, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1502, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0636, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1293, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0646, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1883, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0940, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1868, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1267, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1168, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1195, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0760, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0751, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1221, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1018, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0984, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2288, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1507, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1006, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1155, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0992, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1080, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0969, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0922, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1139, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0958, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0887, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1206, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1297, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1538, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2145, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0993, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1238, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1600, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1414, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1424, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1548, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2032, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1120, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1326, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1708, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1602, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2812, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0824, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1035, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0738, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0640, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1541, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0918, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0996, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1292, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1539, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2101, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1310, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2415, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1410, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1677, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1317, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1356, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1107, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0806, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1873, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0937, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1519, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1422, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0960, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1257, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1649, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1208, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1410, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1388, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0891, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1864, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0771, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1557, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1127, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1383, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0986, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1052, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1325, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1728, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1408, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0919, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1230, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1518, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1101, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1150, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0845, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1280, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2246, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0662, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1380, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2249, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1158, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0720, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1532, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1413, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1313, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0948, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0953, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0734, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1260, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1020, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0799, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1755, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0994, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1462, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0749, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1762, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0640, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1243, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1225, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1473, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1350, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1591, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0843, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1237, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1325, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1350, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0748, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1945, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1117, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1295, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0949, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0761, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0892, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1438, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0931, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1099, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1221, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1647, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1494, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1047, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0908, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1343, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1448, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0945, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0878, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1701, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1025, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1148, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1170, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1568, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1518, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1220, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1389, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1067, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1287, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1535, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0876, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1727, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1496, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0766, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0936, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1647, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1732, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1042, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1525, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1503, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1917, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1045, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1610, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1447, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1052, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1010, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1378, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1076, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1360, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1603, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1459, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0738, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0783, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2438, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1041, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1219, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1311, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1258, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1312, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1627, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0904, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0776, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1412, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1121, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1284, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1066, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1437, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2144, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1536, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1344, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1998, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1508, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1269, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1631, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1584, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1049, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0981, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2058, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1125, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1351, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0607, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1166, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1562, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0727, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1405, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1280, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1703, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1712, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1305, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0930, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1129, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1198, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.3795, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1059, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1967, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1038, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0816, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1733, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0915, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1215, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2614, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0526, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1372, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0683, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0672, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1539, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1156, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1594, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1660, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1074, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1888, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2046, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0625, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1047, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1084, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1367, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0849, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1956, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0705, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1230, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1339, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1524, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0955, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1269, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1064, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1839, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0898, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0700, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1443, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1803, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1250, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0912, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0681, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1436, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1497, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1145, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2405, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1794, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1342, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1237, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1002, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1982, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1947, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1394, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1405, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1911, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1141, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1368, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0744, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1455, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1300, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0826, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1429, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1271, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1176, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0977, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2159, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1472, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1793, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1396, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0904, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1694, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0848, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1322, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1298, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1296, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2362, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1430, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1093, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1310, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1098, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1679, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0613, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0969, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1836, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1273, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1464, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2190, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0932, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1966, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1743, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1842, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1125, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1066, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1882, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0688, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2859, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2082, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1687, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0652, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1322, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1295, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1297, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0991, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0821, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1655, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0579, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1590, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0495, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1056, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1474, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1133, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1562, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1212, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1283, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1471, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0852, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0767, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1169, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0986, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1027, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1142, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1625, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1431, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2562, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1467, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1089, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1278, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0970, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1582, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1398, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1637, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1626, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0933, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1820, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1390, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1337, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0754, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1672, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1615, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1040, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1081, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1156, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1512, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1039, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1137, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0943, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1451, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1895, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1125, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1352, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1555, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1161, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1602, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1955, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0803, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0953, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1117, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1886, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1160, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1501, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1512, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1099, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0974, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0919, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0905, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1575, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1486, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0854, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1120, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1234, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1299, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1372, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1657, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1377, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0765, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1180, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0814, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1513, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1626, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1133, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1017, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2850, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0851, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1702, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1568, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1135, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1003, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1184, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1598, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1124, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1371, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1063, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0791, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1527, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1079, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1548, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1324, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0936, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0864, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1106, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1628, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1433, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1038, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1427, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1625, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1401, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0918, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1419, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1252, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1552, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1351, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1398, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1082, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1223, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1798, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0980, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0777, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1874, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1003, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0919, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1265, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2046, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1420, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2069, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0757, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1077, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1140, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1937, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1774, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1458, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0889, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2023, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1712, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1779, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1246, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0921, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1157, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1221, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1172, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1596, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2069, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0728, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0690, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2329, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0853, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1368, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1502, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0616, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0869, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1095, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0745, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1153, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1962, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1847, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1092, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1094, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1535, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1355, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1359, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0987, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0787, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1498, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2008, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2344, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1113, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0835, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1447, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1927, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0985, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2214, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0976, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1304, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2366, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1224, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1111, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0967, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1294, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0906, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1226, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1279, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2320, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2009, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1309, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0595, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1568, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0902, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0696, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1198, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0988, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2840, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1630, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2423, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1416, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1031, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1254, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2188, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1024, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1667, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0928, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0811, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1510, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1319, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0979, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1323, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1386, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1351, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0938, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0726, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1805, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1235, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1018, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1601, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1595, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1195, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0954, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1838, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2290, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1342, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1467, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1399, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1629, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0982, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0952, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1566, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1204, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0869, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1171, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1207, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0880, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1692, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1524, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0740, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1183, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0772, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1704, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0925, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1178, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1815, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0894, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1370, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1361, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1227, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1306, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2648, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0696, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1659, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1215, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1162, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1381, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1518, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2120, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1994, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1107, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1229, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1107, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1591, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0752, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1264, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1281, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1306, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0576, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1546, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1339, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1179, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0999, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1196, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1467, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0905, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2034, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1532, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1594, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1523, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1761, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1204, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0734, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1783, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1601, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0713, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0843, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1286, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1181, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1620, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1175, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2151, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1400, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1253, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2371, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1671, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1544, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1132, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1245, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0905, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0824, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0829, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1445, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1714, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1477, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1753, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1624, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1574, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1332, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1395, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1397, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1186, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0859, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1703, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1326, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1115, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0750, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1444, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0661, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1764, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1123, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2028, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1003, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1091, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0841, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1421, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1896, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1711, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0592, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0983, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1468, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0670, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0689, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1460, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1696, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1168, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1197, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0903, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0800, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1260, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0508, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2748, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1393, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1163, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0978, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1116, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0857, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1034, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1512, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2118, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1105, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0838, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1210, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0954, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0944, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1314, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1682, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1212, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1880, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1288, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1135, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1480, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0996, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0809, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1910, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1654, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0721, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0827, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0885, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0749, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1302, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1034, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1230, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1128, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1334, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1513, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1904, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1192, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1262, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1411, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0618, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1556, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1501, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0913, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2043, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0950, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0871, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1274, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0862, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0756, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1901, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1802, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0915, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1364, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1787, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1782, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1335, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2189, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1491, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0808, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1746, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0897, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1560, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0577, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1144, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1030, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1519, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1758, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1539, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1379, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1114, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0924, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1992, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2433, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1574, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1586, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1745, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0851, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0599, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1327, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0963, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1371, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1407, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1566, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0922, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1125, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1358, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1097, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1619, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1482, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1112, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1622, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1188, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1779, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1544, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1465, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0935, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1937, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.1402, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1257, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1018, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1007, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0846, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0788, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1435, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1146, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0971, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1006, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1019, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1587, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0653, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1090, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0941, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1082, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1228, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0727, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0883, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1369, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0814, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0942, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0890, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2231, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1274, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1316, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1369, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0613, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1650, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1057, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1567, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1745, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1016, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1306, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.2036, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0959, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1664, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1336, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1117, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1001, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1560, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1048, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1078, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1100, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0875, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1134, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1259, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1376, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1140, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0989, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1194, grad_fn=<MseLossBackward0>)
Training loss:

Training loss: tensor(0.0995, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0747, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1961, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1710, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1126, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1709, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1348, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1549, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.0929, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1090, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1301, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1520, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1045, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.2061, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1147, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1167, grad_fn=<MseLossBackward0>)
Training loss: tensor(0.1176, grad_fn=<MseLossBackward0>)
Training loss:

## Predict outputs

In [17]:

for i in range(len(X_test_tr)):
    print("**************************************************")
    print("input")
    print(X_test_tr[i])
    print("preds, real")
    preds = model(X_test_tr[i])

    np_pred = preds.detach().numpy()              ## [0]
    np_real = y_test_tr[i].detach().numpy()


    for j in range(len(np_pred)):
        print((np_pred[j], np_real[j]))
    



**************************************************
input
tensor([-0.6746, -0.4502, -0.5039, -1.3071,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000, -1.7780])
preds, real
(-0.46002662, -0.39696112)
(1.1710917, 1.2066705)
(-0.31973016, -0.12531227)
(-0.94827056, -0.75444394)
(-0.913869, -0.75592893)
(-0.97501653, -0.75426406)
(0.13841298, 0.015406219)
(-0.05245635, 0.019047316)
(0.14797057, 0.1761152)
(-1.8454828, -1.1784332)
(0.4707379, 0.5987946)
(0.3194221, 0.401798)
(0.22359324, 1.4128103)
(-1.1378942, -0.80931795)
(-0.74922323, -0.631744)
(-0.44249845, -0.556402)
(1.9950607, 1.816122)
**************************************************
input
tensor([-0.6746, -0.4502,  2.6633, -1.3071,  0.0000,  0.0000,  0.0000,  0.0000,
         0.0000, -1.7780])
preds, real
(-0.4270332, -0.34652972)
(0.88221717, 1.3610646)
(0.06905574, -0.08125172)
(-0.63133097, -0.75444394)
(-0.61943686, -0.75592893)
(-0.62703097, -0.75426406)
(0.6892816, 1.020035)
(0.21983907, 0.3497931)
(-0.18603681, -0.374

In [18]:

preds = model(X_test_tr)
print('Test loss:', loss_fn(     preds,                   y_test_tr          ))
print('Testing R**2:', r2_score( preds.detach().numpy(),  y_test_tr.numpy()  ))


Test loss: tensor(0.2373, grad_fn=<MseLossBackward0>)
Testing R**2: 0.6153083986843413


In [19]:

'''

model.eval()

dummy_input = torch.randn(1, 10)


input_names = ["input1"]
output_names = ["output1", "output2"]

torch.onnx.export(
  model, 
  dummy_input, 
  "ONNXmodels/CFDallNORM.onnx", 
  verbose=False, 
  input_names  = input_names,
  output_names = output_names
)

'''


'\n\nmodel.eval()\n\ndummy_input = torch.randn(1, 10)\n\n\ninput_names = ["input1"]\noutput_names = ["output1", "output2"]\n\ntorch.onnx.export(\n  model, \n  dummy_input, \n  "ONNXmodels/CFDallNORM.onnx", \n  verbose=False, \n  input_names  = input_names,\n  output_names = output_names\n)\n\n'