In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch.optim import AdamW, LBFGS
from torch.autograd import Variable
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from itertools import cycle

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
def fourier_features(x, B):
    x_transformed = torch.matmul(x, B)
    return torch.cat([torch.sin(x_transformed), torch.cos(x_transformed)], dim=-1)

def init_fixed_frequency_matrix(size, scale=1.0):
    num_elements = size[0] * size[1]
    lin_space = torch.linspace(-scale, scale, steps=num_elements)
    B = lin_space.view(size).float()
    return B

class FourierFeatureNN(nn.Module):
    def __init__(self, input_dim=1, shared_units=16, neuron_units=32, scale=1.0, 
                 activation=nn.Tanh, device='cpu'):
        super(FourierFeatureNN, self).__init__()
        self.Bx = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)
        self.Bt = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)

        # Define separate paths for x and t after Fourier transformation
        self.path_x = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Adjusted from shared_units // 2 to shared_units
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )
        self.path_t = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Same adjustment
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )

        # Define separate FFN for u and v directly after the paths
        self.ffn_u = nn.Sequential(
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, 2)  # Outputs for u (real and imaginary parts)
        )
        

        self.apply(self.initialize_weights)

    def forward(self, x, t):
        # Apply Fourier feature transformations
        x_fourier = fourier_features(x, self.Bx)
        t_fourier = fourier_features(t, self.Bt)

        # Pass through separate paths
        x_path_output = self.path_x(x_fourier)
        t_path_output = self.path_t(t_fourier)

        # Pointwise multiplication of the separate path outputs
        combined_features = x_path_output * t_path_output

        # Directly pass through different FFNs for u and v
        final_output_u = self.ffn_u(combined_features)
        #final_output_v = self.ffn_v(combined_features)

        # Splitting the output for u into real and complex parts
        output_1, output_2 = final_output_u.split(1, dim=-1)
        #output_3 = final_output_v
        
        return output_1, output_2#, output_3

    def initialize_weights(self, m):
        if isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

In [3]:
def fourier_features(x, B):
    x_transformed = torch.matmul(x, B)
    return torch.cat([torch.sin(x_transformed), torch.cos(x_transformed)], dim=-1)

def init_fixed_frequency_matrix(size, scale=1.0):
    num_elements = size[0] * size[1]
    lin_space = torch.linspace(-scale, scale, steps=num_elements)
    B = lin_space.view(size).float()
    return B

class FourierFeatureNN_V(nn.Module):
    def __init__(self, input_dim=1, shared_units=16, neuron_units=32, scale=1.0, 
                 activation=nn.Tanh, device='cpu'):
        super(FourierFeatureNN_V, self).__init__()
        self.Bx = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)
        self.Bt = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)

        # Define separate paths for x and t after Fourier transformation
        self.path_x = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Adjusted from shared_units // 2 to shared_units
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )
        self.path_t = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Same adjustment
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )

        # Define separate FFN for u and v directly after the paths
        self.ffn_v = nn.Sequential(
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, 1)  # Outputs for u (real and imaginary parts)
        )
        

        self.apply(self.initialize_weights)

    def forward(self, x, t):
        # Apply Fourier feature transformations
        x_fourier = fourier_features(x, self.Bx)
        t_fourier = fourier_features(t, self.Bt)

        # Pass through separate paths
        x_path_output = self.path_x(x_fourier)
        t_path_output = self.path_t(t_fourier)

        # Pointwise multiplication of the separate path outputs
        combined_features = x_path_output * t_path_output

        # Directly pass through different FFNs for u and v
        final_output_v = self.ffn_v(combined_features)
        #final_output_v = self.ffn_v(combined_features)

        # Splitting the output for u into real and complex parts
       # output_1, output_2 = final_output_u.split(1, dim=-1)
        #output_3 = final_output_v
        
        return final_output_v

    def initialize_weights(self, m):
        if isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

In [4]:
class FCN(nn.Module):
    def __init__(self, input_dim=1, neuron_units=32, activation=nn.Tanh, device='cpu'):
        super(FCN, self).__init__()

        # Define a simple FFN with 3 hidden layers
        self.ffn = nn.Sequential(
            nn.Linear(2 * input_dim, neuron_units),  # input dimension is doubled for x and t
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, 1)  # Outputs for u (2 parts: real and imaginary) and v (1 part)
        )

        self.to(device)
        self.apply(self.initialize_weights)

    def forward(self, x, t):
        # Concatenate x and t as input
        combined_input = torch.cat([x, t], dim=-1)
        output = self.ffn(combined_input)
        
        return output

    def initialize_weights(self, m):
        if isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

In [5]:
def real_u1(x, t, k, omega, r):
    u = torch.sqrt(torch.tensor(2)) *( 1/torch.cosh(x+t) ) * torch.exp(1j * ( -x*torch.tensor(0.5) + t*torch.tensor(0.75) ) )
    return torch.real(u)

def imag_u1(x, t, k, omega, r):
    u = torch.sqrt(torch.tensor(2)) *( 1/torch.cosh(x+t) ) * torch.exp(1j * ( -x*torch.tensor(0.5) + t*torch.tensor(0.75) ) )
    return torch.imag(u)

def real_v1(x, t, k, omega, r):
    result = -2 * torch.square( 1/torch.cosh(x+t) )
    return result

def compute_analytical_boundary_loss(model_u, model_v, x, t, mse_cost_function, k, omega, r):
    pred_u_r, pred_u_i = model_u(x,t)
    pred_v =  model_v(x,t)

    real_u1_val = real_u1(x, t, k, omega, r)
    imag_u1_val = imag_u1(x, t, k, omega, r)
    real_v1_val = real_v1(x, t, k, omega, r)
 
    boundary_loss_ur = mse_cost_function(pred_u_r, real_u1_val)
    boundary_loss_ui = mse_cost_function(pred_u_i, imag_u1_val)
    boundary_loss_v = mse_cost_function(pred_v, real_v1_val)
    
    return boundary_loss_ur, boundary_loss_ui, boundary_loss_v

def cyclic_iterator(items):
    return cycle(items)

In [6]:
def LBFGS_training(model_u, model_v, model_save_path, mse_cost_function, device, num_epochs, lr, num_samples, r, k, omega, gamma, beta, line_search_fn):
    print('Starting LBFGS Fine Tuning')
    optimizer_u = LBFGS(model_u.parameters(), lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)
    #optimizer_ui = LBFGS(model_ui.parameters(), lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)
    optimizer_v = LBFGS(model_v.parameters(), lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)

    factor = -2

    x_n = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # x in range [-5, -3]
    t_n = (torch.rand(num_samples, 1)).to(device)   
    x_dom = (torch.rand(num_samples*30, 1)*4 + factor ).to(device)
    t_dom = torch.rand(num_samples*30, 1).to(device) 
    x_bc_x0 = (torch.zeros(num_samples, 1)*4 + factor ).to(device)
    t_bc_x0 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_x1 = (torch.zeros(num_samples, 1)*4 - factor ).to(device)
    t_bc_x1 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_t0 = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # Uniformly distributed random values between 0 and 1
    t_bc_t0 = torch.zeros(num_samples, 1).to(device)

    for epoch in tqdm(range(num_epochs),
                  desc='Progress:',  
                  leave=False,  
                  ncols=75,
                  mininterval=0.1,
                  bar_format='{l_bar} {bar} | {remaining}',  # Only show the bar without any counters
                  colour='blue'): 
        model_u.train()
        model_v.train()
        
        def closure_u():
            optimizer_u.zero_grad()
            optimizer_v.zero_grad()
            x_dom = (torch.rand(num_samples*30, 1)*4 + factor ).to(device)
            t_dom = torch.rand(num_samples*30, 1).to(device) 
            x_dom.requires_grad_(True)
            t_dom.requires_grad_(True)

            #physics_loss_ur, physics_loss_ui, physics_loss_v = compute_physics_loss(model, x_n, t_n, device, mse_cost_function) 
            u_real, u_imag = model_u(x_dom, t_dom)
            v = model_v(x_dom, t_dom)
            u_abs = torch.square(u_real) + torch.square(u_imag)

            # First order derivatives with retain_graph=True to reuse computational graph
            u_real_x = torch.autograd.grad(u_real.sum(), x_dom, create_graph=True )[0]
            u_real_t = torch.autograd.grad(u_real.sum(), t_dom, create_graph=True )[0]
            u_imag_x = torch.autograd.grad(u_imag.sum(), x_dom, create_graph=True )[0]
            u_imag_t = torch.autograd.grad(u_imag.sum(), t_dom, create_graph=True )[0]
            v_x = torch.autograd.grad(v.sum(), x_dom, create_graph=True )[0]
            v_t = torch.autograd.grad(v.sum(), t_dom, create_graph=True )[0]
    
            # Second order derivatives
            u_real_xx = torch.autograd.grad(u_real_x.sum(), x_dom, create_graph=True )[0]
            u_imag_xx = torch.autograd.grad(u_imag_x.sum(), x_dom, create_graph=True )[0]

            # Compute u_abs_xx with retain_graph if further gradients need to be calculated
            u_abs_x = torch.autograd.grad(u_abs.sum(), x_dom, create_graph=True )[0]

            # Define du_r, du_i, dv according to given formulas
            du_r = - u_imag_t - u_real_xx - u_real * v
            du_i =   u_real_t - u_imag_xx - u_imag * v
            #dv = v_t + u_abs_x

            zero_target = torch.zeros_like(du_r)  # Assuming du_r, du_i, dv have the same shape
            physics_loss_ur = mse_cost_function(du_r, zero_target)
            physics_loss_ui = mse_cost_function(du_i, zero_target)
            #physics_loss_v = mse_cost_function(dv, zero_target)
            print(physics_loss_ur)
            print(physics_loss_ur)
            #print(physics_loss_v)

            boundary_loss_ur_x0, boundary_loss_ui_x0, boundary_loss_v_x0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x0, t_bc_x0, mse_cost_function, k, omega, r)
            boundary_loss_ur_x1, boundary_loss_ui_x1, boundary_loss_v_x1 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x1, t_bc_x1, mse_cost_function, k, omega, r)
            boundary_loss_ur_t0, boundary_loss_ui_t0, boundary_loss_v_t0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_t0, t_bc_t0, mse_cost_function, k, omega, r)
            #boundary_loss_ur_t1, boundary_loss_ui_t1, boundary_loss_v_t1 = compute_analytical_boundary_loss(model, x_bc_t1, t_bc_t1, mse_cost_function, k, omega, r)
            #domain_loss_ur_t, domain_loss_ui_t, domain_loss_v_t = compute_analytical_boundary_loss(model, x_dom, t_dom, mse_cost_function, k, omega, r)
            
            # Total loss 
            loss_ur = gamma*(physics_loss_ur) + beta*( boundary_loss_ur_x0 + boundary_loss_ur_x1 + boundary_loss_ur_t0)
            loss_ui = gamma*(physics_loss_ui) + beta*( boundary_loss_ui_x0 + boundary_loss_ui_x1 + boundary_loss_ur_t0 )
            #loss_v = gamma*(physics_loss_v) + beta*( boundary_loss_v_x0 + boundary_loss_v_x1 + boundary_loss_v_t0 )
            total_loss = loss_ur + loss_ui #+ loss_v
            total_loss.backward()

            return total_loss 


        def closure_v():
            optimizer_u.zero_grad()
            optimizer_v.zero_grad()
            x_dom = (torch.rand(num_samples*30, 1)*4 + factor ).to(device)
            t_dom = torch.rand(num_samples*30, 1).to(device) 
            x_dom.requires_grad_(True)
            t_dom.requires_grad_(True)

            #physics_loss_ur, physics_loss_ui, physics_loss_v = compute_physics_loss(model, x_n, t_n, device, mse_cost_function) 
            u_real, u_imag = model_u(x_dom, t_dom)
            v = model_v(x_dom, t_dom)
            u_abs = torch.square(u_real) + torch.square(u_imag)

            # First order derivatives with retain_graph=True to reuse computational graph
            u_real_x = torch.autograd.grad(u_real.sum(), x_dom, create_graph=True )[0]
            u_real_t = torch.autograd.grad(u_real.sum(), t_dom, create_graph=True )[0]
            u_imag_x = torch.autograd.grad(u_imag.sum(), x_dom, create_graph=True )[0]
            u_imag_t = torch.autograd.grad(u_imag.sum(), t_dom, create_graph=True )[0]
            v_x = torch.autograd.grad(v.sum(), x_dom, create_graph=True )[0]
            v_t = torch.autograd.grad(v.sum(), t_dom, create_graph=True )[0]
    
            # Second order derivatives
            u_real_xx = torch.autograd.grad(u_real_x.sum(), x_dom, create_graph=True )[0]
            u_imag_xx = torch.autograd.grad(u_imag_x.sum(), x_dom, create_graph=True )[0]

            # Compute u_abs_xx with retain_graph if further gradients need to be calculated
            u_abs_x = torch.autograd.grad(u_abs.sum(), x_dom, create_graph=True )[0]

            # Define du_r, du_i, dv according to given formulas
            #du_r = - u_imag_t - u_real_xx - u_real * v
            #du_i =   u_real_t - u_imag_xx - u_imag * v
            dv = v_t + u_abs_x

            zero_target = torch.zeros_like(dv)  # Assuming du_r, du_i, dv have the same shape
            #physics_loss_ur = mse_cost_function(du_r, zero_target)
            #physics_loss_ui = mse_cost_function(du_i, zero_target)
            physics_loss_v = mse_cost_function(dv, zero_target)
            #print(physics_loss_ur)
            #print(physics_loss_ur)
            print(physics_loss_v)

            boundary_loss_ur_x0, boundary_loss_ui_x0, boundary_loss_v_x0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x0, t_bc_x0, mse_cost_function, k, omega, r)
            boundary_loss_ur_x1, boundary_loss_ui_x1, boundary_loss_v_x1 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x1, t_bc_x1, mse_cost_function, k, omega, r)
            boundary_loss_ur_t0, boundary_loss_ui_t0, boundary_loss_v_t0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_t0, t_bc_t0, mse_cost_function, k, omega, r)
            #boundary_loss_ur_t1, boundary_loss_ui_t1, boundary_loss_v_t1 = compute_analytical_boundary_loss(model, x_bc_t1, t_bc_t1, mse_cost_function, k, omega, r)
            #domain_loss_ur_t, domain_loss_ui_t, domain_loss_v_t = compute_analytical_boundary_loss(model, x_dom, t_dom, mse_cost_function, k, omega, r)
            
            # Total loss 
            #loss_ur = gamma*(physics_loss_ur) + beta*( boundary_loss_ur_x0 + boundary_loss_ur_x1 + boundary_loss_ur_t0)
            #loss_ui = gamma*(physics_loss_ui) + beta*( boundary_loss_ui_x0 + boundary_loss_ui_x1 + boundary_loss_ur_t0 )
            loss_v = gamma*(physics_loss_v) + beta*( boundary_loss_v_x0 + boundary_loss_v_x1 + boundary_loss_v_t0 )
            total_loss = loss_v
            total_loss.backward()

            return total_loss 
    
        optimizer_u.step(closure_u)    
        optimizer_v.step(closure_v)

        
        
        if epoch % 10 == 0:
            current_loss_u = closure_u()  # Optionally recompute to print
            current_loss_v = closure_v()
            print(f' Epoch {epoch}, Loss U: {current_loss_u.item()}, Loss V: {current_loss_v.item()}') 
            model_u_filename = os.path.join(model_save_path, f'C_HIGGS_U_second_training_epoch_{epoch}.pth')
            torch.save(model_u.state_dict(), model_u_filename)
            model_v_filename = os.path.join(model_save_path, f'C_HIGGS_V_second_training_epoch_{epoch}.pth')
            torch.save(model_v.state_dict(), model_v_filename)
            plot_model_results(epoch, model_u, model_v, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results') 
            
    model_u_filename = os.path.join(model_save_path, f'C_HIGGS_U_second_training.pth')
    torch.save(model_u.state_dict(), model_u_filename)
    model_v_filename = os.path.join(model_save_path, f'C_HIGGS_V_second_training.pth')
    torch.save(model_v.state_dict(), model_v_filename)
    print('TRAINING COMPLETED')

In [7]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.ndimage import gaussian_filter

def plot_model_results(epoch, model_u, model_v, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results'):
    x = torch.linspace(-1.8, 1.8, 400)
    t = torch.linspace(0.2, 0.8, 400)
    X, T = torch.meshgrid(x, t)  # Create a 2D grid of x and t
    X_flat = X.flatten().unsqueeze(-1).to(device)
    T_flat = T.flatten().unsqueeze(-1).to(device)
    
    model_u_state = torch.load(os.path.join(model_save_path, f'C_HIGGS_U_second_training_epoch_{epoch}.pth'), map_location=device)
    model_u.load_state_dict(model_u_state)
    model_u.eval()

    model_v_state = torch.load(os.path.join(model_save_path, f'C_HIGGS_V_second_training_epoch_{epoch}.pth'), map_location=device)
    model_v.load_state_dict(model_v_state)
    model_v.eval()

    # Get predictions from the trained models
    with torch.no_grad():
        pred_u_r, pred_u_i = model_u(X_flat, T_flat) 
        pred_v = model_v(X_flat, T_flat) 

    pred_u_r = pred_u_r.cpu().reshape(X.shape).numpy()
    pred_u_i = pred_u_i.cpu().reshape(X.shape).numpy()
    pred_v = pred_v.cpu().reshape(X.shape).numpy()

    real_u1_analytical = real_u1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()
    imag_u1_analytical = imag_u1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()
    real_v1_analytical = real_v1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()

    pred_v_smooth = gaussian_filter(pred_v, sigma=sigma)

    shrink = 0.3
    aspect = 50

    # Plotting predictions
    fig = plt.figure(figsize=(24, 16))

    ax1 = fig.add_subplot(231, projection='3d')
    ax1.plot_surface(X.numpy(), T.numpy(), pred_u_r, cmap=cmap)
    ax1.set_title('Predicted Real Part of $u_1(x, t)$')
    ax1.set_xlabel('x')
    ax1.set_ylabel('t')
    ax1.set_zlabel('Real part of $u_1$')

    ax2 = fig.add_subplot(232, projection='3d')

    ax2.plot_surface(X.numpy(), T.numpy(), pred_u_i, cmap=cmap)
    ax2.set_title('Predicted Imaginary Part of $u_1(x, t)$')
    ax2.set_xlabel('x')
    ax2.set_ylabel('t')
    ax2.set_zlabel('Imag part of $u_1$')

    ax3 = fig.add_subplot(233, projection='3d')
    ax3.plot_surface(X.numpy(), T.numpy(), pred_v_smooth, cmap=cmap)
    ax3.set_title('Predicted Real Part of $v_1(x, t)$')
    ax3.set_xlabel('x')
    ax3.set_ylabel('t')
    ax3.set_zlabel('Real part of $v_1$')

    ax4 = fig.add_subplot(234, projection='3d')
    ax4.plot_surface(X.numpy(), T.numpy(), real_u1_analytical, cmap=cmap)
    ax4.set_title('Analytical Real Part of $u_1(x, t)$')
    ax4.set_xlabel('x')
    ax4.set_ylabel('t')
    ax4.set_zlabel('Real part of $u_1$')

    ax5 = fig.add_subplot(235, projection='3d')
    ax5.plot_surface(X.numpy(), T.numpy(), imag_u1_analytical, cmap=cmap)
    ax5.set_title('Analytical Imaginary Part of $u_1(x, t)$')
    ax5.set_xlabel('x')
    ax5.set_ylabel('t')
    ax5.set_zlabel('Imag part of $u_1$')

    ax6 = fig.add_subplot(236, projection='3d')
    ax6.plot_surface(X.numpy(), T.numpy(), real_v1_analytical, cmap=cmap)
    ax6.set_title('Analytical Real Part of $v_1(x, t)$')
    ax6.set_xlabel('x')
    ax6.set_ylabel('t')
    ax6.set_zlabel('Real part of $v_1$')

    plt.tight_layout()
    plt.savefig(os.path.join(image_save_path, f'chiggs_model_comparison_3d_epoch_{epoch}.png'))
    plt.close(fig)  # Close the figure to free memory


In [8]:
! rm -rf results
! rm -rf model_weights

In [9]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU.")

model_u = FourierFeatureNN(device=device).to(device)
model_v = FourierFeatureNN_V(device=device).to(device)
#model_u = FCN(input_dim=1, neuron_units=100, activation=nn.Tanh, device=device)
#model_ui = FCN(input_dim=1, neuron_units=100, activation=nn.Tanh, device=device)
#model_v = FCN(input_dim=1, neuron_units=100, activation=nn.Tanh, device=device)

print(model_u)
print(model_v)
num_epochs_lbfgs = 500  # Number of training epochs
num_samples_lbfgs = 1000 # Number of samples for training
num_epochs_sq = 36000
num_samples_sq = 1000
lr_sq = 1e-4 
lr_lbfgs = 1e-2
r = 1.1
omega = 5 
k = 0.5
gamma = 1
beta = 1
model_save_path = 'model_weights' 
mse_cost_function = torch.nn.MSELoss()
os.makedirs(model_save_path, exist_ok=True)
os.makedirs('results', exist_ok=True)
losses = []
line_search_fn = "strong_wolfe"

CUDA is available! Training on GPU.
FourierFeatureNN(
  (path_x): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): Tanh()
  )
  (path_t): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): Tanh()
  )
  (ffn_u): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=2, bias=True)
  )
)
FourierFeatureNN_V(
  (path_x): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh(

In [None]:
LBFGS_training(model_u, model_v, model_save_path, mse_cost_function, device, num_epochs_lbfgs, lr_lbfgs, num_samples_lbfgs, r, k, omega, gamma, beta, line_search_fn)

Starting LBFGS Fine Tuning


Progress::   0%| [34m                                                      [0m | ?[0m

tensor(0.0212, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0212, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0213, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0213, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0216, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0216, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0222, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0222, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0250, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0250, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0253, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0253, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0262, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0262, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0339, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0339, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0345, device='cuda:0', grad_fn=

Progress::   0%| [34m                                                [0m | 5:05:40[0m

tensor(0.5468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5500, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5500, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5474, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5474, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5474, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5474, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.5427, device='cuda:0', grad_fn=

Progress::   0%| [34m▏                                               [0m | 3:41:45[0m

tensor(0.0387, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0376, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0376, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0267, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0255, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0255, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0312, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0312, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0338, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0338, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0305, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0305, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0331, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0331, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0288, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.0288, device='cuda:0', grad_fn=

Progress::   1%| [34m▎                                               [0m | 3:07:05[0m

tensor(0.0098, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9478, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9478, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9499, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9499, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0098, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0096, device='cuda:0', grad_fn=

Progress::   1%| [34m▍                                               [0m | 2:41:19[0m

tensor(0.0101, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8847, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8847, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8898, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8898, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8904, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8904, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8917, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8917, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8937, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8937, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8903, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8903, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0101, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0101, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0102, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0101, device='cuda:0', grad_fn=

Progress::   1%| [34m▍                                               [0m | 2:23:36[0m

tensor(0.0076, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9064, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9064, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9127, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9127, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9206, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9206, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0077, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0075, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0076, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0076, device='cuda:0', grad_fn=

Progress::   1%| [34m▌                                               [0m | 2:14:59[0m

tensor(0.0059, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9356, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9356, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9550, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9550, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9464, device='cuda:0', grad_fn=

Progress::   1%| [34m▋                                               [0m | 2:17:10[0m

tensor(0.0046, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9832, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9832, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9758, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9758, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9636, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9636, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9739, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9739, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9740, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9740, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9746, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9746, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9745, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9745, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9722, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9722, device='cuda:0', grad_fn=

Progress::   2%| [34m▊                                               [0m | 2:03:49[0m

tensor(0.0046, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9750, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9750, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9760, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9760, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9748, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9748, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9756, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9756, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9710, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9710, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9708, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9708, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9761, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9761, device='cuda:0', grad_fn=

Progress::   2%| [34m▊                                               [0m | 2:00:59[0m

tensor(0.0047, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9678, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9678, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9749, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9749, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9691, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9691, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9787, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9787, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9743, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9743, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.9597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8979, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.8979, device='cuda:0', grad_fn=

Progress::   2%| [34m▉                                               [0m | 2:29:17[0m

tensor(0.0128, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.6793, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.6793, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.7943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.7943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3156, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3156, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3131, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3131, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3169, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3169, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3133, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3133, device='cuda:0', grad_fn=

Progress::   2%| [34m█                                               [0m | 2:38:59[0m

tensor(0.3101, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3101, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3107, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3107, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3109, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3109, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3156, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3156, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3224, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3224, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3152, device='cuda:0', grad_fn=

Progress::   2%| [34m█▏                                              [0m | 2:19:08[0m

tensor(0.0423, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3113, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3113, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3191, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3191, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3124, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3124, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3148, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3148, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::   3%| [34m█▏                                              [0m | 1:50:06[0m

tensor(0.0429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3135, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3135, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3184, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3184, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3158, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3158, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3175, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3175, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3138, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3138, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0418, device='cuda:0', grad_fn=

Progress::   3%| [34m█▎                                              [0m | 1:39:53[0m

tensor(0.0421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3123, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3123, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3143, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3143, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3160, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3160, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3212, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3212, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3146, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3146, device='cuda:0', grad_fn=

Progress::   3%| [34m█▍                                              [0m | 1:29:42[0m

tensor(0.0431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3137, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3137, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3150, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3150, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0424, device='cuda:0', grad_fn=

Progress::   3%| [34m█▌                                              [0m | 1:22:29[0m

tensor(0.0420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3149, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3149, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3152, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3152, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3154, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3154, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3181, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3181, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0415, device='cuda:0', grad_fn=

Progress::   3%| [34m█▋                                              [0m | 1:21:34[0m

tensor(0.0422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3126, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3126, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3112, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3112, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3088, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3088, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3145, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3145, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3132, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3132, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3104, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3104, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3057, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3057, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3094, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3094, device='cuda:0', grad_fn=

Progress::   4%| [34m█▋                                              [0m | 1:25:23[0m

tensor(0.0420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3135, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3135, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3070, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3070, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3123, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3123, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3119, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3119, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3109, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3109, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3052, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3052, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3162, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3162, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3120, device='cuda:0', grad_fn=

Progress::   4%| [34m█▊                                              [0m | 1:24:35[0m

tensor(0.0420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3085, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3085, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3143, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3143, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3074, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3074, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3085, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3085, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3061, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3061, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3075, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3075, device='cuda:0', grad_fn=

Progress::   4%| [34m█▉                                              [0m | 1:27:21[0m

tensor(0.0442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2935, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2935, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2960, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2960, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2966, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2966, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2881, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2881, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2945, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2945, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2952, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2952, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2951, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2951, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2948, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2948, device='cuda:0', grad_fn=

Progress::   4%| [34m██                                              [0m | 1:49:26[0m

tensor(0.2972, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2972, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2950, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2950, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2971, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2971, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2956, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2956, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2954, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2954, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2954, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2954, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2889, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2889, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2916, device='cuda:0', grad_fn=

Progress::   4%| [34m██                                              [0m | 1:59:11[0m

tensor(0.0442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2914, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2914, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2916, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2916, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2928, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2928, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2907, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2907, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2832, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2832, device='cuda:0', grad_fn=

Progress::   5%| [34m██▏                                             [0m | 2:07:21[0m

tensor(0.0482, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2825, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2825, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2782, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2782, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2801, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2801, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2858, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2858, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2840, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2840, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2795, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2795, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2739, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2739, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2801, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2801, device='cuda:0', grad_fn=

Progress::   5%| [34m██▎                                             [0m | 2:13:07[0m

tensor(0.0492, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2854, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2854, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2806, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2806, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2840, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2840, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2853, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2853, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2826, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2826, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2784, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2784, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2831, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2831, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2843, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2843, device='cuda:0', grad_fn=

Progress::   5%| [34m██▍                                             [0m | 2:37:42[0m

tensor(0.0568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2291, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2291, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2263, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2263, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2264, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2264, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2256, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2256, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2326, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2326, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2248, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2248, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2154, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.2154, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=

Progress::   5%| [34m██▍                                             [0m | 2:32:06[0m

tensor(0.0445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1588, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1588, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=

Progress::   5%| [34m██▌                                             [0m | 2:07:16[0m

tensor(0.0439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1574, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1574, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0437, device='cuda:0', grad_fn=

Progress::   6%| [34m██▋                                             [0m | 1:46:10[0m

tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1621, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1621, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=

Progress::   6%| [34m██▊                                             [0m | 1:37:50[0m

tensor(0.0442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=

Progress::   6%| [34m██▉                                             [0m | 1:42:02[0m

tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1544, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1544, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1583, device='cuda:0', grad_fn=

Progress::   6%| [34m██▉                                             [0m | 1:53:13[0m

tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1610, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1610, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=

Progress::   6%| [34m███                                             [0m | 1:32:52[0m

tensor(0.0445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1567, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1567, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1559, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1559, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1573, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1573, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=

Progress::   7%| [34m███▏                                            [0m | 1:28:34[0m

tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1601, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1601, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1610, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1610, device='cuda:0', grad_fn=

Progress::   7%| [34m███▎                                            [0m | 1:31:29[0m

tensor(0.0434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1580, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1580, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1575, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1575, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0441, device='cuda:0', grad_fn=

Progress::   7%| [34m███▎                                            [0m | 1:20:57[0m

tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1614, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1614, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1620, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1620, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1594, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0437, device='cuda:0', grad_fn=

Progress::   7%| [34m███▍                                            [0m | 1:12:41[0m

tensor(0.0435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1593, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1593, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1627, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1627, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=

Progress::   7%| [34m███▌                                            [0m | 1:27:26[0m

tensor(0.0435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1549, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1549, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=

Progress::   8%| [34m███▋                                            [0m | 1:30:13[0m

tensor(0.0442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1588, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1588, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1554, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=

Progress::   8%| [34m███▋                                            [0m | 1:27:58[0m

tensor(0.0435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1580, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1580, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1560, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1560, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=

Progress::   8%| [34m███▊                                            [0m | 1:38:34[0m

tensor(0.0431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1622, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=

Progress::   8%| [34m███▉                                            [0m | 1:47:36[0m

tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=

Progress::   8%| [34m████                                            [0m | 1:36:43[0m

tensor(0.0438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=

Progress::   9%| [34m████▏                                           [0m | 1:28:21[0m

tensor(0.0432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1603, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1603, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0436, device='cuda:0', grad_fn=

Progress::   9%| [34m████▏                                           [0m | 1:22:46[0m

tensor(0.0436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1608, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1590, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1616, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1616, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1596, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1620, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1620, device='cuda:0', grad_fn=

Progress::   9%| [34m████▎                                           [0m | 1:26:31[0m

tensor(0.0434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1612, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1612, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1614, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1614, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0439, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::   9%| [34m████▍                                           [0m | 1:11:02[0m

tensor(0.0435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1664, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1664, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1603, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1603, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1553, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1553, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1584, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1584, device='cuda:0', grad_fn=

Progress::   9%| [34m████▌                                           [0m | 1:16:27[0m

tensor(0.0435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1601, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1601, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1616, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1616, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1624, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1619, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1598, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1598, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=

Progress::  10%| [34m████▌                                           [0m | 1:20:51[0m

tensor(0.0440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1595, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1655, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1655, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1602, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1606, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=

Progress::  10%| [34m████▋                                           [0m | 1:27:45[0m

tensor(0.0536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1563, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1563, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1600, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1514, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1514, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1611, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1611, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1623, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1593, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1593, device='cuda:0', grad_fn=

Progress::  10%| [34m████▊                                           [0m | 1:19:50[0m

tensor(0.0536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1545, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1545, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1575, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1575, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1563, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1563, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0543, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0536, device='cuda:0', grad_fn=

Progress::  10%| [34m████▉                                           [0m | 1:23:57[0m

tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1552, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1552, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1582, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1582, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1550, device='cuda:0', grad_fn=

Progress::  10%| [34m████▉                                           [0m | 1:18:58[0m

tensor(0.0537, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1546, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1546, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1587, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1599, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0545, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0538, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  11%| [34m█████                                           [0m | 1:08:09[0m

tensor(0.0536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1560, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1560, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1561, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1561, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1581, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1539, device='cuda:0', grad_fn=

Progress::  11%| [34m█████▏                                          [0m | 1:06:14[0m

tensor(0.0542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1541, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1541, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1585, device='cuda:0', grad_fn=

Progress::  11%| [34m█████▎                                          [0m | 1:08:03[0m

tensor(0.0538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1525, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1525, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1534, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1516, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1516, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1543, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1543, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1502, device='cuda:0', grad_fn=

Progress::  11%| [34m█████▍                                          [0m | 1:18:42[0m

tensor(0.0538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1556, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1494, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1494, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1553, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1553, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1510, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1510, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=

Progress::  11%| [34m█████▍                                          [0m | 1:18:28[0m

tensor(0.0531, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1557, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1557, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1565, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1519, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1519, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1532, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1532, device='cuda:0', grad_fn=

Progress::  12%| [34m█████▌                                          [0m | 1:27:26[0m

tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1527, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1527, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1548, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1548, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1549, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1549, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0527, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0528, device='cuda:0', grad_fn=

Progress::  12%| [34m█████▋                                          [0m | 1:18:45[0m

tensor(0.0540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1552, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1552, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1501, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1501, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1555, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1557, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1557, device='cuda:0', grad_fn=

Progress::  12%| [34m█████▊                                          [0m | 1:16:16[0m

tensor(0.0529, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1498, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1498, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1514, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1514, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1546, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1546, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1499, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1499, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1572, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1572, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0534, device='cuda:0', grad_fn=

Progress::  12%| [34m█████▊                                          [0m | 1:23:30[0m

tensor(0.1541, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1541, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1493, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1493, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1542, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1548, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1548, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1519, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1519, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1532, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1532, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1527, device='cuda:0', grad_fn=

Progress::  12%| [34m█████▉                                          [0m | 1:27:27[0m

tensor(0.0535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1528, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1540, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1507, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1507, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1538, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1527, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1527, device='cuda:0', grad_fn=

Progress::  13%| [34m██████                                          [0m | 1:44:56[0m

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1492, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1492, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1489, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1489, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1508, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1508, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1481, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1481, device='cuda:0', grad_fn=

Progress::  13%| [34m██████▏                                         [0m | 1:35:17[0m

tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1483, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1495, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1495, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1516, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1516, device='cuda:0', grad_fn=

Progress::  13%| [34m██████▏                                         [0m | 1:22:57[0m

tensor(0.0459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1497, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1497, device='cuda:0', grad_fn=

Progress::  13%| [34m██████▎                                         [0m | 1:23:11[0m

tensor(0.0457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1493, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1493, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1512, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1512, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=

Progress::  13%| [34m██████▍                                         [0m | 1:17:23[0m

tensor(0.0459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1490, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1490, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1480, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1480, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=

Progress::  14%| [34m██████▌                                         [0m | 1:20:35[0m

tensor(0.0456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1443, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1443, device='cuda:0', grad_fn=

Progress::  14%| [34m██████▌                                         [0m | 1:32:13[0m

tensor(0.0459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1467, device='cuda:0', grad_fn=

Progress::  14%| [34m██████▋                                         [0m | 1:41:47[0m

tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1418, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1418, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1440, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1474, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1474, device='cuda:0', grad_fn=

Progress::  14%| [34m██████▊                                         [0m | 1:44:29[0m

tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1471, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1471, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1476, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1476, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1455, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=

Progress::  14%| [34m██████▉                                         [0m | 1:25:07[0m

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1484, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1484, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0458, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  15%| [34m███████                                         [0m | 1:11:52[0m

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0460, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  15%| [34m███████                                         [0m | 1:00:09[0m

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1486, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1486, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0459, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0455, device='cuda:0', grad_fn=

Progress::  15%| [34m███████▌                                          [0m | 57:22[0m

tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=

Progress::  15%| [34m███████▎                                        [0m | 1:09:41[0m

tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1442, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=

Progress::  15%| [34m███████▍                                        [0m | 1:32:17[0m

tensor(0.0461, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1478, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1478, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1479, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1479, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0457, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  16%| [34m███████▍                                        [0m | 1:15:55[0m

tensor(0.0462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1386, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1386, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1444, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1423, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1423, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1432, device='cuda:0', grad_fn=

Progress::  16%| [34m███████▌                                        [0m | 1:15:47[0m

tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1443, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1443, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1400, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1400, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1387, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1387, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1375, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1375, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=

Progress::  16%| [34m███████▋                                        [0m | 1:14:35[0m

tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1441, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=

Progress::  16%| [34m███████▊                                        [0m | 1:24:10[0m

tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=

Progress::  16%| [34m███████▊                                        [0m | 1:17:42[0m

tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1392, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1392, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1452, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1452, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=

Progress::  17%| [34m███████▉                                        [0m | 1:19:11[0m

tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1349, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1349, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1401, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1401, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=

Progress::  17%| [34m████████                                        [0m | 1:10:55[0m

tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1458, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1458, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=

Progress::  17%| [34m████████▏                                       [0m | 1:18:17[0m

tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1404, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1446, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=

Progress::  17%| [34m████████▎                                       [0m | 1:11:40[0m

tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1460, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=

Progress::  17%| [34m████████▎                                       [0m | 1:25:38[0m

tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1368, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1368, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1456, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  18%| [34m████████▍                                       [0m | 1:07:59[0m

tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1422, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1436, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1393, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1393, device='cuda:0', grad_fn=

Progress::  18%| [34m████████▌                                       [0m | 1:11:40[0m

tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1447, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::  18%| [34m████████▋                                       [0m | 1:01:11[0m

tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1399, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1399, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1413, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1381, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1381, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=

Progress::  18%| [34m████████▋                                       [0m | 1:15:22[0m

tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1372, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1372, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1450, device='cuda:0', grad_fn=

Progress::  18%| [34m████████▊                                       [0m | 1:04:28[0m

tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1392, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1392, device='cuda:0', grad_fn=

Progress::  19%| [34m████████▉                                       [0m | 1:21:35[0m

tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1393, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1393, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1420, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0463, device='cuda:0', grad_fn=

Progress::  19%| [34m█████████                                       [0m | 1:13:30[0m

tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1434, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1408, device='cuda:0', grad_fn=

Progress::  19%| [34m█████████                                       [0m | 1:16:37[0m

tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1354, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1354, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=

Progress::  19%| [34m█████████▏                                      [0m | 1:13:55[0m

tensor(0.0473, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1439, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0467, device='cuda:0', grad_fn=

Progress::  19%| [34m█████████▎                                      [0m | 1:04:06[0m

tensor(0.0464, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1418, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1418, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1451, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1451, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1438, device='cuda:0', grad_fn=

Progress::  20%| [34m█████████▊                                        [0m | 58:29[0m

tensor(0.0471, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1397, device='cuda:0', grad_fn=

Progress::  20%| [34m█████████▉                                        [0m | 55:59[0m

tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1427, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1401, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1401, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1376, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1376, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=

Progress::  20%| [34m█████████▌                                      [0m | 1:02:18[0m

tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1399, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1399, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1390, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1390, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=

Progress::  20%| [34m█████████▋                                      [0m | 1:20:48[0m

tensor(0.1371, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1371, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1409, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=

Progress::  20%| [34m█████████▊                                      [0m | 1:07:49[0m

tensor(0.0465, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1421, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1419, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0472, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0467, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0466, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0470, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0470, device='cuda:0', grad_fn=

Progress::  21%| [34m██████████▎                                       [0m | 59:45[0m

tensor(0.0469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1402, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1402, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1428, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1382, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1382, device='cuda:0', grad_fn=

Progress::  21%| [34m█████████▉                                      [0m | 1:12:36[0m

tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1359, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1359, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1407, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1388, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1388, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0481, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0480, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0482, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0484, device='cuda:0', grad_fn=

Progress::  21%| [34m██████████                                      [0m | 1:02:55[0m

tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1366, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1366, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1374, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1374, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1379, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1379, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1423, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1423, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1412, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1411, device='cuda:0', grad_fn=

Progress::  21%| [34m██████████▏                                     [0m | 1:03:12[0m

tensor(0.0481, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1430, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1361, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1361, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1429, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1416, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1384, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=

Progress::  21%| [34m██████████▎                                     [0m | 1:03:56[0m

tensor(0.0486, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1364, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1364, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1388, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1388, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1377, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.1377, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0487, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0485, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0481, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0479, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0478, device='cuda:0', grad_fn=

Progress::  22%| [34m██████████▎                                     [0m | 1:11:29[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  22%| [34m██████████▍                                     [0m | 1:46:48[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  22%| [34m██████████▌                                     [0m | 1:56:14[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  22%| [34m██████████▋                                     [0m | 2:02:58[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  22%| [34m██████████▊                                     [0m | 2:01:23[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  23%| [34m██████████▊                                     [0m | 2:21:04[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  23%| [34m██████████▉                                     [0m | 2:19:49[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  23%| [34m███████████                                     [0m | 2:11:25[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  23%| [34m███████████▏                                    [0m | 2:27:39[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr

Progress::  23%| [34m███████████▏                                    [0m | 2:33:57[0m

tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(nan, device='cuda:0', gr