In [1]:
import torch
import torch.nn as nn
import torch.nn.init as init
from torch.optim import AdamW, LBFGS
from torch.autograd import Variable
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from itertools import cycle 

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
def fourier_features(x, B):
    x_transformed = torch.matmul(x, B)
    return torch.cat([torch.sin(x_transformed), torch.cos(x_transformed)], dim=-1)

def init_fixed_frequency_matrix(size, scale=1.0):
    num_elements = size[0] * size[1]
    lin_space = torch.linspace(-scale, scale, steps=num_elements)
    B = lin_space.view(size).float()
    return B

class FourierFeatureNN(nn.Module):
    def __init__(self, input_dim=1, shared_units=16, neuron_units=32, scale=1.0, 
                 activation=nn.Tanh, device='cpu'):
        super(FourierFeatureNN, self).__init__()
        self.Bx = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)
        self.Bt = init_fixed_frequency_matrix((input_dim, shared_units // 2), scale=scale).to(device)

        # Define separate paths for x and t after Fourier transformation
        self.path_x = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Adjusted from shared_units // 2 to shared_units
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )
        self.path_t = nn.Sequential( 
            nn.Linear(shared_units, neuron_units),  # Same adjustment
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation(),
            nn.Linear(neuron_units, neuron_units),
            activation() )

        # Define separate FFN for u and v directly after the paths
        self.ffn_u = nn.Sequential(
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, 2)  # Outputs for u (real and imaginary parts)
        )
        self.ffn_v = nn.Sequential(
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, neuron_units), activation(),
            nn.Linear(neuron_units, 1)  # Output for v
        )

        self.apply(self.initialize_weights)

    def forward(self, x, t):
        # Apply Fourier feature transformations
        x_fourier = fourier_features(x, self.Bx)
        t_fourier = fourier_features(t, self.Bt)

        # Pass through separate paths
        x_path_output = self.path_x(x_fourier)
        t_path_output = self.path_t(t_fourier)

        # Pointwise multiplication of the separate path outputs
        combined_features = x_path_output * t_path_output

        # Directly pass through different FFNs for u and v
        final_output_u = self.ffn_u(combined_features)
        final_output_v = self.ffn_v(combined_features)

        # Splitting the output for u into real and complex parts
        output_1, output_2 = final_output_u.split(1, dim=-1)
        output_3 = final_output_v
        
        return output_1, output_2, output_3

    def initialize_weights(self, m):
        if isinstance(m, nn.Linear):
            init.xavier_uniform_(m.weight)
            if m.bias is not None:
                init.constant_(m.bias, 0)

In [3]:
def real_u1(x, t, k, omega, r):
    complex_exp = torch.exp(1j * r * (omega * x + t))
    tanh_val = torch.tanh((r * (k + x + omega * t)) / torch.sqrt(torch.tensor(2.0)))
    result = torch.real(1j * r * complex_exp * torch.sqrt(torch.tensor(1) + omega**2) * tanh_val)
    return result

def imag_u1(x, t, k, omega, r):
    complex_exp = torch.exp(1j * r * (omega * x + t))
    tanh_val = torch.tanh((r * (k + x + omega * t)) / torch.sqrt(torch.tensor(2.0)))
    result = torch.imag(1j * r * complex_exp * torch.sqrt(torch.tensor(1) + omega**2) * tanh_val)
    return result

def real_v1(x, t, k, omega, r):
    result = (r * torch.tanh((r * (k + x + omega * t)) / torch.sqrt(torch.tensor(2.0))))**2
    return result

def compute_analytical_boundary_loss(model, x, t, mse_cost_function, k, omega, r):
    pred_u_r, pred_u_i, pred_v = model(x, t)

    real_u1_val = real_u1(x, t, k, omega, r)
    imag_u1_val = imag_u1(x, t, k, omega, r)
    real_v1_val = real_v1(x, t, k, omega, r)
 
    boundary_loss_ur = mse_cost_function(pred_u_r, real_u1_val)
    boundary_loss_ui = mse_cost_function(pred_u_i, imag_u1_val)
    boundary_loss_v = mse_cost_function(pred_v, real_v1_val)
    
    return boundary_loss_ur, boundary_loss_ui, boundary_loss_v

def cyclic_iterator(items):
    return cycle(items)

In [4]:
def LBFGS_training(model, model_save_path, mse_cost_function, device, num_epochs, lr, num_samples, r, k, omega, gamma, beta, line_search_fn):
    print('Starting LBFGS Fine Tuning')
    optimizer = LBFGS(model.parameters(), lr=1, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)
    factor = -2

    x_n = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # x in range [-5, -3]
    t_n = (torch.rand(num_samples, 1)).to(device)   
    x_dom = (torch.rand(num_samples, 1)*4 + factor ).to(device)
    t_dom = torch.rand(num_samples, 1).to(device) 
    x_bc_x0 = (torch.zeros(num_samples, 1)*4 + factor ).to(device)
    t_bc_x0 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_x1 = (torch.zeros(num_samples, 1)*4 - factor ).to(device)
    t_bc_x1 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_t0 = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # Uniformly distributed random values between 0 and 1
    t_bc_t0 = torch.zeros(num_samples, 1).to(device)


    for epoch in tqdm(range(num_epochs),
                  desc='Progress:',  
                  leave=False,  
                  ncols=75,
                  mininterval=0.1,
                  bar_format='{l_bar} {bar} | {remaining}',  # Only show the bar without any counters
                  colour='blue'): 
        model.train()
        
        def closure():
            optimizer.zero_grad()
            x_dom = (torch.rand(num_samples, 1)*4 + factor ).to(device)
            t_dom = torch.rand(num_samples, 1).to(device) 
            x_dom.requires_grad_(True)
            t_dom.requires_grad_(True)

            #physics_loss_ur, physics_loss_ui, physics_loss_v = compute_physics_loss(model, x_n, t_n, device, mse_cost_function) 
            u_real, u_imag, v = model(x_dom, t_dom)
            u_abs = torch.square(u_real) + torch.square(u_imag)

            # First order derivatives with retain_graph=True to reuse computational graph
            u_real_x = torch.autograd.grad(u_real.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            u_real_t = torch.autograd.grad(u_real.sum(), t_dom, create_graph=True, retain_graph=True)[0]
            u_imag_x = torch.autograd.grad(u_imag.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            u_imag_t = torch.autograd.grad(u_imag.sum(), t_dom, create_graph=True, retain_graph=True)[0]
            v_x = torch.autograd.grad(v.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            v_t = torch.autograd.grad(v.sum(), t_dom, create_graph=True, retain_graph=True)[0]
    
            # Second order derivatives
            u_real_xx = torch.autograd.grad(u_real_x.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            u_real_tt = torch.autograd.grad(u_real_t.sum(), t_dom, create_graph=True, retain_graph=True)[0]
            u_imag_xx = torch.autograd.grad(u_imag_x.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            u_imag_tt = torch.autograd.grad(u_imag_t.sum(), t_dom, create_graph=True, retain_graph=True)[0]
            v_xx = torch.autograd.grad(v_x.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            v_tt = torch.autograd.grad(v_t.sum(), t_dom, create_graph=True, retain_graph=True)[0]

            # Compute u_abs_xx with retain_graph if further gradients need to be calculated
            u_abs_x = torch.autograd.grad(u_abs.sum(), x_dom, create_graph=True, retain_graph=True)[0]
            u_abs_xx = torch.autograd.grad(u_abs_x.sum(), x_dom, create_graph=True, retain_graph=True)[0]

            # Define du_r, du_i, dv according to given formulas
            du_r = u_real_tt - u_real_xx + u_abs * u_real - 2 * u_real * v
            du_i = u_imag_tt - u_imag_xx + u_abs * u_imag - 2 * u_imag * v
            dv = v_tt + v_xx - u_abs_xx

            zero_target = torch.zeros_like(du_r)  # Assuming du_r, du_i, dv have the same shape
            physics_loss_ur = mse_cost_function(du_r, zero_target)
            physics_loss_ui = mse_cost_function(du_i, zero_target)
            physics_loss_v = mse_cost_function(dv, zero_target)
            print(physics_loss_ur)
            print(physics_loss_ur)
            print(physics_loss_v)

            boundary_loss_ur_x0, boundary_loss_ui_x0, boundary_loss_v_x0 = compute_analytical_boundary_loss(model, x_bc_x0, t_bc_x0, mse_cost_function, k, omega, r)
            boundary_loss_ur_x1, boundary_loss_ui_x1, boundary_loss_v_x1 = compute_analytical_boundary_loss(model, x_bc_x1, t_bc_x1, mse_cost_function, k, omega, r)
            boundary_loss_ur_t0, boundary_loss_ui_t0, boundary_loss_v_t0 = compute_analytical_boundary_loss(model, x_bc_t0, t_bc_t0, mse_cost_function, k, omega, r)
            # boundary_loss_ur_t1, boundary_loss_ui_t1, boundary_loss_v_t1 = compute_analytical_boundary_loss(model, x_bc_t1, t_bc_t1, mse_cost_function, k, omega, r)
            domain_loss_ur_t, domain_loss_ui_t, domain_loss_v_t = compute_analytical_boundary_loss(model, x_dom, t_dom, mse_cost_function, k, omega, r)
            
            # Total loss 
            loss_ur = gamma*(physics_loss_ur) + beta*( boundary_loss_ur_x0 + boundary_loss_ur_t0 )
            loss_ui = gamma*(physics_loss_ui) + beta*( boundary_loss_ui_x0 + boundary_loss_ui_t0 )
            loss_v = gamma*(physics_loss_v) + beta*( boundary_loss_v_x0 + boundary_loss_v_t0 )
            total_loss = loss_ur + loss_ui + loss_v
            total_loss.backward()

            return total_loss 
    
        optimizer.step(closure)
        if epoch % 10 == 0:
            current_loss = closure()  # Optionally recompute to print
            print(f' Epoch {epoch}, Loss: {current_loss.item()}') 
            model_filename = os.path.join(model_save_path, f'C_HIGGS_second_training_epoch_{epoch}.pth')
            torch.save(model.state_dict(), model_filename)
            plot_model_results(epoch, model, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results') 
            
    model_filename = os.path.join(model_save_path, f'C_HIGGS_second_training.pth')
    torch.save(model.state_dict(), model_filename)
    print('TRAINING COMPLETED')

In [5]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.ndimage import gaussian_filter

def plot_model_results(epoch, model, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results'):
    x = torch.linspace(-1.8, 1.8, 400)
    t = torch.linspace(0.2, 0.8, 400)
    X, T = torch.meshgrid(x, t)  # Create a 2D grid of x and t
    X_flat = X.flatten().unsqueeze(-1).to(device)
    T_flat = T.flatten().unsqueeze(-1).to(device)
    
    model_save_path = 'model_weights' 
    model_state = torch.load(os.path.join(model_save_path, f'C_HIGGS_second_training_epoch_{epoch}.pth'), map_location=device)
    model.load_state_dict(model_state)
    model.eval()

    # Get predictions from the trained models
    with torch.no_grad():
        pred_u_r, pred_u_i, pred_v = model(X_flat, T_flat) 

    pred_u_r = pred_u_r.cpu().reshape(X.shape).numpy()
    pred_u_i = pred_u_i.cpu().reshape(X.shape).numpy()
    pred_v = pred_v.cpu().reshape(X.shape).numpy()

    real_u1_analytical = real_u1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()
    imag_u1_analytical = imag_u1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()
    real_v1_analytical = real_v1(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()

    pred_v_smooth = gaussian_filter(pred_v, sigma=sigma)

    shrink = 0.3
    aspect = 50

    # Plotting predictions
    fig = plt.figure(figsize=(24, 16))

    ax1 = fig.add_subplot(231, projection='3d')
    ax1.plot_surface(X.numpy(), T.numpy(), pred_u_r, cmap=cmap)
    ax1.set_title('Predicted Real Part of $u_1(x, t)$')
    ax1.set_xlabel('x')
    ax1.set_ylabel('t')
    ax1.set_zlabel('Real part of $u_1$')

    ax2 = fig.add_subplot(232, projection='3d')
    ax2.plot_surface(X.numpy(), T.numpy(), pred_u_i, cmap=cmap)
    ax2.set_title('Predicted Imaginary Part of $u_1(x, t)$')
    ax2.set_xlabel('x')
    ax2.set_ylabel('t')
    ax2.set_zlabel('Imag part of $u_1$')

    ax3 = fig.add_subplot(233, projection='3d')
    ax3.plot_surface(X.numpy(), T.numpy(), pred_v_smooth, cmap=cmap)
    ax3.set_title('Predicted Real Part of $v_1(x, t)$')
    ax3.set_xlabel('x')
    ax3.set_ylabel('t')
    ax3.set_zlabel('Real part of $v_1$')

    ax4 = fig.add_subplot(234, projection='3d')
    ax4.plot_surface(X.numpy(), T.numpy(), real_u1_analytical, cmap=cmap)
    ax4.set_title('Analytical Real Part of $u_1(x, t)$')
    ax4.set_xlabel('x')
    ax4.set_ylabel('t')
    ax4.set_zlabel('Real part of $u_1$')

    ax5 = fig.add_subplot(235, projection='3d')
    ax5.plot_surface(X.numpy(), T.numpy(), imag_u1_analytical, cmap=cmap)
    ax5.set_title('Analytical Imaginary Part of $u_1(x, t)$')
    ax5.set_xlabel('x')
    ax5.set_ylabel('t')
    ax5.set_zlabel('Imag part of $u_1$')

    ax6 = fig.add_subplot(236, projection='3d')
    ax6.plot_surface(X.numpy(), T.numpy(), real_v1_analytical, cmap=cmap)
    ax6.set_title('Analytical Real Part of $v_1(x, t)$')
    ax6.set_xlabel('x')
    ax6.set_ylabel('t')
    ax6.set_zlabel('Real part of $v_1$')

    plt.tight_layout()
    plt.savefig(os.path.join(image_save_path, f'chiggs_model_comparison_3d_epoch_{epoch}.png'))
    plt.close(fig)  # Close the figure to free memory


In [6]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU.")

model = FourierFeatureNN(device=device).to(device)

print(model)
num_epochs_lbfgs = 500  # Number of training epochs
num_samples_lbfgs = 1000*3 # Number of samples for training
num_epochs_sq = 36000
num_samples_sq = 1000
lr_sq = 1e-4 
lr_lbfgs = 1e-3
r = 1.1
omega = 5 
k = 0.5
gamma = 1e-3
beta = 1
model_save_path = 'model_weights' 
mse_cost_function = torch.nn.MSELoss()
os.makedirs(model_save_path, exist_ok=True)
losses = []
line_search_fn = "strong_wolfe"

CUDA is available! Training on GPU.
FourierFeatureNN(
  (path_x): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): Tanh()
  )
  (path_t): Sequential(
    (0): Linear(in_features=16, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=32, bias=True)
    (5): Tanh()
  )
  (ffn_u): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_features=32, out_features=2, bias=True)
  )
  (ffn_v): Sequential(
    (0): Linear(in_features=32, out_features=32, bias=True)
    (1): Tanh()
    (2): Linear(in_features=32, out_features=32, bias=True)
    (3): Tanh()
    (4): Linear(in_fe

In [None]:
LBFGS_training(model, model_save_path, mse_cost_function, device, num_epochs_lbfgs, lr_lbfgs, num_samples_lbfgs, r, k, omega, gamma, beta, line_search_fn)

Starting LBFGS Fine Tuning


Progress::   0%| [34m                                                      [0m | ?[0m

tensor(0.0129, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0129, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0437, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0134, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0134, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0450, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.0788, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(3.5893, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(3.5893, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1.4607, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.4720, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.4720, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.7627, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3614, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(0.3614, device='cuda:0', grad_fn=

Progress::   0%| [34m                                                [0m | 1:17:19[0m

tensor(251.9633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(251.9633, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.4986, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(238.4329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(238.4329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(791.0112, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(223.6780, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(223.6780, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.3262, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(261.6178, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(261.6178, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(812.5723, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(229.8281, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(229.8281, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(435.1586, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(251.0191, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(2

Progress::   0%| [34m▏                                                 [0m | 44:12[0m

tensor(246.0305, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(246.0305, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(740.5785, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(236.9235, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(236.9235, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(552.9197, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(255.8522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(255.8522, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(649.4821, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(327.6200, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(327.6200, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1677.9380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(263.0069, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(263.0069, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(595.2731, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(257.0764, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::   1%| [34m▎                                                 [0m | 35:08[0m

tensor(474.4394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(474.4394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(436.1827, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.4886, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.4886, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(479.2360, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(400.7763, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(400.7763, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(389.6004, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(490.0496, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(490.0496, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(477.6417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.7345, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.7345, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(438.6676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(476.7858, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   1%| [34m▍                                                 [0m | 23:47[0m

tensor(464.9573, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(464.9573, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(442.1571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(440.1808, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(440.1808, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(415.8162, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(510.4566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(510.4566, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.8482, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.3777, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.3777, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(429.9060, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.9351, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.9351, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(486.9043, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(448.9961, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   1%| [34m▌                                                 [0m | 23:58[0m

tensor(541.8591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(541.8591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(570.9827, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.1539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.1539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(618.4977, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.7694, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.7694, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(556.3141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(556.4502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(556.4502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.7054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.4415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(467.4415, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(390.9475, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.7885, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   1%| [34m▌                                                 [0m | 24:02[0m

tensor(503.5761, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(503.5761, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(458.4067, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.1500, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.1500, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(486.8539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(489.0177, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(489.0177, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.7900, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.8380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.8380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(460.4981, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.0755, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.0755, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(436.3980, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.2526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   1%| [34m▋                                                 [0m | 20:00[0m

tensor(578.4920, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(578.4920, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(463.2468, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(558.7815, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(558.7815, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.4511, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.2136, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.2136, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(494.4997, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.7163, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.7163, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.9193, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8057, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8057, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(430.8766, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(493.4651, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   2%| [34m▊                                                 [0m | 17:03[0m

tensor(517.4819, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.4819, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.0294, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.2551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.2551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(443.6169, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.7406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.7406, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(463.4173, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.0502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.0502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(448.8878, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.4747, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.4747, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(535.9002, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.0562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   2%| [34m▉                                                 [0m | 17:14[0m

tensor(555.7142, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.7142, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.0995, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.0723, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.0723, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(494.2882, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.3826, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.3826, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(444.9577, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(505.5539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(505.5539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(480.8463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.3093, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.3093, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.8280, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(495.0029, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   2%| [34m█                                                 [0m | 15:35[0m

tensor(492.5056, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(492.5056, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(476.0187, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.6164, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.6164, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(450.7295, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(469.7268, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(469.7268, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(441.3803, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.5171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.5171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1149.9657, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.3077, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.3077, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.7628, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(487.5158, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::   2%| [34m█                                                 [0m | 25:48[0m

tensor(509.2125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.2125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(474.0943, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.8022, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.8022, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1113.3142, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.5469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.5469, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.4449, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(493.6948, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(493.6948, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(420.7175, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.8402, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.8402, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(484.0568, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(508.4888, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::   2%| [34m█▏                                                [0m | 23:26[0m

tensor(522.5477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.5477, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(422.3052, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.0765, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.0765, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(439.6661, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(583.3673, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(583.3673, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.6697, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.3942, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.3942, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(450.3039, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(568.2405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(568.2405, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(403.8671, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.8294, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   3%| [34m█▎                                                [0m | 22:17[0m

tensor(566.1987, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(566.1987, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(450.8986, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.5717, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.5717, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.5521, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1599.5953, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1599.5953, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(8317.4961, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.5091, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.5091, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(612.5964, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.7991, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.7991, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(419.7010, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.0858, device='cuda:0', grad_fn=<MseLossBackward0>)
tenso

Progress::   3%| [34m█▍                                                [0m | 18:46[0m

tensor(512.8535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.8535, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.0544, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.2571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.2571, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.3756, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(503.4383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(503.4383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.9438, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.2502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.2502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.1388, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(457.6454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(457.6454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(549.2006, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(494.1006, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   3%| [34m█▌                                                [0m | 16:18[0m

tensor(497.7819, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.7819, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(486.0592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.9777, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.9777, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.7803, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.0839, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.0839, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.3313, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(537.8757, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(537.8757, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.5969, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(468.6703, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(468.6703, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(442.1403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.7146, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   3%| [34m█▌                                                [0m | 14:54[0m

tensor(479.9205, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(479.9205, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(551.1337, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(489.4700, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(489.4700, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(456.9698, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(518.0986, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(518.0986, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.9872, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.4054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.4054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(502.3062, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.1689, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.1689, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(616.9171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.2785, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   3%| [34m█▋                                                [0m | 13:09[0m

tensor(525.2248, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.2248, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.9104, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.7834, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.7834, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(502.6819, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.9207, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.9207, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(476.3123, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(576.3068, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(576.3068, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.4144, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.1628, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.1628, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.3691, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(569.9752, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   4%| [34m█▊                                                [0m | 11:01[0m

tensor(556.0862, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(529.0054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(573.2424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(573.2424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(520.3113, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(518.5220, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(518.5220, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.6375, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8526, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(581.7185, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.3597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.3597, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.4851, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(637.4937, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(637.4937, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1

Progress::   4%| [34m█▉                                                [0m | 12:53[0m

tensor(556.6965, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(556.6965, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.8990, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(565.3661, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(565.3661, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.1400, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1507.3031, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(1507.3031, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(13426.1426, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.1339, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.1339, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.9138, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(534.4539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(534.4539, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(489.2178, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.6718, device='cuda:0', grad_fn=<MseLossBackward0>)
tens

Progress::   4%| [34m██                                                [0m | 12:36[0m

tensor(496.6395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(496.6395, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(447.2747, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(585.3417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(585.3417, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(625.8079, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.8609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.8609, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(486.4443, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.0676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.0676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.7758, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.5908, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.5908, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(507.8940, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(560.2122, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   4%| [34m██                                                [0m | 24:46[0m

tensor(536.4871, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.4871, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.8759, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.1884, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.1884, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.7855, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.2202, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(553.2202, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(475.3087, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.8268, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.8268, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(477.5882, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(502.2497, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(502.2497, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(517.9702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.7760, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   4%| [34m██▏                                               [0m | 24:24[0m

tensor(557.6688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(557.6688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(368.4853, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.1453, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(366.9639, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.4012, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.4012, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(361.6616, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.9163, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.9163, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(358.4635, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.0258, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.0258, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(317.0852, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.8359, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   5%| [34m██▎                                               [0m | 20:42[0m

tensor(529.3385, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(529.3385, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(299.8156, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(533.3705, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(533.3705, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(313.4869, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.8192, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.8192, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(304.6844, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.2311, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(504.2311, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(298.4844, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.8083, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.8083, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(295.7938, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(537.8833, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   5%| [34m██▍                                               [0m | 20:16[0m

tensor(543.4125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.4125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(291.4849, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.4843, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.4843, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(280.1391, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(581.5432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(581.5432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(390.2366, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(551.0750, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(551.0750, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(297.6400, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(623.2007, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(623.2007, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(298.0734, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(566.1605, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   5%| [34m██▌                                               [0m | 16:25[0m

tensor(556.2729, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(556.2729, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(276.4569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.7934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.7934, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(270.3458, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(570.6932, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(570.6932, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(278.4783, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(535.6454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(535.6454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(321.7383, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(552.2729, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(552.2729, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(258.2584, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(580.9776, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   5%| [34m██▌                                               [0m | 16:17[0m

tensor(578.6354, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(578.6354, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(253.2669, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.7103, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.7103, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(259.8997, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.8671, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.8671, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(264.9150, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.2396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(536.2396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(256.1895, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(576.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(576.3168, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(265.8655, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(554.9403, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   5%| [34m██▋                                               [0m | 15:39[0m

tensor(602.0171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(602.0171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(250.6366, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(23428.6660, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(23428.6660, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(340545.5938, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.5903, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.5903, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(251.1604, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.2283, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.2283, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(310.2332, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(579.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(579.0433, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(260.6810, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(546.5434, device='cuda:0', grad_fn=<MseLossBackward0>)
t

Progress::   6%| [34m██▊                                               [0m | 15:09[0m

tensor(528.8198, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.8198, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(313.6613, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.1332, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.1332, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(319.9315, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.4431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.4431, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(293.6567, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.9637, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(564.9637, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(234.6109, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.3509, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.3509, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(235.9910, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.8030, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   6%| [34m██▉                                               [0m | 13:35[0m

tensor(515.5256, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.5256, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(247.1103, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.6332, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(543.6332, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(253.9074, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.8074, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.8074, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(257.0414, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(537.3848, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(537.3848, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(264.2180, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.8333, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.8333, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(252.8378, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(546.0032, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   6%| [34m███                                               [0m | 12:10[0m

tensor(531.5153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.5153, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(246.7169, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.3053, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.3053, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(249.6827, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(572.2701, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(572.2701, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(246.8094, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.5482, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(555.5482, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(260.3186, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(558.7972, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(558.7972, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(249.5020, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(574.1668, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   6%| [34m███                                               [0m | 23:03[0m

tensor(532.0551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(532.0551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(256.4734, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.2667, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.2667, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(263.1680, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(534.8551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(534.8551, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(252.2986, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.4547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.4547, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(235.7122, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.9454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.9454, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(233.7462, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.7625, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   6%| [34m███▏                                              [0m | 19:58[0m

tensor(239.2763, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(546.7591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(546.7591, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(227.1296, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.6936, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(531.6936, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(237.1264, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(557.7037, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(557.7037, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(245.7598, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.1896, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.1896, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(242.7293, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(501.3445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(501.3445, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(244.0132, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   7%| [34m███▎                                              [0m | 17:04[0m

tensor(569.2097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(569.2097, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(253.3585, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.4803, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.4803, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(258.3501, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(549.7676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(549.7676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(243.1317, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.8174, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(519.8174, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(254.8162, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.3225, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.3225, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(264.0125, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.3644, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   7%| [34m███▍                                              [0m | 14:30[0m

tensor(559.7301, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.7301, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(239.7731, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.1116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.1116, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(241.3645, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.5971, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.5971, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(264.3589, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.3505, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.3505, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(260.2727, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.1985, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.1985, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(252.9951, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(544.3876, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   7%| [34m███▌                                              [0m | 15:17[0m

tensor(523.1785, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.1785, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(243.2782, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.3394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(563.3394, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(257.9341, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(605.0773, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(605.0773, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(240.4250, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.4536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.4536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(235.6252, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(569.2396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(569.2396, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(241.3195, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(539.7948, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   7%| [34m███▌                                              [0m | 16:32[0m

tensor(576.2031, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(576.2031, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(254.4518, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.5209, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.5209, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(231.9936, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(572.9592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(572.9592, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(681.5280, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.8984, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.8984, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(237.8666, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(561.7781, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(561.7781, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(250.1730, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(505.3533, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   7%| [34m███▋                                              [0m | 15:27[0m

tensor(514.4637, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.4637, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(247.6221, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.7676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(526.7676, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(243.4558, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.8579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(545.8579, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(644.1899, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(551.7118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(551.7118, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(232.7771, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(505.2117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(505.2117, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(250.0900, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(566.3099, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   8%| [34m███▊                                              [0m | 13:54[0m

tensor(559.8793, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(559.8793, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(224.9507, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(560.8962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(560.8962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(248.6152, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(496.3047, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(496.3047, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(259.5674, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.7703, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(512.7703, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(265.5398, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(508.8046, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(508.8046, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(257.2381, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.1296, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   8%| [34m███▉                                              [0m | 13:09[0m

tensor(568.1285, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(568.1285, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(234.4490, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.2378, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(511.2378, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(235.7680, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.3492, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.3492, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.9770, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.5883, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(542.5883, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.4544, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.8380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.8380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(222.1670, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.9480, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   8%| [34m████                                              [0m | 12:19[0m

tensor(529.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(529.1435, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(214.7848, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.3336, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.3336, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(225.6578, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.2662, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.2662, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(222.6224, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.5674, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(522.5674, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(232.6314, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.4133, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(530.4133, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(239.3177, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(538.4496, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   8%| [34m████                                              [0m | 23:00[0m

tensor(527.6336, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(527.6336, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(218.1653, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.1561, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.1561, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.1931, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.3380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.3380, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(248.6234, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(520.8141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(520.8141, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(219.5389, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(557.2738, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(557.2738, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(225.7536, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(529.7461, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   8%| [34m████▏                                             [0m | 17:57[0m

tensor(525.8120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.8120, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(221.7351, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.1210, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(521.1210, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(223.8171, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.3432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.3432, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(230.9115, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(540.0828, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(540.0828, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(224.4779, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.4491, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(550.4491, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(219.8229, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.3206, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::   9%| [34m████▎                                             [0m | 18:11[0m

tensor(491.8962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(491.8962, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(232.0448, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(513.8054, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(258.2494, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(508.3321, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(508.3321, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(237.6824, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.1839, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.1839, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.2424, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(481.7576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(481.7576, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(208.0001, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(534.6208, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   9%| [34m████▍                                             [0m | 15:41[0m

tensor(498.9706, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(498.9706, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(202.0682, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.7069, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(478.7069, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(238.3802, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.0939, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.0939, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.9908, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.8231, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.8231, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(208.6914, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.6196, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(515.6196, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(199.1468, device='cuda:0', grad_fn=<MseLossBackward0>)


Progress::   9%| [34m████▌                                             [0m | 12:35[0m

tensor(525.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(525.0463, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(214.3202, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.0075, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.0075, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(194.6834, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.7929, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(523.7929, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(210.0570, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.4351, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.4351, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(324.4553, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.8529, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(524.8529, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(212.3665, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(528.9880, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5

Progress::   9%| [34m████▌                                             [0m | 13:29[0m

tensor(497.7755, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.7755, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(202.8804, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.5250, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.5250, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(212.6942, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.7875, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.7875, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(3331.8425, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.8702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.8702, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(211.6285, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.8659, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(483.8659, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(214.0497, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.1236, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::   9%| [34m████▋                                             [0m | 10:55[0m

tensor(465.1127, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(465.1127, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(203.4051, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(449.3808, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(449.3808, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(3737.3569, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.9658, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.9658, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(203.9203, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.6329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.6329, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(210.2830, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.1364, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(514.1364, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(212.5083, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(476.2562, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::  10%| [34m████▊                                             [0m | 09:07[0m

tensor(493.9077, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(493.9077, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.8401, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(540.2368, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(540.2368, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(216.5851, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(422.4694, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(422.4694, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(3467.8215, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.4823, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(506.4823, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(226.6056, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.8999, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(497.8999, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(216.6300, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(490.6699, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(

Progress::  10%| [34m████▉                                             [0m | 09:38[0m

tensor(454.1821, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(454.1821, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(201.5333, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.1771, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(488.1771, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(195.2564, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.7220, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(509.7220, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(202.0722, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.0999, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(500.0999, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(203.0292, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.0856, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(499.0856, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(198.3718, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(487.2041, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(4

Progress::  10%| [34m█████                                             [0m | 10:43[0m

tensor(470.0185, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(470.0185, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(207.8127, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(481.5688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(481.5688, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(216.7290, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(462.7502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(462.7502, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(221.4210, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(487.0708, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(487.0708, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(215.8520, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.1844, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(482.1844, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(212.5716, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(516.2076, device='cuda:0', grad_fn=<MseLossBackward0>)
tensor(5