In [1]:
import torch
import torch.nn as nn
from torch.nn import Module, Linear
from torch.nn.functional import tanh
import torch.nn.init as init
from torch.optim import AdamW, LBFGS
from torch.autograd import Variable
from torch.autograd import grad
from tqdm import tqdm
import numpy as np
import os
import pandas as pd
from itertools import cycle

import warnings
warnings.filterwarnings("ignore", category=UserWarning)

In [2]:
class ComplexTanh(Module):
    @staticmethod
    def forward(inp):
        return tanh(inp.real).type(torch.complex64) + 1j * tanh(inp.imag).type(torch.complex64)

class ComplexLinear(Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.fc_r = Linear(in_features, out_features)
        self.fc_i = Linear(in_features, out_features)

    def forward(self, inp):
        return (self.fc_r(inp.real) - self.fc_i(inp.imag)).type(torch.complex64) + 1j * (self.fc_r(inp.imag) + self.fc_i(inp.real)).type(torch.complex64)

class ComplexFeedForwardNetwork(Module):
    def __init__(self, in_features, hidden_features1, hidden_features2, hidden_features3, out_features):
        super().__init__()
        # Adjust in_features to handle two complex inputs
        self.layer1 = ComplexLinear(in_features, hidden_features1)
        self.layer2 = ComplexLinear(hidden_features1, hidden_features2)
        self.layer3 = ComplexLinear(hidden_features2, hidden_features3)
        self.activation = ComplexTanh()
        self.layer4 = ComplexLinear(hidden_features3, out_features)

    def forward(self, input1, input2):
        # Combine two complex inputs
        combined_input = torch.cat((input1, input2), dim=-1)
        x = self.layer1(combined_input)
        x = self.activation(x)
        x = self.layer2(x)
        x = self.activation(x)
        x = self.layer3(x)
        x = self.activation(x)
        x = self.layer4(x)
        return x

In [3]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = ComplexFeedForwardNetwork(2, 16, 32, 16,1).to(device)

# Create two single complex tensors from x and t
input1 = torch.tensor([3.0 + 1j * 4.0], requires_grad=True, device=device)
input2 = torch.tensor([2.0 + 1j * 5.0], requires_grad=True, device=device)

# Forward pass
output = model(input1, input2)

# Compute derivatives
output.backward(torch.ones_like(output, device=device))
print("Gradient with respect to input 1:", input1.grad)
print("Gradient with respect to input 2:", input2.grad)

Gradient with respect to input 1: tensor([-0.0270-0.0304j], device='cuda:0')
Gradient with respect to input 2: tensor([0.0219+0.0279j], device='cuda:0')


In [4]:
def analytical_u(x, t, k, omega, r):
    c = 0.5*(1 + (71/72)**0.5 )
    c_s = torch.tensor( (c**2)/4 )
    sqrt_c_s = torch.sqrt(c_s)
    complex_exp = torch.exp(1j * c * (x/2) )
    cosh_val = 1/torch.cosh( sqrt_c_s *(x-c*t)  )
    u = complex_exp * sqrt_c_s * cosh_val
    return u/5
    
def analytical_v(x, t, k, omega, r):
    c = 0.5*(1 + (71/72)**0.5 )
    c_s = torch.tensor( (c**2)/4 )
    sqrt_c_s = torch.sqrt(c_s)
    cosh_val = 1/torch.cosh( sqrt_c_s * (x-c*t) )
    result = 12 * c_s * torch.pow(cosh_val,2) 
    return torch.complex(result, torch.zeros_like(result))

def complex_mse_loss(output, target):
    return torch.mean((output.real - target.real) ** 2 + (output.imag - target.imag) ** 2)

def compute_analytical_boundary_loss(model_u, model_v, x, t, k, omega, r):
    complex_x = torch.complex(x, torch.zeros_like(x)) 
    complex_t = torch.complex(t, torch.zeros_like(t))
    
    pred_u = model_u(complex_x,complex_t)
    pred_v =  model_v(complex_x,complex_t)

    u_val = analytical_u(x, t, k, omega, r)
    v_val = analytical_v(x, t, k, omega, r)
 
    boundary_loss_u = complex_mse_loss(pred_u, u_val)
    boundary_loss_v = complex_mse_loss(pred_v, v_val)
    
    return boundary_loss_u, boundary_loss_v

def complex_grad(outputs, inputs):
    return grad(outputs, inputs, grad_outputs=torch.ones_like(outputs), create_graph=True)

def compute_f_and_g(u, v, x, t):
    # Compute first derivatives
    u_t = complex_grad(u, t)[0]
    u_x = complex_grad(u, x)[0]
    v_t = complex_grad(v, t)[0]
    v_x = complex_grad(v, x)[0]
    
    u_xx = complex_grad(u_x, x)[0]
    u_tt = complex_grad(u_t, t)[0]

    v_xx = complex_grad(v_x, x)[0]
    v_tt = complex_grad(v_t, t)[0]

    v_xxx = complex_grad(v_xx, x)[0]
    
    u_abs_squared = u.abs()**2

    u_abs_squared_x = complex_grad(u_abs_squared, x)[0]
    u_abs_squared_xx = complex_grad(u_abs_squared_x, x)[0]

    # Compute terms for the equations
    vu_term = (1/12) * v * u
    u_nonlinear_term = u_abs_squared * u
    v_squared = v**2
    v_nonlinear_term = v_squared + (1/12) * u_abs_squared

    # Compute derivative of v nonlinear term
    v_nonlinear_term_x = complex_grad(v_nonlinear_term, x)[0]

    # Compute the residual f and g as per the given equations
    f = 1j * u_t + u_xx + vu_term + u_nonlinear_term
    g = v_t + v_xxx + 0.5 * v_nonlinear_term_x
    return f, g

In [5]:
def training(model_u, model_v, model_save_path, device, num_epochs, lr, num_samples, r, k, omega, gamma, beta, line_search_fn):
    print('Starting Training')
    optimizer_u = LBFGS(model_u.parameters(), lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)
    optimizer_v = LBFGS(model_v.parameters(), lr=lr, max_iter=20, max_eval=None, tolerance_grad=1e-07, tolerance_change=1e-09, history_size=100, line_search_fn=line_search_fn)

    factor = -2

    x_n = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # x in range [-5, -3]
    t_n = (torch.rand(num_samples, 1)).to(device)   
    x_dom = (torch.rand(num_samples*30, 1)*4 + factor ).to(device)
    t_dom = torch.rand(num_samples*30, 1).to(device) 
    x_bc_x0 = (torch.zeros(num_samples, 1)*4 + factor ).to(device)
    t_bc_x0 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_x1 = (torch.zeros(num_samples, 1)*4 - factor ).to(device)
    t_bc_x1 = torch.rand(num_samples, 1).to(device)  # Uniformly distributed random values between 0 and 1
    x_bc_t0 = (torch.rand(num_samples, 1)*4 + factor ).to(device)  # Uniformly distributed random values between 0 and 1
    t_bc_t0 = torch.zeros(num_samples, 1).to(device)

    complex_x_dom = torch.complex(x_dom, torch.zeros_like(x_dom)) 
    complex_t_dom = torch.complex(t_dom, torch.zeros_like(t_dom)) 
            # Set requires_grad=True 
    complex_x_dom = complex_x_dom.requires_grad_(True)
    complex_t_dom = complex_t_dom.requires_grad_(True)

    for epoch in tqdm(range(num_epochs),
                  desc='Progress:',  
                  leave=False,  
                  ncols=75,
                  mininterval=0.1,
                  bar_format='{l_bar} {bar} | {remaining}',  # Only show the bar without any counters
                  colour='blue'): 
        model_u.train()
        model_v.train()
        
        def closure_u():
            optimizer_u.zero_grad()
            optimizer_v.zero_grad()
            pred_u = model_u(complex_x_dom, complex_t_dom)
            pred_v = model_v(complex_x_dom, complex_t_dom)
            f, g = compute_f_and_g(pred_u, pred_v, complex_x_dom, complex_t_dom)
            physics_loss_u = torch.mean(torch.abs(f)**2)
            physics_loss_v = torch.mean(torch.abs(g)**2)
        
            print(physics_loss_u)
            boundary_loss_u_x0, boundary_loss_v_x0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x0, t_bc_x0, k, omega, r)
            boundary_loss_u_x1, boundary_loss_v_x1 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x1, t_bc_x1, k, omega, r)
            boundary_loss_u_t0, boundary_loss_v_t0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_t0, t_bc_t0, k, omega, r)
            loss_u = gamma*(physics_loss_u) + beta*( boundary_loss_u_x0 + boundary_loss_u_x1 + boundary_loss_u_t0)
            loss_u.backward()
            torch.nn.utils.clip_grad_norm_(model_u.parameters(), max_norm=1.0)  # Clip gradients for model_u
            return loss_u

        def closure_v():
            optimizer_u.zero_grad()
            optimizer_v.zero_grad()
            pred_u = model_u(complex_x_dom, complex_t_dom)
            pred_v = model_v(complex_x_dom, complex_t_dom)
            f, g = compute_f_and_g(pred_u, pred_v, complex_x_dom, complex_t_dom)
            physics_loss_u = torch.mean(torch.abs(f)**2)
            physics_loss_v = torch.mean(torch.abs(g)**2)
            print(physics_loss_v)
            boundary_loss_u_x0, boundary_loss_v_x0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x0, t_bc_x0, k, omega, r)
            boundary_loss_u_x1, boundary_loss_v_x1 = compute_analytical_boundary_loss(model_u, model_v, x_bc_x1, t_bc_x1, k, omega, r)
            boundary_loss_u_t0, boundary_loss_v_t0 = compute_analytical_boundary_loss(model_u, model_v, x_bc_t0, t_bc_t0, k, omega, r)
            loss_v = gamma*(physics_loss_v) + beta*( boundary_loss_v_x0 + boundary_loss_v_x1 + boundary_loss_v_t0)
            loss_v.backward()
            torch.nn.utils.clip_grad_norm_(model_v.parameters(), max_norm=1.0)  # Clip gradients for model_v
            return loss_v
        optimizer_u.step(closure_u)    
        optimizer_v.step(closure_v)

        if epoch % 10 == 0:
            current_loss_u = closure_u()  # Optionally recompute to print
            current_loss_v = closure_v()

            if current_loss_u.item() + current_loss_v.item() < 1e-3:
                break
            else:
                print(f' Epoch {epoch}, Loss U: {current_loss_u.item()}, Loss V: {current_loss_v.item()}') 
                model_u_filename = os.path.join(model_save_path, f'C_HIGGS_U_training_epoch_{epoch}.pth')
                torch.save(model_u.state_dict(), model_u_filename)
                model_v_filename = os.path.join(model_save_path, f'C_HIGGS_V_training_epoch_{epoch}.pth')
                torch.save(model_v.state_dict(), model_v_filename)
                plot_model_results(epoch, model_u, model_v, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results') 
            
    model_u_filename = os.path.join(model_save_path, f'C_HIGGS_U_training.pth')
    torch.save(model_u.state_dict(), model_u_filename)
    model_v_filename = os.path.join(model_save_path, f'C_HIGGS_V_training.pth')
    torch.save(model_v.state_dict(), model_v_filename)
    print('TRAINING COMPLETED')

In [6]:
import os
import torch
import numpy as np
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
from scipy.ndimage import gaussian_filter

def plot_model_results(epoch, model_u, model_v, device, k, omega, r, sigma=1, cmap='viridis', image_save_path='results'):
    x = torch.linspace(-1.8, 1.8, 400)
    t = torch.linspace(0.2, 0.8, 400)
    X, T = torch.meshgrid(x, t)  # Create a 2D grid of x and t
    X_flat = X.flatten().unsqueeze(-1).to(device)
    T_flat = T.flatten().unsqueeze(-1).to(device)
    
    model_u_state = torch.load(os.path.join(model_save_path, f'C_HIGGS_U_training_epoch_{epoch}.pth'), map_location=device)
    model_u.load_state_dict(model_u_state)
    model_u.eval()

    model_v_state = torch.load(os.path.join(model_save_path, f'C_HIGGS_V_training_epoch_{epoch}.pth'), map_location=device)
    model_v.load_state_dict(model_v_state)
    model_v.eval()
    complex_X_flat = torch.complex(X_flat, torch.zeros_like(X_flat)) 
    complex_T_flat = torch.complex(T_flat, torch.zeros_like(T_flat)) 

    # Get predictions from the trained models
    with torch.no_grad():
        pred_u = model_u(complex_X_flat, complex_T_flat) 
        pred_v = model_v(complex_X_flat, complex_T_flat) 
    pred_u_r = pred_u.real 
    pred_u_i = pred_u.imag
    pred_u_r = pred_u_r.cpu().reshape(X.shape).numpy()
    pred_u_i = pred_u_i.cpu().reshape(X.shape).numpy()
    pred_v = pred_v.cpu().reshape(X.shape).numpy()

    u1_analytical = analytical_u(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()
    real_u1_analytical = u1_analytical.real
    imag_u1_analytical = u1_analytical.imag
    real_v1_analytical = analytical_v(X_flat, T_flat, k, omega, r).cpu().reshape(X.shape).numpy()

    pred_v_smooth = gaussian_filter(pred_v, sigma=sigma)

    shrink = 0.3
    aspect = 50

    # Plotting predictions
    fig = plt.figure(figsize=(24, 16))

    ax1 = fig.add_subplot(231, projection='3d')
    ax1.plot_surface(X.numpy(), T.numpy(), pred_u_r, cmap=cmap)
    ax1.set_title('Predicted Real Part of $u_1(x, t)$')
    ax1.set_xlabel('x')
    ax1.set_ylabel('t')
    ax1.set_zlabel('Real part of $u_1$')

    ax2 = fig.add_subplot(232, projection='3d')

    ax2.plot_surface(X.numpy(), T.numpy(), pred_u_i, cmap=cmap)
    ax2.set_title('Predicted Imaginary Part of $u_1(x, t)$')
    ax2.set_xlabel('x')
    ax2.set_ylabel('t')
    ax2.set_zlabel('Imag part of $u_1$')

    ax3 = fig.add_subplot(233, projection='3d')
    ax3.plot_surface(X.numpy(), T.numpy(), pred_v_smooth, cmap=cmap)
    ax3.set_title('Predicted Real Part of $v_1(x, t)$')
    ax3.set_xlabel('x')
    ax3.set_ylabel('t')
    ax3.set_zlabel('Real part of $v_1$')

    ax4 = fig.add_subplot(234, projection='3d')
    ax4.plot_surface(X.numpy(), T.numpy(), real_u1_analytical, cmap=cmap)
    ax4.set_title('Analytical Real Part of $u_1(x, t)$')
    ax4.set_xlabel('x')
    ax4.set_ylabel('t')
    ax4.set_zlabel('Real part of $u_1$')

    ax5 = fig.add_subplot(235, projection='3d')
    ax5.plot_surface(X.numpy(), T.numpy(), imag_u1_analytical, cmap=cmap)
    ax5.set_title('Analytical Imaginary Part of $u_1(x, t)$')
    ax5.set_xlabel('x')
    ax5.set_ylabel('t')
    ax5.set_zlabel('Imag part of $u_1$')

    ax6 = fig.add_subplot(236, projection='3d')
    ax6.plot_surface(X.numpy(), T.numpy(), real_v1_analytical, cmap=cmap)
    ax6.set_title('Analytical Real Part of $v_1(x, t)$')
    ax6.set_xlabel('x')
    ax6.set_ylabel('t')
    ax6.set_zlabel('Real part of $v_1$')

    plt.tight_layout()
    plt.savefig(os.path.join(image_save_path, f'chiggs_model_comparison_3d_epoch_{epoch}.png'))
    plt.close(fig)  # Close the figure to free memory


In [7]:
! rm -rf results
! rm -rf model_weights

In [8]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

if torch.cuda.is_available():
    print("CUDA is available! Training on GPU.")
else:
    print("CUDA is not available. Training on CPU.")

model_u = ComplexFeedForwardNetwork(2, 16, 16, 16, 1).to(device)
model_v = ComplexFeedForwardNetwork(2, 16, 16, 16, 1).to(device)

print(model_u)
print(model_v)
num_epochs_lbfgs = 500  # Number of training epochs
num_samples_lbfgs = 1000 # Number of samples for training
num_epochs_sq = 36000
num_samples_sq = 1000
lr_sq = 1e-4 
lr_lbfgs = 1e-2
r = 1.1
omega = 5 
k = 0.5
gamma = 1
beta = 1
model_save_path = 'model_weights' 
os.makedirs(model_save_path, exist_ok=True)
os.makedirs('results', exist_ok=True)
losses = []
line_search_fn = "strong_wolfe"

CUDA is available! Training on GPU.
ComplexFeedForwardNetwork(
  (layer1): ComplexLinear(
    (fc_r): Linear(in_features=2, out_features=16, bias=True)
    (fc_i): Linear(in_features=2, out_features=16, bias=True)
  )
  (layer2): ComplexLinear(
    (fc_r): Linear(in_features=16, out_features=16, bias=True)
    (fc_i): Linear(in_features=16, out_features=16, bias=True)
  )
  (layer3): ComplexLinear(
    (fc_r): Linear(in_features=16, out_features=16, bias=True)
    (fc_i): Linear(in_features=16, out_features=16, bias=True)
  )
  (activation): ComplexTanh()
  (layer4): ComplexLinear(
    (fc_r): Linear(in_features=16, out_features=1, bias=True)
    (fc_i): Linear(in_features=16, out_features=1, bias=True)
  )
)
ComplexFeedForwardNetwork(
  (layer1): ComplexLinear(
    (fc_r): Linear(in_features=2, out_features=16, bias=True)
    (fc_i): Linear(in_features=2, out_features=16, bias=True)
  )
  (layer2): ComplexLinear(
    (fc_r): Linear(in_features=16, out_features=16, bias=True)
    (fc_i

In [9]:
training(model_u, model_v, model_save_path, device, num_epochs_lbfgs, lr_lbfgs, num_samples_lbfgs, r, k, omega, gamma, beta, line_search_fn)

Starting Training


Progress::   0%| [34m                                                      [0m | ?[0m

tensor(0.0439, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1084240.1250, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0438, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(16.1609, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0392, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0231, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0229, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0228, device='c

  return np.asarray(x, float)
Progress::   0%| [34m                                                [0m | 8:12:44[0m

tensor(0.0199, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0186, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0184, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0183, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0177, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0176, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0175, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0175, device='cuda:0',

Progress::   0%| [34m▏                                               [0m | 7:07:36[0m

tensor(0.0174, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0155, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0146, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0143, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0117, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0112, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0075, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0074, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0072, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0052, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0051, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0049, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0026, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0022, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0019, device='cuda:0',

Progress::   1%| [34m▎                                               [0m | 8:17:58[0m

tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0007, device='cuda:0',

Progress::   1%| [34m▍                                               [0m | 8:47:07[0m

tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0006, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0005, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0004, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0003, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0002, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0',

Progress::   1%| [34m▍                                               [0m | 9:45:37[0m

tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0001, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9528e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9926e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9717e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7865e-05, 

Progress::   1%| [34m▌                                              [0m | 10:46:24[0m

tensor(4.1733e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.1639e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.0839e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6614e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6579e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6427e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.5534e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.5514e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.5427e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4749e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4736e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4622e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3917e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3907e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3865e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3428e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3379e-05, devic

Progress::   1%| [34m▋                                              [0m | 11:44:01[0m

tensor(2.9056e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.9045e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.8954e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.8932e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.8833e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.7871e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.7853e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.7697e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6309e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6303e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6277e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6132e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4676e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4658e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4577e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4142e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4099e-05, devic

Progress::   2%| [34m▊                                              [0m | 12:30:18[0m

tensor(1.9981e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9964e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9887e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9479e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9420e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9155e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.6628e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.6567e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.6300e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3719e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3670e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3451e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1370e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1337e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1194e-05, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8665e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8497e-06, devic

Progress::   2%| [34m▊                                              [0m | 13:12:11[0m

tensor(8.7629e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7527e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7085e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.4177e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.4135e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3846e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2829e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2773e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2305e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1678e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1640e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1473e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0710e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0638e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0324e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.8289e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.8246e-06, devic

Progress::   2%| [34m▉                                              [0m | 13:33:02[0m

tensor(7.0411e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.0350e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.9816e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.7508e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.7340e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.5881e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6832e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6787e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6585e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.4712e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.4660e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.4427e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.3259e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.3206e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.2741e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.9432e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.9416e-06, devic

Progress::   2%| [34m█                                              [0m | 13:58:24[0m

tensor(4.4751e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.4649e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.3795e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.3703e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(4.2902e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.7862e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.7833e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.7702e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6453e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6411e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.6224e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4781e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4762e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.4602e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3681e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3672e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(3.3593e-06, devic

Progress::   2%| [34m█▏                                             [0m | 14:43:41[0m

tensor(2.7567e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.7507e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6993e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6950e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.6579e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4525e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4523e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4518e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4489e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4380e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4378e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4368e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4285e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4275e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.4195e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3828e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3806e-06, devic

Progress::   3%| [34m█▏                                             [0m | 14:36:26[0m

tensor(1.8766e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.8704e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.8429e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.6120e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.6094e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5864e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4467e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4463e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4446e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4334e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4331e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4325e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4287e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4284e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4274e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4232e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4230e-06, devic

Progress::   3%| [34m█▎                                             [0m | 14:30:20[0m

tensor(1.4172e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4171e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4165e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4148e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4147e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4141e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4122e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4121e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4117e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4100e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4100e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4099e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4095e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4083e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4083e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4083e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4083e-06, devic

Progress::   3%| [34m█▍                                             [0m | 14:08:05[0m

tensor(1.4203e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4198e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4177e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4055e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4053e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4046e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3996e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3996e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3996e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3993e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3993e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3992e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3990e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3990e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3990e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3993e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3993e-06, devic

Progress::   3%| [34m█▌                                             [0m | 14:01:33[0m

tensor(1.9417e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9338e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.8661e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5226e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5219e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5191e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5051e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.5044e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4985e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4690e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4689e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4681e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4656e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4653e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4642e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4587e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4584e-06, devic

Progress::   3%| [34m█▌                                             [0m | 14:25:02[0m

tensor(1.4241e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4236e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4215e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4150e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4147e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4129e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4316e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4315e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4314e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4305e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4304e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4302e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4289e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4287e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4283e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4259e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4258e-06, devic

Progress::   4%| [34m█▋                                             [0m | 13:55:17[0m

tensor(1.4502e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4499e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4482e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4378e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4374e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4358e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4280e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4275e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4253e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4100e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4096e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4085e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4025e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4020e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3985e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3780e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3778e-06, devic

Progress::   4%| [34m█▊                                             [0m | 13:16:22[0m

tensor(1.2262e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2250e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2193e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2188e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2156e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1970e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1969e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1967e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1961e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1960e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1960e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1959e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1959e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1959e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1961e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1961e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1961e-06, devic

Progress::   4%| [34m█▉                                             [0m | 13:20:26[0m

tensor(1.1972e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1972e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1970e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1964e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1963e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1960e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1933e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1932e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1927e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1902e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1899e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1886e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1822e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1818e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1798e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1667e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1661e-06, devic

Progress::   4%| [34m█▉                                             [0m | 13:05:33[0m

tensor(1.0938e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0933e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0917e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0841e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0836e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0817e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0719e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0715e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0683e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0680e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0665e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0598e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0596e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0585e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0539e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0539e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0537e-06, devic

Progress::   4%| [34m██                                             [0m | 13:55:21[0m

tensor(1.0414e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0413e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0958, device='

Progress::   5%| [34m██▏                                            [0m | 12:36:25[0m

tensor(1.0408e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0407e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0957, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0955, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0954, device='

Progress::   5%| [34m██▎                                            [0m | 11:32:31[0m

tensor(1.0555e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0552e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0552e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0550e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0546e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0546e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0544e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0540e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0540e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0538e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0533e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0532e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0532e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0527e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0527e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0525e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0514e-06, devic

Progress::   5%| [34m██▎                                            [0m | 11:46:57[0m

tensor(1.0542e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0528e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0581e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0581e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0579e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0569e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0513e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0510e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0493e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0416e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0414e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0407e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0371e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0369e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0361e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0303e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0302e-06, devic

Progress::   5%| [34m██▍                                            [0m | 11:46:28[0m

tensor(1.0072e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0071e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0071e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0073e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0074e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0076e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0098e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0099e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0100e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0106e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0106e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0109e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0123e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0123e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0125e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0134e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0135e-06, devic

Progress::   5%| [34m██▌                                            [0m | 11:55:03[0m

tensor(9.7267e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7262e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7248e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7372e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7377e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7403e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7699e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7712e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7769e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8706e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8716e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8765e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9005e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9020e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9090e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0007e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0008e-06, devic

Progress::   6%| [34m██▋                                            [0m | 11:59:57[0m

tensor(9.3736e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.3826e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.3836e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.3886e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5331e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5359e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5616e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5645e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5905e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9975e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9973e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9969e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9952e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9951e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9949e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9940e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9913e-07, devic

Progress::   6%| [34m██▋                                            [0m | 12:25:30[0m

tensor(1.0393e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0394e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0399e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0418e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0419e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0420e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0428e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0478e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0479e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0480e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0485e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0485e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0486e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0488e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0487e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0485e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0477e-06, devic

Progress::   6%| [34m██▊                                            [0m | 12:59:34[0m

tensor(1.2647e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2628e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2544e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1785e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1777e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1702e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1201e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1198e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1185e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1067e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1060e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1030e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0879e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0871e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0840e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0564e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0560e-06, devic

Progress::   6%| [34m██▉                                            [0m | 14:08:50[0m

tensor(2.3217e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3173e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3168e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3146e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3031e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.3021e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.2961e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.2662e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.2647e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.2582e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.1924e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.1907e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.1759e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.0502e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.0487e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(2.0405e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.9595e-06, devic

Progress::   6%| [34m███                                            [0m | 13:58:23[0m

tensor(1.4439e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4400e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4065e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.4024e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3844e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2346e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2327e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2162e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1492e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1480e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1373e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0668e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0665e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0650e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0556e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0549e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0487e-06, devic

Progress::   7%| [34m███                                            [0m | 13:57:36[0m

tensor(9.8508e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8495e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8437e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8103e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8091e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8064e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7897e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7885e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7863e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7714e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7703e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7647e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7370e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7362e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7329e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7163e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7157e-07, devic

Progress::   7%| [34m███▏                                           [0m | 13:40:54[0m

tensor(9.7012e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7010e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7006e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6996e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6959e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6957e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6948e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6901e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6901e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6902e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6909e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6973e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6974e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6975e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6985e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7073e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7072e-07, devic

Progress::   7%| [34m███▎                                           [0m | 13:45:35[0m

tensor(9.6804e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6795e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6770e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6634e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6609e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.6484e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5884e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5870e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5768e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5391e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5374e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5333e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5174e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.4358e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.4340e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.4274e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.3919e-07, devic

Progress::   7%| [34m███▍                                           [0m | 13:26:22[0m

tensor(8.8333e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8291e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8167e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7456e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7399e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7150e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.5385e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.5338e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.5129e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3676e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3648e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3524e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2962e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2944e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2861e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2502e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2495e-07, devic

Progress::   7%| [34m███▍                                           [0m | 13:44:37[0m

tensor(8.1493e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1490e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1486e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1456e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0532, 

Progress::   8%| [34m███▌                                           [0m | 12:25:17[0m

tensor(8.1482e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1480e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1471e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1392e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1390e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1381e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1316e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1312e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1305e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1260e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBac

Progress::   8%| [34m███▋                                           [0m | 11:57:47[0m

tensor(8.1280e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1277e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1263e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1220e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0531, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(0.0530, 

Progress::   8%| [34m███▊                                           [0m | 11:47:38[0m

tensor(8.1279e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1276e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1255e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1200e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1196e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1178e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1084e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1079e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1072e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.1018e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0674e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0672e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0669e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0647e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0433e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0425e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.0385e-07, devic

Progress::   8%| [34m███▊                                           [0m | 12:53:37[0m

tensor(7.7964e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7947e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7869e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7486e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7471e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7404e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7074e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7064e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7020e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6812e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6803e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6764e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6551e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6552e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6553e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6594e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6595e-07, devic

Progress::   8%| [34m███▉                                           [0m | 12:56:09[0m

tensor(7.6770e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6772e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6781e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6841e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6843e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6852e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6916e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6916e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6919e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6972e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6971e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6966e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6992e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6989e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6976e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6909e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6907e-07, devic

Progress::   9%| [34m████                                           [0m | 14:45:58[0m

tensor(9.0952e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.0722e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.0691e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.0424e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8933e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8918e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8852e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8505e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8456e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8236e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7098e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.7022e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.6683e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3416e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3344e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.2704e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.7637e-07, devic

Progress::   9%| [34m████▏                                          [0m | 14:39:49[0m

tensor(1.3465e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.3389e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2732e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2652e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.2300e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5862e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.5698e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.4983e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9704e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9630e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9309e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9277e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.9138e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8444e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8427e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8362e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8053e-07, devic

Progress::   9%| [34m████▏                                          [0m | 14:26:05[0m

tensor(1.1557e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1535e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1438e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0636e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0624e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0574e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0177e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0173e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0140e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9490e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9470e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9382e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8528e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8513e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8472e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8239e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.8220e-07, devic

Progress::   9%| [34m████▎                                          [0m | 14:22:32[0m

tensor(1.1166e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1157e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.1115e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0756e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0751e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0730e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0629e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0626e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0594e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0441e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0440e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0438e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0416e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0414e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0406e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0350e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0348e-06, devic

Progress::   9%| [34m████▍                                          [0m | 14:22:45[0m

tensor(1.0792e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0751e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0434e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0422e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(1.0334e-06, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9328e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.9178e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(9.7861e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8800e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8713e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.8326e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.4993e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.4967e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.4854e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3735e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3684e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(8.3456e-07, devic

Progress::  10%| [34m████▌                                          [0m | 14:20:02[0m

tensor(7.6573e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6304e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6268e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.6109e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4530e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4460e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.4149e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.1319e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.1257e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(7.0845e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.9057e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.8995e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.8721e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.6459e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.6426e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.6279e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.5578e-07, devic

Progress::  10%| [34m████▌                                          [0m | 14:10:03[0m

tensor(6.3468e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.3456e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.3427e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.3255e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.3246e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.3204e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2986e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2977e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2948e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2787e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2778e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2735e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2512e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2503e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2477e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2330e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.2318e-07, devic

Progress::  10%| [34m████▋                                          [0m | 14:11:38[0m

tensor(6.0829e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.0818e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.0767e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.0371e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.0345e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(6.0232e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.9372e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.9331e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.8965e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6422e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6393e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6208e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6174e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.6022e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.5283e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.5242e-07, device='cuda:0', grad_fn=<MeanBackward0>)
tensor(5.5062e-07, devic

                                                                           

TRAINING COMPLETED


