In [1]:
import numpy as np
import pandas as pd
# import adata
import matplotlib.pyplot as plt
import seaborn as sns
import scanpy as sc
import os
import datetime
import argparse
import torch
from utils import *
from torch.utils.data import DataLoader
import episcanpy.api as epi

torch.autograd.set_detect_anomaly(True)

<torch.autograd.anomaly_mode.set_detect_anomaly at 0x177183ad430>

## Loading data

For more information check this [link](https://openproblems.bio/neurips_docs/about_tasks/task3_joint_embedding/)

In [2]:
# data
# !ls ~/arc_00077/raw_data/phase2-data/joint_embedding/openproblems_bmmc_multiome_phase2
# ! cd ~/arc_00077/raw_data/phase2-data/joint_embedding/openproblems_bmmc_multiome_phase2
# ! ls /user/leuven/331/vsc33180/arc_00077/raw_data/phase2-data/joint_embedding/openproblems_bmmc_multiome_phase2
! dir ..\

 Volume in drive C has no label.
 Volume Serial Number is 00C5-C2CA

 Directory of C:\Users\Nacho\Desktop\Nacho s stuff\RKMs

07/12/2021  15:40    <DIR>          .
07/12/2021  15:40    <DIR>          ..
07/12/2021  15:31             5.063 environment.yml
07/12/2021  15:39             3.474 environment_nobuilds.yml
07/12/2021  15:20    <DIR>          raw_data
14/12/2021  15:45    <DIR>          RKM_Francesco
               2 File(s)          8.537 bytes
               4 Dir(s)  215.307.620.352 bytes free


In [3]:
os.listdir("../raw_data/phase2-data/joint_embedding/openproblems_bmmc_multiome_phase2/")

['openproblems_bmmc_multiome_phase2.censor_dataset.output_mod1.h5ad',
 'openproblems_bmmc_multiome_phase2.censor_dataset.output_mod2.h5ad',
 'openproblems_bmmc_multiome_phase2.censor_dataset.output_solution.h5ad']

In [4]:
data_folder_path = "../raw_data/phase2-data/joint_embedding/openproblems_bmmc_multiome_phase2/"
files = os.listdir(data_folder_path)
# print(data_folder_path + files[0])
data1 = sc.read_h5ad(data_folder_path + files[0])
data2 = sc.read(data_folder_path + files[1])
# data3 = sc.read(data_folder_path + files[2])

Only considering the two last: ['.output_mod2', '.h5ad'].
Only considering the two last: ['.output_mod2', '.h5ad'].


In [5]:
print(data1)
print(data2)

AnnData object with n_obs × n_vars = 42492 × 13431
    obs: 'batch', 'size_factors'
    var: 'gene_ids', 'feature_types'
    uns: 'dataset_id', 'organism'
    layers: 'counts'
AnnData object with n_obs × n_vars = 42492 × 116490
    obs: 'batch'
    var: 'feature_types'
    uns: 'dataset_id', 'gene_activity_var_names', 'organism'
    obsm: 'gene_activity'
    layers: 'counts'


In [6]:
# print(data1.var["feature_types"])
rna = pd.DataFrame.sparse.from_spmatrix(data1.X) # 42492 x 13431
rna.columns = data1.var.index #data1.var["gene_ids"] -> ENSEMBL IDs

sc.pp.highly_variable_genes(data1, n_top_genes=5000) # Find HGVs
rna = rna.transpose().loc[data1.var["highly_variable"] == True].transpose()
rna.head() # 42492 x 5000

rna.to_csv("RNA data preprocessed.csv")

In [7]:
epi.pp.select_var_feature(data2, nb_features=10000, show=False) # Inplace select 10000 variable regions
atac = pd.DataFrame.sparse.from_spmatrix(data2.X)
atac.columns = data2.var.index
atac.head() # 42492 x 10002

atac.to_csv("ATAC data preprocessed.csv")

## Setting up the necessary arguments and functions

In [8]:
# %tb
# # Model Settings =================================================================================================
# parser = argparse.ArgumentParser(description='Gen-RKM Model')

# parser.add_argument('--N', type=int, default=1000, help='Total # of samples')
# parser.add_argument('--mb_size', type=int, default=300, help='Mini-batch size. See utils.py')
# parser.add_argument('--h_dim', type=int, default=2, help='Dim of latent vector') # shared latent space
# parser.add_argument('--capacity', type=int, default=32, help='Capacity of network. See utils.py') # important in CNNs
# parser.add_argument('--x_fdim', type=int, default=128, help='Input x_fdim. See utils.py') # feature map dimensionality
# parser.add_argument('--y_fdim', type=int, default=20, help='Input y_fdim. See utils.py') # feature map dimensionality
# parser.add_argument('--c_accu', type=float, default=100, help='Input weight on recons_error')

# # Training Settings =============================
# parser.add_argument('--lr', type=float, default=1e-4, help='Input learning rate for optimizer')
# parser.add_argument('--max_epochs', type=int, default=100, help='Input max_epoch for cut-off')
# # parser.add_argument('--device', type=str, default='cuda', help='Device type: cuda or cpu')
# parser.add_argument('--device', type=str, default='cuda' if torch.cuda.is_available() else 'cpu', help='Device type: cuda or cpu')
# parser.add_argument('--workers', type=int, default=0, help='# of workers for dataloader')
# parser.add_argument('--shuffle', type=bool, default=True, help='shuffle dataset: true or false')

# opt = parser.parse_args()
args = {
    "N": rna.shape[0],
#     "mb_size": 300,
    "h_dim": 32,
    "capacity": 32,
    "x_fdim": 128,
    "y_fdim": 256,
    "c_accu": 100,
    "lr": 1e-5, #1e-6
    "max_epochs": 100,
    "device": torch.device("cuda" if torch.cuda.is_available() else "cpu"),
    "workers": 0,
    "shuffle": True,
    "batch_size": 300,
}
opt = args
print(opt["device"])

cuda


In [9]:
def kPCA(X, Y):
#     print(X)
#     print(Y)
    a = torch.mm(X, torch.t(X)) + torch.mm(Y, torch.t(Y))
    nh1 = a.size(0)
    oneN = torch.div(torch.ones(nh1, nh1), nh1).float().to(opt["device"])
#     a = a - torch.mm(oneN, a) - torch.mm(a, oneN) + torch.mm(torch.mm(oneN, a), oneN)  # centering
    print(f"Norm of X {torch.norm(X)}, Norm of Y {torch.norm(Y)}, Norm of a {torch.norm(a)}")
#     print(a, end="\r")

#     Stabilize a(?)
    a = a + 100
    h, s, _ = torch.svd(a, some=False)
    return h[:, :opt["h_dim"]], s

In [10]:
# Energy function
def rkm_loss(output1, X, output2, Y):
    h, s = kPCA(output1, output2)
    U = torch.mm(torch.t(output1), h)
    V = torch.mm(torch.t(output2), h)

    x_tilde = net3(torch.mm(h, torch.t(U))) # NET3
#     y_tilde = net4(torch.mm(h, torch.t(V))) # NET4
    y_tilde = net3(torch.mm(h, torch.t(V))) # NET4

    # Costs
    f1 = torch.trace(torch.mm(torch.mm(output1, U), torch.t(h))) + torch.trace(
        torch.mm(torch.mm(output2, V), torch.t(h)))
    f2 = 0.5 * torch.trace(torch.mm(h, torch.mm(torch.diag(s[:opt["h_dim"]]), torch.t(h))))
    f3 = 0.5 * ((torch.trace(torch.mm(torch.t(U), U))) + (torch.trace(torch.mm(torch.t(V), V))))
    recon_loss = torch.nn.MSELoss()
    f4 = recon_loss(x_tilde, X) + recon_loss(y_tilde, Y)  # reconstruction loss

    loss = - f1 + f3 + f2 + 0.5 * (- f1 + f3 + f2) ** 2 + opt["c_accu"] * f4  # stabilized loss
    return loss

In [11]:
class create_dirs:
    """ Creates directories for Checkpoints and saving trained models """

    def __init__(self, ct):
        self.ct = ct
        self.dircp = 'checkpoint.pth_{}.tar'.format(self.ct)
        self.dirout = 'Mul_trained_RKM_{}.tar'.format(self.ct)

    def create(self):
        if not os.path.exists('cp/'):
            os.makedirs('cp/')

        if not os.path.exists('out/'):
            os.makedirs('out/')

    def save_checkpoint(self, state, is_best):
        if is_best:
            torch.save(state, 'cp/{}'.format(self.dircp))

In [12]:
# https://discuss.pytorch.org/t/dataloader-access-two-items-at-the-same-time/22664
class ConcatDataset(torch.utils.data.Dataset):
    def __init__(self, dataset1, dataset2):
#         self.dataset1 = dataset1 # datasets should be sorted!
#         self.dataset2 = dataset2

        self.dataset1 = torch.tensor(dataset1.values) # datasets should be sorted!
        self.dataset2 = torch.tensor(dataset2.values)

    def __getitem__(self, index):
#         x1 = self.dataset1.loc[index].to_numpy() #loc, que no iloc (puede que iloc funcione)
#         x2 = self.dataset2.loc[index].to_numpy()
        
        x1 = self.dataset1[index].float()
        x2 = self.dataset2[index].float()

        return x1, x2, index

    def __len__(self):
        return len(self.dataset1) # assuming both datasets have same length

    
train_loader = DataLoader(
             ConcatDataset(rna, atac),
             batch_size=args["batch_size"], shuffle=args["shuffle"],
             num_workers=args["workers"]
            )

## Setting up the NNs

In [13]:
class Net1(nn.Module): # Phi 1 - RNA to feature map
    def __init__(self, input_dim, hidden_dim_list, args):
        super(Net1, self).__init__()
        
        self.transformations = nn.ModuleList()
        self.transformations.append(nn.Linear(in_features=input_dim, out_features=hidden_dim_list[0]))
        
        for i in range(len(hidden_dim_list)):
            if i == len(hidden_dim_list)-1:
                dim = hidden_dim_list[-1]
                self.transformations.append(nn.Linear(in_features=dim, out_features=args["x_fdim"]))
                
            else:
                in_dim, out_dim = hidden_dim_list[i], hidden_dim_list[i+1]
                self.transformations.append(nn.Linear(in_features=in_dim, out_features=out_dim))
        
    def forward(self, x):
        for i, transform in enumerate(self.transformations):
            if i == len(self.transformations)-1:
                x = transform(x)
                # No leaky relu at the end
                
            else:
                x = transform(x)
                x = F.leaky_relu(x, negative_slope=0.2)

        return x
    
    
class Net3(nn.Module):
    def __init__(self, input_dim, hidden_dim_list, args): #Psi 1 - Feature map to RNA (input dim = 5000)
        super(Net3, self).__init__()
        
        hidden_dim_list.reverse() # Reverse the list to be the inverse of Net 1
        self.reverse_transformations = nn.ModuleList()
        self.reverse_transformations.append(nn.Linear(in_features=args["x_fdim"], out_features=hidden_dim_list[0]))
        
        for i in range(len(hidden_dim_list)):
            if i == len(hidden_dim_list)-1:
                dim = hidden_dim_list[-1]
                self.reverse_transformations.append(nn.Linear(in_features=dim, out_features=input_dim))
                
            else:
                in_dim, out_dim = hidden_dim_list[i], hidden_dim_list[i+1]
                self.reverse_transformations.append(nn.Linear(in_features=in_dim, out_features=out_dim))
        
    def forward(self, x):
        for i, transform in enumerate(self.reverse_transformations):
            if i == len(self.reverse_transformations)-1:
                x = transform(x)
                x = F.relu(x) # RNA preprocessed has a lower bound of 0
                
            else:
                x = transform(x)
                x = F.leaky_relu(x, negative_slope=0.2)
                
        return x

In [14]:
class Net2(nn.Module): # Phi 2 - ATAC to feature map
    def __init__(self, input_dim, hidden_dim_list, args): 
        super(Net2, self).__init__()
#         c = capacity
        self.transformations = nn.ModuleList()
        self.transformations.append(nn.Linear(in_features=input_dim, out_features=hidden_dim_list[0]))
        
        for i in range(len(hidden_dim_list)):
            if i == len(hidden_dim_list)-1:
                dim = hidden_dim_list[-1]
                self.transformations.append(nn.Linear(in_features=dim, out_features=args["y_fdim"]))
                
            else:
                in_dim, out_dim = hidden_dim_list[i], hidden_dim_list[i+1]
                self.transformations.append(nn.Linear(in_features=in_dim, out_features=out_dim))
        
    def forward(self, x):
        for i, transform in enumerate(self.transformations):
            if i == len(self.transformations)-1:
                x = transform(x)
                # No leaky relu at the end
                
            else:
                x = transform(x)
                x = F.leaky_relu(x, negative_slope=0.2)
                
        return x


class Net4(nn.Module):
    def __init__(self, input_dim, hidden_dim_list, args): #Psi 2 - Feature map to ATAC (input dim = 116490)
        super(Net4, self).__init__()
    
        hidden_dim_list.reverse() # Reverse the list to be the inverse of Net 2
        self.reverse_transformations = nn.ModuleList()
        self.reverse_transformations.append(nn.Linear(in_features=args["y_fdim"], out_features=hidden_dim_list[0]))
        
        for i in range(len(hidden_dim_list)):
            if i == len(hidden_dim_list)-1:
                dim = hidden_dim_list[-1]
                self.reverse_transformations.append(nn.Linear(in_features=dim, out_features=input_dim))
                
            else:
                in_dim, out_dim = hidden_dim_list[i], hidden_dim_list[i+1]
                self.reverse_transformations.append(nn.Linear(in_features=in_dim, out_features=out_dim))
        
    def forward(self, x):
        for i, transform in enumerate(self.reverse_transformations):
            if i == len(self.reverse_transformations)-1:
                x = transform(x)
                x = torch.sigmoid(x) # ATAC is has binary values
                
            else:
                x = transform(x)
                x = F.leaky_relu(x, negative_slope=0.2) 
                
        return x   

In [15]:
# args = {"x_fdim":2}
# hid = [1000, 500, 300, 200, 50, 10]
# inp = 10000
# n2 = Net2(inp, hid, args)
# n4 = Net4(inp, hid, args)
# print(n2.transformations)
# print(n4.reverse_transformations)

In [16]:
def final_compute(data, net1, net2, kPCA, device=torch.device("cuda" if torch.cuda.is_available() else "cpu")):
    """ Function to compute embeddings of full dataset. """
    # Data is a dictionary with keys RNA and ATAC
#     args["shuffle"] = False
#     xt, _, _ = get_mnist_dataloader(args=args)  # loading data without shuffle
    xt = data["RNA"]
    yt = data["ATAC"]
    xtr = net1(xt.to(args.device))
    ytr = net2(yt.to(args.device)) # This was targets at first, but we don't have labels

    h, s = kPCA(xtr, ytr)
    return torch.mm(torch.t(xtr), h), torch.mm(torch.t(ytr), h), h, s

## Initialize nets and parameters

In [17]:
# net1 = Net1(input_dim=5000, hidden_dim=1000, args=args).float().to(opt["device"]) # input_dim, hidden_dim, args
# net2 = Net2(input_dim=116490, hidden_dim_list=[10000, 2000], args=args).float().to(opt["device"]) # input_dim, hidden_dim_list, args
# net3 = Net3(input_dim=5000, hidden_dim=1000, args=args).float().to(opt["device"]) # input_dim, hidden_dim, args
# net4 = Net4(input_dim=116490, hidden_dim_list=[10000, 2000], args=args).float().to(opt["device"]) # input_dim, hidden_dim_list, args
# net4 = Net4(input_dim=116490, hidden_dim_list=[10000, 2000], args=args).double().to(opt["device"])

hidden_list_rna = [3000, 1000, 500, 250]
hidden_list_atac = [5000, 2500, 1000, 500, 300]
net1 = Net1(input_dim=5000, hidden_dim_list=hidden_list_rna, args=args).float().to(opt["device"]) # input_dim, hidden_dim, args
net2 = Net2(input_dim=10002, hidden_dim_list=hidden_list_atac, args=args).float().to(opt["device"]) # input_dim, hidden_dim_list, args
net3 = Net3(input_dim=5000, hidden_dim_list=hidden_list_rna, args=args).float().to(opt["device"]) # input_dim, hidden_dim, args
net4 = Net4(input_dim=10002, hidden_dim_list=hidden_list_atac, args=args).float().to(opt["device"])

params = list(net1.parameters()) + list(net3.parameters()) + list(net2.parameters()) + list(net4.parameters())
optimizer = torch.optim.Adam(params, lr=opt["lr"], weight_decay=0)

## Train

In [18]:
ct = time.strftime("%Y%m%d-%H%M")
dirs = create_dirs(ct=ct)
dirs.create()

In [19]:
# float(sum([torch.max(p) for p in net1.parameters()]))
#int(sum([torch.sum(torch.isnan(p)) for p in net1.parameters()]))
# float(max([torch.max(p.grad) for p in net1.parameters()]))

In [20]:
l_cost = np.inf  # Costs from where checkpoints will be saved
t = 1
cost = np.inf  # Initialize cost
start = datetime.datetime.now()
while cost > 0.2 and t <= opt["max_epochs"]:  # run epochs until convergence
    avg_loss = 0
    for i, (datax, datay, _) in enumerate(train_loader):
        if i < math.ceil(opt["N"] / opt["batch_size"]):
            print(f'On step {i}', end="\r")
#             print(f'On step {i}')
            datax, datay = datax.float().to(opt["device"]), datay.float().to(opt["device"])
            output1 = net1(datax.float())
            print(f"Max of net1 {float(sum([torch.max(p) for p in net1.parameters()]))}, #nan of net1 {int(sum([torch.sum(torch.isnan(p)) for p in net1.parameters()]))}")
            output2 = net2(datay.float())
            loss = rkm_loss(output1, datax, output1, datax).float()

            optimizer.zero_grad()
            loss.backward()
            print(f"Loss is {float(loss)}, max of grad is {float(max([torch.max(p.grad) for p in net1.parameters()]))}")
#             nn.utils.clip_grad_norm_(parameters=params, max_norm=1)
            optimizer.step()
            avg_loss += loss.detach().cpu().numpy()
        
        else:
            break
    cost = avg_loss

    # Remember lowest cost and save checkpoint
    is_best = cost < l_cost
    l_cost = min(cost, l_cost)

    dirs.save_checkpoint({
        'epochs': t + 1,
        'net1_state_dict': net1.state_dict(),
        'net3_state_dict': net3.state_dict(),
        'net2_state_dict': net2.state_dict(),
        'net4_state_dict': net4.state_dict(),
        'l_cost': l_cost,
        'optimizer': optimizer.state_dict()}, is_best)
    print(f"\n\nEpoch: {t}, Cost: {cost}\n")
    t += 1
print('Finished Training in: {}. Lowest cost: {}'.format(str(datetime.datetime.now() - start), l_cost))

Max of net1 0.34276241064071655, #nan of net1 0
Norm of X 7.919551849365234, Norm of Y 7.919551849365234, Norm of a 123.98271942138672
Loss is 112473336.0, max of grad is 41.57763671875
Max of net1 0.3428015112876892, #nan of net1 0
Norm of X 7.932462692260742, Norm of Y 7.932462692260742, Norm of a 124.35760498046875
Loss is 112522144.0, max of grad is 17.041624069213867
Max of net1 0.3428414463996887, #nan of net1 0
Norm of X 7.9553351402282715, Norm of Y 7.9553351402282715, Norm of a 124.9958724975586
Loss is 112489896.0, max of grad is 25.08282470703125
Max of net1 0.3428851366043091, #nan of net1 0
Norm of X 7.972089767456055, Norm of Y 7.972089767456055, Norm of a 125.54137420654297
Loss is 112508720.0, max of grad is 43.64646911621094
Max of net1 0.342912495136261, #nan of net1 0
Norm of X 7.978144645690918, Norm of Y 7.978144645690918, Norm of a 125.76471710205078
Loss is 112504904.0, max of grad is 22.154468536376953
Max of net1 0.3429397642612457, #nan of net1 0
Norm of X 7.9

Max of net1 0.3442678451538086, #nan of net1 0
Norm of X 8.243221282958984, Norm of Y 8.243221282958984, Norm of a 134.0102081298828
Loss is 112512816.0, max of grad is 20.335773468017578
Max of net1 0.3442971706390381, #nan of net1 0
Norm of X 8.238688468933105, Norm of Y 8.238688468933105, Norm of a 133.80104064941406
Loss is 112512520.0, max of grad is 20.251237869262695
Max of net1 0.34432312846183777, #nan of net1 0
Norm of X 8.251620292663574, Norm of Y 8.251620292663574, Norm of a 134.2218017578125
Loss is 112518584.0, max of grad is 24.36795425415039
Max of net1 0.344348669052124, #nan of net1 0
Norm of X 8.25137710571289, Norm of Y 8.25137710571289, Norm of a 134.28631591796875
Loss is 112520472.0, max of grad is 94.82470703125
Max of net1 0.34438225626945496, #nan of net1 0
Norm of X 8.26294994354248, Norm of Y 8.26294994354248, Norm of a 134.57237243652344
Loss is 112497400.0, max of grad is 60.612152099609375
Max of net1 0.34441709518432617, #nan of net1 0
Norm of X 8.28496

Max of net1 0.34556281566619873, #nan of net1 0
Norm of X 8.657148361206055, Norm of Y 8.657148361206055, Norm of a 147.41542053222656
Loss is 112511448.0, max of grad is 25.22409439086914
Max of net1 0.3455968499183655, #nan of net1 0
Norm of X 8.672333717346191, Norm of Y 8.672333717346191, Norm of a 147.7610626220703
Loss is 112513632.0, max of grad is 40.54978561401367
Max of net1 0.3456309735774994, #nan of net1 0
Norm of X 8.677132606506348, Norm of Y 8.677132606506348, Norm of a 147.94972229003906
Loss is 112496232.0, max of grad is 21.111328125
Max of net1 0.34566599130630493, #nan of net1 0
Norm of X 8.678248405456543, Norm of Y 8.678248405456543, Norm of a 148.00653076171875
Loss is 112503968.0, max of grad is 22.782575607299805
Max of net1 0.3456987142562866, #nan of net1 0
Norm of X 8.67471981048584, Norm of Y 8.67471981048584, Norm of a 147.87049865722656
Loss is 112512912.0, max of grad is 27.567609786987305
Max of net1 0.34572723507881165, #nan of net1 0
Norm of X 8.6711

Max of net1 0.3465256690979004, #nan of net1 0
Norm of X 8.956082344055176, Norm of Y 8.956082344055176, Norm of a 157.70753479003906
Loss is 112489872.0, max of grad is 28.8709716796875
Max of net1 0.34654152393341064, #nan of net1 0
Norm of X 8.988251686096191, Norm of Y 8.988251686096191, Norm of a 158.78367614746094
Loss is 112516272.0, max of grad is 22.113332748413086
Max of net1 0.34655481576919556, #nan of net1 0
Norm of X 9.001988410949707, Norm of Y 9.001988410949707, Norm of a 159.23291015625
Loss is 112495232.0, max of grad is 28.234954833984375
Max of net1 0.34657177329063416, #nan of net1 0
Norm of X 8.999495506286621, Norm of Y 8.999495506286621, Norm of a 159.0738067626953
Loss is 112511536.0, max of grad is 14.630008697509766
Max of net1 0.34658774733543396, #nan of net1 0
Norm of X 9.01222038269043, Norm of Y 9.01222038269043, Norm of a 159.60623168945312
Loss is 112515536.0, max of grad is 19.360624313354492
Max of net1 0.3466027081012726, #nan of net1 0
Norm of X 8.

Max of net1 0.3471716642379761, #nan of net1 0
Norm of X 9.217162132263184, Norm of Y 9.217162132263184, Norm of a 166.91062927246094
Loss is 112508224.0, max of grad is 21.571666717529297
Max of net1 0.34718751907348633, #nan of net1 0
Norm of X 9.204341888427734, Norm of Y 9.204341888427734, Norm of a 166.40052795410156
Loss is 112519640.0, max of grad is 29.904613494873047
Max of net1 0.3472006320953369, #nan of net1 0
Norm of X 9.241433143615723, Norm of Y 9.241433143615723, Norm of a 167.5551300048828
Loss is 112488304.0, max of grad is 30.58917236328125
Max of net1 0.3472178876399994, #nan of net1 0
Norm of X 9.217690467834473, Norm of Y 9.217690467834473, Norm of a 166.8814239501953
Loss is 112495736.0, max of grad is 23.253934860229492
Max of net1 0.3472391366958618, #nan of net1 0
Norm of X 9.228569030761719, Norm of Y 9.228569030761719, Norm of a 167.28526306152344
Loss is 112518624.0, max of grad is 15.178428649902344
Max of net1 0.3472576439380646, #nan of net1 0
Norm of X 

Max of net1 0.3478814363479614, #nan of net1 0
Norm of X 9.47522258758545, Norm of Y 9.47522258758545, Norm of a 176.24032592773438
Loss is 112498952.0, max of grad is 18.47955322265625
Max of net1 0.34790557622909546, #nan of net1 0
Norm of X 9.481574058532715, Norm of Y 9.481574058532715, Norm of a 176.49850463867188
Loss is 112517336.0, max of grad is 17.564970016479492
Max of net1 0.347927451133728, #nan of net1 0
Norm of X 9.46579360961914, Norm of Y 9.46579360961914, Norm of a 175.9398956298828
Loss is 112526080.0, max of grad is 65.47811889648438
Max of net1 0.3479468822479248, #nan of net1 0
Norm of X 9.487109184265137, Norm of Y 9.487109184265137, Norm of a 176.6285400390625
Loss is 112531120.0, max of grad is 22.4132080078125
Max of net1 0.3479595184326172, #nan of net1 0
Norm of X 9.516073226928711, Norm of Y 9.516073226928711, Norm of a 177.6740264892578
Loss is 112498240.0, max of grad is 19.58660888671875
Max of net1 0.3479766249656677, #nan of net1 0
Norm of X 9.48734855

Max of net1 0.34896528720855713, #nan of net1 0
Norm of X 9.934525489807129, Norm of Y 9.934525489807129, Norm of a 193.74720764160156
Loss is 112522528.0, max of grad is 18.25685691833496
Max of net1 0.34899812936782837, #nan of net1 0
Norm of X 9.940479278564453, Norm of Y 9.940479278564453, Norm of a 194.0965576171875
Loss is 112503936.0, max of grad is 14.926036834716797
Max of net1 0.34902793169021606, #nan of net1 0
Norm of X 9.989471435546875, Norm of Y 9.989471435546875, Norm of a 195.79620361328125
Loss is 112476016.0, max of grad is 57.527061462402344
Max of net1 0.34905779361724854, #nan of net1 0
Norm of X 10.031047821044922, Norm of Y 10.031047821044922, Norm of a 197.43479919433594
Loss is 112522248.0, max of grad is 15.669029235839844
Max of net1 0.349078506231308, #nan of net1 0
Norm of X 10.055974006652832, Norm of Y 10.055974006652832, Norm of a 198.3333740234375
Loss is 112522024.0, max of grad is 12.592947959899902
Max of net1 0.3490965962409973, #nan of net1 0
Norm

Max of net1 0.3497585654258728, #nan of net1 0
Norm of X 10.323473930358887, Norm of Y 10.323473930358887, Norm of a 209.21180725097656
Loss is 112484160.0, max of grad is 40.08758544921875
Max of net1 0.3497868776321411, #nan of net1 0
Norm of X 10.359575271606445, Norm of Y 10.359575271606445, Norm of a 210.52603149414062
Loss is 112507112.0, max of grad is 24.691822052001953
Max of net1 0.34981560707092285, #nan of net1 0
Norm of X 10.38016414642334, Norm of Y 10.38016414642334, Norm of a 211.3931427001953
Loss is 112503936.0, max of grad is 14.01800537109375
Max of net1 0.3498467206954956, #nan of net1 0
Norm of X 10.383482933044434, Norm of Y 10.383482933044434, Norm of a 211.50914001464844
Loss is 112508608.0, max of grad is 17.17043113708496
Max of net1 0.34988006949424744, #nan of net1 0
Norm of X 10.389473915100098, Norm of Y 10.389473915100098, Norm of a 212.02053833007812
Loss is 112531680.0, max of grad is 25.49176025390625
Max of net1 0.3499020040035248, #nan of net1 0
Nor

Norm of X 10.726797103881836, Norm of Y 10.726797103881836, Norm of a 225.8588104248047
Loss is 112515768.0, max of grad is 126.08850860595703
Max of net1 0.35067591071128845, #nan of net1 0
Norm of X 10.756487846374512, Norm of Y 10.756487846374512, Norm of a 227.19427490234375
Loss is 112519328.0, max of grad is 8.519793510437012
Max of net1 0.35071343183517456, #nan of net1 0
Norm of X 10.799558639526367, Norm of Y 10.799558639526367, Norm of a 228.91038513183594
Loss is 112525040.0, max of grad is 29.077762603759766
Max of net1 0.35074204206466675, #nan of net1 0
Norm of X 10.807409286499023, Norm of Y 10.807409286499023, Norm of a 229.3585968017578
Loss is 112518712.0, max of grad is 12.89626693725586
Max of net1 0.35076719522476196, #nan of net1 0
Norm of X 10.788996696472168, Norm of Y 10.788996696472168, Norm of a 228.46873474121094
Loss is 112522624.0, max of grad is 21.241134643554688
Max of net1 0.3507879972457886, #nan of net1 0
Norm of X 10.821391105651855, Norm of Y 10.82

Norm of X 11.133856773376465, Norm of Y 11.133856773376465, Norm of a 243.3317108154297
Loss is 112519520.0, max of grad is 14.926568984985352
Max of net1 0.35123753547668457, #nan of net1 0
Norm of X 11.178387641906738, Norm of Y 11.178387641906738, Norm of a 245.36776733398438
Loss is 112492704.0, max of grad is 32.85894775390625
Max of net1 0.3512589633464813, #nan of net1 0
Norm of X 11.190592765808105, Norm of Y 11.190592765808105, Norm of a 245.72021484375
Loss is 112520648.0, max of grad is 20.224157333374023
Max of net1 0.3512788414955139, #nan of net1 0
Norm of X 11.18695068359375, Norm of Y 11.18695068359375, Norm of a 245.70530700683594
Loss is 112482064.0, max of grad is 48.674072265625
Max of net1 0.3513129949569702, #nan of net1 0
Norm of X 11.226201057434082, Norm of Y 11.226201057434082, Norm of a 247.57595825195312
Loss is 112513336.0, max of grad is 19.047595977783203
Max of net1 0.3513440489768982, #nan of net1 0
Norm of X 11.21108627319336, Norm of Y 11.211086273193

Max of net1 0.3523540198802948, #nan of net1 0
Norm of X 11.916366577148438, Norm of Y 11.916366577148438, Norm of a 278.94818115234375
Loss is 112519400.0, max of grad is 8.858662605285645
Max of net1 0.3523707985877991, #nan of net1 0
Norm of X 11.864188194274902, Norm of Y 11.864188194274902, Norm of a 276.80303955078125
Loss is 112512048.0, max of grad is 10.957955360412598
Max of net1 0.3523890972137451, #nan of net1 0
Norm of X 11.834432601928711, Norm of Y 11.834432601928711, Norm of a 275.2354736328125
Loss is 112510760.0, max of grad is 22.321086883544922
Max of net1 0.3524085283279419, #nan of net1 0
Norm of X 11.918760299682617, Norm of Y 11.918760299682617, Norm of a 279.291259765625
Loss is 112514408.0, max of grad is 26.029251098632812
Max of net1 0.3524259328842163, #nan of net1 0
Norm of X 12.000582695007324, Norm of Y 12.000582695007324, Norm of a 282.9451904296875
Loss is 112520032.0, max of grad is 10.592178344726562
Max of net1 0.35243794322013855, #nan of net1 0
No

Max of net1 0.35305508971214294, #nan of net1 0
Norm of X 12.49531364440918, Norm of Y 12.49531364440918, Norm of a 307.0894470214844
Loss is 112513384.0, max of grad is 14.563348770141602
Max of net1 0.3530769944190979, #nan of net1 0
Norm of X 12.523357391357422, Norm of Y 12.523357391357422, Norm of a 308.4603271484375
Loss is 112521264.0, max of grad is 12.75018310546875
Max of net1 0.35309162735939026, #nan of net1 0
Norm of X 12.537243843078613, Norm of Y 12.537243843078613, Norm of a 309.17803955078125
Loss is 112519976.0, max of grad is 10.609375
Max of net1 0.35309898853302, #nan of net1 0
Norm of X 12.51589584350586, Norm of Y 12.51589584350586, Norm of a 308.1145935058594
Loss is 112524664.0, max of grad is 19.40997314453125
Max of net1 0.3530977666378021, #nan of net1 0
Norm of X 12.446330070495605, Norm of Y 12.446330070495605, Norm of a 304.7718200683594
Loss is 112519184.0, max of grad is 13.181795120239258
Max of net1 0.35309138894081116, #nan of net1 0
Norm of X 12.432

Norm of X 13.172625541687012, Norm of Y 13.172625541687012, Norm of a 341.4804992675781
Loss is 112517472.0, max of grad is 8.437488555908203
Max of net1 0.3536909222602844, #nan of net1 0
Norm of X 13.131811141967773, Norm of Y 13.131811141967773, Norm of a 339.4363098144531
Loss is 112513856.0, max of grad is 19.412240982055664
Max of net1 0.35368847846984863, #nan of net1 0
Norm of X 13.28200912475586, Norm of Y 13.28200912475586, Norm of a 347.1421813964844
Loss is 112520784.0, max of grad is 14.6519775390625
Max of net1 0.3536823093891144, #nan of net1 0
Norm of X 13.189114570617676, Norm of Y 13.189114570617676, Norm of a 342.3204650878906
Loss is 112516328.0, max of grad is 15.488224029541016
Max of net1 0.3536764979362488, #nan of net1 0
Norm of X 13.229496002197266, Norm of Y 13.229496002197266, Norm of a 344.668701171875
Loss is 112522160.0, max of grad is 16.52178955078125
Max of net1 0.3536681830883026, #nan of net1 0
Norm of X 13.169683456420898, Norm of Y 13.1696834564208

Norm of X 14.055594444274902, Norm of Y 14.055594444274902, Norm of a 388.9864807128906
Loss is 112514720.0, max of grad is 10.315116882324219
Max of net1 0.3545098304748535, #nan of net1 0
Norm of X 14.055549621582031, Norm of Y 14.055549621582031, Norm of a 389.2625732421875
Loss is 112515696.0, max of grad is 10.967857360839844
Max of net1 0.35452139377593994, #nan of net1 0
Norm of X 14.087221145629883, Norm of Y 14.087221145629883, Norm of a 390.9764709472656
Loss is 112513280.0, max of grad is 36.53081512451172
Max of net1 0.35453274846076965, #nan of net1 0
Norm of X 14.121503829956055, Norm of Y 14.121503829956055, Norm of a 392.97265625
Loss is 112510040.0, max of grad is 14.244890213012695
Max of net1 0.35454559326171875, #nan of net1 0
Norm of X 14.088335990905762, Norm of Y 14.088335990905762, Norm of a 391.2088928222656
Loss is 112511520.0, max of grad is 6.780048370361328
Max of net1 0.35455939173698425, #nan of net1 0
Norm of X 14.122112274169922, Norm of Y 14.1221122741

Norm of X 14.192023277282715, Norm of Y 14.192023277282715, Norm of a 396.87762451171875
Loss is 112496008.0, max of grad is 39.36065673828125
Max of net1 0.3551058769226074, #nan of net1 0
Norm of X 14.26776123046875, Norm of Y 14.26776123046875, Norm of a 400.77685546875
Loss is 112502952.0, max of grad is 75.55992889404297
Max of net1 0.3551204800605774, #nan of net1 0
Norm of X 14.395959854125977, Norm of Y 14.395959854125977, Norm of a 407.9867248535156
Loss is 112514880.0, max of grad is 28.129789352416992
Max of net1 0.355131059885025, #nan of net1 0
Norm of X 14.408124923706055, Norm of Y 14.408124923706055, Norm of a 408.8519287109375
Loss is 112480808.0, max of grad is 72.84765625
Max of net1 0.3551640510559082, #nan of net1 0
Norm of X 14.532951354980469, Norm of Y 14.532951354980469, Norm of a 415.7101135253906
Loss is 112513944.0, max of grad is 10.720453262329102
Max of net1 0.35519540309906006, #nan of net1 0
Norm of X 14.509883880615234, Norm of Y 14.509883880615234, No

Max of net1 0.35575419664382935, #nan of net1 0
Norm of X 15.557452201843262, Norm of Y 15.557452201843262, Norm of a 477.19287109375
Loss is 112512680.0, max of grad is 13.040145874023438
Max of net1 0.3557949960231781, #nan of net1 0
Norm of X 15.498324394226074, Norm of Y 15.498324394226074, Norm of a 473.784912109375
Loss is 112509776.0, max of grad is 14.51272201538086
Max of net1 0.35583576560020447, #nan of net1 0
Norm of X 15.587316513061523, Norm of Y 15.587316513061523, Norm of a 478.9715576171875
Loss is 112516696.0, max of grad is 9.7390775680542
Max of net1 0.35587093234062195, #nan of net1 0
Norm of X 15.541959762573242, Norm of Y 15.541959762573242, Norm of a 476.6014099121094
Loss is 112516064.0, max of grad is 8.25458812713623
Max of net1 0.35590311884880066, #nan of net1 0
Norm of X 15.760113716125488, Norm of Y 15.760113716125488, Norm of a 489.80975341796875
Loss is 112522656.0, max of grad is 21.989501953125
Max of net1 0.35592448711395264, #nan of net1 0
Norm of X

Loss is 112524632.0, max of grad is 26.01220703125
Max of net1 0.35597115755081177, #nan of net1 0
Norm of X 15.451371192932129, Norm of Y 15.451371192932129, Norm of a 470.56658935546875
Loss is 112514528.0, max of grad is 15.467697143554688
Max of net1 0.355971097946167, #nan of net1 0
Norm of X 15.503097534179688, Norm of Y 15.503097534179688, Norm of a 473.62176513671875
Loss is 112491472.0, max of grad is 53.5662841796875
Max of net1 0.35599082708358765, #nan of net1 0
Norm of X 15.568489074707031, Norm of Y 15.568489074707031, Norm of a 477.90997314453125
Loss is 112512112.0, max of grad is 23.570669174194336
Max of net1 0.3560126721858978, #nan of net1 0
Norm of X 15.445449829101562, Norm of Y 15.445449829101562, Norm of a 470.64739990234375
Loss is 112514376.0, max of grad is 9.55734634399414
Max of net1 0.3560341000556946, #nan of net1 0
Norm of X 15.513670921325684, Norm of Y 15.513670921325684, Norm of a 474.37310791015625
Loss is 112491296.0, max of grad is 54.5563354492187

Max of net1 0.3567262589931488, #nan of net1 0
Norm of X 16.420412063598633, Norm of Y 16.420412063598633, Norm of a 532.2566528320312
Loss is 112523728.0, max of grad is 25.64947509765625
Max of net1 0.3567308187484741, #nan of net1 0
Norm of X 16.54563331604004, Norm of Y 16.54563331604004, Norm of a 540.4400024414062
Loss is 112513192.0, max of grad is 8.549079895019531
Max of net1 0.35673636198043823, #nan of net1 0
Norm of X 16.5708065032959, Norm of Y 16.5708065032959, Norm of a 541.9107666015625
Loss is 112522392.0, max of grad is 22.2852783203125
Max of net1 0.35673367977142334, #nan of net1 0
Norm of X 16.724321365356445, Norm of Y 16.724321365356445, Norm of a 551.3631591796875
Loss is 112514264.0, max of grad is 6.894207000732422
Max of net1 0.35673099756240845, #nan of net1 0
Norm of X 16.47783851623535, Norm of Y 16.47783851623535, Norm of a 535.7784423828125
Loss is 112512464.0, max of grad is 9.640619277954102
Max of net1 0.3567318916320801, #nan of net1 0
Norm of X 16.4

Max of net1 0.3565501570701599, #nan of net1 0
Norm of X 16.087064743041992, Norm of Y 16.087064743041992, Norm of a 510.1255798339844
Loss is 112513912.0, max of grad is 8.991401672363281
Max of net1 0.3565526306629181, #nan of net1 0
Norm of X 16.109113693237305, Norm of Y 16.109113693237305, Norm of a 511.7548522949219
Loss is 112514640.0, max of grad is 9.856056213378906
Max of net1 0.35655441880226135, #nan of net1 0
Norm of X 16.0571346282959, Norm of Y 16.0571346282959, Norm of a 508.3593444824219
Loss is 112483032.0, max of grad is 76.30169677734375
Max of net1 0.35658401250839233, #nan of net1 0
Norm of X 16.26926612854004, Norm of Y 16.26926612854004, Norm of a 521.5444946289062
Loss is 112518672.0, max of grad is 13.675370216369629
Max of net1 0.3566071391105652, #nan of net1 0
Norm of X 16.340578079223633, Norm of Y 16.340578079223633, Norm of a 526.44189453125
Loss is 112508096.0, max of grad is 16.13671875
Max of net1 0.3566339910030365, #nan of net1 0
Norm of X 16.377914

Norm of X 15.944866180419922, Norm of Y 15.944866180419922, Norm of a 501.166015625
Loss is 112506936.0, max of grad is 18.5565185546875
Max of net1 0.35652485489845276, #nan of net1 0
Norm of X 15.878787994384766, Norm of Y 15.878787994384766, Norm of a 496.4041442871094
Loss is 112514000.0, max of grad is 9.37699031829834
Max of net1 0.35651880502700806, #nan of net1 0
Norm of X 16.010929107666016, Norm of Y 16.010929107666016, Norm of a 504.7438049316406
Loss is 112519712.0, max of grad is 398.9912109375
Max of net1 0.3564961850643158, #nan of net1 0
Norm of X 15.83476734161377, Norm of Y 15.83476734161377, Norm of a 493.7872009277344
Loss is 112522024.0, max of grad is 19.57037353515625
Max of net1 0.3564780056476593, #nan of net1 0
Norm of X 15.78817367553711, Norm of Y 15.78817367553711, Norm of a 491.1726989746094
Loss is 112504520.0, max of grad is 23.1378173828125
Max of net1 0.35647204518318176, #nan of net1 0
Norm of X 15.82999324798584, Norm of Y 15.82999324798584, Norm of 

Max of net1 0.357088565826416, #nan of net1 0
Norm of X 16.646072387695312, Norm of Y 16.646072387695312, Norm of a 546.2363891601562
Loss is 112514304.0, max of grad is 15.082721710205078
Max of net1 0.35708653926849365, #nan of net1 0
Norm of X 16.460054397583008, Norm of Y 16.460054397583008, Norm of a 533.9818725585938
Loss is 112513616.0, max of grad is 7.689278602600098
Max of net1 0.35708487033843994, #nan of net1 0
Norm of X 16.45931053161621, Norm of Y 16.45931053161621, Norm of a 533.7676391601562
Loss is 112476104.0, max of grad is 95.1971435546875
Max of net1 0.3571212589740753, #nan of net1 0
Norm of X 16.541732788085938, Norm of Y 16.541732788085938, Norm of a 538.724853515625
Loss is 112523024.0, max of grad is 23.37109375
Max of net1 0.35714614391326904, #nan of net1 0
Norm of X 16.633832931518555, Norm of Y 16.633832931518555, Norm of a 545.35498046875
Loss is 112531400.0, max of grad is 45.90838623046875
Max of net1 0.3571508824825287, #nan of net1 0
Norm of X 16.7798

Norm of X 16.2857666015625, Norm of Y 16.2857666015625, Norm of a 522.329345703125
Loss is 112514848.0, max of grad is 6.409518718719482
Max of net1 0.3569805324077606, #nan of net1 0
Norm of X 16.232521057128906, Norm of Y 16.232521057128906, Norm of a 519.1406860351562
Loss is 112524912.0, max of grad is 27.1317138671875
Max of net1 0.3569800853729248, #nan of net1 0
Norm of X 16.080135345458984, Norm of Y 16.080135345458984, Norm of a 509.4480895996094
Loss is 112512384.0, max of grad is 6.8222832679748535
Max of net1 0.35698121786117554, #nan of net1 0
Norm of X 16.42131233215332, Norm of Y 16.42131233215332, Norm of a 531.2011108398438
Loss is 112518000.0, max of grad is 10.348392486572266
Max of net1 0.35698050260543823, #nan of net1 0
Norm of X 16.329843521118164, Norm of Y 16.329843521118164, Norm of a 525.234130859375
Loss is 112513688.0, max of grad is 6.652679443359375
Max of net1 0.356981098651886, #nan of net1 0
Norm of X 16.320098876953125, Norm of Y 16.320098876953125, N

Loss is 112501184.0, max of grad is 35.96142578125
Max of net1 0.35774409770965576, #nan of net1 0
Norm of X 17.910228729248047, Norm of Y 17.910228729248047, Norm of a 633.3008422851562
Loss is 112524560.0, max of grad is 30.8758544921875
Max of net1 0.35775625705718994, #nan of net1 0
Norm of X 18.098711013793945, Norm of Y 18.098711013793945, Norm of a 645.7533569335938
Loss is 112514832.0, max of grad is 8.828695297241211
Max of net1 0.3577656149864197, #nan of net1 0
Norm of X 17.884916305541992, Norm of Y 17.884916305541992, Norm of a 630.9945068359375
Loss is 112512784.0, max of grad is 6.462214469909668
Max of net1 0.3577747344970703, #nan of net1 0
Norm of X 18.04169464111328, Norm of Y 18.04169464111328, Norm of a 642.6129760742188
Loss is 112513752.0, max of grad is 10.48393440246582
Max of net1 0.35778263211250305, #nan of net1 0
Norm of X 18.0296630859375, Norm of Y 18.0296630859375, Norm of a 641.476806640625
Loss is 112510408.0, max of grad is 20.33700180053711
Max of ne

Max of net1 0.3580811321735382, #nan of net1 0
Norm of X 18.64899444580078, Norm of Y 18.64899444580078, Norm of a 686.8329467773438
Loss is 112508640.0, max of grad is 16.8321533203125
Max of net1 0.3580884635448456, #nan of net1 0
Norm of X 18.600759506225586, Norm of Y 18.600759506225586, Norm of a 683.6896362304688
Loss is 112494016.0, max of grad is 58.77490234375
Max of net1 0.3581196069717407, #nan of net1 0
Norm of X 18.830163955688477, Norm of Y 18.830163955688477, Norm of a 700.2596435546875
Loss is 112513808.0, max of grad is 22.924970626831055
Max of net1 0.3581484258174896, #nan of net1 0
Norm of X 19.067663192749023, Norm of Y 19.067663192749023, Norm of a 718.2388916015625
Loss is 112513928.0, max of grad is 13.093124389648438
Max of net1 0.3581749498844147, #nan of net1 0
Norm of X 18.784040451049805, Norm of Y 18.784040451049805, Norm of a 697.4625244140625
Loss is 112517152.0, max of grad is 8.6571044921875
Max of net1 0.3581963777542114, #nan of net1 0
Norm of X 18.8

Max of net1 0.35796764492988586, #nan of net1 0
Norm of X 18.2280216217041, Norm of Y 18.2280216217041, Norm of a 655.6176147460938
Loss is 112521160.0, max of grad is 20.239501953125
Max of net1 0.357957124710083, #nan of net1 0
Norm of X 17.943035125732422, Norm of Y 17.943035125732422, Norm of a 635.2391357421875
Loss is 112514304.0, max of grad is 15.732049942016602
Max of net1 0.35794809460639954, #nan of net1 0
Norm of X 17.987924575805664, Norm of Y 17.987924575805664, Norm of a 638.4083251953125
Loss is 112514040.0, max of grad is 8.624213218688965
Max of net1 0.35793960094451904, #nan of net1 0
Norm of X 18.191997528076172, Norm of Y 18.191997528076172, Norm of a 652.7684326171875
Loss is 112513560.0, max of grad is 25.23845863342285
Max of net1 0.3579310178756714, #nan of net1 0
Norm of X 18.0297794342041, Norm of Y 18.0297794342041, Norm of a 641.3184204101562
Loss is 112521336.0, max of grad is 20.1851806640625
Max of net1 0.35791727900505066, #nan of net1 0
Norm of X 17.98

Max of net1 0.35834363102912903, #nan of net1 0
Norm of X 19.147863388061523, Norm of Y 19.147863388061523, Norm of a 723.7667236328125
Loss is 112514056.0, max of grad is 6.474855422973633
Max of net1 0.35832610726356506, #nan of net1 0
Norm of X 18.89872169494629, Norm of Y 18.89872169494629, Norm of a 705.476318359375
Loss is 112513264.0, max of grad is 8.056792259216309
Max of net1 0.3583122193813324, #nan of net1 0
Norm of X 18.73509407043457, Norm of Y 18.73509407043457, Norm of a 693.1578979492188
Loss is 112504304.0, max of grad is 29.3585205078125
Max of net1 0.3583105504512787, #nan of net1 0
Norm of X 18.984241485595703, Norm of Y 18.984241485595703, Norm of a 711.1834106445312
Loss is 112513352.0, max of grad is 6.522959232330322
Max of net1 0.3583109974861145, #nan of net1 0
Norm of X 19.18309783935547, Norm of Y 19.18309783935547, Norm of a 726.8085327148438
Loss is 112514704.0, max of grad is 9.699308395385742
Max of net1 0.3583122491836548, #nan of net1 0
Norm of X 18.8

Loss is 112520256.0, max of grad is 18.5849609375
Max of net1 0.3582211136817932, #nan of net1 0
Norm of X 18.75383186340332, Norm of Y 18.75383186340332, Norm of a 693.8656005859375
Loss is 112488600.0, max of grad is 74.36090087890625
Max of net1 0.3582267463207245, #nan of net1 0
Norm of X 18.551671981811523, Norm of Y 18.551671981811523, Norm of a 678.6507568359375
Loss is 112515744.0, max of grad is 6.41374397277832
Max of net1 0.3582304120063782, #nan of net1 0
Norm of X 18.69014549255371, Norm of Y 18.69014549255371, Norm of a 689.3988647460938
Loss is 112522144.0, max of grad is 25.029541015625
Max of net1 0.3582255244255066, #nan of net1 0
Norm of X 18.506406784057617, Norm of Y 18.506406784057617, Norm of a 675.8463745117188
Loss is 112522200.0, max of grad is 24.30072021484375
Max of net1 0.35821425914764404, #nan of net1 0
Norm of X 18.486549377441406, Norm of Y 18.486549377441406, Norm of a 674.1221313476562
Loss is 112514464.0, max of grad is 8.813920974731445
Max of net1

Norm of X 17.66630744934082, Norm of Y 17.66630744934082, Norm of a 614.5970458984375
Loss is 112513632.0, max of grad is 7.911774635314941
Max of net1 0.3579930365085602, #nan of net1 0
Norm of X 17.76327133178711, Norm of Y 17.76327133178711, Norm of a 621.6749267578125
Loss is 112520240.0, max of grad is 15.997314453125
Max of net1 0.35798028111457825, #nan of net1 0
Norm of X 17.831018447875977, Norm of Y 17.831018447875977, Norm of a 626.0140991210938
Loss is 112521048.0, max of grad is 20.773193359375
Max of net1 0.3579617738723755, #nan of net1 0
Norm of X 17.563648223876953, Norm of Y 17.563648223876953, Norm of a 607.7507934570312
Loss is 112514000.0, max of grad is 12.418575286865234
Max of net1 0.3579444885253906, #nan of net1 0
Norm of X 17.518688201904297, Norm of Y 17.518688201904297, Norm of a 603.9686279296875
Loss is 112522096.0, max of grad is 22.36065673828125
Max of net1 0.3579215407371521, #nan of net1 0
Norm of X 17.470563888549805, Norm of Y 17.470563888549805, N

Norm of X 20.13882064819336, Norm of Y 20.13882064819336, Norm of a 801.0834350585938
Loss is 112521912.0, max of grad is 26.1412353515625
Max of net1 0.35887789726257324, #nan of net1 0
Norm of X 20.171937942504883, Norm of Y 20.171937942504883, Norm of a 803.449462890625
Loss is 112520488.0, max of grad is 21.8350830078125
Max of net1 0.35885322093963623, #nan of net1 0
Norm of X 19.931865692138672, Norm of Y 19.931865692138672, Norm of a 785.0005493164062
Loss is 112520944.0, max of grad is 21.8695068359375
Max of net1 0.3588234782218933, #nan of net1 0
Norm of X 19.501964569091797, Norm of Y 19.501964569091797, Norm of a 750.896240234375
Loss is 112516720.0, max of grad is 7.589584827423096
Max of net1 0.35879331827163696, #nan of net1 0
Norm of X 19.652099609375, Norm of Y 19.652099609375, Norm of a 762.2171020507812
Loss is 112516696.0, max of grad is 7.38037109375
Max of net1 0.3587646782398224, #nan of net1 0
Norm of X 19.62430763244629, Norm of Y 19.62430763244629, Norm of a 7

Max of net1 0.3580063283443451, #nan of net1 0
Norm of X 17.218271255493164, Norm of Y 17.218271255493164, Norm of a 582.9212036132812
Loss is 112510120.0, max of grad is 23.434385299682617
Max of net1 0.3579876124858856, #nan of net1 0
Norm of X 17.128372192382812, Norm of Y 17.128372192382812, Norm of a 576.8305053710938
Loss is 112513560.0, max of grad is 11.406982421875
Max of net1 0.35797184705734253, #nan of net1 0
Norm of X 16.973772048950195, Norm of Y 16.973772048950195, Norm of a 566.3934936523438
Loss is 112483520.0, max of grad is 82.09979248046875
Max of net1 0.3579827547073364, #nan of net1 0
Norm of X 16.86153793334961, Norm of Y 16.86153793334961, Norm of a 558.8683471679688
Loss is 112513896.0, max of grad is 9.182735443115234
Max of net1 0.3579931855201721, #nan of net1 0
Norm of X 17.04132652282715, Norm of Y 17.04132652282715, Norm of a 570.9849243164062
Loss is 112515112.0, max of grad is 9.377098083496094
Max of net1 0.35800355672836304, #nan of net1 0
Norm of X 1

Max of net1 0.3581586480140686, #nan of net1 0
Norm of X 16.87828826904297, Norm of Y 16.87828826904297, Norm of a 560.3671264648438
Loss is 112513560.0, max of grad is 17.845041275024414
Max of net1 0.35816115140914917, #nan of net1 0
Norm of X 16.80741310119629, Norm of Y 16.80741310119629, Norm of a 555.3167114257812
Loss is 112523976.0, max of grad is 26.57666015625
Max of net1 0.35815709829330444, #nan of net1 0
Norm of X 16.986804962158203, Norm of Y 16.986804962158203, Norm of a 566.6988525390625
Loss is 112521368.0, max of grad is 19.47412109375
Max of net1 0.3581480383872986, #nan of net1 0
Norm of X 16.786876678466797, Norm of Y 16.786876678466797, Norm of a 553.58837890625
Loss is 112516104.0, max of grad is 9.265793800354004
Max of net1 0.35813993215560913, #nan of net1 0
Norm of X 16.754993438720703, Norm of Y 16.754993438720703, Norm of a 551.3128662109375
Loss is 112512728.0, max of grad is 9.912090301513672
Max of net1 0.35813602805137634, #nan of net1 0
Norm of X 16.71

Max of net1 0.35822340846061707, #nan of net1 0
Norm of X 16.239456176757812, Norm of Y 16.239456176757812, Norm of a 517.6112060546875
Loss is 112523960.0, max of grad is 25.09759521484375
Max of net1 0.3582098186016083, #nan of net1 0
Norm of X 16.14098358154297, Norm of Y 16.14098358154297, Norm of a 511.2630310058594
Loss is 112509776.0, max of grad is 17.91701316833496
Max of net1 0.3582029640674591, #nan of net1 0
Norm of X 16.048158645629883, Norm of Y 16.048158645629883, Norm of a 505.0081787109375
Loss is 112512856.0, max of grad is 10.156845092773438
Max of net1 0.35819658637046814, #nan of net1 0
Norm of X 16.025949478149414, Norm of Y 16.025949478149414, Norm of a 503.82879638671875
Loss is 112511816.0, max of grad is 9.74024772644043
Max of net1 0.3581920564174652, #nan of net1 0
Norm of X 15.920425415039062, Norm of Y 15.920425415039062, Norm of a 497.0849914550781
Loss is 112514600.0, max of grad is 5.506753444671631
Max of net1 0.35818785429000854, #nan of net1 0
Norm o

Loss is 112516608.0, max of grad is 8.523892402648926
Max of net1 0.3578372299671173, #nan of net1 0
Norm of X 14.568098068237305, Norm of Y 14.568098068237305, Norm of a 414.9946594238281
Loss is 112520296.0, max of grad is 13.16162109375
Max of net1 0.35782015323638916, #nan of net1 0
Norm of X 14.56706714630127, Norm of Y 14.56706714630127, Norm of a 415.0234680175781
Loss is 112498080.0, max of grad is 38.5150146484375
Max of net1 0.3578149080276489, #nan of net1 0
Norm of X 14.62232494354248, Norm of Y 14.62232494354248, Norm of a 417.86126708984375
Loss is 112505120.0, max of grad is 22.79827880859375
Max of net1 0.3578171133995056, #nan of net1 0
Norm of X 14.539702415466309, Norm of Y 14.539702415466309, Norm of a 412.91595458984375
Loss is 112514016.0, max of grad is 5.951138973236084
Max of net1 0.35781991481781006, #nan of net1 0
Norm of X 14.577640533447266, Norm of Y 14.577640533447266, Norm of a 415.0892639160156
Loss is 112515024.0, max of grad is 5.474475383758545
Max o

Norm of X 13.46690559387207, Norm of Y 13.46690559387207, Norm of a 352.737060546875
Loss is 112511112.0, max of grad is 11.701631546020508
Max of net1 0.35761016607284546, #nan of net1 0
Norm of X 13.487438201904297, Norm of Y 13.487438201904297, Norm of a 354.21319580078125
Loss is 112520456.0, max of grad is 16.297466278076172
Max of net1 0.3575989305973053, #nan of net1 0
Norm of X 13.385619163513184, Norm of Y 13.385619163513184, Norm of a 348.9390869140625
Loss is 112514616.0, max of grad is 7.28961181640625
Max of net1 0.3575919270515442, #nan of net1 0
Norm of X 13.44968032836914, Norm of Y 13.44968032836914, Norm of a 351.3415832519531
Loss is 112513152.0, max of grad is 9.832080841064453
Max of net1 0.3575863838195801, #nan of net1 0
Norm of X 13.27927303314209, Norm of Y 13.27927303314209, Norm of a 342.9320068359375
Loss is 112513616.0, max of grad is 13.930379867553711
Max of net1 0.35757914185523987, #nan of net1 0
Norm of X 13.33736801147461, Norm of Y 13.33736801147461,

Max of net1 0.3576198220252991, #nan of net1 0
Norm of X 13.015686988830566, Norm of Y 13.015686988830566, Norm of a 329.3647155761719
Loss is 112519784.0, max of grad is 10.360268592834473
Max of net1 0.35762280225753784, #nan of net1 0
Norm of X 13.154470443725586, Norm of Y 13.154470443725586, Norm of a 336.1403503417969
Loss is 112516576.0, max of grad is 5.953340530395508
Max of net1 0.35762307047843933, #nan of net1 0
Norm of X 13.175292015075684, Norm of Y 13.175292015075684, Norm of a 336.6792907714844
Loss is 112512680.0, max of grad is 9.625423431396484
Max of net1 0.3576233386993408, #nan of net1 0
Norm of X 12.984004974365234, Norm of Y 12.984004974365234, Norm of a 327.8178405761719
Loss is 112525296.0, max of grad is 20.2322998046875
Max of net1 0.3576201796531677, #nan of net1 0
Norm of X 12.990799903869629, Norm of Y 12.990799903869629, Norm of a 327.503662109375
Loss is 112514600.0, max of grad is 7.325051307678223
Max of net1 0.3576195240020752, #nan of net1 0
Norm of

Loss is 112514024.0, max of grad is 33.200321197509766
Max of net1 0.357730895280838, #nan of net1 0
Norm of X 12.511637687683105, Norm of Y 12.511637687683105, Norm of a 303.94256591796875
Loss is 112509224.0, max of grad is 11.0831298828125
Max of net1 0.3577369451522827, #nan of net1 0
Norm of X 12.607227325439453, Norm of Y 12.607227325439453, Norm of a 308.0052185058594
Loss is 112484144.0, max of grad is 58.93292236328125
Max of net1 0.3577495813369751, #nan of net1 0
Norm of X 12.747314453125, Norm of Y 12.747314453125, Norm of a 314.34283447265625
Loss is 112513688.0, max of grad is 7.6034040451049805
Max of net1 0.3577638268470764, #nan of net1 0
Norm of X 12.6983642578125, Norm of Y 12.6983642578125, Norm of a 312.8186950683594
Loss is 112515608.0, max of grad is 5.37770938873291
Max of net1 0.3577783703804016, #nan of net1 0
Norm of X 12.707463264465332, Norm of Y 12.707463264465332, Norm of a 312.72235107421875
Loss is 112512648.0, max of grad is 14.869483947753906
Max of n

Norm of X 13.376452445983887, Norm of Y 13.376452445983887, Norm of a 348.1460876464844
Loss is 112519200.0, max of grad is 8.80914306640625
Max of net1 0.3582003116607666, #nan of net1 0
Norm of X 13.483068466186523, Norm of Y 13.483068466186523, Norm of a 353.5135192871094
Loss is 112513576.0, max of grad is 7.273176193237305
Max of net1 0.35820984840393066, #nan of net1 0
Norm of X 13.423768997192383, Norm of Y 13.423768997192383, Norm of a 350.2972412109375
Loss is 112517160.0, max of grad is 7.913754463195801
Max of net1 0.35821887850761414, #nan of net1 0
Norm of X 13.446798324584961, Norm of Y 13.446798324584961, Norm of a 351.3753967285156
Loss is 112525632.0, max of grad is 21.7366943359375
Max of net1 0.3582260310649872, #nan of net1 0
Norm of X 13.479387283325195, Norm of Y 13.479387283325195, Norm of a 353.606201171875
Loss is 112516120.0, max of grad is 8.586230278015137
Max of net1 0.35822969675064087, #nan of net1 0
Norm of X 13.437140464782715, Norm of Y 13.437140464782

Max of net1 0.3584054708480835, #nan of net1 0
Norm of X 13.098832130432129, Norm of Y 13.098832130432129, Norm of a 333.18646240234375
Loss is 112513160.0, max of grad is 7.599976539611816
Max of net1 0.35841718316078186, #nan of net1 0
Norm of X 12.936850547790527, Norm of Y 12.936850547790527, Norm of a 324.5509338378906
Loss is 112514024.0, max of grad is 6.8798441886901855
Max of net1 0.35842761397361755, #nan of net1 0
Norm of X 13.092265129089355, Norm of Y 13.092265129089355, Norm of a 332.34027099609375
Loss is 112511640.0, max of grad is 10.442865371704102
Max of net1 0.35844019055366516, #nan of net1 0
Norm of X 13.000234603881836, Norm of Y 13.000234603881836, Norm of a 328.1102600097656
Loss is 112514000.0, max of grad is 24.300262451171875
Max of net1 0.3584543764591217, #nan of net1 0
Norm of X 13.004313468933105, Norm of Y 13.004313468933105, Norm of a 328.2831115722656
Loss is 112512168.0, max of grad is 6.941799640655518
Max of net1 0.3584710359573364, #nan of net1 0


Max of net1 0.3591414988040924, #nan of net1 0
Norm of X 14.139860153198242, Norm of Y 14.139860153198242, Norm of a 389.3441467285156
Loss is 112511920.0, max of grad is 10.562602996826172
Max of net1 0.35915428400039673, #nan of net1 0
Norm of X 14.13484001159668, Norm of Y 14.13484001159668, Norm of a 388.9527893066406
Loss is 112520752.0, max of grad is 12.98895263671875
Max of net1 0.35916322469711304, #nan of net1 0
Norm of X 14.281051635742188, Norm of Y 14.281051635742188, Norm of a 396.84423828125
Loss is 112512200.0, max of grad is 6.506192207336426
Max of net1 0.35917210578918457, #nan of net1 0
Norm of X 14.18273639678955, Norm of Y 14.18273639678955, Norm of a 391.6021728515625
Loss is 112510336.0, max of grad is 10.177490234375
Max of net1 0.3591824769973755, #nan of net1 0
Norm of X 14.25037956237793, Norm of Y 14.25037956237793, Norm of a 395.58819580078125
Loss is 112514104.0, max of grad is 6.3668317794799805
Max of net1 0.3591921925544739, #nan of net1 0
Norm of X 14

Norm of X 14.27188777923584, Norm of Y 14.27188777923584, Norm of a 396.1618957519531
Loss is 112519592.0, max of grad is 11.2283935546875
Max of net1 0.3596729040145874, #nan of net1 0
Norm of X 14.156201362609863, Norm of Y 14.156201362609863, Norm of a 389.9550476074219
Loss is 112509520.0, max of grad is 12.06964111328125
Max of net1 0.35967156291007996, #nan of net1 0
Norm of X 14.098187446594238, Norm of Y 14.098187446594238, Norm of a 386.6575622558594
Loss is 112516592.0, max of grad is 5.60291862487793
Max of net1 0.3596706688404083, #nan of net1 0
Norm of X 14.196887016296387, Norm of Y 14.196887016296387, Norm of a 391.4362487792969
Loss is 112514968.0, max of grad is 9.395889282226562
Max of net1 0.3596686124801636, #nan of net1 0
Norm of X 13.995546340942383, Norm of Y 13.995546340942383, Norm of a 381.3760986328125
Loss is 112524368.0, max of grad is 20.7198486328125
Max of net1 0.3596639037132263, #nan of net1 0
Norm of X 14.007469177246094, Norm of Y 14.007469177246094,

Max of net1 0.35966232419013977, #nan of net1 0
Norm of X 12.668737411499023, Norm of Y 12.668737411499023, Norm of a 310.60595703125
Loss is 112520096.0, max of grad is 9.87030029296875
Max of net1 0.3596643805503845, #nan of net1 0
Norm of X 12.78404426574707, Norm of Y 12.78404426574707, Norm of a 315.450927734375
Loss is 112514000.0, max of grad is 8.125354766845703
Max of net1 0.35966652631759644, #nan of net1 0
Norm of X 12.662407875061035, Norm of Y 12.662407875061035, Norm of a 310.1775817871094
Loss is 112518176.0, max of grad is 7.66215705871582
Max of net1 0.35966813564300537, #nan of net1 0
Norm of X 12.701141357421875, Norm of Y 12.701141357421875, Norm of a 311.47027587890625
Loss is 112513792.0, max of grad is 7.653465270996094
Max of net1 0.35967230796813965, #nan of net1 0
Norm of X 12.586394309997559, Norm of Y 12.586394309997559, Norm of a 306.3500671386719
Loss is 112511392.0, max of grad is 8.1949462890625
Max of net1 0.3596808910369873, #nan of net1 0
Norm of X 12

Norm of X 12.097017288208008, Norm of Y 12.097017288208008, Norm of a 281.4452819824219
Loss is 112514056.0, max of grad is 5.314934730529785
Max of net1 0.36003726720809937, #nan of net1 0
Norm of X 12.092578887939453, Norm of Y 12.092578887939453, Norm of a 281.5252380371094
Loss is 112526360.0, max of grad is 18.81195068359375
Max of net1 0.3600475788116455, #nan of net1 0
Norm of X 12.034103393554688, Norm of Y 12.034103393554688, Norm of a 278.9565124511719
Loss is 112521688.0, max of grad is 11.9261474609375
Max of net1 0.36005350947380066, #nan of net1 0
Norm of X 12.062128067016602, Norm of Y 12.062128067016602, Norm of a 280.25250244140625
Loss is 112517664.0, max of grad is 9.116302490234375
Max of net1 0.3600580394268036, #nan of net1 0
Norm of X 11.935389518737793, Norm of Y 11.935389518737793, Norm of a 274.4609375
Loss is 112517440.0, max of grad is 7.066926956176758
Max of net1 0.36006098985671997, #nan of net1 0
Norm of X 11.997001647949219, Norm of Y 11.997001647949219

Norm of X 10.61862564086914, Norm of Y 10.61862564086914, Norm of a 215.0088653564453
Loss is 112523152.0, max of grad is 12.149658203125
Max of net1 0.35988375544548035, #nan of net1 0
Norm of X 10.58012580871582, Norm of Y 10.58012580871582, Norm of a 213.2783203125
Loss is 112506336.0, max of grad is 13.53338623046875
Max of net1 0.359881728887558, #nan of net1 0
Norm of X 10.541259765625, Norm of Y 10.541259765625, Norm of a 211.7949676513672
Loss is 112532280.0, max of grad is 26.52923583984375
Max of net1 0.35987716913223267, #nan of net1 0
Norm of X 10.551939010620117, Norm of Y 10.551939010620117, Norm of a 212.02574157714844
Loss is 112519568.0, max of grad is 9.966877937316895
Max of net1 0.3598724603652954, #nan of net1 0
Norm of X 10.502957344055176, Norm of Y 10.502957344055176, Norm of a 210.18943786621094
Loss is 112515184.0, max of grad is 4.690975189208984
Max of net1 0.35986775159835815, #nan of net1 0
Norm of X 10.50680160522461, Norm of Y 10.50680160522461, Norm of 

Max of net1 0.3602595329284668, #nan of net1 0
Norm of X 10.772812843322754, Norm of Y 10.772812843322754, Norm of a 221.2391357421875
Loss is 112517048.0, max of grad is 8.313891410827637
Max of net1 0.3602737486362457, #nan of net1 0
Norm of X 10.793956756591797, Norm of Y 10.793956756591797, Norm of a 222.4861297607422
Loss is 112512360.0, max of grad is 10.002256393432617
Max of net1 0.3602878153324127, #nan of net1 0
Norm of X 10.845985412597656, Norm of Y 10.845985412597656, Norm of a 224.05136108398438
Loss is 112515152.0, max of grad is 5.398552894592285
Max of net1 0.36030104756355286, #nan of net1 0
Norm of X 10.85770320892334, Norm of Y 10.85770320892334, Norm of a 224.72158813476562
Loss is 112470088.0, max of grad is 70.82220458984375
Max of net1 0.36032044887542725, #nan of net1 0
Norm of X 10.854530334472656, Norm of Y 10.854530334472656, Norm of a 224.70982360839844
Loss is 112510736.0, max of grad is 7.50408935546875
Max of net1 0.3603387176990509, #nan of net1 0
Norm 

Max of net1 0.3607107996940613, #nan of net1 0
Norm of X 11.58233642578125, Norm of Y 11.58233642578125, Norm of a 257.7756652832031
Loss is 112514304.0, max of grad is 11.034839630126953
Max of net1 0.36072486639022827, #nan of net1 0
Norm of X 11.621944427490234, Norm of Y 11.621944427490234, Norm of a 259.038330078125
Loss is 112515040.0, max of grad is 5.42498254776001
Max of net1 0.3607320785522461, #nan of net1 0
Norm of X 11.656843185424805, Norm of Y 11.656843185424805, Norm of a 260.4854736328125
Loss is 112520008.0, max of grad is 8.4989013671875
Max of net1 0.3607379198074341, #nan of net1 0
Norm of X 11.645430564880371, Norm of Y 11.645430564880371, Norm of a 259.8827819824219
Loss is 112529632.0, max of grad is 24.61376953125
Max of net1 0.3607395589351654, #nan of net1 0
Norm of X 11.638284683227539, Norm of Y 11.638284683227539, Norm of a 259.60174560546875
Loss is 112528312.0, max of grad is 22.00640869140625
Max of net1 0.3607364594936371, #nan of net1 0
Norm of X 11.6

Norm of X 11.865800857543945, Norm of Y 11.865800857543945, Norm of a 270.0646667480469
Loss is 112512008.0, max of grad is 7.2982025146484375
Max of net1 0.3612658381462097, #nan of net1 0
Norm of X 11.924592971801758, Norm of Y 11.924592971801758, Norm of a 272.5741271972656
Loss is 112513208.0, max of grad is 13.567193031311035
Max of net1 0.361282080411911, #nan of net1 0
Norm of X 11.9111967086792, Norm of Y 11.9111967086792, Norm of a 271.9807434082031
Loss is 112502472.0, max of grad is 21.11993408203125
Max of net1 0.3613002896308899, #nan of net1 0
Norm of X 11.915899276733398, Norm of Y 11.915899276733398, Norm of a 272.5955810546875
Loss is 112527784.0, max of grad is 20.88714599609375
Max of net1 0.36131465435028076, #nan of net1 0
Norm of X 11.943520545959473, Norm of Y 11.943520545959473, Norm of a 274.25640869140625
Loss is 112518816.0, max of grad is 11.64865779876709
Max of net1 0.36132246255874634, #nan of net1 0
Norm of X 11.935236930847168, Norm of Y 11.935236930847

Max of net1 0.36159855127334595, #nan of net1 0
Norm of X 12.143881797790527, Norm of Y 12.143881797790527, Norm of a 282.9762878417969
Loss is 112522248.0, max of grad is 11.27203369140625
Max of net1 0.3615992069244385, #nan of net1 0
Norm of X 11.988360404968262, Norm of Y 11.988360404968262, Norm of a 276.422119140625
Loss is 112529392.0, max of grad is 23.923583984375
Max of net1 0.3615971803665161, #nan of net1 0
Norm of X 12.021880149841309, Norm of Y 12.021880149841309, Norm of a 276.7607421875
Loss is 112514632.0, max of grad is 8.844186782836914
Max of net1 0.3615957796573639, #nan of net1 0
Norm of X 12.006406784057617, Norm of Y 12.006406784057617, Norm of a 275.65545654296875
Loss is 112515504.0, max of grad is 7.925045967102051
Max of net1 0.36159706115722656, #nan of net1 0
Norm of X 11.876431465148926, Norm of Y 11.876431465148926, Norm of a 270.5450134277344
Loss is 112523008.0, max of grad is 13.93609619140625
Max of net1 0.36159589886665344, #nan of net1 0
Norm of X 

Norm of X 11.481082916259766, Norm of Y 11.481082916259766, Norm of a 251.81961059570312
Loss is 112505504.0, max of grad is 15.86383056640625
Max of net1 0.3618335723876953, #nan of net1 0
Norm of X 11.479822158813477, Norm of Y 11.479822158813477, Norm of a 251.92059326171875
Loss is 112523152.0, max of grad is 12.10382080078125
Max of net1 0.36184054613113403, #nan of net1 0
Norm of X 11.529267311096191, Norm of Y 11.529267311096191, Norm of a 253.7737274169922
Loss is 112518904.0, max of grad is 8.303812026977539
Max of net1 0.3618459403514862, #nan of net1 0
Norm of X 11.530921936035156, Norm of Y 11.530921936035156, Norm of a 253.64735412597656
Loss is 112512208.0, max of grad is 7.119903564453125
Max of net1 0.36185434460639954, #nan of net1 0
Norm of X 11.505890846252441, Norm of Y 11.505890846252441, Norm of a 252.44374084472656
Loss is 112514264.0, max of grad is 9.879344940185547
Max of net1 0.3618626594543457, #nan of net1 0
Norm of X 11.542688369750977, Norm of Y 11.542688

Norm of X 11.518305778503418, Norm of Y 11.518305778503418, Norm of a 252.6660614013672
Loss is 112514672.0, max of grad is 11.760982513427734
Max of net1 0.3622914254665375, #nan of net1 0
Norm of X 11.432886123657227, Norm of Y 11.432886123657227, Norm of a 249.3942413330078
Loss is 112530056.0, max of grad is 23.00738525390625
Max of net1 0.3622949719429016, #nan of net1 0
Norm of X 11.406147003173828, Norm of Y 11.406147003173828, Norm of a 247.9907684326172
Loss is 112514024.0, max of grad is 6.418229103088379
Max of net1 0.36230167746543884, #nan of net1 0
Norm of X 11.525359153747559, Norm of Y 11.525359153747559, Norm of a 252.67674255371094
Loss is 112518128.0, max of grad is 7.711301326751709
Max of net1 0.36230725049972534, #nan of net1 0
Norm of X 11.404855728149414, Norm of Y 11.404855728149414, Norm of a 247.8062744140625
Loss is 112516032.0, max of grad is 4.689300537109375
Max of net1 0.36230921745300293, #nan of net1 0
Norm of X 11.323271751403809, Norm of Y 11.3232717

Max of net1 0.3624003529548645, #nan of net1 0
Norm of X 11.224724769592285, Norm of Y 11.224724769592285, Norm of a 240.046142578125
Loss is 112513864.0, max of grad is 4.431923866271973
Max of net1 0.36240333318710327, #nan of net1 0
Norm of X 11.214125633239746, Norm of Y 11.214125633239746, Norm of a 239.17674255371094
Loss is 112517032.0, max of grad is 6.710468292236328
Max of net1 0.3624073565006256, #nan of net1 0
Norm of X 11.21117115020752, Norm of Y 11.21117115020752, Norm of a 239.20111083984375
Loss is 112516272.0, max of grad is 6.036627769470215
Max of net1 0.36240944266319275, #nan of net1 0
Norm of X 11.25882339477539, Norm of Y 11.25882339477539, Norm of a 240.4229736328125
Loss is 112513840.0, max of grad is 11.579792022705078
Max of net1 0.3624096214771271, #nan of net1 0
Norm of X 11.197587013244629, Norm of Y 11.197587013244629, Norm of a 238.41114807128906
Loss is 112514816.0, max of grad is 6.049703121185303
Max of net1 0.362412691116333, #nan of net1 0
Norm of 

Norm of X 11.013282775878906, Norm of Y 11.013282775878906, Norm of a 230.1039581298828
Loss is 112519840.0, max of grad is 6.210968017578125
Max of net1 0.36278384923934937, #nan of net1 0
Norm of X 10.986917495727539, Norm of Y 10.986917495727539, Norm of a 228.98358154296875
Loss is 112514280.0, max of grad is 6.440183639526367
Max of net1 0.3627810776233673, #nan of net1 0
Norm of X 10.953710556030273, Norm of Y 10.953710556030273, Norm of a 227.21624755859375
Loss is 112515856.0, max of grad is 5.50846004486084
Max of net1 0.36277684569358826, #nan of net1 0
Norm of X 10.979562759399414, Norm of Y 10.979562759399414, Norm of a 228.3192596435547
Loss is 112525424.0, max of grad is 13.99359130859375
Max of net1 0.3627721667289734, #nan of net1 0
Norm of X 10.955398559570312, Norm of Y 10.955398559570312, Norm of a 226.85928344726562
Loss is 112517864.0, max of grad is 6.457920551300049
Max of net1 0.3627697229385376, #nan of net1 0
Norm of X 10.944624900817871, Norm of Y 10.94462490

Loss is 112517840.0, max of grad is 14.083113670349121
Max of net1 0.3627813458442688, #nan of net1 0
Norm of X 10.680843353271484, Norm of Y 10.680843353271484, Norm of a 215.97503662109375
Loss is 112531376.0, max of grad is 22.88787841796875
Max of net1 0.3627815544605255, #nan of net1 0
Norm of X 10.733349800109863, Norm of Y 10.733349800109863, Norm of a 216.4197235107422
Loss is 112514200.0, max of grad is 7.460697650909424
Max of net1 0.36278101801872253, #nan of net1 0
Norm of X 10.68392562866211, Norm of Y 10.68392562866211, Norm of a 215.04312133789062
Loss is 112526816.0, max of grad is 17.06787109375
Max of net1 0.3627782464027405, #nan of net1 0
Norm of X 10.69657039642334, Norm of Y 10.69657039642334, Norm of a 215.54029846191406
Loss is 112501032.0, max of grad is 22.58111572265625
Max of net1 0.3627823293209076, #nan of net1 0
Norm of X 10.690264701843262, Norm of Y 10.690264701843262, Norm of a 214.75357055664062
Loss is 112530800.0, max of grad is 22.8348388671875
Max

Norm of X 10.568864822387695, Norm of Y 10.568864822387695, Norm of a 210.1237030029297
Loss is 112513512.0, max of grad is 10.979482650756836
Max of net1 0.3628939390182495, #nan of net1 0
Norm of X 10.576449394226074, Norm of Y 10.576449394226074, Norm of a 210.5996551513672
Loss is 112526024.0, max of grad is 18.945636749267578
Max of net1 0.36289408802986145, #nan of net1 0
Norm of X 10.621582984924316, Norm of Y 10.621582984924316, Norm of a 211.75350952148438
Loss is 112516328.0, max of grad is 5.481574058532715
Max of net1 0.3628951609134674, #nan of net1 0
Norm of X 10.542835235595703, Norm of Y 10.542835235595703, Norm of a 210.117919921875
Loss is 112514584.0, max of grad is 5.0676116943359375
Max of net1 0.36289677023887634, #nan of net1 0
Norm of X 10.600445747375488, Norm of Y 10.600445747375488, Norm of a 211.27890014648438
Loss is 112514304.0, max of grad is 8.019067764282227
Max of net1 0.36289769411087036, #nan of net1 0
Norm of X 10.597452163696289, Norm of Y 10.59745

Loss is 112526160.0, max of grad is 18.09649658203125
Max of net1 0.3631706237792969, #nan of net1 0
Norm of X 11.347062110900879, Norm of Y 11.347062110900879, Norm of a 244.00608825683594
Loss is 112506232.0, max of grad is 14.83758544921875
Max of net1 0.36317408084869385, #nan of net1 0
Norm of X 11.36023235321045, Norm of Y 11.36023235321045, Norm of a 244.0947723388672
Loss is 112515288.0, max of grad is 6.394981384277344
Max of net1 0.36317721009254456, #nan of net1 0
Norm of X 11.371387481689453, Norm of Y 11.371387481689453, Norm of a 244.689453125
Loss is 112514920.0, max of grad is 6.067537307739258
Max of net1 0.3631811738014221, #nan of net1 0
Norm of X 11.32476806640625, Norm of Y 11.32476806640625, Norm of a 242.77926635742188
Loss is 112519800.0, max of grad is 6.449593544006348
Max of net1 0.3631857931613922, #nan of net1 0
Norm of X 11.35360050201416, Norm of Y 11.35360050201416, Norm of a 243.02072143554688
Loss is 112514320.0, max of grad is 8.80712890625
Max of net

Norm of X 10.827350616455078, Norm of Y 10.827350616455078, Norm of a 220.27264404296875
Loss is 112520928.0, max of grad is 6.6944580078125
Max of net1 0.3634781241416931, #nan of net1 0
Norm of X 10.822590827941895, Norm of Y 10.822590827941895, Norm of a 219.69741821289062
Loss is 112512856.0, max of grad is 5.44500732421875
Max of net1 0.36348944902420044, #nan of net1 0
Norm of X 10.809537887573242, Norm of Y 10.809537887573242, Norm of a 220.03759765625
Loss is 112514088.0, max of grad is 4.55419921875
Max of net1 0.3635008633136749, #nan of net1 0
Norm of X 10.810911178588867, Norm of Y 10.810911178588867, Norm of a 219.67117309570312
Loss is 112512184.0, max of grad is 7.433807373046875
Max of net1 0.3635125160217285, #nan of net1 0
Norm of X 10.788178443908691, Norm of Y 10.788178443908691, Norm of a 218.5497283935547
Loss is 112523736.0, max of grad is 10.647552490234375
Max of net1 0.3635195791721344, #nan of net1 0
Norm of X 10.771591186523438, Norm of Y 10.771591186523438,

Norm of X 10.307291984558105, Norm of Y 10.307291984558105, Norm of a 199.2497100830078
Loss is 112530624.0, max of grad is 21.7811279296875
Max of net1 0.36348670721054077, #nan of net1 0
Norm of X 10.32106876373291, Norm of Y 10.32106876373291, Norm of a 199.52603149414062
Loss is 112514088.0, max of grad is 10.130707740783691
Max of net1 0.36348727345466614, #nan of net1 0
Norm of X 10.275898933410645, Norm of Y 10.275898933410645, Norm of a 197.82196044921875
Loss is 112524856.0, max of grad is 13.0733642578125
Max of net1 0.3634863495826721, #nan of net1 0
Norm of X 10.260565757751465, Norm of Y 10.260565757751465, Norm of a 196.56678771972656
Loss is 112514112.0, max of grad is 11.546463012695312
Max of net1 0.36348509788513184, #nan of net1 0
Norm of X 10.238450050354004, Norm of Y 10.238450050354004, Norm of a 195.9801483154297
Loss is 112523256.0, max of grad is 10.2545166015625
Max of net1 0.36348676681518555, #nan of net1 0
Norm of X 10.233545303344727, Norm of Y 10.23354530

Loss is 112528912.0, max of grad is 18.596435546875
Max of net1 0.36380451917648315, #nan of net1 0
Norm of X 11.150300979614258, Norm of Y 11.150300979614258, Norm of a 233.335693359375
Loss is 112518512.0, max of grad is 6.843693733215332
Max of net1 0.3638170063495636, #nan of net1 0
Norm of X 11.165051460266113, Norm of Y 11.165051460266113, Norm of a 234.75018310546875
Loss is 112522528.0, max of grad is 9.47088623046875
Max of net1 0.3638276755809784, #nan of net1 0
Norm of X 11.153181076049805, Norm of Y 11.153181076049805, Norm of a 234.25350952148438
Loss is 112517032.0, max of grad is 5.659673690795898
Max of net1 0.36383694410324097, #nan of net1 0
Norm of X 11.14376163482666, Norm of Y 11.14376163482666, Norm of a 233.26727294921875
Loss is 112524176.0, max of grad is 12.91131591796875
Max of net1 0.3638445734977722, #nan of net1 0
Norm of X 11.124372482299805, Norm of Y 11.124372482299805, Norm of a 233.15737915039062
Loss is 112513544.0, max of grad is 12.086274147033691


Norm of X 11.378902435302734, Norm of Y 11.378902435302734, Norm of a 244.2274932861328
Loss is 112510320.0, max of grad is 10.681490898132324
Max of net1 0.3641101121902466, #nan of net1 0
Norm of X 11.355709075927734, Norm of Y 11.355709075927734, Norm of a 243.2611083984375
Loss is 112513952.0, max of grad is 10.072677612304688
Max of net1 0.3641204833984375, #nan of net1 0
Norm of X 11.372368812561035, Norm of Y 11.372368812561035, Norm of a 243.6578826904297
Loss is 112500976.0, max of grad is 21.702392578125
Max of net1 0.36413344740867615, #nan of net1 0
Norm of X 11.44959831237793, Norm of Y 11.44959831237793, Norm of a 247.154052734375
Loss is 112519392.0, max of grad is 5.54052734375
Max of net1 0.3641458749771118, #nan of net1 0
Norm of X 11.432760238647461, Norm of Y 11.432760238647461, Norm of a 246.83145141601562
Loss is 112521016.0, max of grad is 8.255481719970703
Max of net1 0.364155650138855, #nan of net1 0
Norm of X 11.504627227783203, Norm of Y 11.504627227783203, N

Norm of X 11.551018714904785, Norm of Y 11.551018714904785, Norm of a 252.19688415527344
Loss is 112514616.0, max of grad is 3.972951650619507
Max of net1 0.3646627962589264, #nan of net1 0
Norm of X 11.58612060546875, Norm of Y 11.58612060546875, Norm of a 252.77401733398438
Loss is 112514024.0, max of grad is 5.3092041015625
Max of net1 0.36467593908309937, #nan of net1 0
Norm of X 11.589749336242676, Norm of Y 11.589749336242676, Norm of a 252.75372314453125
Loss is 112515888.0, max of grad is 9.574302673339844
Max of net1 0.36468833684921265, #nan of net1 0
Norm of X 11.617096900939941, Norm of Y 11.617096900939941, Norm of a 253.96714782714844
Loss is 112524088.0, max of grad is 12.62078857421875
Max of net1 0.3646967113018036, #nan of net1 0
Norm of X 9.305220603942871, Norm of Y 9.305220603942871, Norm of a 162.80442810058594
Loss is 46090712.0, max of grad is 3.2301881313323975


Epoch: 18, Cost: 15910771256.0

Max of net1 0.36470407247543335, #nan of net1 0
Norm of X 11.594181

Max of net1 0.364952951669693, #nan of net1 0
Norm of X 11.239777565002441, Norm of Y 11.239777565002441, Norm of a 236.8529052734375
Loss is 112513632.0, max of grad is 5.18267822265625
Max of net1 0.3649495840072632, #nan of net1 0
Norm of X 11.24753475189209, Norm of Y 11.24753475189209, Norm of a 236.64474487304688
Loss is 112521104.0, max of grad is 8.0980863571167
Max of net1 0.36494719982147217, #nan of net1 0
Norm of X 11.160701751708984, Norm of Y 11.160701751708984, Norm of a 233.36029052734375
Loss is 112531624.0, max of grad is 23.94256591796875
Max of net1 0.3649398684501648, #nan of net1 0
Norm of X 11.188446998596191, Norm of Y 11.188446998596191, Norm of a 234.2917022705078
Loss is 112513368.0, max of grad is 9.311273574829102
Max of net1 0.36493873596191406, #nan of net1 0
Norm of X 11.132086753845215, Norm of Y 11.132086753845215, Norm of a 231.4891357421875
Loss is 112522248.0, max of grad is 9.0079345703125
Max of net1 0.3649343252182007, #nan of net1 0
Norm of X 11

Norm of X 10.8788423538208, Norm of Y 10.8788423538208, Norm of a 220.41189575195312
Loss is 112514392.0, max of grad is 5.867391586303711
Max of net1 0.3648804724216461, #nan of net1 0
Norm of X 10.929485321044922, Norm of Y 10.929485321044922, Norm of a 222.565185546875
Loss is 112479472.0, max of grad is 53.34588623046875
Max of net1 0.36490365862846375, #nan of net1 0
Norm of X 11.025246620178223, Norm of Y 11.025246620178223, Norm of a 226.59762573242188
Loss is 112517752.0, max of grad is 4.8494672775268555
Max of net1 0.36492013931274414, #nan of net1 0
Norm of X 11.144062042236328, Norm of Y 11.144062042236328, Norm of a 231.27224731445312
Loss is 112518408.0, max of grad is 9.743915557861328
Max of net1 0.3649376332759857, #nan of net1 0
Norm of X 11.187814712524414, Norm of Y 11.187814712524414, Norm of a 233.95050048828125
Loss is 112514288.0, max of grad is 11.194601058959961
Max of net1 0.3649553954601288, #nan of net1 0
Norm of X 11.29942512512207, Norm of Y 11.2994251251

Norm of X 11.080822944641113, Norm of Y 11.080822944641113, Norm of a 227.84255981445312
Loss is 112527376.0, max of grad is 16.7926025390625
Max of net1 0.3652110993862152, #nan of net1 0
Norm of X 11.046295166015625, Norm of Y 11.046295166015625, Norm of a 226.72177124023438
Loss is 112526144.0, max of grad is 14.728515625
Max of net1 0.365201473236084, #nan of net1 0
Norm of X 11.038981437683105, Norm of Y 11.038981437683105, Norm of a 226.25592041015625
Loss is 112513824.0, max of grad is 17.613943099975586
Max of net1 0.3651958405971527, #nan of net1 0
Norm of X 11.007474899291992, Norm of Y 11.007474899291992, Norm of a 226.00048828125
Loss is 112513264.0, max of grad is 8.64778995513916
Max of net1 0.36519724130630493, #nan of net1 0
Norm of X 10.95819091796875, Norm of Y 10.95819091796875, Norm of a 223.3612518310547
Loss is 112514688.0, max of grad is 6.09786319732666
Max of net1 0.36519908905029297, #nan of net1 0
Norm of X 10.958502769470215, Norm of Y 10.958502769470215, No

Max of net1 0.3652071952819824, #nan of net1 0
Norm of X 10.634322166442871, Norm of Y 10.634322166442871, Norm of a 208.993408203125
Loss is 112517352.0, max of grad is 5.325459003448486
Max of net1 0.36520594358444214, #nan of net1 0
Norm of X 10.650492668151855, Norm of Y 10.650492668151855, Norm of a 209.6456298828125
Loss is 112502136.0, max of grad is 18.88616943359375
Max of net1 0.36520621180534363, #nan of net1 0
Norm of X 10.731204986572266, Norm of Y 10.731204986572266, Norm of a 212.30630493164062
Loss is 112528792.0, max of grad is 18.73583984375
Max of net1 0.3652074337005615, #nan of net1 0
Norm of X 10.699604988098145, Norm of Y 10.699604988098145, Norm of a 212.32476806640625
Loss is 112514616.0, max of grad is 7.796973705291748
Max of net1 0.3652152121067047, #nan of net1 0
Norm of X 10.734030723571777, Norm of Y 10.734030723571777, Norm of a 213.40977478027344
Loss is 112518128.0, max of grad is 3.503246784210205
Max of net1 0.36522144079208374, #nan of net1 0
Norm o

Max of net1 0.36562106013298035, #nan of net1 0
Norm of X 11.302794456481934, Norm of Y 11.302794456481934, Norm of a 237.58392333984375
Loss is 112516592.0, max of grad is 5.056789398193359
Max of net1 0.3656361699104309, #nan of net1 0
Norm of X 11.288108825683594, Norm of Y 11.288108825683594, Norm of a 236.5498809814453
Loss is 112518016.0, max of grad is 3.739898920059204
Max of net1 0.3656523525714874, #nan of net1 0
Norm of X 11.274996757507324, Norm of Y 11.274996757507324, Norm of a 236.05657958984375
Loss is 112514408.0, max of grad is 4.280228137969971
Max of net1 0.3656662106513977, #nan of net1 0
Norm of X 11.227875709533691, Norm of Y 11.227875709533691, Norm of a 233.808837890625
Loss is 112516592.0, max of grad is 6.910816669464111
Max of net1 0.3656793236732483, #nan of net1 0
Norm of X 11.201560020446777, Norm of Y 11.201560020446777, Norm of a 233.06886291503906
Loss is 112512888.0, max of grad is 7.473799705505371
Max of net1 0.3656882345676422, #nan of net1 0
Norm 

Max of net1 0.3660910725593567, #nan of net1 0
Norm of X 11.933218002319336, Norm of Y 11.933218002319336, Norm of a 266.0176086425781
Loss is 112515448.0, max of grad is 6.255274772644043
Max of net1 0.3661089539527893, #nan of net1 0
Norm of X 11.976325988769531, Norm of Y 11.976325988769531, Norm of a 268.2218017578125
Loss is 112514528.0, max of grad is 7.197005271911621
Max of net1 0.3661288619041443, #nan of net1 0
Norm of X 12.041263580322266, Norm of Y 12.041263580322266, Norm of a 270.5518493652344
Loss is 112521048.0, max of grad is 29.72159194946289
Max of net1 0.3661454916000366, #nan of net1 0
Norm of X 12.036169052124023, Norm of Y 12.036169052124023, Norm of a 271.6640625
Loss is 112512152.0, max of grad is 8.56341552734375
Max of net1 0.36616814136505127, #nan of net1 0
Norm of X 12.077588081359863, Norm of Y 12.077588081359863, Norm of a 271.7442626953125
Loss is 112513488.0, max of grad is 5.2359619140625
Max of net1 0.36619165539741516, #nan of net1 0
Norm of X 12.09

Norm of X 12.832947731018066, Norm of Y 12.832947731018066, Norm of a 309.9338684082031
Loss is 112514024.0, max of grad is 8.64776611328125
Max of net1 0.3669663667678833, #nan of net1 0
Norm of X 12.806011199951172, Norm of Y 12.806011199951172, Norm of a 307.6497802734375
Loss is 112514280.0, max of grad is 5.855010986328125
Max of net1 0.36698153614997864, #nan of net1 0
Norm of X 12.713960647583008, Norm of Y 12.713960647583008, Norm of a 303.2084655761719
Loss is 112520520.0, max of grad is 8.50970458984375
Max of net1 0.3669956922531128, #nan of net1 0
Norm of X 12.672273635864258, Norm of Y 12.672273635864258, Norm of a 301.36767578125
Loss is 112517616.0, max of grad is 6.365575790405273
Max of net1 0.3670087158679962, #nan of net1 0
Norm of X 12.649246215820312, Norm of Y 12.649246215820312, Norm of a 299.02685546875
Loss is 112514776.0, max of grad is 5.117214202880859
Max of net1 0.3670193552970886, #nan of net1 0
Norm of X 12.558695793151855, Norm of Y 12.558695793151855, 

Max of net1 0.3675640821456909, #nan of net1 0
Norm of X 11.96540355682373, Norm of Y 11.96540355682373, Norm of a 265.90753173828125
Loss is 112514144.0, max of grad is 9.891518592834473
Max of net1 0.3675876557826996, #nan of net1 0
Norm of X 11.9584321975708, Norm of Y 11.9584321975708, Norm of a 265.2648010253906
Loss is 112515960.0, max of grad is 8.532350540161133
Max of net1 0.3676104247570038, #nan of net1 0
Norm of X 11.927910804748535, Norm of Y 11.927910804748535, Norm of a 263.3592834472656
Loss is 112517776.0, max of grad is 4.047765254974365
Max of net1 0.36763203144073486, #nan of net1 0
Norm of X 11.910919189453125, Norm of Y 11.910919189453125, Norm of a 262.15106201171875
Loss is 112523912.0, max of grad is 12.387451171875
Max of net1 0.3676488697528839, #nan of net1 0
Norm of X 11.855274200439453, Norm of Y 11.855274200439453, Norm of a 259.08392333984375
Loss is 112520576.0, max of grad is 7.60791015625
Max of net1 0.36766257882118225, #nan of net1 0
Norm of X 11.81

Max of net1 0.36859118938446045, #nan of net1 0
Norm of X 13.82640266418457, Norm of Y 13.82640266418457, Norm of a 359.9446105957031
Loss is 112512784.0, max of grad is 9.880859375
Max of net1 0.36863118410110474, #nan of net1 0
Norm of X 13.998249053955078, Norm of Y 13.998249053955078, Norm of a 368.7719421386719
Loss is 112516368.0, max of grad is 5.41107702255249
Max of net1 0.36867040395736694, #nan of net1 0
Norm of X 13.996648788452148, Norm of Y 13.996648788452148, Norm of a 369.2998046875
Loss is 112518536.0, max of grad is 8.55817699432373
Max of net1 0.36870840191841125, #nan of net1 0
Norm of X 14.0402193069458, Norm of Y 14.0402193069458, Norm of a 371.4315490722656
Loss is 112514672.0, max of grad is 6.332305908203125
Max of net1 0.3687448799610138, #nan of net1 0
Norm of X 14.110031127929688, Norm of Y 14.110031127929688, Norm of a 375.27813720703125
Loss is 112512624.0, max of grad is 9.28839111328125
Max of net1 0.36878326535224915, #nan of net1 0
Norm of X 14.1339464

Loss is 112517424.0, max of grad is 7.2135009765625
Max of net1 0.37000590562820435, #nan of net1 0
Norm of X 13.348482131958008, Norm of Y 13.348482131958008, Norm of a 330.79791259765625
Loss is 112512856.0, max of grad is 10.8603515625
Max of net1 0.3700299859046936, #nan of net1 0
Norm of X 13.265952110290527, Norm of Y 13.265952110290527, Norm of a 327.08770751953125
Loss is 112515152.0, max of grad is 7.7509765625
Max of net1 0.37005558609962463, #nan of net1 0
Norm of X 13.267261505126953, Norm of Y 13.267261505126953, Norm of a 326.49908447265625
Loss is 112524720.0, max of grad is 13.39703369140625
Max of net1 0.370076060295105, #nan of net1 0
Norm of X 13.191152572631836, Norm of Y 13.191152572631836, Norm of a 322.54388427734375
Loss is 112514904.0, max of grad is 8.176742553710938
Max of net1 0.37010008096694946, #nan of net1 0
Norm of X 13.065618515014648, Norm of Y 13.065618515014648, Norm of a 317.09466552734375
Loss is 112523888.0, max of grad is 11.28076171875
Max of n

Loss is 112512728.0, max of grad is 11.16558837890625
Max of net1 0.37113845348358154, #nan of net1 0
Norm of X 13.203518867492676, Norm of Y 13.203518867492676, Norm of a 322.5845947265625
Loss is 112522448.0, max of grad is 10.17510986328125
Max of net1 0.3711574077606201, #nan of net1 0
Norm of X 13.280508041381836, Norm of Y 13.280508041381836, Norm of a 325.7095031738281
Loss is 112513048.0, max of grad is 13.157958984375
Max of net1 0.3711782693862915, #nan of net1 0
Norm of X 13.189391136169434, Norm of Y 13.189391136169434, Norm of a 321.2768249511719
Loss is 112514352.0, max of grad is 6.773193359375
Max of net1 0.37119531631469727, #nan of net1 0
Norm of X 13.227862358093262, Norm of Y 13.227862358093262, Norm of a 322.9886169433594
Loss is 112515288.0, max of grad is 7.313690185546875
Max of net1 0.3712104856967926, #nan of net1 0
Norm of X 13.23486614227295, Norm of Y 13.23486614227295, Norm of a 324.1977844238281
Loss is 112514304.0, max of grad is 11.0489501953125
Max of 

Max of net1 0.37220972776412964, #nan of net1 0
Norm of X 13.285551071166992, Norm of Y 13.285551071166992, Norm of a 325.4812316894531
Loss is 112514584.0, max of grad is 10.91183090209961
Max of net1 0.37221840023994446, #nan of net1 0
Norm of X 13.3285551071167, Norm of Y 13.3285551071167, Norm of a 326.414794921875
Loss is 112513864.0, max of grad is 9.757568359375
Max of net1 0.3722255229949951, #nan of net1 0
Norm of X 13.320194244384766, Norm of Y 13.320194244384766, Norm of a 327.2295227050781
Loss is 112513424.0, max of grad is 9.904296875
Max of net1 0.37223273515701294, #nan of net1 0
Norm of X 13.376564979553223, Norm of Y 13.376564979553223, Norm of a 329.66632080078125
Loss is 112514864.0, max of grad is 12.95909309387207
Max of net1 0.37224316596984863, #nan of net1 0
Norm of X 13.462837219238281, Norm of Y 13.462837219238281, Norm of a 333.993408203125
Loss is 112518864.0, max of grad is 10.061702728271484
Max of net1 0.3722478747367859, #nan of net1 0
Norm of X 13.4645

Loss is 112520680.0, max of grad is 6.345184326171875
Max of net1 0.37324607372283936, #nan of net1 0
Norm of X 13.153200149536133, Norm of Y 13.153200149536133, Norm of a 316.5573425292969
Loss is 112518144.0, max of grad is 6.678128242492676
Max of net1 0.3732641339302063, #nan of net1 0
Norm of X 13.203110694885254, Norm of Y 13.203110694885254, Norm of a 318.0994567871094
Loss is 112518672.0, max of grad is 5.829209327697754
Max of net1 0.37328317761421204, #nan of net1 0
Norm of X 13.23663330078125, Norm of Y 13.23663330078125, Norm of a 319.0936584472656
Loss is 112514984.0, max of grad is 10.8343505859375
Max of net1 0.3733014166355133, #nan of net1 0
Norm of X 13.19620418548584, Norm of Y 13.19620418548584, Norm of a 319.59136962890625
Loss is 112514640.0, max of grad is 11.87256908416748
Max of net1 0.373323917388916, #nan of net1 0
Norm of X 13.302755355834961, Norm of Y 13.302755355834961, Norm of a 323.5832214355469
Loss is 112521072.0, max of grad is 7.194644927978516
Max 

Loss is 112514528.0, max of grad is 10.644317626953125
Max of net1 0.374426007270813, #nan of net1 0
Norm of X 14.12341022491455, Norm of Y 14.12341022491455, Norm of a 366.3038635253906
Loss is 112516736.0, max of grad is 8.5438232421875
Max of net1 0.3744506537914276, #nan of net1 0
Norm of X 14.109941482543945, Norm of Y 14.109941482543945, Norm of a 366.12786865234375
Loss is 112514232.0, max of grad is 11.597442626953125
Max of net1 0.37447303533554077, #nan of net1 0
Norm of X 14.105949401855469, Norm of Y 14.105949401855469, Norm of a 366.8525085449219
Loss is 112517776.0, max of grad is 8.502455711364746
Max of net1 0.37449973821640015, #nan of net1 0
Norm of X 14.117842674255371, Norm of Y 14.117842674255371, Norm of a 367.12139892578125
Loss is 112512576.0, max of grad is 16.032958984375
Max of net1 0.37452995777130127, #nan of net1 0
Norm of X 14.16494083404541, Norm of Y 14.16494083404541, Norm of a 368.4255065917969
Loss is 112526392.0, max of grad is 17.61431884765625
Max

Max of net1 0.3757956624031067, #nan of net1 0
Norm of X 14.506513595581055, Norm of Y 14.506513595581055, Norm of a 387.2491455078125
Loss is 112513328.0, max of grad is 13.15228271484375
Max of net1 0.3758303225040436, #nan of net1 0
Norm of X 14.583281517028809, Norm of Y 14.583281517028809, Norm of a 389.09759521484375
Loss is 112512872.0, max of grad is 12.4405517578125
Max of net1 0.3758653402328491, #nan of net1 0
Norm of X 14.55020523071289, Norm of Y 14.55020523071289, Norm of a 388.4142150878906
Loss is 112514192.0, max of grad is 12.54681396484375
Max of net1 0.3758995831012726, #nan of net1 0
Norm of X 14.533867835998535, Norm of Y 14.533867835998535, Norm of a 388.9889831542969
Loss is 112517000.0, max of grad is 9.23553466796875
Max of net1 0.37592631578445435, #nan of net1 0
Norm of X 14.717474937438965, Norm of Y 14.717474937438965, Norm of a 396.5164794921875
Loss is 112521352.0, max of grad is 9.443115234375
Max of net1 0.3759491443634033, #nan of net1 0
Norm of X 14.

Norm of X 15.211484909057617, Norm of Y 15.211484909057617, Norm of a 423.3831481933594
Loss is 112514616.0, max of grad is 17.300922393798828
Max of net1 0.377594918012619, #nan of net1 0
Norm of X 15.19693374633789, Norm of Y 15.19693374633789, Norm of a 423.7081604003906
Loss is 112516168.0, max of grad is 9.002578735351562
Max of net1 0.37763750553131104, #nan of net1 0
Norm of X 15.27501106262207, Norm of Y 15.27501106262207, Norm of a 426.4090576171875
Loss is 112514832.0, max of grad is 11.50390625
Max of net1 0.37768226861953735, #nan of net1 0
Norm of X 15.274595260620117, Norm of Y 15.274595260620117, Norm of a 426.8169860839844
Loss is 112513928.0, max of grad is 12.94210433959961
Max of net1 0.3777303099632263, #nan of net1 0
Norm of X 15.337088584899902, Norm of Y 15.337088584899902, Norm of a 432.1377868652344
Loss is 112515552.0, max of grad is 12.78594970703125
Max of net1 0.3777777850627899, #nan of net1 0
Norm of X 15.338981628417969, Norm of Y 15.338981628417969, Nor

Loss is 112518336.0, max of grad is 11.462699890136719
Max of net1 0.3796897232532501, #nan of net1 0
Norm of X 15.77134895324707, Norm of Y 15.77134895324707, Norm of a 452.0271301269531
Loss is 112514336.0, max of grad is 15.08294677734375
Max of net1 0.379737913608551, #nan of net1 0
Norm of X 15.784622192382812, Norm of Y 15.784622192382812, Norm of a 453.3587951660156
Loss is 112513384.0, max of grad is 17.4559326171875
Max of net1 0.3797866404056549, #nan of net1 0
Norm of X 15.878175735473633, Norm of Y 15.878175735473633, Norm of a 460.59014892578125
Loss is 112514424.0, max of grad is 14.4674072265625
Max of net1 0.379835307598114, #nan of net1 0
Norm of X 15.962138175964355, Norm of Y 15.962138175964355, Norm of a 465.323486328125
Loss is 112517336.0, max of grad is 16.510986328125
Max of net1 0.37988102436065674, #nan of net1 0
Norm of X 16.193540573120117, Norm of Y 16.193540573120117, Norm of a 475.9574890136719
Loss is 112514552.0, max of grad is 17.87017822265625
Max of 

Norm of X 16.90167808532715, Norm of Y 16.90167808532715, Norm of a 520.3009643554688
Loss is 112515592.0, max of grad is 19.314666748046875
Max of net1 0.3828388452529907, #nan of net1 0
Norm of X 16.852218627929688, Norm of Y 16.852218627929688, Norm of a 516.1041259765625
Loss is 112514200.0, max of grad is 20.537109375
Max of net1 0.382921427488327, #nan of net1 0
Norm of X 16.75798988342285, Norm of Y 16.75798988342285, Norm of a 512.7605590820312
Loss is 112514904.0, max of grad is 17.356754302978516
Max of net1 0.38300707936286926, #nan of net1 0
Norm of X 16.880935668945312, Norm of Y 16.880935668945312, Norm of a 518.4562377929688
Loss is 112515128.0, max of grad is 16.593473434448242
Max of net1 0.3830856680870056, #nan of net1 0
Norm of X 16.92697525024414, Norm of Y 16.92697525024414, Norm of a 519.4066772460938
Loss is 112515376.0, max of grad is 17.697284698486328
Max of net1 0.3831568658351898, #nan of net1 0
Norm of X 16.9207820892334, Norm of Y 16.9207820892334, Norm o

Max of net1 0.38617178797721863, #nan of net1 0
Norm of X 17.923799514770508, Norm of Y 17.923799514770508, Norm of a 582.8521118164062
Loss is 112514512.0, max of grad is 24.807861328125
Max of net1 0.38626188039779663, #nan of net1 0
Norm of X 17.989727020263672, Norm of Y 17.989727020263672, Norm of a 585.6913452148438
Loss is 112514160.0, max of grad is 22.41387939453125
Max of net1 0.38634979724884033, #nan of net1 0
Norm of X 17.967132568359375, Norm of Y 17.967132568359375, Norm of a 585.7542724609375
Loss is 112514552.0, max of grad is 21.3828125
Max of net1 0.3864313066005707, #nan of net1 0
Norm of X 17.984905242919922, Norm of Y 17.984905242919922, Norm of a 588.159912109375
Loss is 112514280.0, max of grad is 16.3331298828125
Max of net1 0.38650959730148315, #nan of net1 0
Norm of X 18.21257972717285, Norm of Y 18.21257972717285, Norm of a 599.2018432617188
Loss is 112514720.0, max of grad is 18.766357421875
Max of net1 0.3865851163864136, #nan of net1 0
Norm of X 18.161371

Loss is 112514632.0, max of grad is 32.03308868408203
Max of net1 0.3903774917125702, #nan of net1 0
Norm of X 19.98526382446289, Norm of Y 19.98526382446289, Norm of a 724.8504028320312
Loss is 112514480.0, max of grad is 24.452621459960938
Max of net1 0.3904961347579956, #nan of net1 0
Norm of X 20.103282928466797, Norm of Y 20.103282928466797, Norm of a 732.7610473632812
Loss is 112514192.0, max of grad is 25.184795379638672
Max of net1 0.39061033725738525, #nan of net1 0
Norm of X 20.1436767578125, Norm of Y 20.1436767578125, Norm of a 733.2598266601562
Loss is 112514352.0, max of grad is 26.92333984375
Max of net1 0.3907245099544525, #nan of net1 0
Norm of X 20.129404067993164, Norm of Y 20.129404067993164, Norm of a 738.344970703125
Loss is 112517064.0, max of grad is 24.86810874938965
Max of net1 0.39083725214004517, #nan of net1 0
Norm of X 20.32782554626465, Norm of Y 20.32782554626465, Norm of a 749.6603393554688
Loss is 112516368.0, max of grad is 41.324188232421875
Max of n

Norm of X 21.157682418823242, Norm of Y 21.157682418823242, Norm of a 794.5452270507812
Loss is 112515712.0, max of grad is 41.11358642578125
Max of net1 0.39647752046585083, #nan of net1 0
Norm of X 21.1473445892334, Norm of Y 21.1473445892334, Norm of a 797.229736328125
Loss is 112514584.0, max of grad is 40.04315948486328
Max of net1 0.39664095640182495, #nan of net1 0
Norm of X 21.152406692504883, Norm of Y 21.152406692504883, Norm of a 804.7877197265625
Loss is 112516272.0, max of grad is 34.523681640625
Max of net1 0.3967973589897156, #nan of net1 0
Norm of X 21.365602493286133, Norm of Y 21.365602493286133, Norm of a 823.0626220703125
Loss is 112516800.0, max of grad is 38.2664794921875
Max of net1 0.39694687724113464, #nan of net1 0
Norm of X 21.505786895751953, Norm of Y 21.505786895751953, Norm of a 830.9759521484375
Loss is 112513600.0, max of grad is 34.0196533203125
Max of net1 0.3971090018749237, #nan of net1 0
Norm of X 21.795042037963867, Norm of Y 21.795042037963867, N

Loss is 112513880.0, max of grad is 57.74493408203125
Max of net1 0.40301504731178284, #nan of net1 0
Norm of X 23.568164825439453, Norm of Y 23.568164825439453, Norm of a 978.9063110351562
Loss is 112513416.0, max of grad is 61.095550537109375
Max of net1 0.4031461179256439, #nan of net1 0
Norm of X 23.82488441467285, Norm of Y 23.82488441467285, Norm of a 996.8663940429688
Loss is 112513944.0, max of grad is 55.5894775390625
Max of net1 0.40328601002693176, #nan of net1 0
Norm of X 24.27308464050293, Norm of Y 24.27308464050293, Norm of a 1029.55615234375
Loss is 112514544.0, max of grad is 56.9981689453125
Max of net1 0.4034367799758911, #nan of net1 0
Norm of X 24.24692153930664, Norm of Y 24.24692153930664, Norm of a 1039.059326171875
Loss is 112515744.0, max of grad is 57.12310791015625
Max of net1 0.4035756289958954, #nan of net1 0
Norm of X 24.352415084838867, Norm of Y 24.352415084838867, Norm of a 1042.181884765625
Loss is 112514280.0, max of grad is 45.99066162109375
Max of 

Max of net1 0.4099694788455963, #nan of net1 0
Norm of X 26.98233985900879, Norm of Y 26.98233985900879, Norm of a 1300.1844482421875
Loss is 112514248.0, max of grad is 97.62572479248047
Max of net1 0.4101771116256714, #nan of net1 0
Norm of X 27.134111404418945, Norm of Y 27.134111404418945, Norm of a 1317.7943115234375
Loss is 112514760.0, max of grad is 127.86448669433594
Max of net1 0.41038867831230164, #nan of net1 0
Norm of X 27.374319076538086, Norm of Y 27.374319076538086, Norm of a 1336.2939453125
Loss is 112514264.0, max of grad is 90.20933532714844
Max of net1 0.4105920195579529, #nan of net1 0
Norm of X 27.463594436645508, Norm of Y 27.463594436645508, Norm of a 1352.6492919921875
Loss is 112514816.0, max of grad is 83.90332794189453
Max of net1 0.41080108284950256, #nan of net1 0
Norm of X 27.315767288208008, Norm of Y 27.315767288208008, Norm of a 1340.5736083984375
Loss is 112514024.0, max of grad is 122.54141235351562
Max of net1 0.410995751619339, #nan of net1 0
Norm 

Max of net1 0.4192274808883667, #nan of net1 0
Norm of X 31.6210994720459, Norm of Y 31.6210994720459, Norm of a 1789.5838623046875
Loss is 112513456.0, max of grad is 117.15172576904297
Max of net1 0.4194447100162506, #nan of net1 0
Norm of X 31.779890060424805, Norm of Y 31.779890060424805, Norm of a 1813.94189453125
Loss is 112514656.0, max of grad is 212.01841735839844
Max of net1 0.41964584589004517, #nan of net1 0
Norm of X 31.870431900024414, Norm of Y 31.870431900024414, Norm of a 1821.938720703125
Loss is 112515592.0, max of grad is 102.98651123046875
Max of net1 0.41985273361206055, #nan of net1 0
Norm of X 31.86631965637207, Norm of Y 31.86631965637207, Norm of a 1826.082275390625
Loss is 112514144.0, max of grad is 120.05166625976562
Max of net1 0.4200614392757416, #nan of net1 0
Norm of X 31.6335391998291, Norm of Y 31.6335391998291, Norm of a 1797.0133056640625
Loss is 112514248.0, max of grad is 170.07748413085938
Max of net1 0.42025142908096313, #nan of net1 0
Norm of X

Norm of X 34.58932876586914, Norm of Y 34.58932876586914, Norm of a 2117.63525390625
Loss is 112514568.0, max of grad is 171.020751953125
Max of net1 0.4286266565322876, #nan of net1 0
Norm of X 34.646766662597656, Norm of Y 34.646766662597656, Norm of a 2129.485107421875
Loss is 112512872.0, max of grad is 134.65399169921875
Max of net1 0.42884284257888794, #nan of net1 0
Norm of X 35.21306228637695, Norm of Y 35.21306228637695, Norm of a 2197.0341796875
Loss is 112513400.0, max of grad is 142.8748779296875
Max of net1 0.4290599524974823, #nan of net1 0
Norm of X 35.38625717163086, Norm of Y 35.38625717163086, Norm of a 2220.596435546875
Loss is 112513504.0, max of grad is 157.73867797851562
Max of net1 0.42929568886756897, #nan of net1 0
Norm of X 35.69552230834961, Norm of Y 35.69552230834961, Norm of a 2266.480712890625
Loss is 112513000.0, max of grad is 200.06344604492188
Max of net1 0.4295307695865631, #nan of net1 0
Norm of X 35.66987609863281, Norm of Y 35.66987609863281, Norm

Norm of X 39.66929626464844, Norm of Y 39.66929626464844, Norm of a 2758.605712890625
Loss is 112513352.0, max of grad is 379.5487060546875
Max of net1 0.4397438168525696, #nan of net1 0
Norm of X 39.93867492675781, Norm of Y 39.93867492675781, Norm of a 2797.856689453125
Loss is 112512736.0, max of grad is 382.4934387207031
Max of net1 0.4399902820587158, #nan of net1 0
Norm of X 40.204505920410156, Norm of Y 40.204505920410156, Norm of a 2854.353759765625
Loss is 112513616.0, max of grad is 293.0379638671875
Max of net1 0.4402361214160919, #nan of net1 0
Norm of X 40.633453369140625, Norm of Y 40.633453369140625, Norm of a 2931.348388671875
Loss is 112513088.0, max of grad is 212.79244995117188
Max of net1 0.44046664237976074, #nan of net1 0
Norm of X 41.37689208984375, Norm of Y 41.37689208984375, Norm of a 3027.046630859375
Loss is 112512552.0, max of grad is 374.9179382324219
Max of net1 0.4407062530517578, #nan of net1 0
Norm of X 41.979209899902344, Norm of Y 41.979209899902344,

Max of net1 0.4515951871871948, #nan of net1 0
Norm of X 46.557918548583984, Norm of Y 46.557918548583984, Norm of a 3801.898193359375
Loss is 112511624.0, max of grad is 539.5016479492188
Max of net1 0.4518340826034546, #nan of net1 0
Norm of X 46.86511993408203, Norm of Y 46.86511993408203, Norm of a 3835.430908203125
Loss is 112512384.0, max of grad is 462.6358337402344
Max of net1 0.452070027589798, #nan of net1 0
Norm of X 46.68877410888672, Norm of Y 46.68877410888672, Norm of a 3817.7255859375
Loss is 112512752.0, max of grad is 366.3294982910156
Max of net1 0.4523080885410309, #nan of net1 0
Norm of X 46.83390808105469, Norm of Y 46.83390808105469, Norm of a 3828.107666015625
Loss is 112512384.0, max of grad is 321.1834716796875
Max of net1 0.45254191756248474, #nan of net1 0
Norm of X 47.1599235534668, Norm of Y 47.1599235534668, Norm of a 3904.237548828125
Loss is 112512712.0, max of grad is 436.87677001953125
Max of net1 0.45277369022369385, #nan of net1 0
Norm of X 47.46540

Max of net1 0.4624444246292114, #nan of net1 0
Norm of X 53.239620208740234, Norm of Y 53.239620208740234, Norm of a 4954.64501953125
Loss is 112509576.0, max of grad is 756.6666870117188
Max of net1 0.4627078175544739, #nan of net1 0
Norm of X 53.443626403808594, Norm of Y 53.443626403808594, Norm of a 5012.9736328125
Loss is 112510632.0, max of grad is 827.3632202148438
Max of net1 0.46298283338546753, #nan of net1 0
Norm of X 53.377254486083984, Norm of Y 53.377254486083984, Norm of a 4998.58837890625
Loss is 112510736.0, max of grad is 529.79736328125
Max of net1 0.4632587730884552, #nan of net1 0
Norm of X 54.201881408691406, Norm of Y 54.201881408691406, Norm of a 5098.69775390625
Loss is 112511128.0, max of grad is 815.023681640625
Max of net1 0.4635373055934906, #nan of net1 0
Norm of X 53.75149917602539, Norm of Y 53.75149917602539, Norm of a 5044.27734375
Loss is 112509928.0, max of grad is 616.2705078125
Max of net1 0.46380916237831116, #nan of net1 0
Norm of X 53.4477615356

Loss is 112507728.0, max of grad is 951.560546875
Max of net1 0.47289255261421204, #nan of net1 0
Norm of X 55.339847564697266, Norm of Y 55.339847564697266, Norm of a 5191.68896484375
Loss is 112508784.0, max of grad is 873.7614135742188
Max of net1 0.4731127917766571, #nan of net1 0
Norm of X 45.2882194519043, Norm of Y 45.2882194519043, Norm of a 3520.17333984375
Loss is 46087828.0, max of grad is 633.7943115234375


Epoch: 27, Cost: 15910204012.0

Max of net1 0.47331702709198, #nan of net1 0
Norm of X 57.00288009643555, Norm of Y 57.00288009643555, Norm of a 5604.904296875
Loss is 112508520.0, max of grad is 1235.181396484375
Max of net1 0.4735184907913208, #nan of net1 0
Norm of X 58.71010208129883, Norm of Y 58.71010208129883, Norm of a 5971.54443359375
Loss is 112507776.0, max of grad is 1600.30126953125
Max of net1 0.47372815012931824, #nan of net1 0
Norm of X 59.97893142700195, Norm of Y 59.97893142700195, Norm of a 6244.2919921875
Loss is 112506952.0, max of grad is 1406.9072

Max of net1 0.4840754270553589, #nan of net1 0
Norm of X 67.37666320800781, Norm of Y 67.37666320800781, Norm of a 7904.78515625
Loss is 112507024.0, max of grad is 2181.51025390625
Max of net1 0.48435571789741516, #nan of net1 0
Norm of X 68.72161865234375, Norm of Y 68.72161865234375, Norm of a 8148.27294921875
Loss is 112506424.0, max of grad is 2715.25634765625
Max of net1 0.4846278131008148, #nan of net1 0
Norm of X 68.68366241455078, Norm of Y 68.68366241455078, Norm of a 8242.873046875
Loss is 112504832.0, max of grad is 3851.986328125
Max of net1 0.48490339517593384, #nan of net1 0
Norm of X 69.5466537475586, Norm of Y 69.5466537475586, Norm of a 8468.63671875
Loss is 112503712.0, max of grad is 1652.1400146484375
Max of net1 0.48517608642578125, #nan of net1 0
Norm of X 70.61764526367188, Norm of Y 70.61764526367188, Norm of a 8661.119140625
Loss is 112505880.0, max of grad is 2538.35986328125
Max of net1 0.48545005917549133, #nan of net1 0
Norm of X 69.73316955566406, Norm of

Max of net1 0.4960763454437256, #nan of net1 0
Norm of X 74.81881713867188, Norm of Y 74.81881713867188, Norm of a 9730.24609375
Loss is 112502032.0, max of grad is 4136.10400390625
Max of net1 0.49630582332611084, #nan of net1 0
Norm of X 75.32614135742188, Norm of Y 75.32614135742188, Norm of a 9831.2509765625
Loss is 112499792.0, max of grad is 5038.2841796875
Max of net1 0.4965214729309082, #nan of net1 0
Norm of X 74.94253540039062, Norm of Y 74.94253540039062, Norm of a 9729.1796875
Loss is 112500200.0, max of grad is 2160.512939453125
Max of net1 0.49672871828079224, #nan of net1 0
Norm of X 75.66976928710938, Norm of Y 75.66976928710938, Norm of a 9882.9267578125
Loss is 112497504.0, max of grad is 3230.724609375
Max of net1 0.49693626165390015, #nan of net1 0
Norm of X 76.21515655517578, Norm of Y 76.21515655517578, Norm of a 9968.19140625
Loss is 112500392.0, max of grad is 2309.076904296875
Max of net1 0.4971472918987274, #nan of net1 0
Norm of X 76.5141830444336, Norm of Y 

Norm of X 85.26171875, Norm of Y 85.26171875, Norm of a 12493.2548828125
Loss is 112500320.0, max of grad is 2886.831298828125
Max of net1 0.5069925785064697, #nan of net1 0
Norm of X 86.16026306152344, Norm of Y 86.16026306152344, Norm of a 12770.4443359375
Loss is 112497096.0, max of grad is 3661.730224609375
Max of net1 0.5071909427642822, #nan of net1 0
Norm of X 86.17340087890625, Norm of Y 86.17340087890625, Norm of a 12938.443359375
Loss is 112492336.0, max of grad is 5976.70458984375
Max of net1 0.5073939561843872, #nan of net1 0
Norm of X 88.44288635253906, Norm of Y 88.44288635253906, Norm of a 13402.109375
Loss is 112487752.0, max of grad is 3611.78759765625
Max of net1 0.5076010823249817, #nan of net1 0
Norm of X 87.93270874023438, Norm of Y 87.93270874023438, Norm of a 13593.33984375
Loss is 112492552.0, max of grad is 2977.1875
Max of net1 0.5078103542327881, #nan of net1 0
Norm of X 88.59507751464844, Norm of Y 88.59507751464844, Norm of a 13769.365234375
Loss is 1124904

Norm of X 95.74506378173828, Norm of Y 95.74506378173828, Norm of a 15665.58203125
Loss is 112486824.0, max of grad is 4647.85009765625
Max of net1 0.5163472890853882, #nan of net1 0
Norm of X 95.26774597167969, Norm of Y 95.26774597167969, Norm of a 15589.234375
Loss is 112484808.0, max of grad is 5657.4306640625
Max of net1 0.516572117805481, #nan of net1 0
Norm of X 95.82743072509766, Norm of Y 95.82743072509766, Norm of a 15712.833984375
Loss is 112485904.0, max of grad is 5305.8125
Max of net1 0.5167890787124634, #nan of net1 0
Norm of X 96.34881591796875, Norm of Y 96.34881591796875, Norm of a 15944.626953125
Loss is 112484648.0, max of grad is 5964.064453125
Max of net1 0.5169996619224548, #nan of net1 0
Norm of X 96.61599731445312, Norm of Y 96.61599731445312, Norm of a 16006.4677734375
Loss is 112483448.0, max of grad is 8692.9482421875
Max of net1 0.517207682132721, #nan of net1 0
Norm of X 97.48489379882812, Norm of Y 97.48489379882812, Norm of a 16234.22265625
Loss is 11248

Max of net1 0.525667130947113, #nan of net1 0
Norm of X 108.60203552246094, Norm of Y 108.60203552246094, Norm of a 20495.9453125
Loss is 112469032.0, max of grad is 9597.6171875
Max of net1 0.5258737206459045, #nan of net1 0
Norm of X 109.6299819946289, Norm of Y 109.6299819946289, Norm of a 20761.82421875
Loss is 112471144.0, max of grad is 8526.392578125
Max of net1 0.5260763168334961, #nan of net1 0
Norm of X 109.68035125732422, Norm of Y 109.68035125732422, Norm of a 20891.90625
Loss is 112469840.0, max of grad is 21154.15625
Max of net1 0.5262822508811951, #nan of net1 0
Norm of X 109.60568237304688, Norm of Y 109.60568237304688, Norm of a 20953.4609375
Loss is 112466792.0, max of grad is 16004.94921875
Max of net1 0.5264885425567627, #nan of net1 0
Norm of X 109.4681396484375, Norm of Y 109.4681396484375, Norm of a 20906.45703125
Loss is 112463392.0, max of grad is 14671.2626953125
Max of net1 0.5266936421394348, #nan of net1 0
Norm of X 109.77127075195312, Norm of Y 109.7712707

Norm of X 117.92120361328125, Norm of Y 117.92120361328125, Norm of a 24111.33203125
Loss is 112445640.0, max of grad is 13251.31640625
Max of net1 0.5361562371253967, #nan of net1 0
Norm of X 121.1417236328125, Norm of Y 121.1417236328125, Norm of a 25716.9296875
Loss is 112428112.0, max of grad is 23830.0078125
Max of net1 0.536357581615448, #nan of net1 0
Norm of X 122.9362564086914, Norm of Y 122.9362564086914, Norm of a 26569.302734375
Loss is 112460128.0, max of grad is 30857.099609375
Max of net1 0.5365755558013916, #nan of net1 0
Norm of X 126.3983383178711, Norm of Y 126.3983383178711, Norm of a 28042.08203125
Loss is 112447744.0, max of grad is 21831.279296875
Max of net1 0.5368109345436096, #nan of net1 0
Norm of X 126.9625473022461, Norm of Y 126.9625473022461, Norm of a 28452.20703125
Loss is 112424696.0, max of grad is 19516.822265625
Max of net1 0.5370433330535889, #nan of net1 0
Norm of X 127.3128662109375, Norm of Y 127.3128662109375, Norm of a 28718.400390625
Loss is 

Norm of X 131.03872680664062, Norm of Y 131.03872680664062, Norm of a 29427.693359375
Loss is 112375752.0, max of grad is 17420.265625
Max of net1 0.5457367300987244, #nan of net1 0
Norm of X 130.87448120117188, Norm of Y 130.87448120117188, Norm of a 29640.81640625
Loss is 112421576.0, max of grad is 54203.53515625
Max of net1 0.5459595918655396, #nan of net1 0
Norm of X 130.91127014160156, Norm of Y 130.91127014160156, Norm of a 29603.96484375
Loss is 112410568.0, max of grad is 25647.537109375
Max of net1 0.5461788773536682, #nan of net1 0
Norm of X 131.281005859375, Norm of Y 131.281005859375, Norm of a 29542.48046875
Loss is 112376808.0, max of grad is 23860.74609375
Max of net1 0.5463988780975342, #nan of net1 0
Norm of X 130.80477905273438, Norm of Y 130.80477905273438, Norm of a 29686.0390625
Loss is 112407896.0, max of grad is 16812.529296875
Max of net1 0.5466107130050659, #nan of net1 0
Norm of X 132.16319274902344, Norm of Y 132.16319274902344, Norm of a 30354.435546875
Los

Loss is 112354808.0, max of grad is 19371.19921875
Max of net1 0.5548704862594604, #nan of net1 0
Norm of X 158.9978790283203, Norm of Y 158.9978790283203, Norm of a 44757.15625
Loss is 112387992.0, max of grad is 28238.037109375
Max of net1 0.5550493597984314, #nan of net1 0
Norm of X 157.59515380859375, Norm of Y 157.59515380859375, Norm of a 44133.4765625
Loss is 112379352.0, max of grad is 26192.84375
Max of net1 0.5552221536636353, #nan of net1 0
Norm of X 158.9473114013672, Norm of Y 158.9473114013672, Norm of a 44912.45703125
Loss is 112397480.0, max of grad is 49860.7734375
Max of net1 0.5553910732269287, #nan of net1 0
Norm of X 156.92303466796875, Norm of Y 156.92303466796875, Norm of a 43734.375
Loss is 112371328.0, max of grad is 24775.404296875
Max of net1 0.5555513501167297, #nan of net1 0
Norm of X 158.20411682128906, Norm of Y 158.20411682128906, Norm of a 44007.1015625
Loss is 112296504.0, max of grad is 28064.265625
Max of net1 0.555709958076477, #nan of net1 0
Norm o

Max of net1 0.5623076558113098, #nan of net1 0
Norm of X 184.3302001953125, Norm of Y 184.3302001953125, Norm of a 60948.2578125
Loss is 112345160.0, max of grad is 26133.490234375
Max of net1 0.5624604225158691, #nan of net1 0
Norm of X 185.94647216796875, Norm of Y 185.94647216796875, Norm of a 61917.75390625
Loss is 112307800.0, max of grad is 34214.7578125
Max of net1 0.5626128911972046, #nan of net1 0
Norm of X 185.7073516845703, Norm of Y 185.7073516845703, Norm of a 61805.58203125
Loss is 112238440.0, max of grad is 29036.3515625
Max of net1 0.562770426273346, #nan of net1 0
Norm of X 186.51084899902344, Norm of Y 186.51084899902344, Norm of a 62452.62890625
Loss is 112324248.0, max of grad is 34828.45703125
Max of net1 0.5629304051399231, #nan of net1 0
Norm of X 187.1470947265625, Norm of Y 187.1470947265625, Norm of a 62910.79296875
Loss is 112283392.0, max of grad is 40827.6953125
Max of net1 0.5631012916564941, #nan of net1 0
Norm of X 187.82290649414062, Norm of Y 187.8229

Loss is 112243880.0, max of grad is 65486.9609375
Max of net1 0.5693789720535278, #nan of net1 0
Norm of X 220.67352294921875, Norm of Y 220.67352294921875, Norm of a 89494.109375
Loss is 112285200.0, max of grad is 22797.828125
Max of net1 0.5695293545722961, #nan of net1 0
Norm of X 219.73410034179688, Norm of Y 219.73410034179688, Norm of a 88398.328125
Loss is 112243352.0, max of grad is 69410.6796875
Max of net1 0.569667637348175, #nan of net1 0
Norm of X 218.60731506347656, Norm of Y 218.60731506347656, Norm of a 87395.2890625
Loss is 112269456.0, max of grad is 26377.46484375
Max of net1 0.5697993636131287, #nan of net1 0
Norm of X 219.49423217773438, Norm of Y 219.49423217773438, Norm of a 87685.5078125
Loss is 112218832.0, max of grad is 33864.40625
Max of net1 0.5699189305305481, #nan of net1 0
Norm of X 218.03793334960938, Norm of Y 218.03793334960938, Norm of a 86844.0234375
Loss is 112327464.0, max of grad is 147253.40625
Max of net1 0.5700729489326477, #nan of net1 0
Norm

Loss is 112205136.0, max of grad is 59079.4375
Max of net1 0.5755660533905029, #nan of net1 0
Norm of X 227.67083740234375, Norm of Y 227.67083740234375, Norm of a 93711.625
Loss is 112137496.0, max of grad is 94501.5625
Max of net1 0.5756849646568298, #nan of net1 0
Norm of X 233.36944580078125, Norm of Y 233.36944580078125, Norm of a 99182.6015625
Loss is 112220936.0, max of grad is 41573.12890625
Max of net1 0.5758004784584045, #nan of net1 0
Norm of X 235.73898315429688, Norm of Y 235.73898315429688, Norm of a 100416.2421875
Loss is 112094392.0, max of grad is 47502.91015625
Max of net1 0.5759096145629883, #nan of net1 0
Norm of X 237.43563842773438, Norm of Y 237.43563842773438, Norm of a 102911.5
Loss is 112147976.0, max of grad is 47057.06640625
Max of net1 0.5760062336921692, #nan of net1 0
Norm of X 240.1234130859375, Norm of Y 240.1234130859375, Norm of a 105447.9921875
Loss is 112091224.0, max of grad is 66379.4765625
Max of net1 0.5761162638664246, #nan of net1 0
Norm of X 

Max of net1 0.5803800225257874, #nan of net1 0
Norm of X 275.6044921875, Norm of Y 275.6044921875, Norm of a 140447.078125
Loss is 111995752.0, max of grad is 66531.015625
Max of net1 0.5804615020751953, #nan of net1 0
Norm of X 274.5736389160156, Norm of Y 274.5736389160156, Norm of a 139504.71875
Loss is 112130368.0, max of grad is 46532.046875
Max of net1 0.5805425643920898, #nan of net1 0
Norm of X 274.7057189941406, Norm of Y 274.7057189941406, Norm of a 139733.609375
Loss is 112016912.0, max of grad is 62648.109375
Max of net1 0.5806225538253784, #nan of net1 0
Norm of X 273.818603515625, Norm of Y 273.818603515625, Norm of a 139114.859375
Loss is 112002752.0, max of grad is 67322.78125
Max of net1 0.5806995630264282, #nan of net1 0
Norm of X 277.4819030761719, Norm of Y 277.4819030761719, Norm of a 143132.484375
Loss is 112060872.0, max of grad is 79940.140625
Max of net1 0.5807762742042542, #nan of net1 0
Norm of X 278.00555419921875, Norm of Y 278.00555419921875, Norm of a 143

Max of net1 0.5829036831855774, #nan of net1 0
Norm of X 327.9365539550781, Norm of Y 327.9365539550781, Norm of a 202407.3125
Loss is 111881776.0, max of grad is 58760.453125
Max of net1 0.5829602479934692, #nan of net1 0
Norm of X 328.001953125, Norm of Y 328.001953125, Norm of a 202540.671875
Loss is 112006856.0, max of grad is 38374.5390625
Max of net1 0.5830084085464478, #nan of net1 0
Norm of X 327.4605407714844, Norm of Y 327.4605407714844, Norm of a 202287.859375
Loss is 111954352.0, max of grad is 307865.40625
Max of net1 0.5831038951873779, #nan of net1 0
Norm of X 330.57049560546875, Norm of Y 330.57049560546875, Norm of a 204710.359375
Loss is 111924552.0, max of grad is 70880.09375
Max of net1 0.5831957459449768, #nan of net1 0
Norm of X 330.5736999511719, Norm of Y 330.5736999511719, Norm of a 205820.421875
Loss is 111923256.0, max of grad is 77491.0625
Max of net1 0.5832846164703369, #nan of net1 0
Norm of X 331.6749267578125, Norm of Y 331.6749267578125, Norm of a 20721

Norm of X 347.8152770996094, Norm of Y 347.8152770996094, Norm of a 227280.875
Loss is 111904904.0, max of grad is 116064.0078125
Max of net1 0.5859591364860535, #nan of net1 0
Norm of X 354.67279052734375, Norm of Y 354.67279052734375, Norm of a 237264.171875
Loss is 111806408.0, max of grad is 83188.53125
Max of net1 0.5859816074371338, #nan of net1 0
Norm of X 354.1099548339844, Norm of Y 354.1099548339844, Norm of a 236835.96875
Loss is 111806984.0, max of grad is 71874.6015625
Max of net1 0.5859981775283813, #nan of net1 0
Norm of X 356.31842041015625, Norm of Y 356.31842041015625, Norm of a 239516.453125
Loss is 111833384.0, max of grad is 49455.53125
Max of net1 0.5860036015510559, #nan of net1 0
Norm of X 357.76495361328125, Norm of Y 357.76495361328125, Norm of a 241667.265625
Loss is 111888664.0, max of grad is 59544.85546875
Max of net1 0.586013674736023, #nan of net1 0
Norm of X 359.8083801269531, Norm of Y 359.8083801269531, Norm of a 243955.53125
Loss is 111761664.0, max 

Norm of X 410.4850158691406, Norm of Y 410.4850158691406, Norm of a 319883.28125
Loss is 111651776.0, max of grad is 56493.4609375
Max of net1 0.5865967273712158, #nan of net1 0
Norm of X 409.9459228515625, Norm of Y 409.9459228515625, Norm of a 319761.71875
Loss is 111631936.0, max of grad is 68709.2109375
Max of net1 0.5865907073020935, #nan of net1 0
Norm of X 409.36334228515625, Norm of Y 409.36334228515625, Norm of a 319236.0
Loss is 111557936.0, max of grad is 59577.37890625
Max of net1 0.586574375629425, #nan of net1 0
Norm of X 407.5153503417969, Norm of Y 407.5153503417969, Norm of a 315338.375
Loss is 111650608.0, max of grad is 222888.109375
Max of net1 0.5865609645843506, #nan of net1 0
Norm of X 410.57635498046875, Norm of Y 410.57635498046875, Norm of a 320944.375
Loss is 111679560.0, max of grad is 92386.2421875
Max of net1 0.5865537524223328, #nan of net1 0
Norm of X 407.6784362792969, Norm of Y 407.6784362792969, Norm of a 316172.03125
Loss is 111482792.0, max of grad 

Norm of X 450.3478088378906, Norm of Y 450.3478088378906, Norm of a 386928.625
Loss is 111566104.0, max of grad is 75666.5234375
Max of net1 0.5885547995567322, #nan of net1 0
Norm of X 453.3974304199219, Norm of Y 453.3974304199219, Norm of a 392000.40625
Loss is 111687968.0, max of grad is 102048.890625
Max of net1 0.5886108875274658, #nan of net1 0
Norm of X 452.9862060546875, Norm of Y 452.9862060546875, Norm of a 393042.53125
Loss is 111647576.0, max of grad is 99183.421875
Max of net1 0.5886647701263428, #nan of net1 0
Norm of X 455.0256652832031, Norm of Y 455.0256652832031, Norm of a 394907.5
Loss is 111545568.0, max of grad is 54783.5703125
Max of net1 0.5887048840522766, #nan of net1 0
Norm of X 459.2212219238281, Norm of Y 459.2212219238281, Norm of a 402503.5625
Loss is 111637312.0, max of grad is 69762.3203125
Max of net1 0.5887351036071777, #nan of net1 0
Norm of X 451.7840881347656, Norm of Y 451.7840881347656, Norm of a 390280.96875
Loss is 111444768.0, max of grad is 9

Norm of X 511.2100830078125, Norm of Y 511.2100830078125, Norm of a 501729.125
Loss is 111346344.0, max of grad is 98847.0
Max of net1 0.5885894894599915, #nan of net1 0
Norm of X 516.43603515625, Norm of Y 516.43603515625, Norm of a 512538.15625
Loss is 111281304.0, max of grad is 59618.640625
Max of net1 0.5886017680168152, #nan of net1 0
Norm of X 519.8756103515625, Norm of Y 519.8756103515625, Norm of a 518458.71875
Loss is 111445944.0, max of grad is 68226.734375
Max of net1 0.5886097550392151, #nan of net1 0
Norm of X 520.307373046875, Norm of Y 520.307373046875, Norm of a 520510.5
Loss is 111512664.0, max of grad is 72394.484375
Max of net1 0.5886146426200867, #nan of net1 0
Norm of X 526.5996704101562, Norm of Y 526.5996704101562, Norm of a 532963.75
Loss is 111182960.0, max of grad is 89601.7109375
Max of net1 0.5885974168777466, #nan of net1 0
Norm of X 522.5243530273438, Norm of Y 522.5243530273438, Norm of a 527739.5
Loss is 111333520.0, max of grad is 59028.5625
Max of net

Max of net1 0.5891501307487488, #nan of net1 0
Norm of X 549.33544921875, Norm of Y 549.33544921875, Norm of a 580011.5
Loss is 111440800.0, max of grad is 57946.203125
Max of net1 0.5892000794410706, #nan of net1 0
Norm of X 542.3018798828125, Norm of Y 542.3018798828125, Norm of a 564602.3125
Loss is 111389024.0, max of grad is 109586.6875
Max of net1 0.5892486572265625, #nan of net1 0
Norm of X 544.7117919921875, Norm of Y 544.7117919921875, Norm of a 569507.5625
Loss is 111365232.0, max of grad is 92782.828125
Max of net1 0.5892848968505859, #nan of net1 0
Norm of X 545.27001953125, Norm of Y 545.27001953125, Norm of a 571374.6875
Loss is 111204856.0, max of grad is 157452.640625
Max of net1 0.5893102288246155, #nan of net1 0
Norm of X 547.5384521484375, Norm of Y 547.5384521484375, Norm of a 576626.625
Loss is 111270584.0, max of grad is 96008.5390625
Max of net1 0.589321494102478, #nan of net1 0
Norm of X 549.8402099609375, Norm of Y 549.8402099609375, Norm of a 581078.375
Loss i

Norm of X 625.1358032226562, Norm of Y 625.1358032226562, Norm of a 755860.4375
Loss is 111266392.0, max of grad is 218951.953125
Max of net1 0.5885854363441467, #nan of net1 0
Norm of X 630.9295043945312, Norm of Y 630.9295043945312, Norm of a 770203.6875
Loss is 110888168.0, max of grad is 182860.0
Max of net1 0.5885704755783081, #nan of net1 0
Norm of X 623.358642578125, Norm of Y 623.358642578125, Norm of a 750875.25
Loss is 111284104.0, max of grad is 269751.0
Max of net1 0.5885848999023438, #nan of net1 0
Norm of X 620.7526245117188, Norm of Y 620.7526245117188, Norm of a 744150.125
Loss is 111081856.0, max of grad is 66068.3203125
Max of net1 0.5885926485061646, #nan of net1 0
Norm of X 620.0438232421875, Norm of Y 620.0438232421875, Norm of a 741702.75
Loss is 110887704.0, max of grad is 194407.6875
Max of net1 0.5886175036430359, #nan of net1 0
Norm of X 621.5392456054688, Norm of Y 621.5392456054688, Norm of a 745664.75
Loss is 111074872.0, max of grad is 99844.09375
Max of n

Max of net1 0.5878119468688965, #nan of net1 0
Norm of X 663.7882690429688, Norm of Y 663.7882690429688, Norm of a 851321.1875
Loss is 110834200.0, max of grad is 106508.71875
Max of net1 0.587775707244873, #nan of net1 0
Norm of X 678.8777465820312, Norm of Y 678.8777465820312, Norm of a 892296.75
Loss is 110931928.0, max of grad is 113511.2578125
Max of net1 0.5877485871315002, #nan of net1 0
Norm of X 676.4735717773438, Norm of Y 676.4735717773438, Norm of a 883006.5625
Loss is 111083264.0, max of grad is 127919.9375
Max of net1 0.5877270102500916, #nan of net1 0
Norm of X 671.8607788085938, Norm of Y 671.8607788085938, Norm of a 875732.1875
Loss is 111030160.0, max of grad is 102849.171875
Max of net1 0.5877039432525635, #nan of net1 0
Norm of X 682.1654663085938, Norm of Y 682.1654663085938, Norm of a 900208.25
Loss is 110823032.0, max of grad is 67911.6796875
Max of net1 0.5876756310462952, #nan of net1 0
Norm of X 677.5735473632812, Norm of Y 677.5735473632812, Norm of a 888186.

Max of net1 0.5872829556465149, #nan of net1 0
Norm of X 742.1259765625, Norm of Y 742.1259765625, Norm of a 1068795.375
Loss is 110738840.0, max of grad is 141391.53125
Max of net1 0.5872637033462524, #nan of net1 0
Norm of X 743.6378784179688, Norm of Y 743.6378784179688, Norm of a 1072875.375
Loss is 110875608.0, max of grad is 108550.4453125
Max of net1 0.5872507095336914, #nan of net1 0
Norm of X 739.174560546875, Norm of Y 739.174560546875, Norm of a 1061603.5
Loss is 110739768.0, max of grad is 62020.94140625
Max of net1 0.5872299671173096, #nan of net1 0
Norm of X 746.570556640625, Norm of Y 746.570556640625, Norm of a 1081995.875
Loss is 110893288.0, max of grad is 46020.13671875
Max of net1 0.5872015953063965, #nan of net1 0
Norm of X 743.3355712890625, Norm of Y 743.3355712890625, Norm of a 1074285.875
Loss is 110717448.0, max of grad is 98378.546875
Max of net1 0.5871698260307312, #nan of net1 0
Norm of X 746.1515502929688, Norm of Y 746.1515502929688, Norm of a 1080240.75


Max of net1 0.5859016180038452, #nan of net1 0
Norm of X 815.3462524414062, Norm of Y 815.3462524414062, Norm of a 1296550.875
Loss is 110641192.0, max of grad is 114829.78125
Max of net1 0.5858827829360962, #nan of net1 0
Norm of X 815.0307006835938, Norm of Y 815.0307006835938, Norm of a 1292670.25
Loss is 110531512.0, max of grad is 85087.640625
Max of net1 0.5858603715896606, #nan of net1 0
Norm of X 811.2401733398438, Norm of Y 811.2401733398438, Norm of a 1280847.5
Loss is 110754648.0, max of grad is 76772.2734375
Max of net1 0.5858348608016968, #nan of net1 0
Norm of X 821.6982421875, Norm of Y 821.6982421875, Norm of a 1315441.75
Loss is 110420032.0, max of grad is 89683.4765625
Max of net1 0.5858019590377808, #nan of net1 0
Norm of X 830.6536865234375, Norm of Y 830.6536865234375, Norm of a 1345984.75
Loss is 110716520.0, max of grad is 283841.75
Max of net1 0.5857853889465332, #nan of net1 0
Norm of X 827.0392456054688, Norm of Y 827.0392456054688, Norm of a 1333971.125
Loss 

Max of net1 0.585058331489563, #nan of net1 0
Norm of X 864.5492553710938, Norm of Y 864.5492553710938, Norm of a 1455316.875
Loss is 110485056.0, max of grad is 82240.4296875
Max of net1 0.5850672721862793, #nan of net1 0
Norm of X 858.1315307617188, Norm of Y 858.1315307617188, Norm of a 1433318.875
Loss is 110121160.0, max of grad is 86229.6953125
Max of net1 0.5850667357444763, #nan of net1 0
Norm of X 858.2411499023438, Norm of Y 858.2411499023438, Norm of a 1433300.5
Loss is 110451616.0, max of grad is 106962.9765625
Max of net1 0.5850667357444763, #nan of net1 0
Norm of X 855.1800537109375, Norm of Y 855.1800537109375, Norm of a 1422269.25
Loss is 110060880.0, max of grad is 48493.6796875
Max of net1 0.5850592851638794, #nan of net1 0
Norm of X 849.3012084960938, Norm of Y 849.3012084960938, Norm of a 1403498.25
Loss is 110507352.0, max of grad is 105959.0
Max of net1 0.5850539803504944, #nan of net1 0
Norm of X 844.8026733398438, Norm of Y 844.8026733398438, Norm of a 1385126.5

Max of net1 0.5844239592552185, #nan of net1 0
Norm of X 918.660400390625, Norm of Y 918.660400390625, Norm of a 1641644.25
Loss is 110421888.0, max of grad is 150543.984375
Max of net1 0.5844696760177612, #nan of net1 0
Norm of X 918.0947265625, Norm of Y 918.0947265625, Norm of a 1639471.75
Loss is 109981152.0, max of grad is 231885.25
Max of net1 0.5845077037811279, #nan of net1 0
Norm of X 913.7503051757812, Norm of Y 913.7503051757812, Norm of a 1625941.25
Loss is 109900520.0, max of grad is 182135.78125
Max of net1 0.5845303535461426, #nan of net1 0
Norm of X 899.4612426757812, Norm of Y 899.4612426757812, Norm of a 1576376.125
Loss is 110358744.0, max of grad is 212308.125
Max of net1 0.5845513343811035, #nan of net1 0
Norm of X 899.8388671875, Norm of Y 899.8388671875, Norm of a 1574983.75
Loss is 110080360.0, max of grad is 106981.109375
Max of net1 0.5845676064491272, #nan of net1 0
Norm of X 911.4712524414062, Norm of Y 911.4712524414062, Norm of a 1618031.125
Loss is 110685

Norm of X 957.9904174804688, Norm of Y 957.9904174804688, Norm of a 1788704.5
Loss is 109918128.0, max of grad is 154335.09375
Max of net1 0.5847348570823669, #nan of net1 0
Norm of X 940.2684326171875, Norm of Y 940.2684326171875, Norm of a 1716738.625
Loss is 110290040.0, max of grad is 66520.46875
Max of net1 0.5847345590591431, #nan of net1 0
Norm of X 948.1406860351562, Norm of Y 948.1406860351562, Norm of a 1750645.875
Loss is 109919976.0, max of grad is 113796.984375
Max of net1 0.5847211480140686, #nan of net1 0
Norm of X 959.1178588867188, Norm of Y 959.1178588867188, Norm of a 1789597.875
Loss is 109931096.0, max of grad is 120332.734375
Max of net1 0.5847024917602539, #nan of net1 0
Norm of X 955.6436157226562, Norm of Y 955.6436157226562, Norm of a 1772812.875
Loss is 110112816.0, max of grad is 144980.15625
Max of net1 0.5846846103668213, #nan of net1 0
Norm of X 942.4620971679688, Norm of Y 942.4620971679688, Norm of a 1731515.875
Loss is 109665288.0, max of grad is 15724

Max of net1 0.5834853649139404, #nan of net1 0
Norm of X 1055.6951904296875, Norm of Y 1055.6951904296875, Norm of a 2176523.5
Loss is 109886624.0, max of grad is 165772.078125
Max of net1 0.5834876298904419, #nan of net1 0
Norm of X 1047.560546875, Norm of Y 1047.560546875, Norm of a 2144371.5
Loss is 110040480.0, max of grad is 92422.1328125
Max of net1 0.5834906101226807, #nan of net1 0
Norm of X 1046.1505126953125, Norm of Y 1046.1505126953125, Norm of a 2140310.75
Loss is 110233424.0, max of grad is 240073.875
Max of net1 0.5834973454475403, #nan of net1 0
Norm of X 1057.9730224609375, Norm of Y 1057.9730224609375, Norm of a 2186467.25
Loss is 110031208.0, max of grad is 266199.65625
Max of net1 0.5835028886795044, #nan of net1 0
Norm of X 1056.939453125, Norm of Y 1056.939453125, Norm of a 2181412.75
Loss is 110183312.0, max of grad is 85294.53125
Max of net1 0.5835056304931641, #nan of net1 0
Norm of X 1055.7562255859375, Norm of Y 1055.7562255859375, Norm of a 2176987.0
Loss is

Norm of X 1134.988037109375, Norm of Y 1134.988037109375, Norm of a 2518141.75
Loss is 109903296.0, max of grad is 91137.96875
Max of net1 0.5831349492073059, #nan of net1 0
Norm of X 1122.0367431640625, Norm of Y 1122.0367431640625, Norm of a 2458874.75
Loss is 109716208.0, max of grad is 137043.203125
Max of net1 0.5831244587898254, #nan of net1 0
Norm of X 1109.05126953125, Norm of Y 1109.05126953125, Norm of a 2407022.5
Loss is 109128168.0, max of grad is 161839.59375
Max of net1 0.5831186175346375, #nan of net1 0
Norm of X 1122.6326904296875, Norm of Y 1122.6326904296875, Norm of a 2464452.0
Loss is 109494120.0, max of grad is 271773.125
Max of net1 0.583113431930542, #nan of net1 0
Norm of X 1130.18408203125, Norm of Y 1130.18408203125, Norm of a 2493121.0
Loss is 109336936.0, max of grad is 190025.84375
Max of net1 0.5831027030944824, #nan of net1 0
Norm of X 1126.8092041015625, Norm of Y 1126.8092041015625, Norm of a 2484880.25
Loss is 109608840.0, max of grad is 225702.734375


Norm of X 1202.7603759765625, Norm of Y 1202.7603759765625, Norm of a 2832954.5
Loss is 109579224.0, max of grad is 132955.859375
Max of net1 0.5823768973350525, #nan of net1 0
Norm of X 1202.287353515625, Norm of Y 1202.287353515625, Norm of a 2833112.5
Loss is 109307368.0, max of grad is 139635.53125
Max of net1 0.5823647975921631, #nan of net1 0
Norm of X 1199.8221435546875, Norm of Y 1199.8221435546875, Norm of a 2818778.25
Loss is 109251936.0, max of grad is 154697.28125
Max of net1 0.5823562145233154, #nan of net1 0
Norm of X 1200.1353759765625, Norm of Y 1200.1353759765625, Norm of a 2822258.75
Loss is 109392408.0, max of grad is 159023.078125
Max of net1 0.5823439359664917, #nan of net1 0
Norm of X 1206.781005859375, Norm of Y 1206.781005859375, Norm of a 2853425.25
Loss is 109623640.0, max of grad is 146140.9375
Max of net1 0.5823341012001038, #nan of net1 0
Norm of X 1204.484375, Norm of Y 1204.484375, Norm of a 2839941.0
Loss is 109006320.0, max of grad is 135536.359375
Max 

Loss is 109703240.0, max of grad is 112146.9375
Max of net1 0.5818589925765991, #nan of net1 0
Norm of X 1263.196044921875, Norm of Y 1263.196044921875, Norm of a 3127389.0
Loss is 109532968.0, max of grad is 81583.9375
Max of net1 0.5818504095077515, #nan of net1 0
Norm of X 1260.61328125, Norm of Y 1260.61328125, Norm of a 3115635.5
Loss is 109788432.0, max of grad is 131734.015625
Max of net1 0.5818419456481934, #nan of net1 0
Norm of X 1260.4306640625, Norm of Y 1260.4306640625, Norm of a 3120778.5
Loss is 109579224.0, max of grad is 116259.6953125
Max of net1 0.5818307399749756, #nan of net1 0
Norm of X 1256.16943359375, Norm of Y 1256.16943359375, Norm of a 3093088.25
Loss is 108891920.0, max of grad is 162895.09375
Max of net1 0.5818204879760742, #nan of net1 0
Norm of X 1252.9923095703125, Norm of Y 1252.9923095703125, Norm of a 3082368.5
Loss is 109035856.0, max of grad is 115571.1171875
Max of net1 0.5818110704421997, #nan of net1 0
Norm of X 1259.78271484375, Norm of Y 1259.

Max of net1 0.5811917781829834, #nan of net1 0
Norm of X 1372.998779296875, Norm of Y 1372.998779296875, Norm of a 3700379.5
Loss is 109179880.0, max of grad is 173380.828125
Max of net1 0.5812029242515564, #nan of net1 0
Norm of X 1375.3304443359375, Norm of Y 1375.3304443359375, Norm of a 3717726.0
Loss is 109117080.0, max of grad is 142024.0625
Max of net1 0.5812163352966309, #nan of net1 0
Norm of X 1356.3460693359375, Norm of Y 1356.3460693359375, Norm of a 3613113.5
Loss is 108807080.0, max of grad is 176981.953125
Max of net1 0.5812312364578247, #nan of net1 0
Norm of X 1376.081298828125, Norm of Y 1376.081298828125, Norm of a 3715058.5
Loss is 109434928.0, max of grad is 113250.5
Max of net1 0.5812443494796753, #nan of net1 0
Norm of X 1364.4832763671875, Norm of Y 1364.4832763671875, Norm of a 3657684.25
Loss is 109251936.0, max of grad is 321698.1875
Max of net1 0.581253170967102, #nan of net1 0
Norm of X 1351.973876953125, Norm of Y 1351.973876953125, Norm of a 3589927.75
Lo

Max of net1 0.5814482569694519, #nan of net1 0
Norm of X 1408.9705810546875, Norm of Y 1408.9705810546875, Norm of a 3895674.25
Loss is 108613544.0, max of grad is 234879.25
Max of net1 0.5814502239227295, #nan of net1 0
Norm of X 1390.412353515625, Norm of Y 1390.412353515625, Norm of a 3788783.5
Loss is 108587760.0, max of grad is 182259.171875
Max of net1 0.5814458131790161, #nan of net1 0
Norm of X 1415.925048828125, Norm of Y 1415.925048828125, Norm of a 3936144.75
Loss is 109827328.0, max of grad is 234604.34375
Max of net1 0.5814451575279236, #nan of net1 0
Norm of X 1406.4483642578125, Norm of Y 1406.4483642578125, Norm of a 3883596.0
Loss is 108784952.0, max of grad is 227148.203125
Max of net1 0.581450343132019, #nan of net1 0
Norm of X 1398.085693359375, Norm of Y 1398.085693359375, Norm of a 3834888.25
Loss is 110928672.0, max of grad is 2242411.0
Max of net1 0.5815619826316833, #nan of net1 0
Norm of X 1417.19921875, Norm of Y 1417.19921875, Norm of a 3938922.0
Loss is 108

RuntimeError: Function 'SvdHelperBackward0' returned nan values in its 0th output.

In [None]:

15909857544.0
15910955228.0
15910548268.0
15910719520.0
15910903424.0
15911022996.0