In [21]:
import torch
import torch.nn.functional as F
from torch.nn import Parameter
from torch_scatter import scatter_mean
from torch_geometric.utils import remove_self_loops, add_self_loops
from pprint import pprint
from random import shuffle

from torch_geometric.nn.inits import uniform
#from ..inits import uniform

class LinkSAGEConv(torch.nn.Module):
    r"""
       This work, LinkGraphSAGE PyTorch Geometric implementation, is based
       on the previous work GraphSAGE (Hamilton et. al) and PyTorch Geometric.
    
    
    The GraphSAGE operator from the `"Inductive Representation Learning on
    Large Graphs" <https://arxiv.org/abs/1706.02216>`_ paper

    .. math::
        \mathbf{\hat{x}}_i &= \mathbf{\Theta} \cdot
        \mathrm{mean}_{j \in \mathcal{N(i) \cup \{ i \}}}(\mathbf{x}_j)

        \mathbf{x}^{\prime}_i &= \frac{\mathbf{\hat{x}}_i}
        {\| \mathbf{\hat{x}}_i \|_2}.

    Args:
        in_channels (int): Size of each input sample.
        out_channels (int): Size of each output sample.
        normalize (bool, optional): If set to :obj:`False`, output features
            will not be :math:`\ell^2`-normalized.
        bias (bool, optional): If set to :obj:`False`, the layer will not learn
            an additive bias. (default: :obj:`True`)
    """

    def __init__(self, 
                 in_channels, 
                 out_channels, 
                 sampling_size=4,
                 k=1,
                 normalize=True, 
                 bias=True):
        super(LinkSAGEConv, self).__init__()

        self.sampling_size = sampling_size
        self.K = k
        self.in_channels = in_channels
        self.out_channels = out_channels
        self.normalize = normalize
        #self.weight = Parameter(torch.Tensor(self.sampling_size, self.out_channels))
        self.weight = Parameter(torch.Tensor(self.in_channels, self.in_channels))

        
        if bias:
            self.bias = Parameter(torch.Tensor(self.in_channels))
        else:
            self.register_parameter('bias', None)

        self.reset_parameters()

    def reset_parameters(self):
        size = self.weight.size(0)
        uniform(size, self.weight)
        uniform(size, self.bias)

    def forward(self, x, edge_neighbors):
        """
            - where's the sampling???
            - why ther's no normalization? (the message part) -> it's done here
            - the scatter_mean is also done..
            CONCLUSION: it's a more compact way to write it.

            BUT NO SAMPLING WHICH IS STRANGE
            NO K depth sampling! 

        """
        
        # edge_neighbor extraction oand sampling could be repeated inside the K loop..
        edge_neighbors, _ = remove_self_loops(edge_neighbors)
        #edge_neighbors = add_self_loops(edge_neighbors, num_nodes=x.size(0))

        x = x.unsqueeze(-1) if x.dim() == 1 else x
        row, col = edge_neighbors

        # sampling 1 level aggregation
        srow, scol = self.samplingNeighbors2(edge_neighbors, self.sampling_size)
        #srow, scol = self.samplingNeighbors(edge_neighbors, 4)

        for k in range(self.K):

            #print("scol",scol)
            #print("sampled x, x[scol]",x[scol])
            out = scatter_mean(x[scol], srow, dim=0, dim_size=x.size(0))
            #print("scatter_mean result: ",out)
            #print("weight.transpose()", torch.transpose(self.weight,0,1))
            #print("weight", self.weight)
            #out = torch.matmul(torch.transpose(out,0,1), self.weight)
            
            # out: 7x1      -> 7x1
            # weight: 7x15  -> 15x7
            #out = torch.matmul( out,self.weight ) 
            #out = torch.matmul( torch.transpose(out,0,1),self.weight ) 
            #out = torch.matmul( self.weight,out ) 
            #out = torch.matmul( torch.transpose(self.weight,0,1),out ) 
            #print("weight first col: ",self.weight[:,0].size())
            #print("out first col: ",out[:,0].size())
            out = torch.matmul(self.weight, out)
            #print(out.size())
            #print(out)
            #print(x.size())
            
            # ouptu must be 7x1? 7xnum-filters
            
            if self.bias is not None:
                out = out + self.bias

            if self.normalize:
                out = F.normalize(out, p=2, dim=-1)
            
            #print("out",out)
            x = out
            
            
        return out

    def __repr__(self):
        return '{}({}, {})'.format(self.__class__.__name__, self.in_channels,
                                   self.out_channels)

    def samplingNeighbors2(self, neighbors,m):
        
        result = torch.LongTensor([]).to(neighbors.device)
        #print("neighbors",neighbors)
        
        # iterate over tensor and get indices
        
        i1=0
        i2=i1
        i = 0
        for elem in neighbors[0]:
            #print("i1: ",i1,"  i2: ",i2)
            #print( neighbors[0, i], int(elem.item()), neighbors[0 , i] != int(elem.item()))
            if neighbors[0 , i1] != int(elem.item()):
                if i1!=-1 and i2!=-1:
                    # close previous edge list
                    #subvect = neighbors[0][i1:i2]
                
                    # shuffle
                    #subvect = subvect[torch.randperm(subvect.size()[0])]
                    subvect = torch.LongTensor(np.arange(i1,i2)).to(neighbors.device)
                    #print(np.arange(i1,i2))
                    #print(subvect)
                
                
                    # trim by slicing
                    subvect = subvect[torch.randperm(subvect.size()[0])]
                    subvect = subvect[:m]
                    #print("m",m)
                    #print("subvect.size()", subvect.size())
                    #print("torch.randperm(subvect.size()[0])",torch.randperm(subvect.size()[0]))
                    
                    #print("result before cat",result)
                    #print(type(result))
                    #print(type(subvect))
                    # append to result
                    result = torch.cat([result,subvect], dim=0)
                    #print("result after cat",result)
                    #print()
                    
                    i1 = i

            else:
                pass
                #print(" equal, do nothing just increment i and i2")
             
            i+=1
            i2=i
                
        #print(" final result", result)
        #print(neighbors[0, result], neighbors[1, result])
        return neighbors[0, result], neighbors[1, result]
        
  
    
class NetLSAGE1(torch.nn.Module):
    def __init__(self, dataset, d1=16,d2=16, sampling_size=4, k=1):
        super(NetLSAGE1, self).__init__()
        self.conv1 = LinkSAGEConv(
            int(dataset.y.size()[0]), 
            int(dataset.y.size()[0]), 
            sampling_size,
            k,
            normalize=False,
            bias=False)
        self.fc1 = nn.Linear(dataset.num_features, d2)
        self.fc2 = nn.Linear(d2, dataset.num_features)
        self.d1 = d1
        self.d2 = d2
        print("init NetLSAGE1 ",dataset.y.size()[0])
        

    def forward(self, data):
        x, edge_neighbors = data.x, data.edge_neighbors

        x = self.conv1(x, edge_neighbors)
        x = F.relu(x)
        x = F.dropout(x, training=self.training)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)

        # output as multiclass target
        #return F.log_softmax(x, dim=1)
        
        # output as regression target
        return x
    
    def __str__(self):
        return "Net1-gcn(%d,%d)-gcn(%d,%d)" % (dataset.num_features,self.d1,self.d1,
                                               dataset.num_classes)
    


In [28]:
import torch
import torch.nn.functional as F
#from torch_geometric.nn import GCNConv
#from torch_geometric.nn import LinkGCNConv
#from torch_geometric.nn import SAGEConv
#from torch_geometric.nn import LinkSAGEConv
import torch.nn as nn
from pprint import pprint

import networkx as nx
import time
from torch_geometric.data import DataLoader
import importlib
from torch_geometric.data import Data
import pickle
import numpy as np


class MyOwnDataset2():
    def __init__(self,  root, name, transform=None, pre_transform=None):
        f = open(name, 'rb')
        self.data = pickle.load(f) 
        #print(self.data)
        #print(self.data.edge_index)
        #print(self.data.num_features)
        self.num_features = self.data.num_features
        self.num_classes = 1
        self.filename = name
        
        # prepare edge_neighbors
        edges_dict = {}
        i=0
        for edge in self.data.edge_index[0]:
            edges_dict[i]= (self.data.edge_index[0][i],
                            self.data.edge_index[1][i])
            i+=1
            
        #print("\n edges_dict:")
        #pprint(edges_dict)
        #print("\n")
        """
            {
            0 : (1,3),
            1 : (1,4),
            2 : (2,0)
            ...
            }
            
            edge_neighbors= [[],[]]
            for edge in edges_dict.keys():
                for node in edges_dict[edge]:
                    for edge2 in edges_dict.keys():
                        if edge2 != edge and \
                           ( edges_dict[edge2][0] == node or \
                             edges_dict[edge2][1] == node ):
                             edge_neighbors[0].append(edge)
                             edge_neighbors[1].append(edge2)
                             
        """
        edge_neighbors= [[],[]]
        for edge in edges_dict.keys():
            for node in edges_dict[edge]:
                for edge2 in edges_dict.keys():
                    if edge2 != edge and ( edges_dict[edge2][0] == node or edges_dict[edge2][1] == node ):
                        edge_neighbors[0].append(edge)
                        edge_neighbors[1].append(edge2)
        
        self.data.edge_neighbors = torch.LongTensor(edge_neighbors)
        
        #print()
        #print("edge_neighbors")
        #pprint(self.data.edge_neighbors)
        #print()
        #print(type(self.data.edge_neighbors))
        #print()
        
        f.close()

   



def loadDataset(collection, name=None):
    try:
        # import datasets
        themodule = importlib.import_module("torch_geometric.datasets")
        # get the function corresponding to collection
        method_to_call = getattr(themodule, collection)
        if name:
            dataset = method_to_call(root='./data/'+str(collection), name=name)
            dataset.filename = name
            return dataset
        else:
            return method_to_call(root='./data/'+str(collection)) 
    except:
        # custom module
        method_to_call = globals()[collection]
       
        if name:
            
            dataset = method_to_call(root='./data/'+str(collection), name=name)
            dataset.filename = name
            return dataset
        else:
            return method_to_call(root='./data/'+str(collection)) 
        


def transformMask(mask):
    train_mask = []
    i = 0
    for pick in mask:
        if pick[0]==1:
            train_mask.append(i)
        i+=1
    return train_mask


def shuffleTrainTestMasks(data, trainpct = 0.7):
    ysize = list(data.y.size())[0]
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[int(ysize*trainpct):] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
    
    data.train_mask = transformMask(data.train_mask)
    data.test_mask = transformMask(data.test_mask)
  

def shuffleTrainTestValMasks(data, trainpct = 0.7, valpct = 0.2):

    ysize = list(data.y.size())[0]
    #print("total ", ysize)
    #print(" train ",int(ysize*trainpct)-int(ysize*trainpct*valpct))
    #print(" val ",int(ysize*trainpct*valpct))
    #print(" test ",int(ysize*(1- trainpct) ))
    data.train_mask = torch.zeros(ysize,1, dtype=torch.long)
    data.train_mask[:int(ysize*trainpct)] = 1
    data.train_mask = data.train_mask[torch.randperm(ysize)]
    #print(" train sum ",data.train_mask.sum())
    data.test_mask = torch.ones(ysize,1, dtype=torch.long) - data.train_mask
    #print(" test sum ",data.test_mask.sum())
    
    # transform to list of indexes
    data.train_mask = transformMask(data.train_mask)
    data.test_mask = transformMask(data.test_mask)
    
    data.val_mask = data.train_mask[:int(ysize*trainpct*valpct)]
    data.train_mask = data.train_mask[int(ysize*trainpct*valpct):]

    
    #print(data.train_mask)
    #print(data.val_mask)
    #print(data.test_mask)
    
    

def trainTestEval(dataset, epochs=1, batch_size=32):
    global Net
    loader = DataLoader(dataset,  shuffle=False)
    i = 0
    #print(loader)
    #print(dir(loader))
    
    G = dataset.data
    print(G)
    start = time.time()


    # 1.  prepare model
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    #print("using ",device)
    model = Net.to(device)  
    data = G.to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
    model.train()

    # 2.  create a train_mask, and a test_mask (val_mask for further experiments)
    #shuffleTrainTestMasks(data)
    #shuffleTrainTestValMasks(data)
    shuffleTrainTestMasks(data, trainpct=0.7)

    # 3. train some epochs
    for epoch in range(epochs):
        optimizer.zero_grad()
        out = model(data)
        #print("out",out[data.train_mask],out[data.train_mask].size())
        #print("targetr ",data.y[data.train_mask],data.y[data.train_mask].size())
        #print(data.train_mask)
        loss = F.mse_loss(out[data.train_mask], data.y[data.train_mask])
        loss.backward()
        optimizer.step()
        if epoch % 25 == 0 :
            print("epoch-loss: ",epoch, loss)

    # 4. Model evaluation
    model.eval()
    #  classification in a multiclass setting
    #_, pred = model(data).max(dim=1)
    #correct = pred[data.test_mask].eq(data.y[data.test_mask]).sum().item()
    #acc = correct / data.test_mask.sum().item()
    #print('Accuracy: {:.4f}'.format(acc))


    # regression 
    pred = model(data)
    #print("target: ",data.y[data.test_mask])
    #print("prediction: ",pred[data.test_mask])
    #print(pred[data.test_mask].type())
    #print(data.y[data.test_mask].type())
    
    # prepare the normalized mean root squared error
    t = data.y[data.test_mask]
    y = pred[data.test_mask]
    nrmse = torch.sum((t - y) ** 2)/len(data.test_mask)
    nrmse = nrmse.sqrt()
    print("RMSE: ",nrmse)

    #m = torch.mean(t)
    #print("mean",m)
    #tmax = torch.max(t)
    #tmin = torch.min(t)
    #sd = tmax-tmin
    #print("sd",sd)
    #nrmse = (nrmse - m)/sd
    #print("NRMSE:",nrmse)


    endtime = time.time()
    print("Total train-test time: "+str(endtime-start))
    
    with open("results.txt","a") as f:
        #print(dir(dataset))
        f.write("\n")
        f.write(str(model)+" " 
                +str(dataset.filename)+" "  
                +"nrmse: "+str(nrmse.item())+" " 
                +"total time: "+str(endtime-start) 
                +" negative vals?: "+str(False) 
                +"\n"
               )
    
    del model

    #i+=1
    #if i==1:
    #    break

In [35]:
#dataset = loadDataset(collection='MyOwnDataset2', name='precomputed/er_10_0_10_nb.pickle')
dataset = MyOwnDataset2(
    root='', 
    name='precomputed/er_5_0_45_eb.pickle')
#print(dir(dataset.data))
#print()
global Net
Net=NetLSAGE1(dataset.data, d1=30,d2=5, sampling_size=4,k=3)
torch.set_printoptions(profile="full")
trainTestEval(dataset,  epochs=300)
del Net
torch.set_printoptions(profile="default")

init NetLSAGE1  7
Data(edge_index=[2, 7], edge_neighbors=[2, 28], x=[7, 1], y=[7])
epoch-loss:  0 tensor(0.1360, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0052, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(0.0020, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  125 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  150 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  175 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  200 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  225 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  250 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  275 tensor(0.0017, device='cuda:0', grad_fn=<MseLossBackward>)
RMSE:  ten

In [76]:
a = torch.LongTensor([[1,2,3,4],[0,1,1,0]])
print(a)
print(a[0][1], a[0,1])
print(a[1][1], a[1,1])
b = torch.ByteTensor([True, False, True, False])
print(a[0][b], a[0,b])
b = torch.LongTensor([3,2,0])
print(a[0][b], a[0,b])
d = torch.LongTensor([1,1,1,1,1,1,1,1,1])
c = torch.cat([b,d], dim=0)
print(c)

tensor([[1, 2, 3, 4],
        [0, 1, 1, 0]])
tensor(2) tensor(2)
tensor(1) tensor(1)
tensor([1, 3]) tensor([1, 3])
tensor([4, 3, 1]) tensor([4, 3, 1])
tensor([3, 2, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1])


In [4]:
#dataset = loadDataset(collection='MyOwnDataset2', name='precomputed/er_10_0_10_nb.pickle')
dataset = MyOwnDataset2(
    root='', 
    name='precomputed/TUDataset_1765_eb.pickle')
#print(dir(dataset.data))
#print()
global Net
Net=NetLSAGE1(d1=255,d2=5)
trainTestEval(dataset,  epochs=300)
del Net
# before applying that all is done in GPU: 687s and RMSE 0.05

Data(edge_index=[2, 90], edge_neighbors=[2, 2148], x=[90, 1], y=[90])
epoch-loss:  0 tensor(0.1463, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0049, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(0.0010, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(0.0006, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(0.0004, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  125 tensor(0.0003, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  150 tensor(0.0003, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  175 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  200 tensor(0.0004, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  225 tensor(0.0003, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  250 tensor(0.0004, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  275 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
RMSE:  tensor(0.0521, d

In [104]:
#dataset = loadDataset(collection='MyOwnDataset2', name='precomputed/er_10_0_10_nb.pickle')
dataset = MyOwnDataset2(
    root='', 
    name='precomputed/TUDataset_1765_eb.pickle')
#print(dir(dataset.data))
#print()
global Net
Net=NetLSAGE1(d1=255,d2=5)
trainTestEval(dataset,  epochs=300)
del Net
# after applying that all is done in GPU: 27s and RMSE 0.03, 0.04

Data(edge_index=[2, 90], edge_neighbors=[2, 2148], x=[90, 1], y=[90])
epoch-loss:  0 tensor(0.1020, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(0.0009, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(0.0005, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(0.0005, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(0.0006, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  125 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  150 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  175 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  200 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  225 tensor(0.0003, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  250 tensor(0.0002, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  275 tensor(0.0001, device='cuda:0', grad_fn=<MseLossBackward>)
RMSE:  tensor(0.0288, d

In [36]:
#dataset = loadDataset(collection='MyOwnDataset2', name='precomputed/er_10_0_10_nb.pickle')
dataset = MyOwnDataset2(
    root='', 
    name='precomputed/TUDataset_1765_eb.pickle')
#print(dir(dataset.data))
#print()
global Net
#Net=NetLSAGE1(d1=255,d2=5)
Net=NetLSAGE1(dataset.data, d1=30,d2=5, sampling_size=4,k=3)
trainTestEval(dataset,  epochs=300)
del Net
# before applying that all is done in GPU: 687s and RMSE 0.05

init NetLSAGE1  90
Data(edge_index=[2, 90], edge_neighbors=[2, 2148], x=[90, 1], y=[90])
epoch-loss:  0 tensor(0.0004, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(6.5384e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(6.9279e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(5.6953e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(5.6657e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  125 tensor(5.6611e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  150 tensor(5.6606e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  175 tensor(5.6606e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  200 tensor(5.6606e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  225 tensor(5.6606e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  250 tensor(5.6606e-06, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  275 tensor(5.6606e-06, devi

In [37]:
#dataset = loadDataset(collection='MyOwnDataset2', name='precomputed/er_10_0_10_nb.pickle')
dataset = MyOwnDataset2(
    root='', 
    name='precomputed/TUDataset_1765_eb.pickle')
#print(dir(dataset.data))
#print()
global Net
#Net=NetLSAGE1(d1=255,d2=5)
Net=NetLSAGE1(dataset.data, d1=30,d2=5, sampling_size=10,k=7)
trainTestEval(dataset,  epochs=300)
del Net
# before applying that all is done in GPU: 687s and RMSE 0.05

init NetLSAGE1  90
Data(edge_index=[2, 90], edge_neighbors=[2, 2148], x=[90, 1], y=[90])
epoch-loss:  0 tensor(0.0015, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  25 tensor(5.1887e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  50 tensor(2.4448e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  75 tensor(2.4115e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  100 tensor(2.4110e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  125 tensor(2.4113e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  150 tensor(2.4114e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  175 tensor(2.4113e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  200 tensor(2.4112e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  225 tensor(2.4112e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  250 tensor(2.4111e-05, device='cuda:0', grad_fn=<MseLossBackward>)
epoch-loss:  275 tensor(2.4111e-05, devi