In [1]:
# Install required packages.
import os
import torch
os.environ['TORCH'] = torch.__version__
print(torch.__version__)
%matplotlib inline
import networkx as nx
#import matplotlib
import matplotlib.pyplot as plt

2.0.0


In [None]:
def visualize_graph(G, color):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])
    nx.draw_networkx(G, pos=nx.spring_layout(G, seed=42), with_labels=False,
                     node_color=color, cmap="Set2")
    plt.show()


def visualize_embedding(h, color, epoch=None, loss=None):
    plt.figure(figsize=(7,7))
    plt.xticks([])
    plt.yticks([])
    h = h.detach().cpu().numpy()
    plt.scatter(h[:, 0], h[:, 1], s=140, c=color, cmap="Set2")
    if epoch is not None and loss is not None:
        plt.xlabel(f'Epoch: {epoch}, Loss: {loss.item():.4f}', fontsize=16)
    plt.show()

In [None]:
from torch_geometric.datasets import KarateClub

dataset = KarateClub()
print(f'Dataset: {dataset}:')
print('======================')
print(f'Number of graphs: {len(dataset)}')
print(f'Number of features: {dataset.num_features}')
print(f'Number of classes: {dataset.num_classes}')

In [None]:
data = dataset[0]  # Get the first graph object.

print(data)
print('==============================================================')

# Gather some statistics about the graph.
print(f'Number of nodes: {data.num_nodes}')
print(f'Number of edges: {data.num_edges}')
print(f'Average node degree: {data.num_edges / data.num_nodes:.2f}')
print(f'Number of training nodes: {data.train_mask.sum()}')
print(f'Training node label rate: {int(data.train_mask.sum()) / data.num_nodes:.2f}')
print(f'Has isolated nodes: {data.has_isolated_nodes()}')
print(f'Has self-loops: {data.has_self_loops()}')
print(f'Is undirected: {data.is_undirected()}')

In [None]:
edge_index = data.edge_index
print(edge_index.t())

In [None]:
from torch_geometric.utils import to_networkx

G = to_networkx(data, to_undirected=True)
visualize_graph(G, color=data.y)

In [None]:
import torch
from torch.nn import Linear
from torch_geometric.nn import GCNConv


class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(1234)
        self.conv1 = GCNConv(dataset.num_features, 4)
        self.conv2 = GCNConv(4, 4)
        self.conv3 = GCNConv(4, 2)
        self.classifier = Linear(2, dataset.num_classes)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()  # Final GNN embedding space.
        
        # Apply a final (linear) classifier.
        out = self.classifier(h)

        return out, h

model = GCN()
print(model)

In [None]:
model = GCN()

_, h = model(data.x, data.edge_index)
print(f'Embedding shape: {list(h.shape)}')

visualize_embedding(h, color=data.y)

In [None]:
def foo(x,y):
    def bar(z):
        return x+y+z
    
    return bar 

a = foo(1,2)
a(3)

In [None]:
from torch.nn import Linear, Parameter
param = torch.Tensor(4)
b = Parameter(param)
b.data.zero_()

In [None]:
a = Linear(2,2)
a.reset_parameters()
a

In [None]:
def mult(x,y):
    return x*y

a = mult
a(1,2)

In [None]:
import torch
from torch_geometric.data import Data

edge_index = torch.tensor([[0, 1],
                           [1, 0],
                           [1, 2],
                           [2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

data = Data(x=x, edge_index=edge_index.t().contiguous())

In [None]:
print(x)

In [None]:
# Feature matrix holding 10 elements with 64 features each:
x = torch.randn(10, 64)

# Assign each element to one of three sets:
index = torch.tensor([0, 0, 1, 0, 2, 0, 2, 1, 0, 2])

output = aggr(x, index)  #  Output shape: [3, 64]

In [None]:
torch.nn

In [None]:
from torch_geometric.nn import aggr

# Simple aggregations:
mean_aggr = aggr.MeanAggregation()
max_aggr = aggr.MaxAggregation()

# Feature matrix holding 1000 elements with 64 features each:
x = torch.randn(10, 2)
print('x: ', x)
# Randomly assign elements to 100 sets:
index = torch.randint(0, 4, (10, ))
print('index: ', index)
output = mean_aggr(x, index)  #  Output shape: [100, 64]

print('output: ', output)

In [None]:
import numpy
import torch
from torch.nn import Linear, Parameter
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class MyConv(MessagePassing):
    
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='add')  # "Add" aggregation (Step 5).
        #self.lin = Linear(in_channels, out_channels, bias=False)
        #self.bias = Parameter(torch.Tensor(out_channels))

        #self.reset_parameters()

    #def reset_parameters(self):
    #    self.lin.reset_parameters()
    #    self.bias.data.zero_()

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        #edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Linearly transform node feature matrix.
        #x = self.lin(x)

        # Step 3: Compute normalization.
        #row, col = edge_index
        #deg = degree(col, x.size(0), dtype=x.dtype)
        #deg_inv_sqrt = deg.pow(-0.5)
        #deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        #norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4-5: Start propagating messages.
        #out = self.propagate(edge_index, x=x)

        # Step 6: Apply a final bias vector.
        #out += self.bias

        return self.propagate(edge_index, x=x)

    #def message(self, x_j):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
    #    return x_j

conv = MyConv(1, 1)
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

y = conv(x,edge_index)
y

#class GCN(torch.nn.Module):
#    def __init__(self):
#        super().__init__()
#        #torch.manual_seed(1234)
#        self.conv1 = MyConv(1, 4)

#    def forward(self, x, edge_index):
#        h = self.conv1(x, edge_index)
#        return h

#model = GCN()
#print(model)

#for parameter in model.parameters():
#    print(parameter)

In [None]:
import torch
from torch.nn import Linear, Parameter
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree

class GCNConv(MessagePassing):
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr='add')  # "Add" aggregation (Step 5).
        self.lin = Linear(in_channels, out_channels, bias=False)
        self.bias = Parameter(torch.Tensor(out_channels))

        self.reset_parameters()

    def reset_parameters(self):
        self.lin.reset_parameters()
        self.bias.data.zero_()

    def forward(self, x, edge_index):
        # x has shape [N, in_channels]
        # edge_index has shape [2, E]

        # Step 1: Add self-loops to the adjacency matrix.
        edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))

        # Step 2: Linearly transform node feature matrix.
        x = self.lin(x)

        # Step 3: Compute normalization.
        row, col = edge_index
        deg = degree(col, x.size(0), dtype=x.dtype)
        deg_inv_sqrt = deg.pow(-0.5)
        deg_inv_sqrt[deg_inv_sqrt == float('inf')] = 0
        norm = deg_inv_sqrt[row] * deg_inv_sqrt[col]

        # Step 4-5: Start propagating messages.
        out = self.propagate(edge_index, x=x, norm=norm)

        # Step 6: Apply a final bias vector.
        out += self.bias

        return out

    def message(self, x_j, norm):
        # x_j has shape [E, out_channels]

        # Step 4: Normalize node features.
        return norm.view(-1, 1) * x_j

conv = GCNConv(1, 4)
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[-1], [0], [1]], dtype=torch.float)

y = conv(x,edge_index)
print(y)

In [None]:
class GCN(torch.nn.Module):
    def __init__(self):
        super().__init__()
        torch.manual_seed(1234)
        self.conv1 = GCNConv(dataset.num_features, 4)
        self.conv2 = GCNConv(4, 4)
        self.conv3 = GCNConv(4, 2)
        self.classifier = Linear(2, dataset.num_classes)

    def forward(self, x, edge_index):
        h = self.conv1(x, edge_index)
        h = h.tanh()
        h = self.conv2(h, edge_index)
        h = h.tanh()
        h = self.conv3(h, edge_index)
        h = h.tanh()  # Final GNN embedding space.
        
        # Apply a final (linear) classifier.
        out = self.classifier(h)

        return out, h

model = GCN()
print(model)

for parameter in model.parameters():
    print(parameter)

In [None]:
import numpy
import torch
from torch.nn import Linear, Parameter
from torch_geometric.nn import MessagePassing
from torch_geometric.utils import add_self_loops, degree
from torch_geometric.nn import aggr


class MyConv(MessagePassing):
    
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr=aggr.MedianAggregation())  # "Add" aggregation (Step 5).

    def forward(self, x, edge_index):

        return self.propagate(edge_index, x=x,foo=3,bar=10)
    
    def message(self,x_j,foo,bar):
        print(foo + bar)
        return x_j

conv = MyConv(1, 1)
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[1], [2], [3]], dtype=torch.float)

y = conv(x,edge_index)
y

#class GCN(torch.nn.Module):
#    def __init__(self):
#        super().__init__()
#        #torch.manual_seed(1234)
#        self.conv1 = MyConv(1, 4)

#    def forward(self, x, edge_index):
#        h = self.conv1(x, edge_index)
#        return h

#model = GCN()
#print(model)

#for parameter in model.parameters():
#    print(parameter)

In [None]:
def mult(x,y=3):
    return x*y 

def foo(x):
    return mult(x,y=2)
foo(1)

In [None]:
from torch_geometric.nn import aggr
softmax_aggr = aggr.SoftmaxAggregation(learn=True)

# Randomly assign elements to 100 sets:
index = torch.randint(0, 100, (1000, ))

mean_aggr = aggr.MeanAggregation()
print(mean_aggr)
# Feature matrix holding 1000 elements with 64 features each:
x = torch.randn(1000, 64)

output = mean_aggr(x,index)  #  Output shape: [100, 64]
print(x.shape[0])

In [None]:
from typing import Optional

import torch
from torch import Tensor
from torch.nn import Parameter

from torch_geometric.nn.aggr import Aggregation
from torch_geometric.utils import softmax


class MyMax(Aggregation):
    r"""An aggregation operator that takes the feature-wise maximum across a
    set of elements

    .. math::
        \mathrm{max}(\mathcal{X}) = \max_{\mathbf{x}_i \in \mathcal{X}}
        \mathbf{x}_i.
    """
    def forward(self, x: Tensor, index: Optional[Tensor] = None,
                ptr: Optional[Tensor] = None, dim_size: Optional[int] = None,
                dim: int = -2) -> Tensor:
        index = torch.randint(0,1,(x.shape[0],))
        print(index)
        return self.reduce(x, index, ptr, dim_size, dim, reduce='max')

x = torch.randn(10, 2)
print(x)
out = MyMax()
out(x)

In [None]:
from typing import Optional

import torch
from torch import Tensor
from torch.nn import Linear, Parameter

from torch_geometric.nn.aggr import Aggregation
from torch_geometric.utils import softmax,scatter, add_self_loops, degree

from torch_geometric.nn import MessagePassing
from torch_geometric.nn import aggr


class MyMax(Aggregation):
    r"""An aggregation operator that takes the feature-wise maximum across a
    set of elements

    .. math::
        \mathrm{max}(\mathcal{X}) = \max_{\mathbf{x}_i \in \mathcal{X}}
        \mathbf{x}_i.
    """
    def forward(self, x: Tensor, index: Optional[Tensor] = None,
                ptr: Optional[Tensor] = None, dim_size: Optional[int] = None,
                dim: int = -2) -> Tensor:
        x_sum = scatter(x,index,reduce='sum')
        new_index = torch.tensor([0,0,1,1])
        dim = x_sum.shape[0]
        out = torch.zeros((dim,2))
        for i in range(dim):
            a = scatter(x_sum[i,:],new_index,reduce='max')
            out[i,:] = a
        return out

#x = torch.randn(10, 2)
#print(x)
#out = MyMax()
#out(x)

class MyConv(MessagePassing):
    
    def __init__(self, in_channels, out_channels):
        #super().__init__(aggr=MyMax())
        super().__init__(aggr='max')
        self.lin = Linear(in_channels,out_channels,bias=False)

    def forward(self, x, edge_index):
        #edge_index, _ = add_self_loops(edge_index, num_nodes=x.size(0))
        x = self.lin(x)
        print('lin(x): ', x)
        print('edge_index: ', edge_index)
        out = self.propagate(edge_index, x=x)
        #print('out: ', out)
        return out
    
    def message(self,x_j):
        return x_j

conv = MyConv(2, 4)
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[1,2], [2,3], [5,6]], dtype=torch.float)

y = conv(x,edge_index)
print('y: ', y) 

#class GCN(torch.nn.Module):
#    def __init__(self):
#        super().__init__()
#        #torch.manual_seed(1234)
#        self.conv1 = MyConv(1, 4)

#    def forward(self, x, edge_index):
#        h = self.conv1(x, edge_index)
#        return h

#model = GCN()
#print(model)

#for parameter in model.parameters():
#    print(parameter)

In [None]:
from torch_geometric.data import Data

from torch_geometric.loader import DataLoader
dataset = []
for i in range(100):
    edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
    x = torch.randn((3,2), dtype=torch.float)
    data = Data(x=x, edge_index=edge_index.contiguous())
    dataset.append(data) 

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch.nn as nn

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = MyConv(in_channels, out_channels)
        #self.conv2 = GCNConv(16, dataset.num_classes)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        print('x: ', x)
        #x = F.relu(x)
        #x = F.dropout(x, training=self.training)
        #x = self.conv2(x, edge_index)

        #return F.log_softmax(x, dim=1)
        return x 
#device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#model = GCN().to(device)
#data = dataset[0].to(device)
model = GCN(2,4)
model_synth = GCN(2,4)
train = model_synth(data)
train = train.detach()
print('train: ', train)
#data = dataset[0]
#data = new_data
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

#model.train()
for epoch in range(20):
    optimizer.zero_grad()
    out = model(data)
    #loss = F.nll_loss(out[data.train_mask], data.y[data.train_mask])
    #print('out: ', out)
    #print(out.shape)
    mseloss = nn.MSELoss()
    loss = mseloss(out, train)
    print('LOSS: ', loss)
    loss.backward()
    optimizer.step()

#model.eval()
#pred = model(data).argmax(dim=1)
#correct = (pred[data.test_mask] == data.y[data.test_mask]).sum()
#acc = int(correct) / int(data.test_mask.sum())
#print(f'Accuracy: {acc:.4f}')

In [None]:
#ata
#ataset = Planetoid(root='/tmp/Cora', name='Cora')
dataset[0]
from torch_geometric.datasets import TUDataset

dataset = TUDataset(root='/tmp/ENZYMES', name='ENZYMES')

In [None]:
from torch_geometric.loader import DataLoader

dataset = []
num_nodes = 3
num_graphs = 200
in_channels = 2
out_channels = 4
for i in range(num_graphs):
    edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
    x = torch.randn((num_nodes,in_channels), dtype=torch.float)
    data = Data(x=x, edge_index=edge_index.contiguous())
    dataset.append(data) 

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


model = GCN(in_channels,out_channels)
#print('train: ', train)
#data = dataset[0]
#data = new_data
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)
model_synth = GCN(in_channels,out_channels)

def train():
    model.train()
    for data in train_loader:
        print('data: ', data)
        train_y = model_synth(data)
        train_y = train_y.detach()
        optimizer.zero_grad()
        out = model(data)
        mseloss = nn.MSELoss()
        loss = mseloss(out, train_y)
        print('out_y: ', out)
        print('train_y: ', train_y)
        print('LOSS: ', loss)
        loss.backward()
        optimizer.step()
        
for epoch in range(30):
    train()

for param in model_synth.named_parameters():
    print(param)

for param in model.named_parameters():
    print(param)    
    

In [2]:
#Learning max product 
from typing import Optional

import torch
from torch import Tensor
from torch.nn import Linear, Parameter

from torch_geometric.nn.aggr import Aggregation
from torch_geometric.utils import softmax,scatter, add_self_loops, degree

from torch_geometric.nn import MessagePassing
from torch_geometric.nn import aggr

#implement custom aggregation 
class MyMax(Aggregation):
    def forward(self, x: Tensor, index: Optional[Tensor] = None,
                ptr: Optional[Tensor] = None, dim_size: Optional[int] = None,
                dim: int = -2) -> Tensor:
        x_sum = scatter(x,index,reduce='sum')
        new_index = torch.tensor([0,0,1,1])
        dim = x_sum.shape[0]
        out = torch.zeros((dim,2))
        for i in range(dim):
            a = scatter(x_sum[i,:],new_index,reduce='max')
            out[i,:] = a
        return out
    
#implement message passing layer 
class MyConv(MessagePassing):
    
    def __init__(self, in_channels, out_channels):
        super().__init__(aggr=MyMax())
        #super().__init__(aggr='max')
        self.lin = Linear(in_channels,out_channels,bias=False)

    def forward(self, x, edge_index):
        x = self.lin(x)
        out = self.propagate(edge_index, x=x)
        return out
    
    def message(self,x_j):
        return x_j

#test that message passing layer is working 
conv = MyConv(2, 4)
edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
x = torch.tensor([[1,2], [2,3], [5,6]], dtype=torch.float)

y = conv(x,edge_index)
print('y: ', y) 

#generate synthetic dataset
from torch_geometric.data import Data
from torch_geometric.loader import DataLoader
dataset = []
for i in range(100):
    edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
    x = torch.randn((3,2), dtype=torch.float)
    data = Data(x=x, edge_index=edge_index.contiguous())
    dataset.append(data) 

#create a dataloader 
train_loader = DataLoader(dataset, batch_size=32, shuffle=True)

#create a neural network module that uses MyConv
import torch
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
import torch.nn as nn

class GCN(torch.nn.Module):
    def __init__(self, in_channels, out_channels):
        super().__init__()
        self.conv1 = MyConv(in_channels, out_channels)

    def forward(self, data):
        x, edge_index = data.x, data.edge_index

        x = self.conv1(x, edge_index)
        print('x: ', x)
        return x 

#generate data 
from torch_geometric.loader import DataLoader

dataset = []
num_nodes = 3
num_graphs = 200
in_channels = 2
out_channels = 4
for i in range(num_graphs):
    edge_index = torch.tensor([[0, 1, 1, 2],
                           [1, 0, 2, 1]], dtype=torch.long)
    x = torch.randn((num_nodes,in_channels), dtype=torch.float)
    data = Data(x=x, edge_index=edge_index.contiguous())
    dataset.append(data) 

train_loader = DataLoader(dataset, batch_size=32, shuffle=True)


#generate model and optimizer
model = GCN(in_channels,out_channels)
optimizer = torch.optim.Adam(model.parameters(), lr=0.01, weight_decay=5e-4)

#synthetic label generation, not the 'y' label in dataset 
model_synth = GCN(in_channels,out_channels)

#training function 
def train():
    model.train()
    for data in train_loader:
        train_y = model_synth(data)
        train_y = train_y.detach()
        optimizer.zero_grad()
        out = model(data)
        mseloss = nn.MSELoss()
        loss = mseloss(out, train_y)
        print('LOSS: ', loss)
        loss.backward()
        optimizer.step()
        
for epoch in range(30):
    train()

#print ground truth parameters
for param in model_synth.named_parameters():
    print(param)

#print learned parameters 
for param in model.named_parameters():
    print(param)    

y:  tensor([[3.3242, 0.6597],
        [9.3310, 1.6290],
        [3.3242, 0.6597]], grad_fn=<CopySlices>)
x:  tensor([[-2.3910e-02,  2.4848e-01],
        [-4.9547e-02,  2.0334e-01],
        [-2.3910e-02,  2.4848e-01],
        [-2.0259e-02,  6.3176e-02],
        [ 5.8400e-01,  7.4752e-01],
        [-2.0259e-02,  6.3176e-02],
        [ 1.9816e-01, -8.3668e-01],
        [ 3.9982e-01,  2.2042e-01],
        [ 1.9816e-01, -8.3668e-01],
        [ 1.1633e-01,  2.1618e-01],
        [ 3.2084e-01,  3.2419e-01],
        [ 1.1633e-01,  2.1618e-01],
        [ 2.0287e-01,  3.7951e-01],
        [ 1.4431e-02, -1.2910e-01],
        [ 2.0287e-01,  3.7951e-01],
        [-8.4192e-03,  3.7525e-02],
        [ 4.5946e-01,  9.1638e-01],
        [-8.4192e-03,  3.7525e-02],
        [ 4.9740e-02,  3.8653e-01],
        [ 1.4399e-01, -2.0924e-01],
        [ 4.9740e-02,  3.8653e-01],
        [ 2.6941e-02, -1.7822e-01],
        [ 1.1694e+00,  1.5446e+00],
        [ 2.6941e-02, -1.7822e-01],
        [ 7.4247e-02, -2.07

x:  tensor([[ 0.3029, -0.4749],
        [-0.0683,  0.0982],
        [ 0.3029, -0.4749],
        [-0.0215,  0.0287],
        [ 0.2960,  0.2200],
        [-0.0215,  0.0287],
        [ 0.4880,  0.2886],
        [ 0.5480, -0.6787],
        [ 0.4880,  0.2886],
        [-0.1083,  0.1689],
        [ 0.8150,  1.6851],
        [-0.1083,  0.1689],
        [ 0.7279,  0.4961],
        [ 2.0196,  2.0759],
        [ 0.7279,  0.4961],
        [ 0.9589,  1.4084],
        [ 0.1937, -0.2925],
        [ 0.9589,  1.4084],
        [ 0.1258, -0.1984],
        [ 0.5779,  0.4192],
        [ 0.1258, -0.1984],
        [ 0.1259, -0.2245],
        [-0.5995,  0.9913],
        [ 0.1259, -0.2245],
        [ 0.4603,  0.2748],
        [ 0.7156,  0.0996],
        [ 0.4603,  0.2748],
        [ 0.4517,  0.2136],
        [ 0.0676,  0.9308],
        [ 0.4517,  0.2136],
        [ 0.2541,  0.0416],
        [ 0.1639, -0.0387],
        [ 0.2541,  0.0416],
        [ 0.1139, -0.1271],
        [ 0.1330, -0.2444],
        [ 0.1139

x:  tensor([[ 0.0361,  0.0101],
        [ 0.2915, -0.4434],
        [ 0.0361,  0.0101],
        [-0.0319,  0.1969],
        [-0.0017,  0.0155],
        [-0.0319,  0.1969],
        [ 0.5253,  0.6072],
        [ 0.8251,  0.9216],
        [ 0.5253,  0.6072],
        [ 0.1234,  0.4418],
        [ 0.7863,  0.6973],
        [ 0.1234,  0.4418],
        [ 0.0852, -0.2713],
        [-0.2413,  0.4986],
        [ 0.0852, -0.2713],
        [-0.0219, -0.0143],
        [ 0.6717,  1.3150],
        [-0.0219, -0.0143],
        [ 0.0693,  0.2494],
        [ 0.1742,  0.3400],
        [ 0.0693,  0.2494],
        [ 0.4196,  0.7870],
        [ 0.2543,  0.3651],
        [ 0.4196,  0.7870],
        [ 0.2936,  0.3366],
        [ 0.0419,  0.7173],
        [ 0.2936,  0.3366],
        [ 0.1715,  0.3751],
        [-0.0114,  0.1503],
        [ 0.1715,  0.3751],
        [ 0.0912, -0.2529],
        [ 0.0973,  0.1368],
        [ 0.0912, -0.2529],
        [ 0.7962,  1.1015],
        [ 0.4305,  0.7016],
        [ 0.7962

x:  tensor([[ 4.0028e-02,  4.0672e-01],
        [ 7.2302e-02, -1.4145e-01],
        [ 4.0028e-02,  4.0672e-01],
        [ 3.8458e-01,  7.2679e-01],
        [ 5.7637e-02,  1.1577e-01],
        [ 3.8458e-01,  7.2679e-01],
        [ 2.4099e-01,  2.3034e-01],
        [ 1.5498e-02,  6.4640e-01],
        [ 2.4099e-01,  2.3034e-01],
        [ 2.1045e-02,  2.7836e-01],
        [-3.9501e-03,  2.1814e-01],
        [ 2.1045e-02,  2.7836e-01],
        [ 1.3245e-01,  4.3226e-01],
        [ 4.4666e-02, -2.3068e-01],
        [ 1.3245e-01,  4.3226e-01],
        [ 6.9469e-01,  1.1645e+00],
        [ 5.0569e-02, -1.8469e-01],
        [ 6.9469e-01,  1.1645e+00],
        [ 3.1705e-03,  9.5071e-02],
        [ 3.3214e-03,  1.6468e-01],
        [ 3.1705e-03,  9.5071e-02],
        [ 1.5979e-02,  4.8231e-02],
        [ 1.4308e-01,  1.0080e+00],
        [ 1.5979e-02,  4.8231e-02],
        [ 1.1482e-01,  2.5230e-01],
        [ 8.2616e-02, -2.9473e-01],
        [ 1.1482e-01,  2.5230e-01],
        [ 2.4189e-02,  5

LOSS:  tensor(0.0320, grad_fn=<MseLossBackward0>)
x:  tensor([[ 0.2808,  0.3313],
        [ 0.6895,  0.7887],
        [ 0.2808,  0.3313],
        [ 0.0760, -0.3071],
        [ 0.2169, -1.4352],
        [ 0.0760, -0.3071],
        [ 0.0705, -0.2997],
        [-0.1031,  0.5291],
        [ 0.0705, -0.2997],
        [ 0.2683,  0.3622],
        [ 0.2086, -1.3476],
        [ 0.2683,  0.3622],
        [ 0.2397,  1.7103],
        [ 0.1578,  0.8029],
        [ 0.2397,  1.7103],
        [ 0.0454, -0.3208],
        [ 0.1147,  0.2070],
        [ 0.0454, -0.3208],
        [ 0.5066,  0.5810],
        [ 0.0221, -0.1876],
        [ 0.5066,  0.5810],
        [ 0.0485, -0.3634],
        [ 0.0526,  0.6006],
        [ 0.0485, -0.3634],
        [ 0.4036,  0.4494],
        [ 1.1019,  1.3051],
        [ 0.4036,  0.4494],
        [ 0.2803,  0.2651],
        [ 0.0547,  0.3341],
        [ 0.2803,  0.2651],
        [-0.0084,  0.0375],
        [ 0.4595,  0.9164],
        [-0.0084,  0.0375],
        [ 0.0128, -0.1

x:  tensor([[ 0.3920,  1.1872],
        [ 0.2788, -0.8798],
        [ 0.3920,  1.1872],
        [ 0.2683,  0.3622],
        [ 0.2086, -1.3476],
        [ 0.2683,  0.3622],
        [ 0.0454, -0.3208],
        [ 0.1147,  0.2070],
        [ 0.0454, -0.3208],
        [ 0.3397,  0.9634],
        [-0.0915,  0.4968],
        [ 0.3397,  0.9634],
        [ 0.0571, -0.0179],
        [-0.0122,  0.0173],
        [ 0.0571, -0.0179],
        [-0.1365,  0.7407],
        [-0.2022,  1.0502],
        [-0.1365,  0.7407],
        [ 0.0265, -0.2027],
        [ 0.5211,  0.7226],
        [ 0.0265, -0.2027],
        [ 0.2808,  0.3313],
        [ 0.6895,  0.7887],
        [ 0.2808,  0.3313],
        [-0.0697,  0.3718],
        [ 0.0081,  0.0256],
        [-0.0697,  0.3718],
        [-0.0428,  0.2299],
        [ 0.3767,  0.3771],
        [-0.0428,  0.2299],
        [ 0.0928,  0.7789],
        [ 0.8333,  1.1413],
        [ 0.0928,  0.7789],
        [ 0.5210,  0.7130],
        [-0.0647,  0.2862],
        [ 0.5210

LOSS:  tensor(0.0055, grad_fn=<MseLossBackward0>)
x:  tensor([[ 0.4026,  1.0694],
        [ 0.1447,  0.4581],
        [ 0.4026,  1.0694],
        [ 0.0928,  0.7789],
        [ 0.8333,  1.1413],
        [ 0.0928,  0.7789],
        [ 0.1917,  0.4441],
        [ 0.6530,  0.8796],
        [ 0.1917,  0.4441],
        [ 0.0916, -0.4362],
        [ 0.2514,  0.3500],
        [ 0.0916, -0.4362],
        [ 0.4426,  0.4365],
        [ 0.2150, -0.5140],
        [ 0.4426,  0.4365],
        [-0.0677,  0.3291],
        [-0.2471,  1.2317],
        [-0.0677,  0.3291],
        [ 0.0269, -0.1782],
        [ 1.1694,  1.5446],
        [ 0.0269, -0.1782],
        [ 0.1395,  0.3189],
        [ 0.2140, -0.2286],
        [ 0.1395,  0.3189],
        [-0.0086, -0.0487],
        [-0.0982,  0.5239],
        [-0.0086, -0.0487],
        [ 0.0403, -0.2610],
        [ 0.1416, -0.5206],
        [ 0.0403, -0.2610],
        [ 0.1418, -0.9257],
        [ 0.0814,  0.2218],
        [ 0.1418, -0.9257],
        [ 0.5891,  0.5

x:  tensor([[ 5.0431e-02,  3.8379e-01],
        [ 1.5392e-01, -1.2983e-01],
        [ 5.0431e-02,  3.8379e-01],
        [ 1.8853e-01,  2.8414e-01],
        [ 9.7000e-02, -5.7007e-01],
        [ 1.8853e-01,  2.8414e-01],
        [ 2.7803e-01, -5.2793e-01],
        [ 4.1912e-01,  1.0496e+00],
        [ 2.7803e-01, -5.2793e-01],
        [ 5.9269e-01,  1.2982e+00],
        [ 3.6775e-01, -5.2644e-01],
        [ 5.9269e-01,  1.2982e+00],
        [ 6.3456e-01,  7.0965e-01],
        [ 3.0507e-01,  3.2056e-01],
        [ 6.3456e-01,  7.0965e-01],
        [-4.7592e-04, -1.1787e-01],
        [ 2.8171e-01, -5.2734e-01],
        [-4.7592e-04, -1.1787e-01],
        [ 2.3648e-01,  2.2524e-01],
        [ 2.7700e-01,  7.3166e-01],
        [ 2.3648e-01,  2.2524e-01],
        [ 1.6697e-01,  1.6451e-01],
        [ 1.4804e-01,  1.6070e-01],
        [ 1.6697e-01,  1.6451e-01],
        [ 6.3033e-02, -4.7150e-02],
        [ 1.1098e-01, -8.5855e-02],
        [ 6.3033e-02, -4.7150e-02],
        [ 1.0110e+00,  1

x:  tensor([[ 0.0184, -0.1693],
        [ 0.1995, -0.1773],
        [ 0.0184, -0.1693],
        [ 0.2306, -1.0050],
        [ 0.3385, -0.6371],
        [ 0.2306, -1.0050],
        [ 0.3800,  0.5324],
        [ 0.5049,  0.7692],
        [ 0.3800,  0.5324],
        [ 0.2336,  0.3066],
        [ 0.0274, -0.1292],
        [ 0.2336,  0.3066],
        [ 0.1679,  0.1160],
        [ 0.1484,  0.1401],
        [ 0.1679,  0.1160],
        [ 0.0435, -0.1132],
        [ 0.4627,  0.3961],
        [ 0.0435, -0.1132],
        [-0.0677,  0.3291],
        [-0.2471,  1.2317],
        [-0.0677,  0.3291],
        [ 0.1151, -0.0584],
        [-0.0292,  0.0634],
        [ 0.1151, -0.0584],
        [ 0.0128, -0.1030],
        [-0.0237,  0.1292],
        [ 0.0128, -0.1030],
        [ 0.0019,  0.6898],
        [ 0.5332,  0.6938],
        [ 0.0019,  0.6898],
        [ 0.3920,  1.1872],
        [ 0.2788, -0.8798],
        [ 0.3920,  1.1872],
        [ 0.1096, -0.4944],
        [ 0.3785,  0.5195],
        [ 0.1096

x:  tensor([[ 1.6834e-04, -5.9669e-02],
        [-1.0259e-02,  2.6352e-01],
        [ 1.6834e-04, -5.9669e-02],
        [ 2.9910e-03, -4.9041e-02],
        [ 2.3360e-01,  2.8754e-01],
        [ 2.9910e-03, -4.9041e-02],
        [ 2.7290e-01,  3.2317e-01],
        [ 6.4376e-01,  8.9877e-01],
        [ 2.7290e-01,  3.2317e-01],
        [-4.2771e-02,  2.2993e-01],
        [ 3.7665e-01,  3.7708e-01],
        [-4.2771e-02,  2.2993e-01],
        [-1.2244e-01,  8.2130e-01],
        [ 2.6175e-01, -3.9753e-01],
        [-1.2244e-01,  8.2130e-01],
        [-1.5222e-02,  9.4520e-01],
        [ 9.5835e-01,  1.1009e+00],
        [-1.5222e-02,  9.4520e-01],
        [-1.0973e-01,  5.9687e-01],
        [ 1.0269e-02, -1.3412e-01],
        [-1.0973e-01,  5.9687e-01],
        [ 4.0363e-01,  4.4940e-01],
        [ 1.1019e+00,  1.3051e+00],
        [ 4.0363e-01,  4.4940e-01],
        [ 9.3640e-02, -2.1223e-01],
        [ 8.2149e-03, -9.5552e-02],
        [ 9.3640e-02, -2.1223e-01],
        [ 5.8911e-01,  5

x:  tensor([[ 6.1757e-02, -1.4985e-01],
        [ 6.1238e-02, -3.9822e-01],
        [ 6.1757e-02, -1.4985e-01],
        [ 4.6011e-03, -1.9567e-01],
        [ 1.0575e-01, -2.0895e-01],
        [ 4.6011e-03, -1.9567e-01],
        [ 2.8073e-01,  3.3254e-01],
        [ 6.8925e-01,  7.9427e-01],
        [ 2.8073e-01,  3.3254e-01],
        [ 1.9177e-01,  4.4498e-01],
        [ 6.5291e-01,  8.7212e-01],
        [ 1.9177e-01,  4.4498e-01],
        [ 5.0638e-01,  5.8491e-01],
        [ 2.2402e-02, -1.8755e-01],
        [ 5.0638e-01,  5.8491e-01],
        [ 1.8725e-02, -1.6916e-01],
        [ 1.9735e-01, -1.5976e-01],
        [ 1.8725e-02, -1.6916e-01],
        [ 7.5464e-02, -3.0071e-01],
        [ 2.1638e-01, -1.4372e+00],
        [ 7.5464e-02, -3.0071e-01],
        [-1.6186e-02,  8.8967e-02],
        [ 1.4482e-01,  9.9186e-02],
        [-1.6186e-02,  8.8967e-02],
        [-5.0769e-02,  7.1482e-01],
        [ 7.4937e-02, -1.4498e-01],
        [-5.0769e-02,  7.1482e-01],
        [ 4.9902e-02, -6

x:  tensor([[ 0.2274, -0.7610],
        [ 0.6100,  2.1567],
        [ 0.2274, -0.7610],
        [-0.0304,  0.1418],
        [ 0.0955, -0.3666],
        [-0.0304,  0.1418],
        [ 0.4024,  1.0701],
        [ 0.1446,  0.4584],
        [ 0.4024,  1.0701],
        [ 0.0910, -0.1348],
        [-0.0995,  0.5192],
        [ 0.0910, -0.1348],
        [ 0.0030, -0.0490],
        [ 0.2335,  0.2869],
        [ 0.0030, -0.0490],
        [ 0.0084, -0.1144],
        [ 0.1412, -0.3217],
        [ 0.0084, -0.1144],
        [ 0.0497,  0.3868],
        [ 0.1434, -0.2044],
        [ 0.0497,  0.3868],
        [ 0.2028,  0.3797],
        [ 0.0144, -0.1291],
        [ 0.2028,  0.3797],
        [ 0.0453, -0.3208],
        [ 0.1146,  0.2071],
        [ 0.0453, -0.3208],
        [ 0.1974, -0.8292],
        [ 0.3996,  0.2290],
        [ 0.1974, -0.8292],
        [-0.0908,  0.4827],
        [ 0.2729, -0.9267],
        [-0.0908,  0.4827],
        [ 0.0757, -0.3043],
        [ 0.2163, -1.4358],
        [ 0.0757

x:  tensor([[-0.0164,  0.0887],
        [ 0.1450,  0.0908],
        [-0.0164,  0.0887],
        [ 0.1982, -0.8367],
        [ 0.3998,  0.2204],
        [ 0.1982, -0.8367],
        [ 0.0623, -0.1552],
        [ 0.0615, -0.3976],
        [ 0.0623, -0.1552],
        [-0.1418,  0.7480],
        [ 0.3143, -0.5301],
        [-0.1418,  0.7480],
        [-0.1355,  0.6881],
        [-0.0136, -0.0626],
        [-0.1355,  0.6881],
        [ 0.0571, -0.0179],
        [-0.0122,  0.0173],
        [ 0.0571, -0.0179],
        [ 0.2683,  0.3622],
        [ 0.2086, -1.3476],
        [ 0.2683,  0.3622],
        [ 0.1418, -0.9257],
        [ 0.0814,  0.2218],
        [ 0.1418, -0.9257],
        [ 0.1233, -0.8343],
        [ 0.1367, -0.6597],
        [ 0.1233, -0.8343],
        [ 0.0046, -0.1157],
        [ 0.2659, -0.6788],
        [ 0.0046, -0.1157],
        [ 0.2099,  0.1996],
        [ 0.6071,  1.0139],
        [ 0.2099,  0.1996],
        [ 0.2078,  0.6519],
        [ 0.1995, -0.7531],
        [ 0.2078

x:  tensor([[ 9.3543e-02, -1.2217e-01],
        [-2.7989e-02,  2.0721e-01],
        [ 9.3543e-02, -1.2217e-01],
        [-7.6634e-02,  9.8676e-01],
        [ 7.6895e-02, -6.1677e-01],
        [-7.6634e-02,  9.8676e-01],
        [ 2.0287e-01,  3.7951e-01],
        [ 1.4431e-02, -1.2910e-01],
        [ 2.0287e-01,  3.7951e-01],
        [ 1.1739e-01, -4.1253e-01],
        [-3.2032e-02,  1.1363e-01],
        [ 1.1739e-01, -4.1253e-01],
        [ 1.7697e-01, -1.4251e-01],
        [ 2.0473e-01, -9.0966e-02],
        [ 1.7697e-01, -1.4251e-01],
        [ 1.6834e-04, -5.9669e-02],
        [-1.0259e-02,  2.6352e-01],
        [ 1.6834e-04, -5.9669e-02],
        [ 3.2841e-01,  3.4079e-01],
        [ 3.8403e-01,  2.4981e-01],
        [ 3.2841e-01,  3.4079e-01],
        [ 1.7798e-01,  2.2403e-01],
        [ 4.6405e-01,  6.4330e-01],
        [ 1.7798e-01,  2.2403e-01],
        [ 2.3364e-01,  3.0663e-01],
        [ 2.7382e-02, -1.2922e-01],
        [ 2.3364e-01,  3.0663e-01],
        [ 5.5622e-01,  7

x:  tensor([[ 0.2688,  0.3628],
        [ 0.0486,  0.9067],
        [ 0.2688,  0.3628],
        [-0.0166,  0.0803],
        [ 0.2482,  1.3880],
        [-0.0166,  0.0803],
        [-0.0948,  0.5179],
        [ 0.2428,  0.0876],
        [-0.0948,  0.5179],
        [ 0.3916,  1.1860],
        [ 0.2787, -0.8800],
        [ 0.3916,  1.1860],
        [ 0.0084, -0.1144],
        [ 0.1417, -0.3268],
        [ 0.0084, -0.1144],
        [ 0.2395,  1.7086],
        [ 0.1576,  0.8021],
        [ 0.2395,  1.7086],
        [ 0.5148,  0.6923],
        [ 0.4084, -1.5136],
        [ 0.5148,  0.6923],
        [ 0.1031,  0.8891],
        [-0.1576,  0.8827],
        [ 0.1031,  0.8891],
        [ 0.0728, -0.4870],
        [ 0.4881, -1.4871],
        [ 0.0728, -0.4870],
        [ 0.2552,  0.3238],
        [ 0.4410,  0.5992],
        [ 0.2552,  0.3238],
        [ 0.0030, -0.0490],
        [ 0.2334,  0.2874],
        [ 0.0030, -0.0490],
        [ 0.1769, -0.1428],
        [ 0.2046, -0.0914],
        [ 0.1769

LOSS:  tensor(1.3823e-07, grad_fn=<MseLossBackward0>)
x:  tensor([[-9.0232e-02,  4.6966e-01],
        [ 3.7242e-01,  3.9521e-01],
        [-9.0232e-02,  4.6966e-01],
        [-1.3651e-01,  7.4067e-01],
        [-2.0223e-01,  1.0502e+00],
        [-1.3651e-01,  7.4067e-01],
        [-1.6393e-02,  8.8702e-02],
        [ 1.4499e-01,  9.0797e-02],
        [-1.6393e-02,  8.8702e-02],
        [ 3.5212e-01,  9.0133e-01],
        [ 1.5575e-01, -1.0246e+00],
        [ 3.5212e-01,  9.0133e-01],
        [ 9.2777e-02,  7.7892e-01],
        [ 8.3333e-01,  1.1413e+00],
        [ 9.2777e-02,  7.7892e-01],
        [ 5.0408e-02, -6.8244e-02],
        [ 2.0433e-01, -1.3239e+00],
        [ 5.0408e-02, -6.8244e-02],
        [ 8.9930e-02,  4.2315e-02],
        [ 4.3406e-02, -3.3744e-01],
        [ 8.9930e-02,  4.2315e-02],
        [ 2.3973e-01,  1.7103e+00],
        [ 1.5779e-01,  8.0295e-01],
        [ 2.3973e-01,  1.7103e+00],
        [ 1.4813e-01,  6.5272e-02],
        [ 6.1098e-01,  7.6633e-01],
      

x:  tensor([[ 0.1032,  0.8893],
        [-0.1575,  0.8828],
        [ 0.1032,  0.8893],
        [ 0.0859,  0.1371],
        [ 0.1938,  0.1550],
        [ 0.0859,  0.1371],
        [ 0.0764, -0.5864],
        [ 0.0891, -0.2109],
        [ 0.0764, -0.5864],
        [ 0.1025, -0.3624],
        [ 0.1069,  0.8737],
        [ 0.1025, -0.3624],
        [ 0.1985, -0.6102],
        [ 0.5908,  0.8200],
        [ 0.1985, -0.6102],
        [ 0.4613,  0.5323],
        [ 0.9174,  1.3357],
        [ 0.4613,  0.5323],
        [ 0.1980, -0.8363],
        [ 0.3995,  0.2198],
        [ 0.1980, -0.8363],
        [ 0.5356,  0.6954],
        [ 0.5244,  0.4143],
        [ 0.5356,  0.6954]], grad_fn=<CopySlices>)
LOSS:  tensor(1.7877e-07, grad_fn=<MseLossBackward0>)
('conv1.lin.weight', Parameter containing:
tensor([[ 1.1216e-01, -4.4626e-04],
        [-1.1765e-01,  3.5111e-01],
        [-2.4524e-01,  4.2592e-01],
        [-6.5533e-01,  6.3089e-02]], requires_grad=True))
('conv1.lin.weight', Parameter contain