In [None]:
!pip install -q torch==1.7.1 torchvision

[K     |████████████████████████████████| 776.8MB 23kB/s 
[31mERROR: torchvision 0.9.1+cu101 has requirement torch==1.8.1, but you'll have torch 1.7.1 which is incompatible.[0m
[31mERROR: torchtext 0.9.1 has requirement torch==1.8.1, but you'll have torch 1.7.1 which is incompatible.[0m
[?25h

In [None]:
pip install Cython



In [None]:
import torch
import numpy as np
import scipy.io
import h5py
import sklearn.metrics
import torch.nn as nn
from scipy.ndimage import gaussian_filter


#################################################
#
# Utilities
#
#################################################
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# reading data
class MatReader(object):
    def __init__(self, file_path, to_torch=True, to_cuda=False, to_float=True):
        super(MatReader, self).__init__()

        self.to_torch = to_torch
        self.to_cuda = to_cuda
        self.to_float = to_float

        self.file_path = file_path

        self.data = None
        self.old_mat = None
        self._load_file()

    def _load_file(self):
        try:
            self.data = scipy.io.loadmat(self.file_path)
            self.old_mat = True
        except:
            self.data = h5py.File(self.file_path)
            self.old_mat = False

    def load_file(self, file_path):
        self.file_path = file_path
        self._load_file()

    def read_field(self, field):
        x = self.data[field]

        if not self.old_mat:
            x = x[()]
            x = np.transpose(x, axes=range(len(x.shape) - 1, -1, -1))

        if self.to_float:
            x = x.astype(np.float32)

        if self.to_torch:
            x = torch.from_numpy(x)

            if self.to_cuda:
                x = x.cuda()

        return x

    def set_cuda(self, to_cuda):
        self.to_cuda = to_cuda

    def set_torch(self, to_torch):
        self.to_torch = to_torch

    def set_float(self, to_float):
        self.to_float = to_float

# normalization, pointwise gaussian
class UnitGaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(UnitGaussianNormalizer, self).__init__()

        # x could be in shape of ntrain*n or ntrain*T*n or ntrain*n*T
        self.mean = torch.mean(x, 0)
        self.std = torch.std(x, 0)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        if sample_idx is None:
            std = self.std + self.eps # n
            mean = self.mean
        else:
            if len(self.mean.shape) == len(sample_idx[0].shape):
                std = self.std[sample_idx] + self.eps  # batch*n
                mean = self.mean[sample_idx]
            if len(self.mean.shape) > len(sample_idx[0].shape):
                std = self.std[:,sample_idx]+ self.eps # T*batch*n
                mean = self.mean[:,sample_idx]

        # x is in shape of batch*n or T*batch*n
        x = (x * std) + mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()

    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()

# normalization, Gaussian
class GaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(GaussianNormalizer, self).__init__()

        self.mean = torch.mean(x)
        self.std = torch.std(x)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        x = (x * (self.std + self.eps)) + self.mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()

    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()


# normalization, scaling by range
class RangeNormalizer(object):
    def __init__(self, x, low=0.0, high=1.0):
        super(RangeNormalizer, self).__init__()
        mymin = torch.min(x, 0)[0].view(-1)
        mymax = torch.max(x, 0)[0].view(-1)

        self.a = (high - low)/(mymax - mymin)
        self.b = -self.a*mymax + high

    def encode(self, x):
        s = x.size()
        x = x.view(s[0], -1)
        x = self.a*x + self.b
        x = x.view(s)
        return x

    def decode(self, x):
        s = x.size()
        x = x.view(s[0], -1)
        x = (x - self.b)/self.a
        x = x.view(s)
        return x

#loss function with rel/abs Lp loss
class LpLoss(object):
    def __init__(self, d=2, p=2, size_average=True, reduction=True):
        super(LpLoss, self).__init__()

        #Dimension and Lp-norm type are postive
        assert d > 0 and p > 0

        self.d = d
        self.p = p
        self.reduction = reduction
        self.size_average = size_average

    def abs(self, x, y):
        num_examples = x.size()[0]

        #Assume uniform mesh
        h = 1.0 / (x.size()[1] - 1.0)

        all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(all_norms)
            else:
                return torch.sum(all_norms)

        return all_norms

    def rel(self, x, y):
        num_examples = x.size()[0]

        diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
        y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(diff_norms/y_norms)
            else:
                return torch.sum(diff_norms/y_norms)

        return diff_norms/y_norms

    def __call__(self, x, y):
        return self.rel(x, y)

# A simple feedforward neural network
class DenseNet(torch.nn.Module):
    def __init__(self, layers, nonlinearity, out_nonlinearity=None, normalize=False):
        super(DenseNet, self).__init__()

        self.n_layers = len(layers) - 1

        assert self.n_layers >= 1

        self.layers = nn.ModuleList()

        for j in range(self.n_layers):
            self.layers.append(nn.Linear(layers[j], layers[j+1]))

            if j != self.n_layers - 1:
                if normalize:
                    self.layers.append(nn.BatchNorm1d(layers[j+1]))

                self.layers.append(nonlinearity())

        if out_nonlinearity is not None:
            self.layers.append(out_nonlinearity())

    def forward(self, x):
        for _, l in enumerate(self.layers):
            x = l(x)

        return x


In [None]:
import torch
print(torch.__version__)
from google.colab import drive
drive.mount('/content/drive/')

1.7.1
Mounted at /content/drive/


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import matplotlib.pyplot as plt

import operator
from functools import reduce
from functools import partial
from timeit import default_timer
#from utilities3 import *

In [None]:
torch.manual_seed(0)
np.random.seed(0)

In [None]:
def compl_mul1d(a, b):
    # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
    op = partial(torch.einsum, "bix,iox->box")
    return torch.stack([
        op(a[..., 0], b[..., 0]) - op(a[..., 1], b[..., 1]),
        op(a[..., 1], b[..., 0]) + op(a[..., 0], b[..., 1])
    ], dim=-1)

In [None]:
class SpectralConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1):
        super(SpectralConv1d, self).__init__()

        """
        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  #Number of Fourier modes to multiply, at most floor(N/2) + 1

        self.scale = (1 / (in_channels*out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, 2))

    def forward(self, x):
        batchsize = x.shape[0]
        #Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.rfft(x, 1, normalized=True, onesided=True)

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.in_channels, x.size(-1)//2 + 1, 2, device=x.device)
        out_ft[:, :, :self.modes1] = compl_mul1d(x_ft[:, :, :self.modes1], self.weights1)

        #Return to physical space
        x = torch.irfft(out_ft, 1, normalized=True, onesided=True, signal_sizes=(x.size(-1), ))
        return x

In [None]:
class SimpleBlock1d(nn.Module):
    def __init__(self, modes, width):
        super(SimpleBlock1d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the initial condition and location (a(x), x)
        input shape: (batchsize, x=s, c=2)
        output: the solution of a later timestep
        output shape: (batchsize, x=s, c=1)
        """

        self.modes1 = modes
        self.width = width
        self.fc0 = nn.Linear(2, self.width) # input channel is 2: (a(x), x)

        self.conv0 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv1 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv2 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv3 = SpectralConv1d(self.width, self.width, self.modes1)
        self.w0 = nn.Conv1d(self.width, self.width, 1)
        self.w1 = nn.Conv1d(self.width, self.width, 1)
        self.w2 = nn.Conv1d(self.width, self.width, 1)
        self.w3 = nn.Conv1d(self.width, self.width, 1)


        self.fc1 = nn.Linear(self.width, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):

        x = self.fc0(x)
        x = x.permute(0, 2, 1)

        x1 = self.conv0(x)
        x2 = self.w0(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv1(x)
        x2 = self.w1(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv2(x)
        x2 = self.w2(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv3(x)
        x2 = self.w3(x)
        x = x1 + x2

        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [None]:
class Net1d(nn.Module):
    def __init__(self, modes, width):
        super(Net1d, self).__init__()

        """
        A wrapper function
        """

        self.conv1 = SimpleBlock1d(modes, width)


    def forward(self, x):
        x = self.conv1(x)
        return x.squeeze()

    def count_params(self):
        c = 0
        for p in self.parameters():
            c += reduce(operator.mul, list(p.size()))

        return c

In [None]:
################################################################
#  configurations
################################################################
ntrain = 1000
ntest = 100

sub = 2**3 #subsampling rate
h = 2**13 // sub #total grid size divided by the subsampling rate
s = h

batch_size = 10
learning_rate = 0.001

rounds = 500
step_size = 100
gamma = 0.5

modes = 16
width = 64

In [None]:
# reading data
class MatReaderMatReader(object):
    def __init__(self, file_path, to_torch=True, to_cuda=False, to_float=True):
        super(MatReader, self).__init__()

        self.to_torch = to_torch
        self.to_cuda = to_cuda
        self.to_float = to_float

        self.file_path = file_path

        self.data = None
        self.old_mat = None
        self._load_file()

    def _load_file(self):
        try:
            self.data = scipy.io.loadmat(self.file_path)
            self.old_mat = True
        except:
            self.data = h5py.File(self.file_path)
            self.old_mat = False

    def load_file(self, file_path):
        self.file_path = file_path
        self._load_file()

    def read_field(self, field):
        x = self.data[field]

        if not self.old_mat:
            x = x[()]
            x = np.transpose(x, axes=range(len(x.shape) - 1, -1, -1))

        if self.to_float:
            x = x.astype(np.float32)

        if self.to_torch:
            x = torch.from_numpy(x)

            if self.to_cuda:
                x = x.cuda()

        return x

    def set_cuda(self, to_cuda):
        self.to_cuda = to_cuda

    def set_torch(self, to_torch):
        self.to_torch = to_torch

    def set_float(self, to_float):
        self.to_float = to_float

# normalization, pointwise gaussian
class UnitGaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(UnitGaussianNormalizer, self).__init__()

        # x could be in shape of ntrain*n or ntrain*T*n or ntrain*n*T
        self.mean = torch.mean(x, 0)
        self.std = torch.std(x, 0)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        if sample_idx is None:
            std = self.std + self.eps # n
            mean = self.mean
        else:
            if len(self.mean.shape) == len(sample_idx[0].shape):
                std = self.std[sample_idx] + self.eps  # batch*n
                mean = self.mean[sample_idx]
            if len(self.mean.shape) > len(sample_idx[0].shape):
                std = self.std[:,sample_idx]+ self.eps # T*batch*n
                mean = self.mean[:,sample_idx]

        # x is in shape of batch*n or T*batch*n
        x = (x * std) + mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()
    '''
    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()
    '''

In [None]:
dataloader = MatReader('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/burgers_data_R10.mat')

In [None]:
x_data = dataloader.read_field('a')[:,::sub]
y_data = dataloader.read_field('u')[:,::sub]
import numpy as np

In [None]:
x_data.size()

torch.Size([2048, 1024])

In [None]:
[a,b] = x_data.size()
a_index_list = range(0,a)
num_dev = 27 # number of devices considered in the FL
allocate_dev = np.array_split(a_index_list,num_dev) # how to distribute training data among all devices

In [None]:
allocate_dev[17].size

76

In [None]:
# model
model_global = Net1d(modes, width).cuda()
print(model_global.count_params())

549569


In [None]:
model = model_global
w1 = []
w1 = model_global.conv1.fc0.weight
w2 = model_global.conv1.fc0.weight
w1.size()
#model2 = Net1d(modes, width).cuda()
#model2.conv1.fc0.weight = torch.nn.Parameter(w_final)

torch.Size([64, 2])

In [None]:
model_global.conv1.conv0.weights1.size()

torch.Size([64, 64, 16, 2])

In [None]:
dev_data_range = allocate_dev[0]
dev_data_size = dev_data_range.size
x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]
# x_train
# cat the locations information
grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
grid = torch.tensor(grid, dtype=torch.float)
x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)

In [None]:
aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)

In [None]:
# Here is how optimization works here
# myloss = LpLoss(size_average=False)
for ep in range(rounds):
  t1 = default_timer()
  train_mse = 0
  train_l2 = 0

  w_conv1_fc0_weight = torch.zeros(model_global.conv1.fc0.weight.size()).to('cuda:0')
  w_conv1_fc0_bias = torch.zeros(model_global.conv1.fc0.bias.size()).to('cuda:0')
  w_conv1_conv0_weights1 = torch.zeros(model_global.conv1.conv0.weights1.size()).to('cuda:0')
  w_conv1_conv1_weights1 = torch.zeros(model_global.conv1.conv1.weights1.size()).to('cuda:0')
  w_conv1_conv2_weights1 = torch.zeros(model_global.conv1.conv2.weights1.size()).to('cuda:0')
  w_conv1_conv3_weights1 = torch.zeros(model_global.conv1.conv3.weights1.size()).to('cuda:0')
  w_conv1_w0_weight = torch.zeros(model_global.conv1.w0.weight.size()).to('cuda:0')
  w_conv1_w0_bias = torch.zeros(model_global.conv1.w0.bias.size()).to('cuda:0')
  w_conv1_w1_weight = torch.zeros(model_global.conv1.w1.weight.size()).to('cuda:0')
  w_conv1_w1_bias = torch.zeros(model_global.conv1.w1.bias.size()).to('cuda:0')
  w_conv1_w2_weight = torch.zeros(model_global.conv1.w2.weight.size()).to('cuda:0')
  w_conv1_w2_bias = torch.zeros(model_global.conv1.w2.bias.size()).to('cuda:0')
  w_conv1_w3_weight = torch.zeros(model_global.conv1.w3.weight.size()).to('cuda:0')
  w_conv1_w3_bias = torch.zeros(model_global.conv1.w3.bias.size()).to('cuda:0')
  w_conv1_fc1_weight = torch.zeros(model_global.conv1.fc1.weight.size()).to('cuda:0')
  w_conv1_fc1_bias = torch.zeros(model_global.conv1.fc1.bias.size()).to('cuda:0')
  w_conv1_fc2_weight = torch.zeros(model_global.conv1.fc2.weight.size()).to('cuda:0')
  w_conv1_fc2_bias = torch.zeros(model_global.conv1.fc2.bias.size()).to('cuda:0')
  for dev_index in range(num_dev):
    dev_data_range = allocate_dev[dev_index]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global
    optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
    model.train()
    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        mse.backward()
        #l2 = myloss(out.view(batch_size, -1), y.view(batch_size, -1))
        #l2.backward() # use the l2 relative losa
        optimizer.step()
        train_mse += mse.item()
        #train_l2 += l2.item()
    #print(train_mse)
    scheduler.step()
    w_conv1_fc0_weight += model.conv1.fc0.weight
    #print(w_conv1_fc0_weight)
    w_conv1_fc0_bias += model.conv1.fc0.bias
    w_conv1_conv0_weights1 += model.conv1.conv0.weights1
    w_conv1_conv1_weights1 += model.conv1.conv1.weights1
    w_conv1_conv2_weights1 += model.conv1.conv2.weights1
    w_conv1_conv3_weights1 += model.conv1.conv3.weights1
    w_conv1_w0_weight += model.conv1.w0.weight
    w_conv1_w0_bias += model.conv1.w0.bias
    w_conv1_w1_weight += model.conv1.w1.weight
    w_conv1_w1_bias += model.conv1.w1.bias
    w_conv1_w2_weight += model.conv1.w2.weight
    w_conv1_w2_bias += model.conv1.w2.bias
    w_conv1_w3_weight += model.conv1.w3.weight
    w_conv1_w3_bias += model.conv1.w3.bias
    w_conv1_fc1_weight += model.conv1.fc1.weight
    w_conv1_fc1_bias += model.conv1.fc1.bias
    w_conv1_fc2_weight += model.conv1.fc2.weight
    w_conv1_fc2_bias += model.conv1.fc2.bias
    '''
    scheduler.step()
    model.eval()
    test_l2 = 0.0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            out = model(x)
            test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()

    train_mse /= len(train_loader)
    train_l2 /= ntrain
    test_l2 /= ntest
    '''
  w_conv1_fc0_weight_final = torch.mul(w_conv1_fc0_weight,1/num_dev)
  #print(w_conv1_fc0_weight_final)
  w_conv1_fc0_bias_final = torch.mul(w_conv1_fc0_bias,1/num_dev)
  w_conv1_conv0_weights1_final = torch.mul(w_conv1_conv0_weights1,1/num_dev)
  w_conv1_conv1_weights1_final = torch.mul(w_conv1_conv1_weights1,1/num_dev)
  w_conv1_conv2_weights1_final = torch.mul(w_conv1_conv2_weights1,1/num_dev)
  w_conv1_conv3_weights1_final = torch.mul(w_conv1_conv3_weights1,1/num_dev)
  w_conv1_w0_weight_final = torch.mul(w_conv1_w0_weight,1/num_dev)
  w_conv1_w0_bias_final = torch.mul(w_conv1_w0_bias,1/num_dev)
  w_conv1_w1_weight_final = torch.mul(w_conv1_w1_weight,1/num_dev)
  w_conv1_w1_bias_final = torch.mul(w_conv1_w1_bias,1/num_dev)
  w_conv1_w2_weight_final = torch.mul(w_conv1_w2_weight,1/num_dev)
  w_conv1_w2_bias_final = torch.mul(w_conv1_w2_bias,1/num_dev)
  w_conv1_w3_weight_final = torch.mul(w_conv1_w3_weight,1/num_dev)
  w_conv1_w3_bias_final = torch.mul(w_conv1_w3_bias,1/num_dev)
  w_conv1_fc1_weight_final = torch.mul(w_conv1_fc1_weight,1/num_dev)
  w_conv1_fc1_bias_final = torch.mul(w_conv1_fc1_bias,1/num_dev)
  w_conv1_fc2_weight_final = torch.mul(w_conv1_fc2_weight,1/num_dev)
  w_conv1_fc2_bias_final = torch.mul(w_conv1_fc2_bias,1/num_dev)
  model_global = Net1d(modes, width).cuda()
  model_global.conv1.fc0.weight = torch.nn.Parameter(w_conv1_fc0_weight_final)
  #print(model_global.conv1.fc0.weight)
  model_global.conv1.fc0.bias = torch.nn.Parameter(w_conv1_fc0_bias_final)
  model_global.conv1.conv0.weights1 = torch.nn.Parameter(w_conv1_conv0_weights1_final)
  model_global.conv1.conv1.weights1 = torch.nn.Parameter(w_conv1_conv1_weights1_final)
  model_global.conv1.conv2.weights1 = torch.nn.Parameter(w_conv1_conv2_weights1_final)
  model_global.conv1.conv3.weights1 = torch.nn.Parameter(w_conv1_conv3_weights1_final)
  model_global.conv1.w0.weight = torch.nn.Parameter(w_conv1_w0_weight_final)
  model_global.conv1.w0.bias = torch.nn.Parameter(w_conv1_w0_bias_final)
  model_global.conv1.w1.weight = torch.nn.Parameter(w_conv1_w1_weight_final)
  model_global.conv1.w1.bias = torch.nn.Parameter(w_conv1_w1_bias_final)
  model_global.conv1.w2.weight = torch.nn.Parameter(w_conv1_w2_weight_final)
  model_global.conv1.w2.bias = torch.nn.Parameter(w_conv1_w2_bias_final)
  model_global.conv1.w3.weight = torch.nn.Parameter(w_conv1_w3_weight_final)
  model_global.conv1.w3.bias = torch.nn.Parameter(w_conv1_w3_bias_final)
  model_global.conv1.fc1.weight = torch.nn.Parameter(w_conv1_fc1_weight_final)
  model_global.conv1.fc1.bias = torch.nn.Parameter(w_conv1_fc1_bias_final)
  model_global.conv1.fc2.weight = torch.nn.Parameter(w_conv1_fc2_weight_final)
  model_global.conv1.fc2.bias = torch.nn.Parameter(w_conv1_fc2_bias_final)
  t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
  print(ep, t2-t1, train_mse/27)
  #print(model.conv1.fc0.weight)



0 2.763574888000022 0.16613995093265893
1 2.7377015749999885 0.05929271161073336
2 2.7840030480000166 0.041990578357837406
3 2.700624470000008 0.03926311652564133
4 2.7367974120000156 0.03186697559000028
5 2.839335707999993 0.028054355874903396
6 2.7674628729999995 0.028132430670127547
7 2.7416887669999994 0.0235932881629129
8 2.7108042570000066 0.02135407753101188
9 2.765325770000004 0.019275711328696668
10 2.7572227610000084 0.018385209974237822
11 2.774357205000001 0.01891287346909478
12 2.743030327000014 0.017341310460189427
13 2.7505975789999866 0.015718243373730394
14 2.777307307000001 0.015959791236565688
15 2.79163041999999 0.014546813829198342
16 2.736462305999993 0.014131916533827919
17 2.698691927999988 0.012908756662336937
18 2.6985709250000127 0.012357969702592257
19 2.766386787999977 0.012421305135056307
20 2.7097609839999564 0.01227850595239067
21 2.6844917399999986 0.011350262501057133
22 2.706031429999996 0.010947319299534516
23 2.7395569569999907 0.010864578619172486


KeyboardInterrupt: ignored