In [None]:
!pip install -q torch==1.7.1 torchvision==0.8.2

[K     |████████████████████████████████| 776.8 MB 18 kB/s 
[K     |████████████████████████████████| 12.8 MB 51.5 MB/s 
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
torchtext 0.12.0 requires torch==1.11.0, but you have torch 1.7.1 which is incompatible.
torchaudio 0.11.0+cu113 requires torch==1.11.0, but you have torch 1.7.1 which is incompatible.[0m
[?25h

In [None]:
pip install Cython



In [None]:
import torch
import numpy as np
import scipy.io
import h5py
import sklearn.metrics
import torch.nn as nn
from scipy.ndimage import gaussian_filter


#################################################
#
# Utilities
#
#################################################
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# reading data
class MatReader(object):
    def __init__(self, file_path, to_torch=True, to_cuda=False, to_float=True):
        super(MatReader, self).__init__()

        self.to_torch = to_torch
        self.to_cuda = to_cuda
        self.to_float = to_float

        self.file_path = file_path

        self.data = None
        self.old_mat = None
        self._load_file()

    def _load_file(self):
        try:
            self.data = scipy.io.loadmat(self.file_path)
            self.old_mat = True
        except:
            self.data = h5py.File(self.file_path)
            self.old_mat = False

    def load_file(self, file_path):
        self.file_path = file_path
        self._load_file()

    def read_field(self, field):
        x = self.data[field]

        if not self.old_mat:
            x = x[()]
            x = np.transpose(x, axes=range(len(x.shape) - 1, -1, -1))

        if self.to_float:
            x = x.astype(np.float32)

        if self.to_torch:
            x = torch.from_numpy(x)

            if self.to_cuda:
                x = x.cuda()

        return x

    def set_cuda(self, to_cuda):
        self.to_cuda = to_cuda

    def set_torch(self, to_torch):
        self.to_torch = to_torch

    def set_float(self, to_float):
        self.to_float = to_float

# normalization, pointwise gaussian
class UnitGaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(UnitGaussianNormalizer, self).__init__()

        # x could be in shape of ntrain*n or ntrain*T*n or ntrain*n*T
        self.mean = torch.mean(x, 0)
        self.std = torch.std(x, 0)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        if sample_idx is None:
            std = self.std + self.eps # n
            mean = self.mean
        else:
            if len(self.mean.shape) == len(sample_idx[0].shape):
                std = self.std[sample_idx] + self.eps  # batch*n
                mean = self.mean[sample_idx]
            if len(self.mean.shape) > len(sample_idx[0].shape):
                std = self.std[:,sample_idx]+ self.eps # T*batch*n
                mean = self.mean[:,sample_idx]

        # x is in shape of batch*n or T*batch*n
        x = (x * std) + mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()

    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()

# normalization, Gaussian
class GaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(GaussianNormalizer, self).__init__()

        self.mean = torch.mean(x)
        self.std = torch.std(x)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        x = (x * (self.std + self.eps)) + self.mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()

    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()


# normalization, scaling by range
class RangeNormalizer(object):
    def __init__(self, x, low=0.0, high=1.0):
        super(RangeNormalizer, self).__init__()
        mymin = torch.min(x, 0)[0].view(-1)
        mymax = torch.max(x, 0)[0].view(-1)

        self.a = (high - low)/(mymax - mymin)
        self.b = -self.a*mymax + high

    def encode(self, x):
        s = x.size()
        x = x.view(s[0], -1)
        x = self.a*x + self.b
        x = x.view(s)
        return x

    def decode(self, x):
        s = x.size()
        x = x.view(s[0], -1)
        x = (x - self.b)/self.a
        x = x.view(s)
        return x

#loss function with rel/abs Lp loss
class LpLoss(object):
    def __init__(self, d=2, p=2, size_average=True, reduction=True):
        super(LpLoss, self).__init__()

        #Dimension and Lp-norm type are postive
        assert d > 0 and p > 0

        self.d = d
        self.p = p
        self.reduction = reduction
        self.size_average = size_average

    def abs(self, x, y):
        num_examples = x.size()[0]

        #Assume uniform mesh
        h = 1.0 / (x.size()[1] - 1.0)

        all_norms = (h**(self.d/self.p))*torch.norm(x.view(num_examples,-1) - y.view(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(all_norms)
            else:
                return torch.sum(all_norms)

        return all_norms

    def rel(self, x, y):
        num_examples = x.size()[0]

        diff_norms = torch.norm(x.reshape(num_examples,-1) - y.reshape(num_examples,-1), self.p, 1)
        y_norms = torch.norm(y.reshape(num_examples,-1), self.p, 1)

        if self.reduction:
            if self.size_average:
                return torch.mean(diff_norms/y_norms)
            else:
                return torch.sum(diff_norms/y_norms)

        return diff_norms/y_norms

    def __call__(self, x, y):
        return self.rel(x, y)

# A simple feedforward neural network
class DenseNet(torch.nn.Module):
    def __init__(self, layers, nonlinearity, out_nonlinearity=None, normalize=False):
        super(DenseNet, self).__init__()

        self.n_layers = len(layers) - 1

        assert self.n_layers >= 1

        self.layers = nn.ModuleList()

        for j in range(self.n_layers):
            self.layers.append(nn.Linear(layers[j], layers[j+1]))

            if j != self.n_layers - 1:
                if normalize:
                    self.layers.append(nn.BatchNorm1d(layers[j+1]))

                self.layers.append(nonlinearity())

        if out_nonlinearity is not None:
            self.layers.append(out_nonlinearity())

    def forward(self, x):
        for _, l in enumerate(self.layers):
            x = l(x)

        return x


In [None]:
import torch
print(torch.__version__)
from google.colab import drive
drive.mount('/content/drive/')

1.7.1
Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.nn.parameter import Parameter
import matplotlib.pyplot as plt

import operator
from functools import reduce
from functools import partial
from timeit import default_timer
#from utilities3 import *

In [None]:
torch.manual_seed(0)
np.random.seed(0)

In [None]:
def compl_mul1d(a, b):
    # (batch, in_channel, x ), (in_channel, out_channel, x) -> (batch, out_channel, x)
    op = partial(torch.einsum, "bix,iox->box")
    return torch.stack([
        op(a[..., 0], b[..., 0]) - op(a[..., 1], b[..., 1]),
        op(a[..., 1], b[..., 0]) + op(a[..., 0], b[..., 1])
    ], dim=-1)

In [None]:


class SpectralConv1d(nn.Module):
    def __init__(self, in_channels, out_channels, modes1):
        super(SpectralConv1d, self).__init__()

        """
        1D Fourier layer. It does FFT, linear transform, and Inverse FFT.
        """

        self.in_channels = in_channels
        self.out_channels = out_channels
        self.modes1 = modes1  #Number of Fourier modes to multiply, at most floor(N/2) + 1

        self.scale = (1 / (in_channels*out_channels))
        self.weights1 = nn.Parameter(self.scale * torch.rand(in_channels, out_channels, self.modes1, 2))

    def forward(self, x):
        batchsize = x.shape[0]
        #Compute Fourier coeffcients up to factor of e^(- something constant)
        x_ft = torch.rfft(x, 1, normalized=True, onesided=True)

        # Multiply relevant Fourier modes
        out_ft = torch.zeros(batchsize, self.in_channels, x.size(-1)//2 + 1, 2, device=x.device)
        out_ft[:, :, :self.modes1] = compl_mul1d(x_ft[:, :, :self.modes1], self.weights1)

        #Return to physical space
        x = torch.irfft(out_ft, 1, normalized=True, onesided=True, signal_sizes=(x.size(-1), ))
        return x

In [None]:
class SimpleBlock1d(nn.Module):
    def __init__(self, modes, width):
        super(SimpleBlock1d, self).__init__()

        """
        The overall network. It contains 4 layers of the Fourier layer.
        1. Lift the input to the desire channel dimension by self.fc0 .
        2. 4 layers of the integral operators u' = (W + K)(u).
            W defined by self.w; K defined by self.conv .
        3. Project from the channel space to the output space by self.fc1 and self.fc2 .

        input: the solution of the initial condition and location (a(x), x)
        input shape: (batchsize, x=s, c=2)
        output: the solution of a later timestep
        output shape: (batchsize, x=s, c=1)
        """

        self.modes1 = modes
        self.width = width
        self.fc0 = nn.Linear(2, self.width) # input channel is 2: (a(x), x)

        self.conv0 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv1 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv2 = SpectralConv1d(self.width, self.width, self.modes1)
        self.conv3 = SpectralConv1d(self.width, self.width, self.modes1)
        self.w0 = nn.Conv1d(self.width, self.width, 1)
        self.w1 = nn.Conv1d(self.width, self.width, 1)
        self.w2 = nn.Conv1d(self.width, self.width, 1)
        self.w3 = nn.Conv1d(self.width, self.width, 1)


        self.fc1 = nn.Linear(self.width, 128)
        self.fc2 = nn.Linear(128, 1)

    def forward(self, x):

        x = self.fc0(x)
        x = x.permute(0, 2, 1)

        x1 = self.conv0(x)
        x2 = self.w0(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv1(x)
        x2 = self.w1(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv2(x)
        x2 = self.w2(x)
        x = x1 + x2
        x = F.relu(x)

        x1 = self.conv3(x)
        x2 = self.w3(x)
        x = x1 + x2

        x = x.permute(0, 2, 1)
        x = self.fc1(x)
        x = F.relu(x)
        x = self.fc2(x)
        return x

In [None]:
class Net1d(nn.Module):
    def __init__(self, modes, width):
        super(Net1d, self).__init__()

        """
        A wrapper function
        """

        self.conv1 = SimpleBlock1d(modes, width)


    def forward(self, x):
        x = self.conv1(x)
        return x.squeeze()

    def count_params(self):
        c = 0
        for p in self.parameters():
            c += reduce(operator.mul, list(p.size()))

        return c

In [None]:
################################################################
#  configurations
################################################################
ntrain = 1000
ntest = 100

sub = 2**3 #subsampling rate
h = 2**13 // sub #total grid size divided by the subsampling rate
s = h

batch_size = 10
learning_rate = 0.001

rounds = 500
step_size = 100
gamma = 0.5

modes = 16
width = 64

In [None]:
# reading data
class MatReaderMatReader(object):
    def __init__(self, file_path, to_torch=True, to_cuda=False, to_float=True):
        super(MatReader, self).__init__()

        self.to_torch = to_torch
        self.to_cuda = to_cuda
        self.to_float = to_float

        self.file_path = file_path

        self.data = None
        self.old_mat = None
        self._load_file()

    def _load_file(self):
        try:
            self.data = scipy.io.loadmat(self.file_path)
            self.old_mat = True
        except:
            self.data = h5py.File(self.file_path)
            self.old_mat = False

    def load_file(self, file_path):
        self.file_path = file_path
        self._load_file()

    def read_field(self, field):
        x = self.data[field]

        if not self.old_mat:
            x = x[()]
            x = np.transpose(x, axes=range(len(x.shape) - 1, -1, -1))

        if self.to_float:
            x = x.astype(np.float32)

        if self.to_torch:
            x = torch.from_numpy(x)

            if self.to_cuda:
                x = x.cuda()

        return x

    def set_cuda(self, to_cuda):
        self.to_cuda = to_cuda

    def set_torch(self, to_torch):
        self.to_torch = to_torch

    def set_float(self, to_float):
        self.to_float = to_float

# normalization, pointwise gaussian
class UnitGaussianNormalizer(object):
    def __init__(self, x, eps=0.00001):
        super(UnitGaussianNormalizer, self).__init__()

        # x could be in shape of ntrain*n or ntrain*T*n or ntrain*n*T
        self.mean = torch.mean(x, 0)
        self.std = torch.std(x, 0)
        self.eps = eps

    def encode(self, x):
        x = (x - self.mean) / (self.std + self.eps)
        return x

    def decode(self, x, sample_idx=None):
        if sample_idx is None:
            std = self.std + self.eps # n
            mean = self.mean
        else:
            if len(self.mean.shape) == len(sample_idx[0].shape):
                std = self.std[sample_idx] + self.eps  # batch*n
                mean = self.mean[sample_idx]
            if len(self.mean.shape) > len(sample_idx[0].shape):
                std = self.std[:,sample_idx]+ self.eps # T*batch*n
                mean = self.mean[:,sample_idx]

        # x is in shape of batch*n or T*batch*n
        x = (x * std) + mean
        return x

    def cuda(self):
        self.mean = self.mean.cuda()
        self.std = self.std.cuda()
    '''
    def cpu(self):
        self.mean = self.mean.cpu()
        self.std = self.std.cpu()
    '''

In [None]:
dataloader = MatReader('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/burgers_data_R10.mat')

In [None]:
import scipy.io
#delay_in_order = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/delay_in_order.mat')
#delay_device = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/delay_device.mat')
#delay_matrix = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/matrix_update_new.mat')
#delay_time = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/delay_time.mat')
#delay_in_order = MatReader('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/delay_in_order.mat')#
#delay_device = MatReader('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/delay_device.mat')
#delay_matrix = MatReader('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/matrix_update_new.mat')
everything = scipy.io.loadmat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/everything2.mat')

In [None]:
delay_in_order = everything['delay_in_order']
delay_device = everything['delay_device']
delay_matrix = everything['matrix_update_new']
delay_in_order = delay_in_order[0]
delay_device = delay_device[0]
delay_time = everything['delay_time']
delay_time = delay_time[0]

In [None]:

delay_device[0]
len(delay_time)

36000

In [None]:
x_data = dataloader.read_field('a')[:,::sub]
y_data = dataloader.read_field('u')[:,::sub]
import numpy as np

In [None]:
[a,b] = x_data.size()
a_index_list = range(0,a)
num_dev = len(delay_matrix) # number of devices considered in the FL
allocate_dev = np.array_split(a_index_list,num_dev) # how to distribute training data among all devices
num_dev

36

In [None]:
# model
model_global = Net1d(modes, width).cuda()
print(model_global.count_params())

549569


In [None]:
model = model_global
w1 = []
w1 = model_global.conv1.fc0.weight
w2 = model_global.conv1.fc0.weight
w1.size()
#model2 = Net1d(modes, width).cuda()
#model2.conv1.fc0.weight = torch.nn.Parameter(w_final)

torch.Size([64, 2])

In [None]:
model_global.conv1.conv0.weights1.size()

torch.Size([64, 64, 16, 2])

In [None]:
###
#dev_data_range = allocate_dev[0]
#dev_data_size = dev_data_range.size
#x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
#y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]
# x_train
# cat the locations information
#grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
#grid = torch.tensor(grid, dtype=torch.float)
#x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)

In [None]:
aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
len(aa)

205

In [None]:
model_set = [model_global]*len(delay_matrix)
model_set[1] = model_global
model_set1 = [model_global]*len(delay_matrix)
model_set2 = [model_global]*len(delay_matrix)

In [None]:
train_mse = 0
dev_index = 10
model_global = model_global
dev_data_range = allocate_dev[dev_index]
dev_data_size = dev_data_range.size
x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]
grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
grid = torch.tensor(grid, dtype=torch.float)
x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
model = model_global
for x, y in train_loader:
  x, y = x.cuda(), y.cuda()
  out = model(x)
  mse = F.mse_loss(out, y, reduction='mean')
  train_mse += mse.item()
train_mse
num_dev

36

In [None]:
# Here is how optimization works here
# myloss = LpLoss(size_average=False)
import math
model_global_iter = model_global
model_global_iter1 = model_global
model_global_iter2 = model_global
for ep in range(len(delay_time)):
  #t1 = default_timer()
  #train_mse = 0
  train_l2 = 0

  #w_conv1_fc0_weight = torch.zeros(model_global.conv1.fc0.weight.size()).to('cuda:0')
  #w_conv1_fc0_bias = torch.zeros(model_global.conv1.fc0.bias.size()).to('cuda:0')
  #w_conv1_conv0_weights1 = torch.zeros(model_global.conv1.conv0.weights1.size()).to('cuda:0')
  #w_conv1_conv1_weights1 = torch.zeros(model_global.conv1.conv1.weights1.size()).to('cuda:0')
  #w_conv1_conv2_weights1 = torch.zeros(model_global.conv1.conv2.weights1.size()).to('cuda:0')
  #w_conv1_conv3_weights1 = torch.zeros(model_global.conv1.conv3.weights1.size()).to('cuda:0')
  #w_conv1_w0_weight = torch.zeros(model_global.conv1.w0.weight.size()).to('cuda:0')
  #w_conv1_w0_bias = torch.zeros(model_global.conv1.w0.bias.size()).to('cuda:0')
  #w_conv1_w1_weight = torch.zeros(model_global.conv1.w1.weight.size()).to('cuda:0')
  #w_conv1_w1_bias = torch.zeros(model_global.conv1.w1.bias.size()).to('cuda:0')
  #w_conv1_w2_weight = torch.zeros(model_global.conv1.w2.weight.size()).to('cuda:0')
  #w_conv1_w2_bias = torch.zeros(model_global.conv1.w2.bias.size()).to('cuda:0')
  #w_conv1_w3_weight = torch.zeros(model_global.conv1.w3.weight.size()).to('cuda:0')
  #w_conv1_w3_bias = torch.zeros(model_global.conv1.w3.bias.size()).to('cuda:0')
  #w_conv1_fc1_weight = torch.zeros(model_global.conv1.fc1.weight.size()).to('cuda:0')
  #w_conv1_fc1_bias = torch.zeros(model_global.conv1.fc1.bias.size()).to('cuda:0')
  #w_conv1_fc2_weight = torch.zeros(model_global.conv1.fc2.weight.size()).to('cuda:0')
  #w_conv1_fc2_bias = torch.zeros(model_global.conv1.fc2.bias.size()).to('cuda:0')

  #for dev_index in range(num_dev):
  dev_index = delay_device[ep]
  dev_data_range = allocate_dev[dev_index-1]
  dev_data_size = dev_data_range.size
  x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
  y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

  # cat the locations information
  grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
  grid = torch.tensor(grid, dtype=torch.float)
  x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
  train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
  model = model_set[dev_index-1]
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
  model.train()
  for x, y in train_loader:
      x, y = x.cuda(), y.cuda()

      optimizer.zero_grad()
      out = model(x)

      mse = F.mse_loss(out, y, reduction='mean')
      mse.backward()
      #l2 = myloss(out.view(batch_size, -1), y.view(batch_size, -1))
      #l2.backward() # use the l2 relative losa
      optimizer.step()
      #train_mse += mse.item()
      #train_l2 += l2.item()
    #print(train_mse)
  scheduler.step()

  # calculate time difference
  delay_time_index = delay_time[ep]
  time_difference = delay_matrix[dev_index-1][delay_time_index-1] - delay_matrix[dev_index-1][delay_time_index-2]
  #time_difference.real
  #time_weight = (0.5 + math.exp(-time_difference))/len(delay_matrix)
  time_weight = 1.5/len(delay_matrix)
  #time_weight = 1/(time_difference.real)
  #time_weight
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight_differnce = model.conv1.fc0.weight - model_global_iter.conv1.fc0.weight
  #print(w_conv1_fc0_weight)
  w_conv1_fc0_bias_differnce = model.conv1.fc0.bias - model_global_iter.conv1.fc0.bias
  w_conv1_conv0_weights1_differnce = model.conv1.conv0.weights1 - model_global_iter.conv1.conv0.weights1
  w_conv1_conv1_weights1_difference = model.conv1.conv1.weights1 - model_global_iter.conv1.conv1.weights1
  w_conv1_conv2_weights1_difference = model.conv1.conv2.weights1 - model_global_iter.conv1.conv2.weights1
  w_conv1_conv3_weights1_difference = model.conv1.conv3.weights1 - model_global_iter.conv1.conv3.weights1
  w_conv1_w0_weight_difference = model.conv1.w0.weight - model_global_iter.conv1.w0.weight
  w_conv1_w0_bias_difference = model.conv1.w0.bias - model_global_iter.conv1.w0.bias
  w_conv1_w1_weight_difference = model.conv1.w1.weight - model_global_iter.conv1.w1.weight
  w_conv1_w1_bias_difference = model.conv1.w1.bias - model_global_iter.conv1.w1.bias
  w_conv1_w2_weight_difference = model.conv1.w2.weight - model_global_iter.conv1.w2.weight
  w_conv1_w2_bias_difference = model.conv1.w2.bias - model_global_iter.conv1.w2.bias
  w_conv1_w3_weight_difference = model.conv1.w3.weight - model_global_iter.conv1.w3.weight
  w_conv1_w3_bias_difference = model.conv1.w3.bias - model_global_iter.conv1.w3.bias
  w_conv1_fc1_weight_difference = model.conv1.fc1.weight - model_global_iter.conv1.fc1.weight
  w_conv1_fc1_bias_difference = model.conv1.fc1.bias - model_global_iter.conv1.fc1.bias
  w_conv1_fc2_weight_difference = model.conv1.fc2.weight - model_global_iter.conv1.fc2.weight
  w_conv1_fc2_bias_difference = model.conv1.fc2.bias - model_global_iter.conv1.fc2.bias
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight = model_global_iter.conv1.fc0.weight + torch.mul(w_conv1_fc0_weight_differnce, time_weight)
  w_conv1_fc0_bias = model_global_iter.conv1.fc0.bias + torch.mul(w_conv1_fc0_bias_differnce, time_weight)
  w_conv1_conv0_weights1 = model_global_iter.conv1.conv0.weights1 + torch.mul(w_conv1_conv0_weights1_differnce, time_weight)
  w_conv1_conv1_weights1 = model_global_iter.conv1.conv1.weights1 + torch.mul(w_conv1_conv1_weights1_difference, time_weight)
  w_conv1_conv2_weights1 = model_global_iter.conv1.conv2.weights1 + torch.mul(w_conv1_conv2_weights1_difference, time_weight)
  w_conv1_conv3_weights1 = model_global_iter.conv1.conv3.weights1 + torch.mul(w_conv1_conv3_weights1_difference, time_weight)
  w_conv1_w0_weight = model_global_iter.conv1.w0.weight + torch.mul(w_conv1_w0_weight_difference, time_weight)
  w_conv1_w0_bias = model_global_iter.conv1.w0.bias + torch.mul(w_conv1_w0_bias_difference, time_weight)
  w_conv1_w1_weight = model_global_iter.conv1.w1.weight + torch.mul(w_conv1_w1_weight_difference, time_weight)
  w_conv1_w1_bias = model_global_iter.conv1.w1.bias + torch.mul(w_conv1_w1_bias_difference, time_weight)
  w_conv1_w2_weight = model_global_iter.conv1.w2.weight + torch.mul(w_conv1_w2_weight_difference, time_weight)
  w_conv1_w2_bias = model_global_iter.conv1.w2.bias + torch.mul(w_conv1_w2_bias_difference, time_weight)
  w_conv1_w3_weight = model_global_iter.conv1.w3.weight + torch.mul(w_conv1_w3_weight_difference, time_weight)
  w_conv1_w3_bias = model_global_iter.conv1.w3.bias + torch.mul(w_conv1_w3_bias_difference, time_weight)
  w_conv1_fc1_weight = model_global_iter.conv1.fc1.weight + torch.mul(w_conv1_fc1_weight_difference, time_weight)
  w_conv1_fc1_bias = model_global_iter.conv1.fc1.bias + torch.mul(w_conv1_fc1_bias_difference, time_weight)
  w_conv1_fc2_weight = model_global_iter.conv1.fc2.weight + torch.mul(w_conv1_fc2_weight_difference, time_weight)
  w_conv1_fc2_bias = model_global_iter.conv1.fc2.bias + torch.mul(w_conv1_fc2_bias_difference, time_weight)


  '''
    scheduler.step()
    model.eval()
    test_l2 = 0.0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            out = model(x)
            test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()

    train_mse /= len(train_loader)
    train_l2 /= ntrain
    test_l2 /= ntest

  '''
  model_global_iter = Net1d(modes, width).cuda()
  model_global_iter.conv1.fc0.weight = torch.nn.Parameter(w_conv1_fc0_weight)
  #print(model_global.conv1.fc0.weight)
  model_global_iter.conv1.fc0.bias = torch.nn.Parameter(w_conv1_fc0_bias)
  model_global_iter.conv1.conv0.weights1 = torch.nn.Parameter(w_conv1_conv0_weights1)
  model_global_iter.conv1.conv1.weights1 = torch.nn.Parameter(w_conv1_conv1_weights1)
  model_global_iter.conv1.conv2.weights1 = torch.nn.Parameter(w_conv1_conv2_weights1)
  model_global_iter.conv1.conv3.weights1 = torch.nn.Parameter(w_conv1_conv3_weights1)
  model_global_iter.conv1.w0.weight = torch.nn.Parameter(w_conv1_w0_weight)
  model_global_iter.conv1.w0.bias = torch.nn.Parameter(w_conv1_w0_bias)
  model_global_iter.conv1.w1.weight = torch.nn.Parameter(w_conv1_w1_weight)
  model_global_iter.conv1.w1.bias = torch.nn.Parameter(w_conv1_w1_bias)
  model_global_iter.conv1.w2.weight = torch.nn.Parameter(w_conv1_w2_weight)
  model_global_iter.conv1.w2.bias = torch.nn.Parameter(w_conv1_w2_bias)
  model_global_iter.conv1.w3.weight = torch.nn.Parameter(w_conv1_w3_weight)
  model_global_iter.conv1.w3.bias = torch.nn.Parameter(w_conv1_w3_bias)
  model_global_iter.conv1.fc1.weight = torch.nn.Parameter(w_conv1_fc1_weight)
  model_global_iter.conv1.fc1.bias = torch.nn.Parameter(w_conv1_fc1_bias)
  model_global_iter.conv1.fc2.weight = torch.nn.Parameter(w_conv1_fc2_weight)
  model_global_iter.conv1.fc2.bias = torch.nn.Parameter(w_conv1_fc2_bias)

  model_set[dev_index-1] = model_global_iter

  train_mse = 0

  for dev_index_temp in range(num_dev):
    dev_data_range = allocate_dev[dev_index_temp]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global_iter

    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        train_mse += mse.item()
      #train_l2 += l2.item()
  #t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
  print(ep, train_mse/len(aa))



0 0.1978356124787796
1 0.19078757940995983
2 0.18569677559340872
3 0.17896568077366526
4 0.17199797303211398
5 0.16760955472908368
6 0.1606666336335787
7 0.15328478246200375
8 0.1464689402136861
9 0.14067115502023114
10 0.13355477561311024
11 0.12633689409349022
12 0.12165573433768459
13 0.11512554900311842
14 0.1077859162829998
15 0.10183310759503666
16 0.09572932269515061
17 0.09089115025793634
18 0.08411900797268239
19 0.07939636465616343
20 0.0746447267510542
21 0.07053850682043447
22 0.06570466821331804
23 0.06365637503019193
24 0.059974952714472285
25 0.05629451256245375
26 0.05366215206137517
27 0.05018693618476391
28 0.0478071931340709
29 0.04577725198937625
30 0.04417292591093517
31 0.042140151255923075
32 0.0142740847792749
33 0.013587901461869478
34 0.012848298466287371
35 0.012818780096202362
36 0.01297327145570662
37 0.01296580383190658
38 0.012820669734941387
39 0.012850487768286612
40 0.012703371320555849
41 0.01281088579264356
42 0.012948545738600376
43 0.01296215256074

In [None]:
import sys
import numpy
import scipy.io
iii = 1

for dev_index_temp in range(10):
    dev_data_range = allocate_dev[dev_index_temp]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global_iter

    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        y1= y.cpu().numpy()
        out1 = out.cpu().detach().numpy()
        print("y",y1)
        print("out",out1)
        #print("out",out)
        #('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/matrix_update_new.mat')
        scipy.io.savemat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/'+'y'+str(iii)+'.mat', mdict={'y': y1})
        scipy.io.savemat('/content/drive/MyDrive/Colab Notebooks/Burgers_R10/'+'out'+str(iii)+'.mat', mdict={'out': out1})
        iii = iii + 1
        train_mse += mse.item()
      #train_l2 += l2.item()
  #t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
print(ep, train_mse/len(aa))

y [[-1.0088338  -1.0067719  -1.0046806  ... -1.0148362  -1.0128666
  -1.0108656 ]
 [ 0.6834662   0.6850582   0.68663555 ...  0.67860657  0.68024004
   0.68186   ]
 [-0.01621377 -0.01383816 -0.0114659  ... -0.02336083 -0.02097508
  -0.01859273]
 ...
 [-0.30980882 -0.3058491  -0.30188996 ... -0.32169086 -0.3177297
  -0.313769  ]
 [ 0.2677609   0.26224926  0.256758   ...  0.28438962  0.27883396
   0.27329007]
 [-0.0254968  -0.02466057 -0.02383704 ... -0.02808264 -0.02720775
  -0.02634583]]
out [[-0.99496776 -0.99391896 -0.99281484 ... -0.9976422  -0.99706966
  -0.99644655]
 [ 0.72545063  0.7273319   0.72919244 ...  0.72662103  0.72789973
   0.72907084]
 [-0.03211452 -0.03049859 -0.02886642 ... -0.04756236 -0.04597886
  -0.04438367]
 ...
 [-0.3355031  -0.33248776 -0.32947278 ... -0.36922735 -0.36590987
  -0.3624516 ]
 [ 0.3375075   0.32998562  0.3222037  ...  0.31850958  0.31201297
   0.30551046]
 [-0.01165901 -0.01200315 -0.01236068 ... -0.01460488 -0.01495718
  -0.01532788]]
y [[ 0.20391

In [None]:
# Here is how optimization works here
# myloss = LpLoss(size_average=False)
import math
model_global_iter = model_global
model_global_iter1 = model_global
model_global_iter2 = model_global
for ep in range(len(delay_time)):
  #t1 = default_timer()
  #train_mse = 0
  train_l2 = 0

  #w_conv1_fc0_weight = torch.zeros(model_global.conv1.fc0.weight.size()).to('cuda:0')
  #w_conv1_fc0_bias = torch.zeros(model_global.conv1.fc0.bias.size()).to('cuda:0')
  #w_conv1_conv0_weights1 = torch.zeros(model_global.conv1.conv0.weights1.size()).to('cuda:0')
  #w_conv1_conv1_weights1 = torch.zeros(model_global.conv1.conv1.weights1.size()).to('cuda:0')
  #w_conv1_conv2_weights1 = torch.zeros(model_global.conv1.conv2.weights1.size()).to('cuda:0')
  #w_conv1_conv3_weights1 = torch.zeros(model_global.conv1.conv3.weights1.size()).to('cuda:0')
  #w_conv1_w0_weight = torch.zeros(model_global.conv1.w0.weight.size()).to('cuda:0')
  #w_conv1_w0_bias = torch.zeros(model_global.conv1.w0.bias.size()).to('cuda:0')
  #w_conv1_w1_weight = torch.zeros(model_global.conv1.w1.weight.size()).to('cuda:0')
  #w_conv1_w1_bias = torch.zeros(model_global.conv1.w1.bias.size()).to('cuda:0')
  #w_conv1_w2_weight = torch.zeros(model_global.conv1.w2.weight.size()).to('cuda:0')
  #w_conv1_w2_bias = torch.zeros(model_global.conv1.w2.bias.size()).to('cuda:0')
  #w_conv1_w3_weight = torch.zeros(model_global.conv1.w3.weight.size()).to('cuda:0')
  #w_conv1_w3_bias = torch.zeros(model_global.conv1.w3.bias.size()).to('cuda:0')
  #w_conv1_fc1_weight = torch.zeros(model_global.conv1.fc1.weight.size()).to('cuda:0')
  #w_conv1_fc1_bias = torch.zeros(model_global.conv1.fc1.bias.size()).to('cuda:0')
  #w_conv1_fc2_weight = torch.zeros(model_global.conv1.fc2.weight.size()).to('cuda:0')
  #w_conv1_fc2_bias = torch.zeros(model_global.conv1.fc2.bias.size()).to('cuda:0')

  #for dev_index in range(num_dev):
  dev_index = delay_device[ep]
  dev_data_range = allocate_dev[dev_index-1]
  dev_data_size = dev_data_range.size
  x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
  y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

  # cat the locations information
  grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
  grid = torch.tensor(grid, dtype=torch.float)
  x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
  train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
  model = model_set[dev_index-1]
  optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate, weight_decay=1e-4)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
  model.train()
  for x, y in train_loader:
      x, y = x.cuda(), y.cuda()

      optimizer.zero_grad()
      out = model(x)

      mse = F.mse_loss(out, y, reduction='mean')
      mse.backward()
      #l2 = myloss(out.view(batch_size, -1), y.view(batch_size, -1))
      #l2.backward() # use the l2 relative losa
      optimizer.step()
      #train_mse += mse.item()
      #train_l2 += l2.item()
    #print(train_mse)
  scheduler.step()

  # calculate time difference
  delay_time_index = delay_time[ep]
  time_difference = delay_matrix[dev_index-1][delay_time_index-1] - delay_matrix[dev_index-1][delay_time_index-2]
  time_weight = 1
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight_differnce = model.conv1.fc0.weight - model_global_iter.conv1.fc0.weight
  #print(w_conv1_fc0_weight)
  w_conv1_fc0_bias_differnce = model.conv1.fc0.bias - model_global_iter.conv1.fc0.bias
  w_conv1_conv0_weights1_differnce = model.conv1.conv0.weights1 - model_global_iter.conv1.conv0.weights1
  w_conv1_conv1_weights1_difference = model.conv1.conv1.weights1 - model_global_iter.conv1.conv1.weights1
  w_conv1_conv2_weights1_difference = model.conv1.conv2.weights1 - model_global_iter.conv1.conv2.weights1
  w_conv1_conv3_weights1_difference = model.conv1.conv3.weights1 - model_global_iter.conv1.conv3.weights1
  w_conv1_w0_weight_difference = model.conv1.w0.weight - model_global_iter.conv1.w0.weight
  w_conv1_w0_bias_difference = model.conv1.w0.bias - model_global_iter.conv1.w0.bias
  w_conv1_w1_weight_difference = model.conv1.w1.weight - model_global_iter.conv1.w1.weight
  w_conv1_w1_bias_difference = model.conv1.w1.bias - model_global_iter.conv1.w1.bias
  w_conv1_w2_weight_difference = model.conv1.w2.weight - model_global_iter.conv1.w2.weight
  w_conv1_w2_bias_difference = model.conv1.w2.bias - model_global_iter.conv1.w2.bias
  w_conv1_w3_weight_difference = model.conv1.w3.weight - model_global_iter.conv1.w3.weight
  w_conv1_w3_bias_difference = model.conv1.w3.bias - model_global_iter.conv1.w3.bias
  w_conv1_fc1_weight_difference = model.conv1.fc1.weight - model_global_iter.conv1.fc1.weight
  w_conv1_fc1_bias_difference = model.conv1.fc1.bias - model_global_iter.conv1.fc1.bias
  w_conv1_fc2_weight_difference = model.conv1.fc2.weight - model_global_iter.conv1.fc2.weight
  w_conv1_fc2_bias_difference = model.conv1.fc2.bias - model_global_iter.conv1.fc2.bias
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight = model_global_iter.conv1.fc0.weight + torch.mul(w_conv1_fc0_weight_differnce, time_weight)
  w_conv1_fc0_bias = model_global_iter.conv1.fc0.bias + torch.mul(w_conv1_fc0_bias_differnce, time_weight)
  w_conv1_conv0_weights1 = model_global_iter.conv1.conv0.weights1 + torch.mul(w_conv1_conv0_weights1_differnce, time_weight)
  w_conv1_conv1_weights1 = model_global_iter.conv1.conv1.weights1 + torch.mul(w_conv1_conv1_weights1_difference, time_weight)
  w_conv1_conv2_weights1 = model_global_iter.conv1.conv2.weights1 + torch.mul(w_conv1_conv2_weights1_difference, time_weight)
  w_conv1_conv3_weights1 = model_global_iter.conv1.conv3.weights1 + torch.mul(w_conv1_conv3_weights1_difference, time_weight)
  w_conv1_w0_weight = model_global_iter.conv1.w0.weight + torch.mul(w_conv1_w0_weight_difference, time_weight)
  w_conv1_w0_bias = model_global_iter.conv1.w0.bias + torch.mul(w_conv1_w0_bias_difference, time_weight)
  w_conv1_w1_weight = model_global_iter.conv1.w1.weight + torch.mul(w_conv1_w1_weight_difference, time_weight)
  w_conv1_w1_bias = model_global_iter.conv1.w1.bias + torch.mul(w_conv1_w1_bias_difference, time_weight)
  w_conv1_w2_weight = model_global_iter.conv1.w2.weight + torch.mul(w_conv1_w2_weight_difference, time_weight)
  w_conv1_w2_bias = model_global_iter.conv1.w2.bias + torch.mul(w_conv1_w2_bias_difference, time_weight)
  w_conv1_w3_weight = model_global_iter.conv1.w3.weight + torch.mul(w_conv1_w3_weight_difference, time_weight)
  w_conv1_w3_bias = model_global_iter.conv1.w3.bias + torch.mul(w_conv1_w3_bias_difference, time_weight)
  w_conv1_fc1_weight = model_global_iter.conv1.fc1.weight + torch.mul(w_conv1_fc1_weight_difference, time_weight)
  w_conv1_fc1_bias = model_global_iter.conv1.fc1.bias + torch.mul(w_conv1_fc1_bias_difference, time_weight)
  w_conv1_fc2_weight = model_global_iter.conv1.fc2.weight + torch.mul(w_conv1_fc2_weight_difference, time_weight)
  w_conv1_fc2_bias = model_global_iter.conv1.fc2.bias + torch.mul(w_conv1_fc2_bias_difference, time_weight)


  '''
    scheduler.step()
    model.eval()
    test_l2 = 0.0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            out = model(x)
            test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()

    train_mse /= len(train_loader)
    train_l2 /= ntrain
    test_l2 /= ntest

  '''
  model_global_iter = Net1d(modes, width).cuda()
  model_global_iter.conv1.fc0.weight = torch.nn.Parameter(w_conv1_fc0_weight)
  #print(model_global.conv1.fc0.weight)
  model_global_iter.conv1.fc0.bias = torch.nn.Parameter(w_conv1_fc0_bias)
  model_global_iter.conv1.conv0.weights1 = torch.nn.Parameter(w_conv1_conv0_weights1)
  model_global_iter.conv1.conv1.weights1 = torch.nn.Parameter(w_conv1_conv1_weights1)
  model_global_iter.conv1.conv2.weights1 = torch.nn.Parameter(w_conv1_conv2_weights1)
  model_global_iter.conv1.conv3.weights1 = torch.nn.Parameter(w_conv1_conv3_weights1)
  model_global_iter.conv1.w0.weight = torch.nn.Parameter(w_conv1_w0_weight)
  model_global_iter.conv1.w0.bias = torch.nn.Parameter(w_conv1_w0_bias)
  model_global_iter.conv1.w1.weight = torch.nn.Parameter(w_conv1_w1_weight)
  model_global_iter.conv1.w1.bias = torch.nn.Parameter(w_conv1_w1_bias)
  model_global_iter.conv1.w2.weight = torch.nn.Parameter(w_conv1_w2_weight)
  model_global_iter.conv1.w2.bias = torch.nn.Parameter(w_conv1_w2_bias)
  model_global_iter.conv1.w3.weight = torch.nn.Parameter(w_conv1_w3_weight)
  model_global_iter.conv1.w3.bias = torch.nn.Parameter(w_conv1_w3_bias)
  model_global_iter.conv1.fc1.weight = torch.nn.Parameter(w_conv1_fc1_weight)
  model_global_iter.conv1.fc1.bias = torch.nn.Parameter(w_conv1_fc1_bias)
  model_global_iter.conv1.fc2.weight = torch.nn.Parameter(w_conv1_fc2_weight)
  model_global_iter.conv1.fc2.bias = torch.nn.Parameter(w_conv1_fc2_bias)

  model_set[dev_index-1] = model_global_iter

  train_mse = 0

  for dev_index_temp in range(num_dev):
    dev_data_range = allocate_dev[dev_index_temp]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global_iter

    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        train_mse += mse.item()
      #train_l2 += l2.item()
  #t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
  print(ep, train_mse/len(aa))

  # THE SECOND CASE
  model1 = model_set1[dev_index-1]
  optimizer = torch.optim.Adam(model1.parameters(), lr=learning_rate, weight_decay=1e-4)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
  model1.train()
  for x, y in train_loader:
      x, y = x.cuda(), y.cuda()

      optimizer.zero_grad()
      out = model1(x)

      mse = F.mse_loss(out, y, reduction='mean')
      mse.backward()
      #l2 = myloss(out.view(batch_size, -1), y.view(batch_size, -1))
      #l2.backward() # use the l2 relative losa
      optimizer.step()
      #train_mse += mse.item()
      #train_l2 += l2.item()
    #print(train_mse)
  scheduler.step()

  # calculate time difference
  delay_time_index = delay_time[ep]
  time_difference = delay_matrix[dev_index-1][delay_time_index-1] - delay_matrix[dev_index-1][delay_time_index-2]
  #####time_weight1 = (1+math.exp(-time_difference))/len(delay_matrix)
  time_weight1 = (1/time_difference)
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight_differnce1 = model1.conv1.fc0.weight - model_global_iter1.conv1.fc0.weight
  #print(w_conv1_fc0_weight)
  w_conv1_fc0_bias_differnce1 = model1.conv1.fc0.bias - model_global_iter1.conv1.fc0.bias
  w_conv1_conv0_weights1_differnce1 = model1.conv1.conv0.weights1 - model_global_iter1.conv1.conv0.weights1
  w_conv1_conv1_weights1_difference1 = model1.conv1.conv1.weights1 - model_global_iter1.conv1.conv1.weights1
  w_conv1_conv2_weights1_difference1 = model1.conv1.conv2.weights1 - model_global_iter1.conv1.conv2.weights1
  w_conv1_conv3_weights1_difference1 = model1.conv1.conv3.weights1 - model_global_iter1.conv1.conv3.weights1
  w_conv1_w0_weight_difference1 = model1.conv1.w0.weight - model_global_iter1.conv1.w0.weight
  w_conv1_w0_bias_difference1 = model1.conv1.w0.bias - model_global_iter1.conv1.w0.bias
  w_conv1_w1_weight_difference1 = model1.conv1.w1.weight - model_global_iter1.conv1.w1.weight
  w_conv1_w1_bias_difference1 = model1.conv1.w1.bias - model_global_iter1.conv1.w1.bias
  w_conv1_w2_weight_difference1 = model1.conv1.w2.weight - model_global_iter1.conv1.w2.weight
  w_conv1_w2_bias_difference1 = model1.conv1.w2.bias - model_global_iter1.conv1.w2.bias
  w_conv1_w3_weight_difference1 = model1.conv1.w3.weight - model_global_iter1.conv1.w3.weight
  w_conv1_w3_bias_difference1 = model1.conv1.w3.bias - model_global_iter1.conv1.w3.bias
  w_conv1_fc1_weight_difference1 = model1.conv1.fc1.weight - model_global_iter1.conv1.fc1.weight
  w_conv1_fc1_bias_difference1 = model1.conv1.fc1.bias - model_global_iter1.conv1.fc1.bias
  w_conv1_fc2_weight_difference1 = model1.conv1.fc2.weight - model_global_iter1.conv1.fc2.weight
  w_conv1_fc2_bias_difference1 = model1.conv1.fc2.bias - model_global_iter1.conv1.fc2.bias
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight1 = model_global_iter1.conv1.fc0.weight + torch.mul(w_conv1_fc0_weight_differnce1, time_weight1)
  w_conv1_fc0_bias1 = model_global_iter1.conv1.fc0.bias + torch.mul(w_conv1_fc0_bias_differnce1, time_weight1)
  w_conv1_conv0_weights11 = model_global_iter1.conv1.conv0.weights1 + torch.mul(w_conv1_conv0_weights1_differnce1, time_weight1)
  w_conv1_conv1_weights11 = model_global_iter1.conv1.conv1.weights1 + torch.mul(w_conv1_conv1_weights1_difference1, time_weight1)
  w_conv1_conv2_weights11 = model_global_iter1.conv1.conv2.weights1 + torch.mul(w_conv1_conv2_weights1_difference1, time_weight1)
  w_conv1_conv3_weights11 = model_global_iter1.conv1.conv3.weights1 + torch.mul(w_conv1_conv3_weights1_difference1, time_weight1)
  w_conv1_w0_weight1 = model_global_iter1.conv1.w0.weight + torch.mul(w_conv1_w0_weight_difference1, time_weight1)
  w_conv1_w0_bias1 = model_global_iter1.conv1.w0.bias + torch.mul(w_conv1_w0_bias_difference1, time_weight1)
  w_conv1_w1_weight1 = model_global_iter1.conv1.w1.weight + torch.mul(w_conv1_w1_weight_difference1, time_weight1)
  w_conv1_w1_bias1 = model_global_iter1.conv1.w1.bias + torch.mul(w_conv1_w1_bias_difference1, time_weight1)
  w_conv1_w2_weight1 = model_global_iter1.conv1.w2.weight + torch.mul(w_conv1_w2_weight_difference1, time_weight1)
  w_conv1_w2_bias1 = model_global_iter1.conv1.w2.bias + torch.mul(w_conv1_w2_bias_difference1, time_weight1)
  w_conv1_w3_weight1 = model_global_iter1.conv1.w3.weight + torch.mul(w_conv1_w3_weight_difference1, time_weight1)
  w_conv1_w3_bias1 = model_global_iter1.conv1.w3.bias + torch.mul(w_conv1_w3_bias_difference1, time_weight1)
  w_conv1_fc1_weight1 = model_global_iter1.conv1.fc1.weight + torch.mul(w_conv1_fc1_weight_difference1, time_weight1)
  w_conv1_fc1_bias1 = model_global_iter1.conv1.fc1.bias + torch.mul(w_conv1_fc1_bias_difference1, time_weight1)
  w_conv1_fc2_weight1 = model_global_iter1.conv1.fc2.weight + torch.mul(w_conv1_fc2_weight_difference1, time_weight1)
  w_conv1_fc2_bias1 = model_global_iter1.conv1.fc2.bias + torch.mul(w_conv1_fc2_bias_difference1, time_weight1)


  '''
    scheduler.step()
    model.eval()
    test_l2 = 0.0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            out = model(x)
            test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()

    train_mse /= len(train_loader)
    train_l2 /= ntrain
    test_l2 /= ntest

  '''
  model_global_iter1 = Net1d(modes, width).cuda()
  model_global_iter1.conv1.fc0.weight = torch.nn.Parameter(w_conv1_fc0_weight1)
  #print(model_global.conv1.fc0.weight)
  model_global_iter1.conv1.fc0.bias = torch.nn.Parameter(w_conv1_fc0_bias1)
  model_global_iter1.conv1.conv0.weights1 = torch.nn.Parameter(w_conv1_conv0_weights11)
  model_global_iter1.conv1.conv1.weights1 = torch.nn.Parameter(w_conv1_conv1_weights11)
  model_global_iter1.conv1.conv2.weights1 = torch.nn.Parameter(w_conv1_conv2_weights11)
  model_global_iter1.conv1.conv3.weights1 = torch.nn.Parameter(w_conv1_conv3_weights11)
  model_global_iter1.conv1.w0.weight = torch.nn.Parameter(w_conv1_w0_weight1)
  model_global_iter1.conv1.w0.bias = torch.nn.Parameter(w_conv1_w0_bias1)
  model_global_iter1.conv1.w1.weight = torch.nn.Parameter(w_conv1_w1_weight1)
  model_global_iter1.conv1.w1.bias = torch.nn.Parameter(w_conv1_w1_bias1)
  model_global_iter1.conv1.w2.weight = torch.nn.Parameter(w_conv1_w2_weight1)
  model_global_iter1.conv1.w2.bias = torch.nn.Parameter(w_conv1_w2_bias1)
  model_global_iter1.conv1.w3.weight = torch.nn.Parameter(w_conv1_w3_weight1)
  model_global_iter1.conv1.w3.bias = torch.nn.Parameter(w_conv1_w3_bias1)
  model_global_iter1.conv1.fc1.weight = torch.nn.Parameter(w_conv1_fc1_weight1)
  model_global_iter1.conv1.fc1.bias = torch.nn.Parameter(w_conv1_fc1_bias1)
  model_global_iter1.conv1.fc2.weight = torch.nn.Parameter(w_conv1_fc2_weight1)
  model_global_iter1.conv1.fc2.bias = torch.nn.Parameter(w_conv1_fc2_bias1)

  model_set1[dev_index-1] = model_global_iter1

  train_mse1 = 0

  for dev_index_temp in range(num_dev):
    dev_data_range = allocate_dev[dev_index_temp]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global_iter1

    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        train_mse1 += mse.item()
      #train_l2 += l2.item()
  #t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
  print(ep, train_mse1/len(aa))
  #print(model.conv1.fc0.weight)

  # THE ThIRD CASE
  model2 = model_set2[dev_index-1]
  optimizer = torch.optim.Adam(model2.parameters(), lr=learning_rate, weight_decay=1e-4)
  scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=step_size, gamma=gamma)
  model2.train()
  for x, y in train_loader:
      x, y = x.cuda(), y.cuda()

      optimizer.zero_grad()
      out = model2(x)

      mse = F.mse_loss(out, y, reduction='mean')
      mse.backward()
      #l2 = myloss(out.view(batch_size, -1), y.view(batch_size, -1))
      #l2.backward() # use the l2 relative losa
      optimizer.step()
      #train_mse += mse.item()
      #train_l2 += l2.item()
    #print(train_mse)
  scheduler.step()

  # calculate time difference
  delay_time_index = delay_time[ep]
  time_difference = delay_matrix[dev_index-1][delay_time_index-1] - delay_matrix[dev_index-1][delay_time_index-2]
  time_weight2 = (1/time_difference)/len(delay_matrix)
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight_differnce2 = model2.conv1.fc0.weight - model_global_iter2.conv1.fc0.weight
  #print(w_conv1_fc0_weight)
  w_conv1_fc0_bias_differnce2 = model2.conv1.fc0.bias - model_global_iter2.conv1.fc0.bias
  w_conv1_conv0_weights1_differnce2 = model2.conv1.conv0.weights1 - model_global_iter2.conv1.conv0.weights1
  w_conv1_conv1_weights1_difference2 = model2.conv1.conv1.weights1 - model_global_iter2.conv1.conv1.weights1
  w_conv1_conv2_weights1_difference2 = model2.conv1.conv2.weights1 - model_global_iter2.conv1.conv2.weights1
  w_conv1_conv3_weights1_difference2 = model2.conv1.conv3.weights1 - model_global_iter2.conv1.conv3.weights1
  w_conv1_w0_weight_difference2 = model2.conv1.w0.weight - model_global_iter2.conv1.w0.weight
  w_conv1_w0_bias_difference2 = model2.conv1.w0.bias - model_global_iter2.conv1.w0.bias
  w_conv1_w1_weight_difference2 = model2.conv1.w1.weight - model_global_iter2.conv1.w1.weight
  w_conv1_w1_bias_difference2 = model2.conv1.w1.bias - model_global_iter2.conv1.w1.bias
  w_conv1_w2_weight_difference2 = model2.conv1.w2.weight - model_global_iter2.conv1.w2.weight
  w_conv1_w2_bias_difference2 = model2.conv1.w2.bias - model_global_iter2.conv1.w2.bias
  w_conv1_w3_weight_difference2 = model2.conv1.w3.weight - model_global_iter2.conv1.w3.weight
  w_conv1_w3_bias_difference2 = model2.conv1.w3.bias - model_global_iter2.conv1.w3.bias
  w_conv1_fc1_weight_difference2 = model2.conv1.fc1.weight - model_global_iter2.conv1.fc1.weight
  w_conv1_fc1_bias_difference2 = model2.conv1.fc1.bias - model_global_iter2.conv1.fc1.bias
  w_conv1_fc2_weight_difference2 = model2.conv1.fc2.weight - model_global_iter2.conv1.fc2.weight
  w_conv1_fc2_bias_difference2 = model2.conv1.fc2.bias - model_global_iter2.conv1.fc2.bias
  # calculate the product of gradient and learning rate
  w_conv1_fc0_weight2 = model_global_iter2.conv1.fc0.weight + torch.mul(w_conv1_fc0_weight_differnce2, time_weight2)
  w_conv1_fc0_bias2 = model_global_iter2.conv1.fc0.bias + torch.mul(w_conv1_fc0_bias_differnce2, time_weight2)
  w_conv1_conv0_weights12 = model_global_iter2.conv1.conv0.weights1 + torch.mul(w_conv1_conv0_weights1_differnce2, time_weight2)
  w_conv1_conv1_weights12 = model_global_iter2.conv1.conv1.weights1 + torch.mul(w_conv1_conv1_weights1_difference2, time_weight2)
  w_conv1_conv2_weights12 = model_global_iter2.conv1.conv2.weights1 + torch.mul(w_conv1_conv2_weights1_difference2, time_weight2)
  w_conv1_conv3_weights12 = model_global_iter2.conv1.conv3.weights1 + torch.mul(w_conv1_conv3_weights1_difference2, time_weight2)
  w_conv1_w0_weight2 = model_global_iter2.conv1.w0.weight + torch.mul(w_conv1_w0_weight_difference2, time_weight2)
  w_conv1_w0_bias2 = model_global_iter2.conv1.w0.bias + torch.mul(w_conv1_w0_bias_difference2, time_weight2)
  w_conv1_w1_weight2 = model_global_iter2.conv1.w1.weight + torch.mul(w_conv1_w1_weight_difference2, time_weight2)
  w_conv1_w1_bias2 = model_global_iter2.conv1.w1.bias + torch.mul(w_conv1_w1_bias_difference2, time_weight2)
  w_conv1_w2_weight2 = model_global_iter2.conv1.w2.weight + torch.mul(w_conv1_w2_weight_difference2, time_weight2)
  w_conv1_w2_bias2 = model_global_iter2.conv1.w2.bias + torch.mul(w_conv1_w2_bias_difference2, time_weight2)
  w_conv1_w3_weight2 = model_global_iter2.conv1.w3.weight + torch.mul(w_conv1_w3_weight_difference2, time_weight2)
  w_conv1_w3_bias2 = model_global_iter2.conv1.w3.bias + torch.mul(w_conv1_w3_bias_difference2, time_weight2)
  w_conv1_fc1_weight2 = model_global_iter2.conv1.fc1.weight + torch.mul(w_conv1_fc1_weight_difference2, time_weight2)
  w_conv1_fc1_bias2 = model_global_iter2.conv1.fc1.bias + torch.mul(w_conv1_fc1_bias_difference2, time_weight2)
  w_conv1_fc2_weight2 = model_global_iter2.conv1.fc2.weight + torch.mul(w_conv1_fc2_weight_difference2, time_weight2)
  w_conv1_fc2_bias2 = model_global_iter2.conv1.fc2.bias + torch.mul(w_conv1_fc2_bias_difference2, time_weight2)


  '''
    scheduler.step()
    model.eval()
    test_l2 = 0.0
    with torch.no_grad():
        for x, y in test_loader:
            x, y = x.cuda(), y.cuda()

            out = model(x)
            test_l2 += myloss(out.view(batch_size, -1), y.view(batch_size, -1)).item()

    train_mse /= len(train_loader)
    train_l2 /= ntrain
    test_l2 /= ntest

  '''
  model_global_iter2 = Net1d(modes, width).cuda()
  model_global_iter2.conv1.fc0.weight = torch.nn.Parameter(w_conv1_fc0_weight2)
  #print(model_global.conv1.fc0.weight)
  model_global_iter2.conv1.fc0.bias = torch.nn.Parameter(w_conv1_fc0_bias2)
  model_global_iter2.conv1.conv0.weights1 = torch.nn.Parameter(w_conv1_conv0_weights12)
  model_global_iter2.conv1.conv1.weights1 = torch.nn.Parameter(w_conv1_conv1_weights12)
  model_global_iter2.conv1.conv2.weights1 = torch.nn.Parameter(w_conv1_conv2_weights12)
  model_global_iter2.conv1.conv3.weights1 = torch.nn.Parameter(w_conv1_conv3_weights12)
  model_global_iter2.conv1.w0.weight = torch.nn.Parameter(w_conv1_w0_weight2)
  model_global_iter2.conv1.w0.bias = torch.nn.Parameter(w_conv1_w0_bias2)
  model_global_iter2.conv1.w1.weight = torch.nn.Parameter(w_conv1_w1_weight2)
  model_global_iter2.conv1.w1.bias = torch.nn.Parameter(w_conv1_w1_bias2)
  model_global_iter2.conv1.w2.weight = torch.nn.Parameter(w_conv1_w2_weight2)
  model_global_iter2.conv1.w2.bias = torch.nn.Parameter(w_conv1_w2_bias2)
  model_global_iter2.conv1.w3.weight = torch.nn.Parameter(w_conv1_w3_weight2)
  model_global_iter2.conv1.w3.bias = torch.nn.Parameter(w_conv1_w3_bias2)
  model_global_iter2.conv1.fc1.weight = torch.nn.Parameter(w_conv1_fc1_weight2)
  model_global_iter2.conv1.fc1.bias = torch.nn.Parameter(w_conv1_fc1_bias2)
  model_global_iter2.conv1.fc2.weight = torch.nn.Parameter(w_conv1_fc2_weight2)
  model_global_iter2.conv1.fc2.bias = torch.nn.Parameter(w_conv1_fc2_bias2)

  model_set2[dev_index-1] = model_global_iter2

  train_mse2 = 0

  for dev_index_temp in range(num_dev):
    dev_data_range = allocate_dev[dev_index_temp]
    dev_data_size = dev_data_range.size
    x_train = x_data[dev_data_range[0]:dev_data_range[-1]+1,:]
    y_train = y_data[dev_data_range[0]:dev_data_range[-1]+1,:]

    # cat the locations information
    grid = np.linspace(0, 2*np.pi, s).reshape(1, s, 1)
    grid = torch.tensor(grid, dtype=torch.float)
    x_train = torch.cat([x_train.reshape(dev_data_size,s,1), grid.repeat(dev_data_size,1,1)], dim=2)
    train_loader = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_train, y_train), batch_size=batch_size, shuffle=True)
    model = model_global_iter2

    for x, y in train_loader:
        x, y = x.cuda(), y.cuda()

        optimizer.zero_grad()
        out = model(x)

        mse = F.mse_loss(out, y, reduction='mean')
        train_mse2 += mse.item()
      #train_l2 += l2.item()
  #t2 = default_timer()
  #aa = torch.utils.data.DataLoader(torch.utils.data.TensorDataset(x_data, y_data), batch_size=batch_size, shuffle=True)
  print(ep, train_mse2/len(aa))
  #print(model.conv1.fc0.weight)

0 0.038794413199875415


RuntimeError: ignored