In [1]:
import torch
import torch.nn as nn
import numpy as np
from tc.tc_fc import TTLinear
import matplotlib.pyplot as plt
from matplotlib import cm
import tntorch as tn
from pytorch_minimize.optim import MinimizeWrapper

In [8]:
device = 'cuda'

In [9]:
hid = [5, 2, 5, 2]
rank = [1, 2, 2, 2, 1]

model = torch.nn.Sequential(
        nn.Linear(2, 100),
        nn.Tanh(),
        TTLinear(hid, hid, rank, activation=None),
        nn.Tanh(),
        TTLinear(hid, hid, rank, activation=None),
        nn.Tanh(),
        nn.Linear(100, 1)).to(device)



In [56]:
TTLinear(hid, hid, [1, 2, 2, 2, 1], activation=None).state_dict()

OrderedDict([('b', tensor([1.])),
             ('W_cores.0', tensor([[[[-0.5346,  0.5896],
                        [ 0.3231, -0.4545],
                        [ 0.0695, -0.0094],
                        [-0.0131, -0.0202],
                        [ 0.2038,  0.8469]],
              
                       [[ 0.4613,  0.2683],
                        [-0.3998, -0.5913],
                        [ 0.0253,  0.0046],
                        [ 0.1612,  0.5893],
                        [ 0.8060, -0.3599]],
              
                       [[-0.0447, -0.2268],
                        [-0.1123, -0.4232],
                        [ 1.1072, -0.0714],
                        [ 0.1988, -0.0288],
                        [-0.1353,  0.2569]],
              
                       [[-0.5316, -0.0047],
                        [-0.1453, -0.1965],
                        [-0.0384, -0.3857],
                        [-0.3401, -0.3599],
                        [-0.4302, -0.4828]],
              
         

In [55]:
TTLinear(hid, hid, [1, 2, 2, 2, 1], activation=None).state_dict()['W_cores.3'].shape

torch.Size([2, 2, 2, 1])

In [25]:
model.state_dict()

OrderedDict([('0.weight',
              tensor([[ 4.5426e-01,  1.7516e-01],
                      [-5.9689e-01,  1.6616e-01],
                      [ 5.8600e-01, -4.6252e-01],
                      [-4.5706e-01,  6.9215e-01],
                      [ 5.6817e-01, -3.9449e-01],
                      [ 4.2550e-01, -9.0462e-02],
                      [-6.9716e-01,  5.3127e-02],
                      [ 2.2846e-01, -2.1210e-01],
                      [ 2.4599e-01, -6.0465e-01],
                      [ 4.9004e-01,  6.0283e-01],
                      [ 3.8995e-01,  1.1985e-01],
                      [ 2.3288e-01, -5.1163e-01],
                      [-1.7669e-01, -6.7506e-02],
                      [-3.9350e-01, -5.2228e-01],
                      [ 4.7561e-01,  4.7987e-01],
                      [-6.4219e-01,  2.2501e-01],
                      [ 2.8765e-01,  6.7173e-01],
                      [ 4.0910e-01,  6.5007e-01],
                      [-2.3150e-01,  4.9525e-01],
                      [ 

In [10]:
x_grid = np.linspace(0, 1, 51)
t_grid = np.linspace(0, 1, 51)

x = torch.from_numpy(x_grid)
t = torch.from_numpy(t_grid)

grid = torch.cartesian_prod(x, t).float().to(device)

def nn_autograd_simple(model, points, order,axis=0):
    points.requires_grad=True
    f = model(points).sum()
    for i in range(order):
        grads, = torch.autograd.grad(f, points, create_graph=True)
        f = grads[:,axis].sum()
    return grads[:,axis]

func_bnd1 = lambda x: 10 ** 4 * torch.sin((1 / 10) * x * (x - 1)) ** 2
bnd1 = torch.cartesian_prod(x, torch.from_numpy(np.array([0], dtype=np.float64))).float().to(device)
bndval1 = func_bnd1(bnd1[:, 0])

# du/dx (x,0) = 1e3*sin^2(x(x-1)/10)
func_bnd2 = lambda x: 10 ** 3 * torch.sin((1 / 10) * x * (x - 1)) ** 2
bnd2 = torch.cartesian_prod(x, torch.from_numpy(np.array([0], dtype=np.float64))).float().to(device)
bop2 = {
    'du/dt':
        {
            'coeff': 1,
            'du/dt': [1],
            'pow': 1,
            'var': 0
        }
}
bndval2 = func_bnd2(bnd2[:, 0])

# u(0,t) = u(1,t)
bnd3_left = torch.cartesian_prod(torch.from_numpy(np.array([0], dtype=np.float64)), t).float().to(device)
bnd3_right = torch.cartesian_prod(torch.from_numpy(np.array([1], dtype=np.float64)), t).float().to(device)
bnd3 = [bnd3_left, bnd3_right]

# du/dt(0,t) = du/dt(1,t)
bnd4_left = torch.cartesian_prod(torch.from_numpy(np.array([0], dtype=np.float64)), t).float().to(device)
bnd4_right = torch.cartesian_prod(torch.from_numpy(np.array([1], dtype=np.float64)), t).float().to(device)
bnd4 = [bnd4_left, bnd4_right]

bop4 = {
    'du/dx':
        {
            'coeff': 1,
            'du/dx': [0],
            'pow': 1,
            'var': 0
        }
}
bcond_type = 'periodic'

bconds = [[bnd1, bndval1, 'dirichlet'],
          [bnd2, bop2, bndval2, 'operator'],
          [bnd3, bcond_type],
          [bnd4, bop4, bcond_type]]

def wave_op(model, grid):
    u_xx = nn_autograd_simple(model, grid, order=2, axis=0)
    u_tt = nn_autograd_simple(model, grid, order=2, axis=1)
    a = -(1 / 4)

    op = u_tt + a * u_xx

    return op

def op_loss(operator):
    return torch.mean(torch.square(operator))

def bcs_loss(model):
    bc1 = model(bnd1)
    bc2 = nn_autograd_simple(model, bnd2, order=1, axis=1)
    bc3 = model(bnd3_left) - model(bnd3_right)
    bc4 = nn_autograd_simple(model, bnd4_left, order=1, axis=0) - nn_autograd_simple(model, bnd4_right, order=1, axis=0)
    
    loss_bc1 = torch.mean(torch.square(bc1.reshape(-1) - bndval1))
    loss_bc2 = torch.mean(torch.square(bc2.reshape(-1) - bndval2))
    loss_bc3 = torch.mean(torch.square(bc3))
    loss_bc4 = torch.mean(torch.square(bc4))
    
    loss = loss_bc1 + loss_bc2 + loss_bc3 + loss_bc4
    return loss



In [11]:
def draw_fig(model, grid):
    fig = plt.figure()
    ax = fig.add_subplot(111, projection='3d')

    xs = grid[:, 0].detach().cpu().numpy().reshape(-1)
    ys = grid[:, 1].detach().cpu().numpy().reshape(-1)
    zs = model(grid).detach().cpu().numpy().reshape(-1)

    ax.plot_trisurf(xs, ys, zs, cmap=cm.jet, linewidth=0.2, alpha=1)

    ax.set_title("wave periodic")
    ax.set_xlabel("$x$")
    ax.set_ylabel("$t$")

    plt.show()

In [12]:
def loss_fn(model):
    # model.load_state_dict(params)
    operator = wave_op(model, grid)
    loss = op_loss(operator) + 1000 * bcs_loss(model)
    return loss

In [7]:
num_iterations = 20000 
learning_rate = 0.01  
best_loss = float("inf") 

for iteration in range(num_iterations):
    current_params = [param.clone() for param in model.parameters()]
    new_params = [param + torch.randn_like(param) * learning_rate for param in current_params]

    for param, new_param in zip(model.parameters(), new_params):
        param.data = new_param
        
    operator = wave_op(model, grid)
    loss = op_loss(operator) + 1000 * bcs_loss(model)
  
    if loss < best_loss:
        best_loss = loss
    else:
        for param, current_param in zip(model.parameters(), current_params):
            param.data = current_param

    if iteration % 500 == 0:
        print(f"Iteration {iteration}: Loss = {best_loss}")

print("Optimization complete. Best loss:", best_loss)


Iteration 0: Loss = 15969.0361328125


KeyboardInterrupt: 

In [114]:
model

Sequential(
  (0): Linear(in_features=2, out_features=100, bias=True)
  (1): Tanh()
  (2): TTLinear(
    (TTLayer): inp_modes=[5, 2, 5, 2], out_modes=[5, 2, 5, 2], mat_ranks=[1, 2, 2, 2, 1]
    (W_cores): ParameterList(
        (0): Parameter containing: [torch.float32 of size 1x5x5x2 (GPU 0)]
        (1): Parameter containing: [torch.float32 of size 2x2x2x2 (GPU 0)]
        (2): Parameter containing: [torch.float32 of size 2x5x5x2 (GPU 0)]
        (3): Parameter containing: [torch.float32 of size 2x2x2x1 (GPU 0)]
    )
  )
  (3): Tanh()
  (4): TTLinear(
    (TTLayer): inp_modes=[5, 2, 5, 2], out_modes=[5, 2, 5, 2], mat_ranks=[1, 2, 2, 2, 1]
    (W_cores): ParameterList(
        (0): Parameter containing: [torch.float32 of size 1x5x5x2 (GPU 0)]
        (1): Parameter containing: [torch.float32 of size 2x2x2x2 (GPU 0)]
        (2): Parameter containing: [torch.float32 of size 2x5x5x2 (GPU 0)]
        (3): Parameter containing: [torch.float32 of size 2x2x2x1 (GPU 0)]
    )
  )
  (5): Tan

In [174]:
from copy import deepcopy
mu = 0.01

current_params = [param.clone() for param in model.parameters()]

xi = [torch.randn_like(param) for param in current_params]
new_params = [x + y for x, y in zip(current_params, list(map(lambda x: mu * x, xi)))]

# Создайте новую версию параметров модели с небольшими случайными изменениями
model_temp = deepcopy(model)

# Установите параметры модели на новые значения
for param, new_param in zip(model_temp.parameters(), new_params):
    param.data = new_param

# Рассчитайте функцию потерь на новых параметрах
loss_curr = loss_fn(model)
loss_temp = loss_fn(model_temp)

In [175]:
delta_losses = list(map(lambda x: (loss_temp - loss_curr) * x, xi))

In [176]:
N_dot_mu = [mu * len(item.flatten()) for item in xi]

In [177]:
grads = [x / y for x, y in zip(delta_losses, N_dot_mu)]

In [178]:
grads

[tensor([[0., 0.],
         [0., -0.],
         [-0., -0.],
         [0., 0.],
         [-0., -0.],
         [-0., 0.],
         [-0., -0.],
         [0., -0.],
         [-0., 0.],
         [-0., -0.],
         [-0., -0.],
         [-0., 0.],
         [0., -0.],
         [-0., 0.],
         [-0., 0.],
         [-0., 0.],
         [0., -0.],
         [0., -0.],
         [-0., -0.],
         [0., -0.],
         [-0., 0.],
         [-0., -0.],
         [0., 0.],
         [-0., 0.],
         [-0., -0.],
         [0., -0.],
         [0., 0.],
         [0., -0.],
         [-0., -0.],
         [-0., -0.],
         [-0., -0.],
         [0., -0.],
         [0., 0.],
         [0., -0.],
         [0., -0.],
         [-0., -0.],
         [-0., 0.],
         [0., 0.],
         [0., -0.],
         [0., 0.],
         [-0., 0.],
         [0., -0.],
         [-0., 0.],
         [-0., 0.],
         [0., 0.],
         [0., 0.],
         [-0., -0.],
         [0., 0.],
         [-0., -0.],
         [0., 0.

In [179]:
learning_rate = 0.1
grads_dot_lr = [learning_rate * g for g in grads]
params = [param - grad for param, grad in zip(current_params, grads_dot_lr)] 

In [169]:
for param, new_param in zip(model.parameters(), params):
        param.data = new_param

In [170]:
loss_fn(model)

tensor(9697772., device='cuda:0', grad_fn=<AddBackward0>)

In [180]:

num_iterations = 1000 
learning_rate = 0.01  
best_loss = float("inf")  
for iteration in range(num_iterations):
    
    # Текущие параметры в список
    current_params = [param.clone() for param in model.parameters()] 
    # Расчет кси
    xi = [torch.randn_like(param) for param in current_params]
    # Расчет новых параметров: theta + mu * xi
    new_params = [x + y for x, y in zip(current_params, list(map(lambda x: mu * x, xi)))]
    
    model_temp = deepcopy(model)
    # применяю новые параметры к модели
    for param, new_param in zip(model_temp.parameters(), new_params):
        param.data = new_param
    # считаем лосс от текущей модели и новой
    loss_curr = loss_fn(model)
    loss_temp = loss_fn(model_temp)
    
    delta_losses = list(map(lambda x: (loss_temp - loss_curr) * x, xi))
    
    # считаем произведение mu * N
    N_dot_mu = [mu * len(item.flatten()) for item in xi]
    # считаем  mu * N
    grads = [x / y for x, y in zip(delta_losses, N_dot_mu)]
    
    grads_dot_lr = [learning_rate * g for g in grads]
    params = [param - grad for param, grad in zip(current_params, grads_dot_lr)] 
    
    for param, new_param in zip(model.parameters(), params):
        param.data = new_param
        
    loss = loss_fn(model)
    if loss < best_loss:
        best_loss = loss    
    if iteration % 10 == 0:
        print(f"Iteration {iteration}: Loss = {best_loss}")

print("Optimization complete. Best loss:", best_loss)

Iteration 0: Loss = 1.119643794470657e+24
Iteration 10: Loss = 1.119643794470657e+24
Iteration 20: Loss = 1.119643794470657e+24
Iteration 30: Loss = 1.119643794470657e+24
Iteration 40: Loss = 1.119643794470657e+24
Iteration 50: Loss = 1.119643794470657e+24
Iteration 60: Loss = 1.119643794470657e+24
Iteration 70: Loss = 1.119643794470657e+24
Iteration 80: Loss = 1.119643794470657e+24


KeyboardInterrupt: 

In [15]:
minimizer_args = dict(method='L-BFGS-B', options={'disp':True, 'maxiter':5000}, jac = True)

In [16]:
minimizer_args['jac']

True

In [17]:
optimizer = MinimizeWrapper(model.parameters(), minimizer_args)

In [18]:
def closure():
    optimizer.zero_grad()
    loss = loss_fn(model)
    loss.backward()
    return loss
optimizer.step(closure)

AttributeError: 'NoneType' object has no attribute 'device'

In [None]:
draw_fig(model, grid)