In [1]:
import numpy as np
import torch
import torch.nn as nn
import optuna
import torch.optim as optim

import pickle

from loss_function import gradient_norm_loss
from Constant import C

In [2]:
class Model(nn.Module):
    def __init__(self, num_node):
        super(Model, self).__init__()
        self.fc1 = nn.Linear(2,num_node)
        self.fc2 = nn.Linear(num_node, num_node)
        self.fc3 = nn.Linear(num_node, num_node)
        self.fc3 = nn.Linear(num_node, num_node)
        self.fc4 = nn.Linear(num_node, num_node)
        self.fc5 = nn.Linear(num_node, num_node)
        self.fc6 = nn.Linear(num_node, num_node)
        self.fc7 = nn.Linear(num_node, num_node)
        self.fc8 = nn.Linear(num_node, num_node)
        self.fc9 = nn.Linear(num_node, 1)

    def forward(self, x):
        '''
        x = torch([batch, t, x])
        '''
        x = torch.tanh(self.fc1(x))
        x = torch.tanh(self.fc2(x))
        x = torch.tanh(self.fc3(x))
        x = torch.tanh(self.fc4(x))
        x = torch.tanh(self.fc5(x))
        x = torch.tanh(self.fc6(x))
        x = torch.tanh(self.fc7(x))
        x = torch.tanh(self.fc8(x))
        x = self.fc9(x)
        return x

In [3]:
def main(dataset, sample_batch_size, boundary_batch_size, \
         model, learning_rate, sample_loss_weight, gradient_loss_weight, max_step):
    
    

    #model = Model()
    optimizer = optim.SGD(model.parameters(), lr=learning_rate)
    criterion = nn.MSELoss()
    grad_loss = gradient_norm_loss()

    params = 0
    for p in model.parameters():
        if p.requires_grad:
            params += p.numel()
    print('params: ', params)

    #writer = SummaryWriter(log_dir='./logs')

    bc_and_init = define_bc_and_init(dataset)
    
    for step in range(max_step):
        #boundary_and_initial calucrate
        boundary_input, boundary_label = get_boudary_point(bc_and_init, dataset, boundary_batch_size)
        u = model(boundary_input)
        loss_1 = criterion(u, boundary_label)

        #sample calucurate
        sample_input, _ = get_sample_point(dataset, sample_batch_size)
        f_list, grad = f(sample_input, model)
        fnc = torch.cat(f_list, dim=0).unsqueeze(1)
        loss_2 = criterion(fnc, torch.zeros_like(fnc))
        loss_3 = grad_loss(grad)

        loss = loss_1 + sample_loss_weight * loss_2 + gradient_loss_weight * loss_3
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        print('step {} : loss {}'.format(step, loss))
        #writer.add_scalar('Loss/step',loss, step)
        #if step % 50 == 0:
            #evaluate(model, dataset, step, writer)
            #torch.save(model.state_dict(), 'checkpoint/step_{}.pth'.format(step))

    writer.close()


def evaluate(model, dataset):
    model.eval()
    L2_loss = 0
    with torch.no_grad():
        for t_index, t in enumerate(dataset['t']):
            for x_index, x in enumerate(dataset['x']):
                #print('t', t.shape)
                #print('x', x)
                input_array = torch.tensor(np.concatenate([t, x]), dtype=torch.float32)
                #print(input_array)
                pred_u = model(input_array)
                L2_loss += (pred_u.item() - dataset['u'][x_index][0][t_index]) ** 2
    #writer.add_scalar('pre_L2loss/step', L2_loss.item(), step)
    #print('pre_L2loss: {}'.format(L2_loss.item()))
    model.train()
    return L2_loss

def define_bc_and_init(dataset):
    max_time_step = dataset['t'].shape[0]
    left_pos_index = 0
    right_pos_index = dataset['x'].shape[0]
    left_bc = np.concatenate([np.arange(1, max_time_step, dtype=np.int32).reshape(max_time_step - 1, 1), \
                              np.full((max_time_step - 1, 1), left_pos_index)], axis=1)
    #print(left_bc)
    right_bc = np.concatenate([np.arange(1, max_time_step, dtype=np.int32).reshape(max_time_step - 1, 1), \
                              np.full((max_time_step - 1, 1), right_pos_index - 1)], axis=1)
    #print(right_bc)
    initial_con = np.concatenate([np.full((right_pos_index, 1), 0), \
                                  np.arange(right_pos_index, dtype=np.int32).reshape(right_pos_index, 1)], axis=1)
    #print(initial_con)
    #print('*'*80)
    return np.concatenate([left_bc, right_bc, initial_con], axis=0)

def f(sample_input, model):
    f_list = []
    grad_list = []
    for one_sample in sample_input:
        t = one_sample[0].unsqueeze(0)
        x = one_sample[1].unsqueeze(0)
        u = model(torch.cat([t, x], dim=0))
        gradient = torch.autograd.grad(u, (t, x), retain_graph=True, create_graph=True)
        f = gradient[0] + C * gradient[1]
        f_list.append(f)
        grad_list.extend([gradient[0], gradient[1]])
    return f_list, torch.tensor(grad_list, dtype=torch.float32)
    
def data_load(data_path):
    with open(data_path,mode='rb') as f:
        data = pickle.load(f)
    return data

def get_label_list(label, batch_point):
    '''
    label [time * stencil]
    batch_point [batch_size * 2(time, point)]
    '''
    label_list = []
    for point in batch_point:
        value = torch.unsqueeze(label[point[0], point[1]], 0)
        label_list.append(value)
    return label_list

def get_sample_point(dataset, sample_batch_size):
    time_step = np.random.randint(1, dataset['t'].shape[0], (sample_batch_size, 1))
    #time_step = np.array([0])
    #time_step = time_step.astype(np.float32)
    sample_stencil = np.random.randint(1, dataset['x'].shape[0] - 2, (sample_batch_size, 1))
    #sample_stencil = sample_stencil.astype(np.float32)
    pair_batch = np.concatenate([time_step, sample_stencil], axis=1)
    #print("pair_batch", pair_batch)
    batch_list = []
    for pair in pair_batch:
        one_dataset = np.concatenate([dataset['t'][pair[0]], \
                                      dataset['x'][pair[1]], \
                                      np.array([dataset['u'][pair[1]][0][pair[0]]])])                             
        batch_list.append(one_dataset)
    numerical_data = np.stack(batch_list, axis=0)
    #print('numerical data', numerical_data)
    return torch.tensor(numerical_data[:, 0:2], dtype=torch.float32, requires_grad=True), torch.tensor(numerical_data[:, 2], dtype=torch.float32).view(-1, 1)

def get_boudary_point(bc_and_init, dataset, boundary_batch_size):

    index_list = np.random.choice(range(bc_and_init.shape[0]), boundary_batch_size)
    pair_batch = np.stack([bc_and_init[index] for index in index_list], axis=0)

    batch_list = []
    for pair in pair_batch:
        one_dataset = np.concatenate([dataset['t'][pair[0]], \
                                      dataset['x'][pair[1]], \
                                      np.array([dataset['u'][pair[1]][0][pair[0]]])])                             
        batch_list.append(one_dataset)

    numerical_data = np.stack(batch_list, axis=0)
    return torch.tensor(numerical_data[:, 0:2], dtype=torch.float32, requires_grad=True), torch.tensor(numerical_data[:, 2], dtype=torch.float32).view(-1, 1)

In [5]:
def objective(trial):
    num_node = trial.suggest_int('num_nodes', 20, 100)
    learning_rate = trial.suggest_uniform('lr', 1e-4, 1e-1)
    sample_batch_size = trial.suggest_int('sample_batch_size', 500, 8000)
    boundary_batch_size = trial.suggest_int('boundary_batch_size', 20, 300)
    gradient_loss_weight = trial.suggest_uniform('gradient_loss_weight', 1e-2, 10)
    sample_loss_weight = trial.suggest_uniform('sample_loss_weight', 1e-2, 10)
    
    model = Model(num_node)
    dataset = data_load('dataset.pkl')
    
    main(dataset, sample_batch_size, boundary_batch_size, \
          model, learning_rate, sample_loss_weight, gradient_loss_weight, max_step=3000)
    L2_loss = evaluate(model, dataset)
    return L2_loss

In [24]:
study = optuna.create_study()
study.optimize(objective, n_trials = 100)

[I 2020-10-08 14:41:14,183] A new study created in memory with name: no-name-43ec2609-0507-4494-8614-fdcef34a5f5b


params:  53941
step 0 : loss 3.196103096008301
step 1 : loss 3.1523444652557373
step 2 : loss 3.2860267162323
step 3 : loss 3.3421390056610107
step 4 : loss 3.385719060897827
step 5 : loss 3.4026870727539062
step 6 : loss 3.4101169109344482
step 7 : loss 3.5189192295074463
step 8 : loss 3.6040737628936768
step 9 : loss 3.4695887565612793
step 10 : loss 3.568143367767334
step 11 : loss 3.660487413406372
step 12 : loss 3.7853286266326904
step 13 : loss 3.7643373012542725
step 14 : loss 3.8444013595581055
step 15 : loss 3.9822275638580322
step 16 : loss 3.858593463897705
step 17 : loss 4.00861120223999
step 18 : loss 4.114837169647217
step 19 : loss 4.1826043128967285
step 20 : loss 4.115908145904541
step 21 : loss 4.2595534324646
step 22 : loss 4.225491046905518
step 23 : loss 4.322822570800781
step 24 : loss 4.376670837402344
step 25 : loss 4.493327617645264
step 26 : loss 4.465932846069336
step 27 : loss 4.804269313812256
step 28 : loss 4.899441242218018
step 29 : loss 4.68767213821411

step 476 : loss 0.7315928936004639
step 477 : loss 0.7865247130393982
step 478 : loss 0.7225408554077148
step 479 : loss 0.7452367544174194
step 480 : loss 0.736639142036438
step 481 : loss 0.7602383494377136
step 482 : loss 0.7491892576217651
step 483 : loss 0.7637051939964294
step 484 : loss 0.733884334564209
step 485 : loss 0.7325577735900879
step 486 : loss 0.7562928199768066
step 487 : loss 0.6814507842063904
step 488 : loss 0.7324724197387695


KeyboardInterrupt: 

In [1]:
print(study.best_params)
print(study.best_value)

NameError: name 'study' is not defined

In [2]:
import torch

In [16]:
x_1 = torch.tensor([[1, 2], [4, 2], [6, 3]], dtype=torch.float32, requires_grad=True)
#x_2 = torch.tensor([2, 4, 6], dtype=torch.float32, requires_grad=True)
a = torch.tensor([7], dtype=torch.float32, requires_grad=True)
b = torch.tensor([11], dtype=torch.float32, requires_grad=True)
c = torch.tensor([13], dtype=torch.float32, requires_grad=True)

In [21]:
y = a * x_1[:, 0] + b * x_1[:, 1] + c +  x_1[:, 0] * x_1[:, 1]

In [22]:
torch.sum(y, dim=0).backward()

In [23]:
x_1.grad

tensor([[16., 23.],
        [16., 26.],
        [17., 28.]])

In [24]:
x_2.grad

tensor([11., 11., 11.])