In [1]:
import torch
import pickle
import random
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch_geometric.nn as gnn
import time
from torch.utils.data import Dataset, DataLoader, random_split
from sklearn.model_selection import train_test_split
from torch_geometric.data import Data

from torch.optim import Adam
from torch_geometric.nn import GCNConv, global_mean_pool
from concurrent.futures import ThreadPoolExecutor

In [2]:
from base_function import *

### Model Net

In [3]:
import torch
from torch import nn
import torch.nn.functional as F
import torch_geometric.nn as gnn
from torch_geometric.data import Data, Batch

class PSOGNN(nn.Module):
    def __init__(self, node_input_dim, hidden_dim=32):
        super(PSOGNN, self).__init__()
        self.node_input_dim = node_input_dim
        self.conv1 = gnn.GCNConv(node_input_dim, hidden_dim)
        self.conv2 = gnn.GCNConv(hidden_dim, hidden_dim)
        self.fc = nn.Linear(hidden_dim, 3)

    def forward(self, x, edge_index, batch):
        B, n, d = x.size()
        x = x.view(B * n, d)

        with torch.autograd.set_detect_anomaly(True):
            x = F.relu(self.conv1(x, edge_index))
            x = F.relu(self.conv2(x, edge_index))
        
        x = self.fc(x)
        x = torch.sigmoid(x)

        x = x.view(B, n, 3)
        return x


### PSO

In [4]:
class PSO():
    def __init__(self, X, W, C1, C2, function, lower_bound, upper_bound, device='cuda', patience=5):
        self.device = device  
        self.X = X.to(device)
        self.W = W.to(device)
        self.C1 = C1.to(device)
        self.C2 = C2.to(device)
        self.func = function
        self.lower_bound = torch.as_tensor(lower_bound, device=device).float()
        self.upper_bound = torch.as_tensor(upper_bound, device=device).float()
        self.num_particle, self.dim = X.shape
        self.V = torch.zeros((self.num_particle, self.dim), device=device)
        self.P = X.clone().to(device)
        self.P_best = torch.full((self.num_particle, 1), float('inf'), device=device)
        self.G = None
        self.global_best = torch.tensor(float('inf'), device=device)
        self.global_best_history = []
        self.patience = patience

    def initial_global_best(self):
        fitnesses = self.func(self.X)
        min_fitness, min_id = torch.min(fitnesses, dim=0)
        self.global_best = min_fitness
        self.G = self.X[min_id].clone().to(self.device)
        return self.G

    def update_position(self):
        if self.G is None:  
            self.G = self.initial_global_best()

        random_tensor_1 = torch.rand((self.num_particle, 1), device=self.device)
        random_tensor_2 = torch.rand((self.num_particle, 1), device=self.device)

        new_velocity = (
            self.W.reshape(-1, 1) * self.V +
            random_tensor_1 * self.C1.reshape(-1, 1) * (self.P - self.X) +
            random_tensor_2 * self.C2.reshape(-1, 1) * (self.G - self.X)
        )

        new_position = self.X + new_velocity
        new_position = torch.clamp(new_position, self.lower_bound, self.upper_bound)
        self.V = new_velocity.clone().detach()
        self.X = new_position.clone().detach()
        return new_position

    def update_fitness(self, new_position):
        fitnesses = self.func(new_position).reshape(-1, 1)
        fitnesses_no_grad = fitnesses.detach()

        improve = fitnesses_no_grad < self.P_best
        self.P_best = torch.where(improve, fitnesses_no_grad, self.P_best)
        self.P = torch.where(improve, new_position, self.P)
        
        min_fitness, min_id = torch.min(fitnesses_no_grad, dim=0)
        
        if self.global_best > min_fitness:
            self.global_best = min_fitness
            self.G = new_position[min_id].clone().to(self.device)
        
        self.global_best_history.append(self.global_best.item())
        return torch.mean(fitnesses)

    def run_step(self):
        if self.G is None:
            self.G = self.initial_global_best()
        
        new_position = self.update_position()
        mean_fitness = self.update_fitness(new_position)
        print(new_position)
        print(mean_fitness)
        
        return new_position, mean_fitness


### Train

In [5]:
splited_path = r'A:\Code\deepso\splited_data_no.pkl'
with open(splited_path, 'rb') as f:
    dataset = pickle.load(f)

train_set = dataset['train']
test_set = dataset['test']
print(train_set[0])

{'dim': 1, 'func_type': 'ackley', 'params': [21.240033376567105, 0.20560771459870464, 7.294585386829872]}


In [6]:
def create_batches(dataset, batch_size):
    print("Dataset type:", type(dataset))
    if not isinstance(dataset, list):
        dataset = list(dataset)
    
    random.shuffle(dataset)
    for i in range(0, len(dataset), batch_size):
        batch = dataset[i:i + batch_size]
        yield batch


In [None]:
import torch
from torch import nn
import torch.nn.functional as F
from concurrent.futures import ThreadPoolExecutor
from torch.optim import Adam
import time
import matplotlib.pyplot as plt

num_particle = 10
padding_dim = 100
lower_bound = -50
upper_bound = 50
device = 'cuda' if torch.cuda.is_available() else 'cpu'

def initial_padding_batch(batch):
    x_list = []
    for func in batch:
        dim = func['dim']
        x = lower_bound + (upper_bound - lower_bound) * torch.rand((num_particle, dim), device=device)
        if dim < padding_dim:
            x_padding = F.pad(x, (0, padding_dim - dim))
            x_list.append(x_padding)
    return x_list

def padding_batch(x_list, batch):
    x_list_update = []
    for x_ori, func in zip(x_list, batch):
        dim = func['dim']
        if dim < padding_dim:
            x_padding = F.pad(x_ori, (0, padding_dim - dim))
            x_list_update.append(x_padding)
    return x_list_update

def unpadding(x_list, batch):
    x_ori = []
    for x_padded, func in zip(x_list, batch):
        dim = func['dim']
        x_original = x_padded[:, :dim]
        x_ori.append(x_original)
    return x_ori

def train(dataset, max_step, num_epochs, batch_size, hidden_dim, num_particle, padding_dim, device='cuda'):
    epoch_losses = []
    model = PSOGNN(node_input_dim=padding_dim, hidden_dim=hidden_dim).to(device)
    optimizer = Adam(model.parameters(), lr=0.001)
    
    total_start_time = time.time()
    
    for epoch in range(num_epochs):
        print(f"Epoch {epoch+1}/{num_epochs}")
        epoch_start_time = time.time()
        
        total_loss = 0
        batch_count = 0
        function_loader = create_batches(dataset, batch_size)

        for batch in function_loader:
            print(f"Batch {batch_count+1}")
            
            for step in range(max_step):
                if step == 0:
                    x_list = initial_padding_batch(batch)
                else:
                    x_list = padding_batch(x_list, batch)
                
                x_padding_all = torch.stack(x_list, dim=0).to(device)
                x_ori = unpadding(x_list, batch)

                edge_index = torch.combinations(torch.arange(num_particle, device=device), r=2).t().contiguous().to(device)
                batch_indices = torch.arange(batch_count, device=device).repeat_interleave(num_particle)
                
                output = model(x_padding_all, edge_index, batch_indices)
                
                output = output
                
                W_all = output[:, :, 0]
                C1_all = output[:, :, 1]
                C2_all = output[:, :, 2]

                for index, func in enumerate(batch):
                    dim = func['dim']
                    func_type = func['func_type']
                    params = func['params']
                    
                    def function(x):
                        function_instance = Function.get_function(func_type, x, params)
                        return function_instance.evaluate_function()
                    
                    W = W_all[index]
                    C1 = C1_all[index]
                    C2 = C2_all[index]
                    X = x_ori[index].to(device)

                    pso = PSO(X, W, C1, C2, function, lower_bound, upper_bound, device=device, patience=5)
                    new_position, mean_fitness = pso.run_step()
                    
                    x_list[index] = new_position.detach().to(device)

                    loss = mean_fitness
                    
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    total_loss = total_loss + loss.item()


            print(f"Batch {batch_count+1} Loss: {total_loss / max_step}")
            batch_count += 1

        epoch_loss = total_loss / len(function_loader)
        epoch_losses.append(epoch_loss)
        print(f"Epoch {epoch+1} Loss: {epoch_loss:.4f}")
        print(f"Epoch {epoch+1} Time: {time.time() - epoch_start_time:.2f} seconds")

    print(f"Total Training Time: {time.time() - total_start_time:.2f} seconds")
    print("Training completed.")
    
    plt.figure(figsize=(10, 6))
    plt.plot(range(1, num_epochs + 1), epoch_losses, marker='o', linestyle='-')
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.title('Training Loss per Epoch')
    plt.grid(True)
    plt.show()


In [8]:
train(train_set, max_step=50, num_epochs=1, batch_size=32, hidden_dim=16, num_particle=10, padding_dim=100, device='cuda')

Epoch 1/1
Dataset type: <class 'list'>
Batch 1
tensor([[-2.5789e+01, -2.5441e+01, -2.5964e+01, -3.0763e+01,  1.4347e+01,
         -3.4648e+01,  1.7637e+01,  3.2972e+01,  8.1781e-01, -1.8739e+01,
         -2.7062e+01,  1.9260e+01, -1.9750e+01, -1.9799e+01,  1.6194e+01,
          3.7205e+01, -4.8106e+00, -2.5469e+01,  4.3534e+01,  4.1399e+01,
          1.6522e+01,  9.3137e+00,  3.7459e+01, -2.1194e+01,  2.0400e+01,
          1.4151e+00, -2.8346e+01, -4.0732e+01, -2.8191e+01,  7.8747e+00,
         -1.9207e+00,  3.1453e+01,  2.5606e+01,  3.0020e+01, -2.9246e+01,
          3.3737e+01,  1.9263e+01, -2.0139e+01,  2.5457e+01,  3.9150e+01,
         -1.7901e+01,  4.4571e+01, -3.7281e+01, -2.5012e+01, -9.2577e+00,
          1.4868e+01,  3.3173e+01, -9.0441e+00,  2.1142e+01, -3.2469e+01,
         -2.4889e+01,  2.9560e+01,  2.0515e+01,  3.3779e+01,  3.1846e+01,
          2.5419e+01, -2.8508e+01, -2.3326e+01,  2.8751e+01,  3.6764e+00,
         -1.9809e+01,  3.2678e+01, -2.6581e+01,  7.8518e+00, -2.7

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.