In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchsummary import summary
import pandas as pd
import os

In [4]:
# CUDA 사용 가능한지 확인합니다.
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print('Device:', device)
print('Current cuda device:', torch.cuda.current_device())
print('Count of using GPUs:', torch.cuda.device_count())

Device: cuda
Current cuda device: 0
Count of using GPUs: 2


In [5]:
device = torch.device("cuda:0")  # 0번 GPU를 사용

In [6]:
import random
import numpy as np
import torch

# 시드 설정
random.seed(42)
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)


In [7]:
# 실험 데이터 로드
data1 = pd.read_csv('./NewABC_cluster_results_newsdist(0.1).csv')
data1

Unnamed: 0,A,B,C,A_scaled,B_scaled,C_scaled,Initial Total Loss,Total Loss at 50,Avg Loss 0-50,dist_from0,dist_from1,dist_from2,Cluster
0,1,2,3,-1.357931,-1.337451,-1.316686,-0.55275,-0.606017,-0.647231,1.0,0.1,0.1,0
1,1,2,7,-1.357931,-1.337451,0.211405,-0.528888,-0.610003,-0.651601,1.0,0.1,0.1,0
2,1,2,11,-1.357931,-1.337451,1.105281,-0.447648,-0.599378,-0.623454,1.0,0.1,0.1,0
3,1,7,3,-1.357931,0.270708,-1.316686,-0.447229,-0.662263,-0.665812,1.0,0.1,0.1,0
4,1,7,7,-1.357931,0.270708,0.211405,-0.922506,-0.76686,-0.812981,1.0,0.1,0.1,0
5,1,7,11,-1.357931,0.270708,1.105281,-0.822324,-0.767863,-0.813792,1.0,0.1,0.1,0
6,1,12,3,-1.357931,1.066743,-1.316686,-0.50659,-0.722144,-0.685461,1.0,0.1,0.1,0
7,1,12,7,-1.357931,1.066743,0.211405,-0.755358,-0.768687,-0.807436,1.0,0.1,0.1,0
8,1,12,11,-1.357931,1.066743,1.105281,-0.652738,-0.76989,-0.805635,1.0,0.1,0.1,0
9,7,2,3,0.336886,-1.337451,-1.316686,0.628385,1.085074,0.9734,0.1,1.0,0.1,1


In [8]:
# 0. 데이터프레임에 'Number' 인덱스 추가
data1['Number'] = range(1, 28)
data1

Unnamed: 0,A,B,C,A_scaled,B_scaled,C_scaled,Initial Total Loss,Total Loss at 50,Avg Loss 0-50,dist_from0,dist_from1,dist_from2,Cluster,Number
0,1,2,3,-1.357931,-1.337451,-1.316686,-0.55275,-0.606017,-0.647231,1.0,0.1,0.1,0,1
1,1,2,7,-1.357931,-1.337451,0.211405,-0.528888,-0.610003,-0.651601,1.0,0.1,0.1,0,2
2,1,2,11,-1.357931,-1.337451,1.105281,-0.447648,-0.599378,-0.623454,1.0,0.1,0.1,0,3
3,1,7,3,-1.357931,0.270708,-1.316686,-0.447229,-0.662263,-0.665812,1.0,0.1,0.1,0,4
4,1,7,7,-1.357931,0.270708,0.211405,-0.922506,-0.76686,-0.812981,1.0,0.1,0.1,0,5
5,1,7,11,-1.357931,0.270708,1.105281,-0.822324,-0.767863,-0.813792,1.0,0.1,0.1,0,6
6,1,12,3,-1.357931,1.066743,-1.316686,-0.50659,-0.722144,-0.685461,1.0,0.1,0.1,0,7
7,1,12,7,-1.357931,1.066743,0.211405,-0.755358,-0.768687,-0.807436,1.0,0.1,0.1,0,8
8,1,12,11,-1.357931,1.066743,1.105281,-0.652738,-0.76989,-0.805635,1.0,0.1,0.1,0,9
9,7,2,3,0.336886,-1.337451,-1.316686,0.628385,1.085074,0.9734,0.1,1.0,0.1,1,10


In [9]:
data2=data1.copy()
data2

Unnamed: 0,A,B,C,A_scaled,B_scaled,C_scaled,Initial Total Loss,Total Loss at 50,Avg Loss 0-50,dist_from0,dist_from1,dist_from2,Cluster,Number
0,1,2,3,-1.357931,-1.337451,-1.316686,-0.55275,-0.606017,-0.647231,1.0,0.1,0.1,0,1
1,1,2,7,-1.357931,-1.337451,0.211405,-0.528888,-0.610003,-0.651601,1.0,0.1,0.1,0,2
2,1,2,11,-1.357931,-1.337451,1.105281,-0.447648,-0.599378,-0.623454,1.0,0.1,0.1,0,3
3,1,7,3,-1.357931,0.270708,-1.316686,-0.447229,-0.662263,-0.665812,1.0,0.1,0.1,0,4
4,1,7,7,-1.357931,0.270708,0.211405,-0.922506,-0.76686,-0.812981,1.0,0.1,0.1,0,5
5,1,7,11,-1.357931,0.270708,1.105281,-0.822324,-0.767863,-0.813792,1.0,0.1,0.1,0,6
6,1,12,3,-1.357931,1.066743,-1.316686,-0.50659,-0.722144,-0.685461,1.0,0.1,0.1,0,7
7,1,12,7,-1.357931,1.066743,0.211405,-0.755358,-0.768687,-0.807436,1.0,0.1,0.1,0,8
8,1,12,11,-1.357931,1.066743,1.105281,-0.652738,-0.76989,-0.805635,1.0,0.1,0.1,0,9
9,7,2,3,0.336886,-1.337451,-1.316686,0.628385,1.085074,0.9734,0.1,1.0,0.1,1,10


In [10]:
origin_columns = ['A_scaled', 'B_scaled', 'C_scaled', 'Initial Total Loss', 'Total Loss at 50', 'Avg Loss 0-50']
origin = data2.loc[13, origin_columns].values

In [11]:
origin

array([ 0.33688572,  0.27070835,  0.21140502, -1.97738581, -0.76340968,
       -0.85078787])

In [13]:
# 유클리드 거리 계산 함수
def euclidean_distance(row, origin):
    return np.sqrt(np.sum((row - origin) ** 2))

# 데이터프레임에 유클리드 거리 계산 및 추가
def add_euclidean_distance(df, origin):
    # 유클리드 거리 계산에 사용될 컬럼 선택
    cols = ['A_scaled', 'B_scaled', 'C_scaled', 'Initial Total Loss', 'Total Loss at 50', 'Avg Loss 0-50']
    df['Euclidean Distance'] = df.apply(lambda row: euclidean_distance(row[cols].values, origin), axis=1)
    return df

# 원본 데이터프레임에 유클리드 거리 추가
data2 = add_euclidean_distance(data2, origin)

data2

Unnamed: 0,A,B,C,A_scaled,B_scaled,C_scaled,Initial Total Loss,Total Loss at 50,Avg Loss 0-50,dist_from0,dist_from1,dist_from2,Cluster,Number,Euclidean Distance
0,1,2,3,-1.357931,-1.337451,-1.316686,-0.55275,-0.606017,-0.647231,1.0,0.1,0.1,0,1,3.144748
1,1,2,7,-1.357931,-1.337451,0.211405,-0.528888,-0.610003,-0.651601,1.0,0.1,0.1,0,2,2.760423
2,1,2,11,-1.357931,-1.337451,1.105281,-0.447648,-0.599378,-0.623454,1.0,0.1,0.1,0,3,2.945552
3,1,7,3,-1.357931,0.270708,-1.316686,-0.447229,-0.662263,-0.665812,1.0,0.1,0.1,0,4,2.755592
4,1,7,7,-1.357931,0.270708,0.211405,-0.922506,-0.76686,-0.812981,1.0,0.1,0.1,0,5,1.996651
5,1,7,11,-1.357931,0.270708,1.105281,-0.822324,-0.767863,-0.813792,1.0,0.1,0.1,0,6,2.237627
6,1,12,3,-1.357931,1.066743,-1.316686,-0.50659,-0.722144,-0.685461,1.0,0.1,0.1,0,7,2.834327
7,1,12,7,-1.357931,1.066743,0.211405,-0.755358,-0.768687,-0.807436,1.0,0.1,0.1,0,8,2.236366
8,1,12,11,-1.357931,1.066743,1.105281,-0.652738,-0.76989,-0.805635,1.0,0.1,0.1,0,9,2.462084
9,7,2,3,0.336886,-1.337451,-1.316686,0.628385,1.085074,0.9734,0.1,1.0,0.1,1,10,4.296026


In [14]:

# 1. 클러스터 종류별로 데이터 나누기
cluster_2 = data2[data2['Cluster'] == 2.0]
cluster_1 = data2[data2['Cluster'] == 1.0]
cluster_0 = data2[data2['Cluster'] == 0.0]

# 2. 각 클러스터에서 가장 짧은 길이만큼 데이터 추출
min_length = min(len(cluster_2), len(cluster_1), len(cluster_0))

# 클러스터 데이터 복사 (비복원 추출을 위해)
copy_cluster_2 = cluster_2.copy()
copy_cluster_1 = cluster_1.copy()
copy_cluster_0 = cluster_0.copy()

# # 2, 1, 0 순으로 데이터 추출하여 새로운 데이터프레임 생성 (비복원 추출)
# extracted_rows = []
# for _ in range(min_length):
#     for cluster in [copy_cluster_2, copy_cluster_1, copy_cluster_0]:
#         if not cluster.empty:
#             # 랜덤 추출 후 선택된 행을 리스트에서 제거
#             row = cluster.sample(n=1)
#             extracted_rows.append(row)
#             cluster.drop(row.index, inplace=True)

# # 추출된 행들을 병합하여 새 데이터프레임 생성
# data_new = pd.concat(extracted_rows).reset_index(drop=True)


# 각 클러스터에서 유클리드 거리가 가장 작은 데이터 추출 및 비복원 추출을 위해 데이터프레임 생성
extracted_rows = []
for _ in range(min_length):
    for cluster_copy in [copy_cluster_2, copy_cluster_1, copy_cluster_0]:
        # 유클리드 거리가 가장 작은 행을 찾음
        row = cluster_copy.sort_values(by='Euclidean Distance').head(1)
        extracted_rows.append(row)
        # 추출된 행을 복사본 데이터프레임에서 제거
        cluster_copy.drop(row.index, inplace=True)

# 추출된 행들을 병합하여 새 데이터프레임 생성
data_new = pd.concat(extracted_rows, ignore_index=True)


# 3. 남은 데이터 추가
longest_length = max(len(cluster_2), len(cluster_1), len(cluster_0))
remaining_steps = longest_length - min_length

# 원본 클러스터 길이 저장
original_lengths = {
    2: len(cluster_2),
    1: len(cluster_1),
    0: len(cluster_0)
}

# 남은 데이터 추가 로직 수정
for _ in range(longest_length - min_length):
    for cluster_num, copy_cluster in zip([2, 1, 0], [copy_cluster_2, copy_cluster_1, copy_cluster_0]):
        if original_lengths[cluster_num] > len(data_new[data_new['Cluster'] == cluster_num]):
            # 원본 길이가 더 클 경우, 복사한 클러스터에서 남은 데이터 중 하나 추가 (비복원 추출)
            if not copy_cluster.empty:
                row = copy_cluster.sample(n=1)
                data_new = pd.concat([data_new, row])
                copy_cluster.drop(row.index, inplace=True)
        else:
            # 원본 길이보다 data_new의 해당 클러스터 길이가 같거나 클 경우, 원본 클러스터에서 데이터 한 개 추가 (복원 추출)
            row = data2[data2['Cluster'] == cluster_num].sample(n=1, replace=True)
            data_new = pd.concat([data_new, row])

# 데이터프레임 인덱스 재설정
data = data_new.reset_index(drop=True)

In [15]:
data

Unnamed: 0,A,B,C,A_scaled,B_scaled,C_scaled,Initial Total Loss,Total Loss at 50,Avg Loss 0-50,dist_from0,dist_from1,dist_from2,Cluster,Number,Euclidean Distance
0,7,7,7,0.336886,0.270708,0.211405,-1.977386,-0.76341,-0.850788,0.1,0.1,1.0,2,14,0.0
1,7,12,3,0.336886,1.066743,-1.316686,0.736237,0.029045,0.460709,0.1,1.0,0.1,1,16,3.560968
2,1,7,7,-1.357931,0.270708,0.211405,-0.922506,-0.76686,-0.812981,1.0,0.1,0.1,0,5,1.996651
3,7,12,7,0.336886,1.066743,0.211405,-1.192925,-0.767233,-0.830689,0.1,0.1,1.0,2,17,1.117797
4,7,7,3,0.336886,0.270708,-1.316686,0.606207,0.815648,0.79787,0.1,1.0,0.1,1,13,3.771142
5,1,12,7,-1.357931,1.066743,0.211405,-0.755358,-0.768687,-0.807436,1.0,0.1,0.1,0,8,2.236366
6,7,7,11,0.336886,0.270708,1.105281,-1.208969,-0.768068,-0.833252,0.1,0.1,1.0,2,15,1.178901
7,7,2,7,0.336886,-1.337451,0.211405,0.70702,0.992458,0.929765,0.1,1.0,0.1,1,11,4.005703
8,1,7,11,-1.357931,0.270708,1.105281,-0.822324,-0.767863,-0.813792,1.0,0.1,0.1,0,6,2.237627
9,13,7,7,1.021045,0.270708,0.211405,-0.814077,-0.704586,-0.683205,0.1,0.1,1.0,2,23,1.361214


In [18]:
# Convert to tensors
inputs = torch.tensor(data[['A', 'B', 'C']].values, dtype=torch.float32)
targets = torch.tensor(data[['Initial Total Loss', 'Total Loss at 50', 'Avg Loss 0-50']].values, dtype=torch.float32)
distances = torch.tensor(data[['dist_from0', 'dist_from1', 'dist_from2']].values, dtype=torch.float32)

In [19]:
class BasisNet(nn.Module):
    def __init__(self):
        super(BasisNet, self).__init__()
        self.fc1 = nn.Linear(2, 10)  # 입력 차원과 출력 차원 설정
        self.fc2 = nn.Linear(10, 10) # 중간 레이어의 차원 설정

    def forward(self, x):
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        return x


class ForwardNet(nn.Module):
    def __init__(self):
        super(ForwardNet, self).__init__()
        self.fc1 = nn.Linear(2, 10)  # 5개의 입력을 받음
        self.fc2 = nn.Linear(10, 10) # 10개의 성분으로 구성된 벡터를 출력

    def forward(self, x):
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        return x


class BackwardNet(nn.Module):
    def __init__(self):
        super(BackwardNet, self).__init__()
        self.fc1 = nn.Linear(10, 10) # 첫 번째 네트워크의 출력을 입력으로 받음
        self.fc2 = nn.Linear(10, 10)  # 1개의 성분으로 구성된 Output을 출력 # 첫 번째 네트워크의 출력을 입력으로 받음
        self.fc3 = nn.Linear(10, 1)

    def forward(self, x):
        x = F.tanh(self.fc1(x))
        x = F.tanh(self.fc2(x))
        x = self.fc3(x)
        return x

# # 직렬 네트워크 구성
# class SerialNetwork(nn.Module):
#     def __init__(self, forward_nets, backward_net):
#         super(SerialNetwork, self).__init__()
#         self.forward_nets = forward_nets
#         self.backward_net = backward_net

#     def forward(self, x, n0, n1, n2):
#         out1 = self.forward_nets[0](x)
#         out2 = self.forward_nets[1](x)
#         out3 = self.forward_nets[2](x)
#         n0 = n0.unsqueeze(-1)  # 마지막 차원을 추가하여 브로드캐스팅이 가능하게 함
#         n1 = n1.unsqueeze(-1)  # 마지막 차원을 추가
#         n2 = n2.unsqueeze(-1)  # 마지막 차원을 추가
#         combined_output = n0 * out1 + n1 * out2 + n2 * out3
#         final_output = self.backward_net(combined_output)
#         return final_output

class SerialNetwork(nn.Module):
    def __init__(self, basis_net, forward_nets, backward_net, n0_init, n1_init, n2_init):
        super(SerialNetwork, self).__init__()
        self.basis_net = basis_net  # basis network 추가
        self.forward_nets = nn.ModuleList(forward_nets) # 리스트를 ModuleList로 변경
        self.backward_net = backward_net
        # n0, n1, n2를 학습 가능한 파라미터로 정의
        self.n0 = nn.Parameter(torch.tensor([n0_init], dtype=torch.float32))
        self.n1 = nn.Parameter(torch.tensor([n1_init], dtype=torch.float32))
        self.n2 = nn.Parameter(torch.tensor([n2_init], dtype=torch.float32))

    def forward(self, x):
        out1 = self.forward_nets[0](x)
        out2 = self.forward_nets[1](x)
        out3 = self.forward_nets[2](x)
        basis_output = self.basis_net(x)  # basis network의 출력 계산
        combined_output = self.n0 * out1 + self.n1 * out2 + self.n2 * out3 + basis_output
        final_output = self.backward_net(combined_output)
        return final_output


def forward_pass(model, batch_xy, device):
    # 모든 입력을 모델과 동일한 디바이스로 이동
    batch_xy = batch_xy.to(device)
    if isinstance(model, torch.nn.DataParallel):
        # DataParallel 인스턴스에서 원래 모듈을 얻음
        return model.module.forward(batch_xy)
    else:
        return model.forward(batch_xy)




# def forward_pass(model, batch_xy, n0, n1, n2, device):
#     # DataParallel을 사용할 때와 그렇지 않을 때 모두 작동하도록 함
#     # 모든 입력을 모델과 동일한 디바이스로 이동
#     batch_xy = batch_xy.to(device)
#     # n0, n1, n2를 텐서로 변환하고 해당 디바이스로 옮김
#     n0 = torch.tensor(n0, dtype=torch.float32, device=device)
#     n1 = torch.tensor(n1, dtype=torch.float32, device=device)
#     n2 = torch.tensor(n2, dtype=torch.float32, device=device)
#     if isinstance(model, torch.nn.DataParallel):
#         # DataParallel 인스턴스에서 원래 모듈을 얻음
#         return model.module.forward(batch_xy, n0, n1, n2)
#     else:
#         return model.forward(batch_xy, n0, n1, n2)


    
# # 메인 모델 정의, 두 네트워크를 직렬로 연결
# class SerialNetwork(nn.Module):
#     def __init__(self, forward_net, backward_net):
#         super(SerialNetwork, self).__init__()
#         self.forward_net = forward_net
#         self.backward_net = backward_net

#     def forward(self, x):
#         x = self.forward_net(x)
#         x = self.backward_net(x)
#         return x


# class SerialNetwork2(nn.Module):
#     def __init__(self, forward_nets, backward_net):
#         super(SerialNetwork2, self).__init__()
#         self.forward_nets = nn.ModuleList(forward_nets)  # 세 개의 전방 네트워크
#         self.backward_net = backward_net  # 하나의 후방 네트워크

#     def forward(self, x, n0, n1, n2):
#         # 세 개의 전방 네트워크의 출력에 가중치를 적용
#         out1 = self.forward_nets[0](x) * n0.unsqueeze(-1)  # n0, n1, n2는 배치 크기의 텐서여야 함
#         out2 = self.forward_nets[1](x) * n1.unsqueeze(-1)
#         out3 = self.forward_nets[2](x) * n2.unsqueeze(-1)
        
#         # 가중치가 적용된 출력을 합친 후, 후방 네트워크에 전달
#         combined_out = out1 + out2 + out3
#         final_out = self.backward_net(combined_out)
#         return final_out


In [20]:
# def compute_derivatives(model, xy_data):
#     xy_data = xy_data.to(device)
#     # Ensure that xy_data has gradient information.
#     xy_data.requires_grad_(True)
    
#     # Get the model prediction.
#     f_pred = model(xy_data)
    
#     # Create a tensor of ones with the same shape as f_pred to be used for gradient computation.
#     # Reshape the ones tensor to match the shape of f_pred.
#     ones = torch.ones(f_pred.shape, device=device, requires_grad=False)
    
#     # Compute the first derivatives.
#     f_x = torch.autograd.grad(f_pred, xy_data, grad_outputs=ones, create_graph=True)[0][:, 0]
#     f_y = torch.autograd.grad(f_pred, xy_data, grad_outputs=ones, create_graph=True)[0][:, 1]
    
#     # Compute the second derivatives.
#     f_xx = torch.autograd.grad(f_x, xy_data, grad_outputs=ones[:, 0], create_graph=True)[0][:, 0]
#     f_yy = torch.autograd.grad(f_y, xy_data, grad_outputs=ones[:, 0], create_graph=True)[0][:, 1]
    
#     return f_xx, f_yy


# compute_derivatives 함수에 n0, n1, n2 인자를 추가합니다.
def compute_derivatives(model, xy_data, n0, n1, n2, device):
    xy_data = xy_data.to(device)
    xy_data.requires_grad_(True)
    
    n0 = torch.tensor(n0, dtype=torch.float32, device=device)
    n1 = torch.tensor(n1, dtype=torch.float32, device=device)
    n2 = torch.tensor(n2, dtype=torch.float32, device=device)
    # compute_derivatives 함수 내부:
    if isinstance(model, torch.nn.DataParallel):
        # 모델이 DataParallel을 사용하는 경우
        batch_size = xy_data.size(0)  # 현재 배치의 크기를 얻음
        # n0, n1, n2를 현재 배치 크기에 맞게 확장
        n0 = torch.full((batch_size, ), n0, dtype=torch.float32, device=device)
        n1 = torch.full((batch_size, ), n1, dtype=torch.float32, device=device)
        n2 = torch.full((batch_size, ), n2, dtype=torch.float32, device=device)
        # 수정된 모델 호출
        f_pred = model.module.forward(xy_data)
    else:
        # 모델이 단일 GPU 또는 CPU에서만 실행되는 경우
        f_pred = model.forward(xy_data)
    # # 모델 예측을 n0, n1, n2 값과 함께 계산합니다.
    # f_pred = model(xy_data, n0, n1, n2)  # 수정된 부분

    ones = torch.ones(f_pred.shape, device=device, requires_grad=False)
    f_x = torch.autograd.grad(f_pred, xy_data, grad_outputs=ones, create_graph=True)[0][:, 0]
    f_y = torch.autograd.grad(f_pred, xy_data, grad_outputs=ones, create_graph=True)[0][:, 1]
    f_xx = torch.autograd.grad(f_x, xy_data, grad_outputs=ones[:, 0], create_graph=True)[0][:, 0]
    f_yy = torch.autograd.grad(f_y, xy_data, grad_outputs=ones[:, 0], create_graph=True)[0][:, 1]
    return f_xx, f_yy

In [21]:


def freeze_parameters(net, freeze=True):
    for param in net.parameters():
        param.requires_grad = not freeze


In [22]:

# 데이터 생성 함수
def generate_data(A, B, C):
    x_data, y_data = np.meshgrid(
    np.linspace(-30, 30, 100),
    np.linspace(-30, 30, 100))    
    # numpy 배열에서 tensor로 변환할 때 requires_grad=True 설정
    f_test = A * np.sin(x_data/B) * np.sin(y_data/C)
    # 정규화된 A, B, C를 포함하여 입력 데이터 준비
    xy_data_ = np.stack([x_data.ravel(), y_data.ravel()], axis=-1)
    xy_data = torch.tensor(xy_data_, dtype=torch.float32)
    f_data = torch.tensor(f_test.ravel(), dtype=torch.float32).view(-1, 1)
    return x_data, y_data, f_data, f_test, xy_data


In [23]:


# 전방 네트워크와 후방 네트워크 초기화
basis_net = BasisNet().to(device)
forward_nets = [ForwardNet().to(device) for _ in range(3)]
backward_net = BackwardNet().to(device)

# 전방 네트워크에 대한 모델 가중치 불러오기
# for net in forward_nets:
#     net.load_state_dict(torch.load('first_network_final.pth'))
#     pass

basis_net.load_state_dict(torch.load('./basis_sum_pth/basis_network_basis_sum_777.pth'))
forward_nets[0].load_state_dict(torch.load('./basis_sum_pth/forward_network_0_basis_sum_777.pth'))
forward_nets[1].load_state_dict(torch.load('./basis_sum_pth/forward_network_1_basis_sum_777.pth'))
forward_nets[2].load_state_dict(torch.load('./basis_sum_pth/forward_network_2_basis_sum_777.pth'))
# 후방 네트워크에 대한 모델 가중치 불러오기
backward_net.load_state_dict(torch.load('./basis_sum_pth/backward_network_basis_sum_777.pth'))



# ##

# # 모델 인스턴스화
# forward_net_0 = ForwardNet().to(device)
# forward_net_1 = ForwardNet().to(device)
# forward_net_2 = ForwardNet().to(device)

# # 가중치 로드
# forward_net_0.load_state_dict(torch.load('first_network_final_777_2input.pth', map_location=device))
# forward_net_1.load_state_dict(torch.load('first_network_final_777_2input.pth', map_location=device))
# forward_net_2.load_state_dict(torch.load('first_network_final_777_2input.pth', map_location=device))

# # DataParallel 적용
# forward_net_0 = torch.nn.DataParallel(forward_net_0)
# forward_net_1 = torch.nn.DataParallel(forward_net_1)
# forward_net_2 = torch.nn.DataParallel(forward_net_2)

# # 백워드 네트워크 인스턴스화 및 가중치 로드 후 DataParallel 적용
# backward_net = BackwardNet().to(device)
# backward_net.load_state_dict(torch.load('second_network_final_777_2input.pth', map_location=device))
# backward_net = torch.nn.DataParallel(backward_net)

# # 전방 네트워크 리스트 및 SerialNetwork 인스턴스 생성
# forward_nets = [forward_net_0, forward_net_1, forward_net_2]






# torch.load(model.module.first_network.state_dict(), './first_network_final.pth')
# torch.load(model.module.second_network.state_dict(), './second_network_final.pth')

#============================================
# # 전방 및 후방 네트워크에 대한 옵티마이저 설정
# optimizers = [torch.optim.Adam(list(forward_net.parameters()) + list(backward_net.parameters()), lr=learning_rate) for forward_net in forward_nets]

# # 후방 네트워크에 대한 별도 옵티마이저 설정 (후방 네트워크 학습 단계용)
# optimizer_backward = torch.optim.Adam(backward_net.parameters(), lr=learning_rate)
#============================================

# # Aggressive learning rate scheduler
# scheduler = ReduceLROnPlateau(optimizers, mode='min', factor=0.7, patience=200, verbose=True)
# # scheduler = ReduceLROnPlateau(optimizer_backward, mode='min', factor=0.7, patience=200, verbose=True)

# # 각 전방 네트워크 옵티마이저에 대한 학습률 스케줄러 생성
# schedulers = [ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=200, verbose=True) for optimizer in optimizers]

# # 후방 네트워크 옵티마이저에 대한 학습률 스케줄러 생성
# scheduler_backward = ReduceLROnPlateau(optimizer_backward, mode='min', factor=0.7, patience=200, verbose=True)


# 손실 함수와 최적화기를 정의합니다.
criterion = nn.MSELoss()

In [24]:
# 배치학습을 위한 데이터 로더 함수를 정의합니다.
def create_dataloader(x_data, y_data, batch_size, shuffle):
    dataset = TensorDataset(x_data, y_data)
    loader = DataLoader(dataset,
                        batch_size=batch_size,
                        shuffle=shuffle)
    return loader

# 데이터 로더를 생성합니다.


# # num_workers를 시스템의 CPU 코어 수에 따라 조정합니다.
# import os
# num_workers = os.cpu_count()
# print("Number of workers:", num_workers)


In [25]:
# 전방 네트워크와 후방 네트워크에 대한 스케쥴러를 설정
# schedulers_forward = [ReduceLROnPlateau(optimizers[i], mode='min', factor=0.7, patience=1, verbose=True) for i in range(len(forward_nets))]
# scheduler_backward = ReduceLROnPlateau(optimizer_backward, mode='min', factor=0.7, patience=1, verbose=True)

# def l2_regularization(model, weight_decay):
#     l2_loss = 0
#     for param in model.parameters():
#         l2_loss += torch.norm(param)**2
#     return l2_loss * weight_decay

# forward_nets = [net.to(device) for net in forward_nets]
# backward_net = backward_net.to(device)




#==================================================

# 학습 설정
main_title = "BasisLearnX"
num_epochs = 100
num_epochs2 = 500
learning_rate1 = 0.002
learning_rate2 = 0.002
lambda_l2 = 0

batch_size = 64
shuffle = True
num_workers=16


file_path = f"./Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr_({main_title})"

# 디렉토리 생성
if not os.path.exists(file_path):
    os.makedirs(file_path)


basis_name = f'./{file_path}/basis_net_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'
forward_name0 = f'./{file_path}/forward_net_0_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'
forward_name1 = f'./{file_path}/forward_net_1_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'
forward_name2 = f'./{file_path}/forward_net_2_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'
backward_name = f'./{file_path}/backward_net_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'
n_params_filename = f'./{file_path}/n_params_Newdist(M1M0.1)learn_Eucli_E{num_epochs},{num_epochs2}_{learning_rate1}lr.pth'





# Weight average디렉토리 경로와 파일 경로 분리
WA_forward_path = f"./{file_path}/WA_forward"

# 디렉토리 생성
if not os.path.exists(WA_forward_path):
    os.makedirs(WA_forward_path)


# Weight average디렉토리 경로와 파일 경로 분리
WA_path = f"./{file_path}/WA"

# 디렉토리 생성
if not os.path.exists(WA_path):
    os.makedirs(WA_path)


# # 파일 저장 경로 결합
# full_file_path = os.path.join(directory_path, file_path)

# # 데이터프레임을 CSV 파일로 저장
# df_new.to_csv(full_file_path, index=False)

#==================================================

print(main_title)

for cycle in range(longest_length):  # 총 9개의 사이클
    print(f"\nCycle {cycle + 1}/{10}")
    start_idx = cycle * 3  # 이 사이클에서 학습할 시작 인덱스
    end_idx = start_idx + 3  # 이 사이클에서 학습할 마지막 인덱스

    # 전방 네트워크 학습(n0, n1, n2 적용)
    for idx in range(start_idx, end_idx):
        # Sum_loss = 0
        # total_loss1=0
        A = data['A'][idx]
        B = data['B'][idx]
        C = data['C'][idx]
        n0 = data['dist_from0'][idx]
        n1 = data['dist_from1'][idx]
        n2 = data['dist_from2'][idx]
        label = data['Cluster'][idx]  # 할당된 라벨에 따라 네트워크 선택
        x_data, y_data, f_data, f_test, xy_data = generate_data(A, B, C)
        loader = create_dataloader(xy_data, f_data, batch_size, shuffle)


        model = SerialNetwork(basis_net, forward_nets, backward_net, n0, n1, n2).to(device)


        # 모든 네트워크 파라미터를 위한 단일 옵티마이저 생성
        combined_parameters = list(backward_net.parameters())
        for forward_net in forward_nets:
            combined_parameters.extend(list(forward_net.parameters()))
  
        #================================================================      
        # # n0, n1, n2 파라미터 추가
        # combined_parameters.extend([model.n0, model.n1, model.n2])
        n_parameters = []
        
        
        # 라벨에 따라 n0, n1, n2 중 적절한 파라미터만 추가
        if label != 0:
            n_parameters.append(model.n0)
        if label != 1:
            n_parameters.append(model.n1)
        if label != 2:
            n_parameters.append(model.n2)
        
        
        #================================================================
        
        # 파라미터 그룹 정의
        param_groups = [
            {'params': combined_parameters, 'lr': learning_rate1},  # 나머지 모델 파라미터용 학습률
            {'params': n_parameters, 'lr': 0.002}  # model.n0, model.n1, model.n2에 대한 학습률
        ]

        # 이제 단일 옵티마이저에 모든 파라미터를 넣습니다.
        optimizer = torch.optim.Adam(param_groups)
        
        print(f"2nd raining Forward Net {label} for Experiment {idx + 1} with A={A}, B={B}, C={C}")
        # optimizer = optimizers[label]
        # optimizer = torch.optim.Adam(list(forward_net.parameters()) + list(backward_net.parameters()), lr=learning_rate)
        
        # model = SerialNetwork(forward_nets, backward_net).to(device)
        # model = nn.DataParallel(model)
        
        
        # model = torch.nn.DataParallel(model)
        print(f"Training Forward Net {label} for Experiment {idx + 1} with A={A}, B={B}, C={C}")
        # Add lists to record separate losses
        loss_history = []
        data_loss_history = []
        pde_loss_history = []
        mse_values = []
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.7, patience=50, verbose=True)

    
                        
        for epoch in range(num_epochs):
            model.train()
            batch_loss = 0.0
            batch_data_loss = 0.0  # To record data loss
            batch_pde_loss = 0.0   # To record pde loss
            
            # for batch_xy, batch_f in loader:
            for batch_num, (batch_xy, batch_f) in enumerate(loader): 
                batch_xy, batch_f = batch_xy.to(device), batch_f.to(device)
                optimizer.zero_grad()
                
                # Calculate the losses
                f_pred = forward_pass(model, batch_xy, device)
                data_loss = criterion(f_pred, batch_f)
                f_xx, f_yy = compute_derivatives(model, batch_xy, n0, n1, n2, device)
                pde_loss = criterion(f_xx + f_yy, ((1/B**2)+(1/C**2))*(-1)*f_pred.squeeze())

                # Combine the lossesgnqk
                loss = data_loss + pde_loss

                # # Combine the lossesgnqk
                # loss = data_loss + pde_loss
                
                # Backpropagate and optimize
                loss.backward()
                optimizer.step()
                
                # Record losses
                batch_loss += loss.item()
                batch_data_loss += data_loss.item()
                batch_pde_loss += pde_loss.item()

            
            avg_loss = batch_loss / len(loader)
            avg_data_loss = batch_data_loss / len(loader)
            avg_pde_loss = batch_pde_loss / len(loader)
            
            # Append the average losses for this epoch to the history
            loss_history.append(avg_loss)
            data_loss_history.append(avg_data_loss)
            pde_loss_history.append(avg_pde_loss)
            
            # scheduler = schedulers[label]
            scheduler.step(avg_loss)
            
            # Print the losses every 100 epochs
            if (epoch % 10 == 0) or (epoch == num_epochs-1) :
                current_lr = optimizer.param_groups[0]['lr']
                print(f'Epoch {epoch}/{num_epochs}, Total Loss: {avg_loss}, Data Loss: {avg_data_loss}, PDE Loss: {avg_pde_loss}, LR: {current_lr}')
                print(f'n0: {model.n0.item()}, n1: {model.n1.item()}, n2: {model.n2.item()} ')
        # torch.save(forward_net.state_dict(), f'./Weight_average{label}_E1,500/forward_{label}net_{cycle}.pth')

        for i, forward_net in enumerate(forward_nets):
            torch.save(forward_net.state_dict(), f'./{WA_forward_path}/forward_net_{i}_{cycle}Cycle_{idx}th.pth')

        
        
        # # 학습된 n으로 업데이트
        # n0_final, n1_final, n2_final = model.n0.item(), model.n1.item(), model.n2.item()
        
        # # .loc을 사용하여 특정 인덱스의 값 업데이트
        # data.loc[idx, 'dist_from0'] = n0_final
        # data.loc[idx, 'dist_from1'] = n1_final
        # data.loc[idx, 'dist_from2'] = n2_final
        print("\n")

        

# =======================================================================================

    # 후방 네트워크 학습
    # 전방 네트워크의 파라미터를 고정
    for net in forward_nets:
        freeze_parameters(net, freeze=True)

    print("Training Backward Net")
    loss_history = []


    # # 모든 네트워크 파라미터를 위한 단일 옵티마이저 생성
    # combined_parameters = list(backward_net.parameters()) + list(basis_net.parameters())
    # for forward_net in forward_nets:
    #     combined_parameters.extend(list(forward_net.parameters()))

    # 모든 네트워크 파라미터를 위한 단일 옵티마이저 생성
    combined_parameters = list(backward_net.parameters())
    for forward_net in forward_nets:
        combined_parameters.extend(list(forward_net.parameters()))

    # 이제 단일 옵티마이저에 모든 파라미터를 넣습니다.
    optimizer1 = torch.optim.Adam(combined_parameters, lr=learning_rate2)
    
    scheduler1 = ReduceLROnPlateau(optimizer1, mode='min', factor=0.7, patience=100, verbose=True)

    for epoch in range(num_epochs2):
        Sum_loss = 0
        total_loss2=0
        for idx in range(start_idx, end_idx):
            A = data['A'][idx]
            B = data['B'][idx]
            C = data['C'][idx]
            #111로 한번더...?
            n0 = data['dist_from0'][idx]
            n1 = data['dist_from1'][idx]
            n2 = data['dist_from2'][idx]
            
            label = data['Cluster'][idx]  # 할당된 라벨에 따라 네트워크 선택
            
            model = SerialNetwork(basis_net, forward_nets, backward_net, n0, n1, n2).to(device)
            
            # model = SerialNetwork(forward_nets[label], backward_net).to(device)
            
            
            # model = SerialNetwork(forward_nets, backward_net).to(device)
            # model = nn.DataParallel(model)
            
            
            # model = torch.nn.DataParallel(model)
            x_data, y_data, f_data, f_test, xy_data = generate_data(A, B, C)
            
                    
            # x_data, y_data, f_data, f_test, xy_data =  x_data.to(device), y_data.to(device), f_data.to(device), f_test.to(device), xy_data.to(device)
            
            f_data = f_data.to(device)
            f_test = torch.tensor(f_test, dtype=torch.float32).view(-1, 1).to(device) # f_test를 PyTorch 텐서로 변환 후 디바이스로 옮김
            xy_data = xy_data.to(device)
            # # f_test를 PyTorch 텐서로 변환하고 디바이스로 옮김
            # f_test = torch.tensor(f_test, dtype=torch.float32).view(-1, 1).to(device)
            # xy_data = xy_data.to(device)
            # f_data = f_data.to(device)
            
            # Calculate the losses
            f_pred = forward_pass(model, xy_data, device)
            data_loss = criterion(f_pred, f_test)
            f_xx, f_yy = compute_derivatives(model, xy_data, n0, n1, n2, device)
            pde_loss = criterion(f_xx + f_yy, ((1/B**2)+(1/C**2))*(-1)*f_pred.squeeze())
                
            # Combine the losses
            total_loss2 = data_loss + pde_loss
            Sum_loss += total_loss2
        
        model.train()    
        optimizer1.zero_grad()
        Sum_loss.backward()
        optimizer1.step()
        avg_loss = Sum_loss.item()
        loss_history.append(avg_loss)
        scheduler1.step(avg_loss)
        
        if (epoch % 10 == 0) or (epoch == num_epochs2-1) :
            current_lr = optimizer1.param_groups[0]['lr']
            print(f'Epoch {epoch}/{num_epochs}, Total Loss: {avg_loss}, LR: {current_lr}')
            print(f'n0: {model.n0.item()}, n1: {model.n1.item()}, n2: {model.n2.item()}')   
             
    # 전방 네트워크의 파라미터 고정 해제
    for net in forward_nets:
        freeze_parameters(net, freeze=False)
        
    for i, forward_net in enumerate(forward_nets):
        torch.save(forward_net.state_dict(), f'./{WA_path}/forward_net_{i}_cycle_{cycle}.pth')
    
    torch.save(backward_net.state_dict(), f'./{WA_path}/backward_net_cycle_{cycle}.pth')


BasisLearnX

Cycle 1/10
2nd raining Forward Net 2 for Experiment 1 with A=7, B=7, C=7
Training Forward Net 2 for Experiment 1 with A=7, B=7, C=7




Epoch 0/100, Total Loss: 0.010241682762813986, Data Loss: 0.00855454625739793, PDE Loss: 0.0016871364779805966, LR: 0.002
n0: 0.10416921228170395, n1: 0.10301641374826431, n2: 1.0 


KeyboardInterrupt: 

In [None]:

# # 전방 네트워크 가중치 저장
# for i, net in enumerate(forward_nets):
#     torch.save(net.state_dict(), f'forward_net_{i}_afterDOE.pth')
torch.save(basis_net.state_dict(), basis_name)
torch.save(forward_nets[0].state_dict(), forward_name0)
torch.save(forward_nets[1].state_dict(), forward_name1)
torch.save(forward_nets[2].state_dict(), forward_name2)

# 후방 네트워크 가중치 저장
torch.save(backward_net.state_dict(), backward_name)


# n0, n1, n2 파라미터를 딕셔너리로 저장
n_params = {
    'n0': model.n0.item(),  # model 대신 실제 모델 변수명 사용
    'n1': model.n1.item(),
    'n2': model.n2.item(),
}


# 파일로 저장
torch.save(n_params, n_params_filename)




# torch.save(forward_nets[0].state_dict(), f'forward_net_0_afterDOE_2input_Newdist(max1)_E2,1000_0.002lr.pth')
# torch.save(forward_nets[1].state_dict(), f'forward_net_1_afterDOE_2input_Newdist(max1)_E2,1000_0.002lr.pth')
# torch.save(forward_nets[2].state_dict(), f'forward_net_2_afterDOE_2input_Newdist(max1)_E2,1000_0.002lr.pth')

# # 후방 네트워크 가중치 저장
# torch.save(backward_net.state_dict(), 'backward_net_afterDOE_2input_Newdist(max1)_E2,1000_0.002lr.pth')


In [None]:

basis_net2 = BasisNet().to(device)


basis_net2.load_state_dict(torch.load('./basis_sum_pth/basis_network_basis_sum_777.pth'))

<All keys matched successfully>

In [None]:
for (name1, param1), (name2, param2) in zip(basis_net.named_parameters(), basis_net2.named_parameters()):
    weight_diff = param1.data - param2.data
    print("basis1")
    print(param1.data)
    print("\n")
    print("basis2")
    print(param2.data)
    print("\n")
    print("diff")
    print(weight_diff)
    print("\n")

basis1
tensor([[ 0.0409, -0.2675],
        [-0.0128, -0.1159],
        [ 0.1628, -0.1552],
        [-0.0342, -0.1779],
        [ 0.1537,  0.1500],
        [ 0.1299,  0.1010],
        [ 0.1588, -0.1689],
        [ 0.1025, -0.0292],
        [-0.0029, -0.1205],
        [-0.1587,  0.1501]], device='cuda:1')


basis2
tensor([[ 0.0409, -0.2675],
        [-0.0128, -0.1159],
        [ 0.1628, -0.1552],
        [-0.0342, -0.1779],
        [ 0.1537,  0.1500],
        [ 0.1299,  0.1010],
        [ 0.1588, -0.1689],
        [ 0.1025, -0.0292],
        [-0.0029, -0.1205],
        [-0.1587,  0.1501]], device='cuda:1')


diff
tensor([[0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.],
        [0., 0.]], device='cuda:1')


basis1
tensor([-0.7723,  2.3789,  2.7825, -1.2654,  1.7077, -1.3990,  1.2062, -1.3990,
        -2.9084,  2.0049], device='cuda:1')


basis2
tensor([-0.7723,  2.3789,  2.7825, -1.265

In [None]:

forward_nets0 = [ForwardNet().to(device) for _ in range(3)]


forward_nets0[0].load_state_dict(torch.load('./basis_sum_pth/forward_network_0_basis_sum_777.pth'))

<All keys matched successfully>

In [None]:
for (name1, param1), (name2, param2) in zip(forward_nets[0].named_parameters(), forward_nets0[0].named_parameters()):
    weight_diff = param1.data - param2.data
    print("forward0_1")
    print(param1.data)
    print("\n")
    print("forward0_2")
    print(param2.data)
    print("\n")
    print("diff")
    print(weight_diff)
    print("\n")

forward0_1
tensor([[-0.2405,  0.0427],
        [ 0.0628, -0.1259],
        [ 0.1119,  0.0131],
        [-0.1072,  0.0615],
        [-0.2191,  0.0679],
        [-0.0405, -0.0165],
        [ 0.1056, -0.0946],
        [ 0.0434, -0.2570],
        [ 0.0114,  0.1024],
        [-0.1695, -0.2594]], device='cuda:1')


forward0_2
tensor([[ 0.1719, -0.0553],
        [-0.2166, -0.1332],
        [ 0.2314,  0.1686],
        [ 0.0912,  0.2452],
        [ 0.1926, -0.2042],
        [-0.0840,  0.1033],
        [-0.1877, -0.0892],
        [-0.0533, -0.1691],
        [ 0.2138,  0.2087],
        [ 0.0098,  0.1530]], device='cuda:1')


diff
tensor([[-0.4124,  0.0980],
        [ 0.2794,  0.0073],
        [-0.1195, -0.1555],
        [-0.1985, -0.1837],
        [-0.4118,  0.2721],
        [ 0.0435, -0.1199],
        [ 0.2933, -0.0054],
        [ 0.0967, -0.0880],
        [-0.2024, -0.1063],
        [-0.1793, -0.4124]], device='cuda:1')


forward0_1
tensor([-3.1004, -1.0550,  2.8708, -0.0550, -0.8923, -4.5338, 