In [1]:
import tarfile
import os 

import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset, random_split
import numpy as np
import os
import torch.nn.functional as F
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.preprocessing import StandardScaler  # 이 부분을 추가하세요
import neurokit2 as nk 
from adamp import AdamP
import wandb




# Define the path to your tar.gz file
os.chdir('/home/ubuntu/hw')
print(os.getcwd())



/home/ubuntu/hw


In [2]:

def find_pqr(signal, sr):
    try:
        signals = signal.reshape(-1,)
        signals = nk.ecg_clean(signals, 250, method='neurokit') #디노이징
        _, rpeaks = nk.ecg_peaks(signals, sampling_rate = sr)
        _, waves_peak = nk.ecg_delineate(signals, rpeaks, sampling_rate = sr, method='peak')

        r_peaks = np.array(rpeaks['ECG_R_Peaks'])
        r_peaks = r_peaks[~np.isnan(r_peaks)].astype('int')

        p_peaks = np.array(waves_peak['ECG_P_Peaks'])
        p_peaks = p_peaks[~np.isnan(p_peaks)].astype('int')

        q_peaks = np.array(waves_peak['ECG_Q_Peaks'])
        q_peaks = q_peaks[~np.isnan(q_peaks)].astype('int')

        s_peaks = np.array(waves_peak['ECG_S_Peaks'])
        s_peaks = s_peaks[~np.isnan(s_peaks)].astype('int')

        t_peaks = np.array(waves_peak['ECG_T_Peaks'])
        t_peaks = t_peaks[~np.isnan(t_peaks)].astype('int')

        plt.figure(figsize=(20,6))
        plt.plot(signals)
        plt.plot(p_peaks, signals[p_peaks], "o", markersize = 6, label = 'P_peaks')
        plt.plot(q_peaks, signals[q_peaks], "o", markersize = 6, label = 'Q_peaks')
        plt.plot(r_peaks, signals[r_peaks], "o", markersize = 6, label = 'R_peaks')
        plt.plot(s_peaks, signals[s_peaks], "o", markersize = 6, label = 'S_peaks')
        plt.plot(t_peaks, signals[t_peaks], "o", markersize = 6, label = 'T_peaks')
        plt.legend()
        plt.show()
    except ValueError as e:
        print(f"ValueError: {e}")
        # 오류가 발생한 경우 NaN 배열을 반환하여 시각화 부분에서 건너뜁니다.
        return np.full_like(signal, np.nan)
 
        

In [None]:
# 2->전체 회귀모델
# Drop out 추가 

wandb.init(project="Linear7_AdamP")


# 데이터 로드 및 전처리 함수
def load_npy_data(folder_path):
    X_list = []

    for file_name in os.listdir(folder_path):
        if file_name.startswith('wave') and file_name.endswith('.npy'):
            data = np.load(os.path.join(folder_path, file_name))

            if data.shape[0] == 13:
                data = data[:12, :]  # 13번째 행 제거
                
                # 디노이징을 위해 각 리드별로 nk.ecg_clean 적용
                cleaned_data = []
                for i in range(data.shape[0]):  # 각 리드에 대해 반복
                    cleaned = nk.ecg_clean(data[i, :], 250, method='neurokit')
                    cleaned_data.append(cleaned)
                cleaned_data = np.array(cleaned_data)
                X_list.append(cleaned_data)
            else:
                print(f"파일 {file_name}의 형식이 올바르지 않습니다: data shape {data.shape}")

    X = np.array(X_list)
    return X


# 데이터 로드
folder_path = "diffusion/data_No20000/data_0"
X = load_npy_data(folder_path)

# 데이터셋 분할 및 DataLoader 설정
dataset_size = len(X)
test_size = int(0.2 * dataset_size)
train_size = dataset_size - test_size

X_tensor = torch.tensor(X, dtype=torch.float32)
dataset = TensorDataset(X_tensor)
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# 데이터 정규화
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X.reshape(-1, 5000)).reshape(-1, 12, 5000)
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

# 데이터셋 분할 및 DataLoader 설정
dataset = TensorDataset(X_tensor)
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

train_loader = DataLoader(train_dataset, batch_size=256, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=256)


# 선형 회귀 모델 클래스
class LinearRegressionModel(nn.Module):
    def __init__(self, input_size, output_size):
        super(LinearRegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 2048)
        self.dropout1 = nn.Dropout(0.3)
        self.fc2 = nn.Linear(2048, 1024)
        self.dropout2 = nn.Dropout(0.3)
        self.fc3 = nn.Linear(1024, 512)
        self.fc4 = nn.Linear(512, 256)
        self.fc5 = nn.Linear(256, 128)
        self.fc6 = nn.Linear(128, 64)
        self.fc7 = nn.Linear(64, output_size)

    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.dropout1(x)
        x = F.relu(self.fc2(x))
        x = self.dropout2(x)
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = F.relu(self.fc5(x))
        x = F.relu(self.fc6(x))
        return self.fc7(x)


# 모델 초기화 및 CUDA 설정
device = torch.device('cuda:5')
input_size = 5000  # Lead II의 데이터 길이
output_size = 5000 * 11  # 나머지 11개 리드의 데이터 길이

# 모델 초기화
lin_reg_model = LinearRegressionModel(input_size, output_size).to(device)

# 옵티마이저
params = lin_reg_model.parameters()
lin_reg_optimizer = AdamP(params, lr=0.001, betas=(0.9, 0.999), weight_decay=1e-2)



# 훈련 루프
num_epochs = 10000
for epoch in range(num_epochs):
    for data in train_loader:  # 여기를 수정했습니다
        data = data[0].to(device)  # data는 튜플이 아니라 바로 텐서입니다
        lead_II = data[:, 1, :]  # 두 번째 리드 (Lead II) 선택
        remaining_leads = data[:, [i for i in range(12) if i != 1], :].reshape(-1, 5000 * 11)  # 나머지 11개 리드 선택 및 재구성

        # 예측 및 손실 계산
        lin_reg_optimizer.zero_grad()
        predictions = lin_reg_model(lead_II)
        loss = nn.MSELoss()(predictions, remaining_leads)

        # 역전파 및 최적화
        loss.backward()
        lin_reg_optimizer.step()
        wandb.log({"Epoch": epoch, "Training Loss": loss.item()})        
    # 10 epoch마다 평균 MSE와 R-squared 점수를 계산하고 wandb에 로깅
    if (epoch + 1) % 10 == 0:
        # 테스트 데이터셋에 대한 예측 및 점수 계산
        mse_scores, r2_scores = [], []
        lin_reg_model.eval()
        with torch.no_grad():
            for data in test_loader:
                data = data[0].to(device)
                lead_II = data[:, 1, :].view(-1, 5000)
                remaining_leads = data[:, [i for i in range(12) if i != 1], :].reshape(-1, 55000)
                
                predictions = lin_reg_model(lead_II)
                mse = mean_squared_error(remaining_leads.cpu().numpy(), predictions.cpu().numpy())
                r2 = r2_score(remaining_leads.cpu().numpy(), predictions.cpu().numpy())
                
                mse_scores.append(mse)
                r2_scores.append(r2)

        # 평균 점수 계산
        average_mse = np.mean(mse_scores)
        average_r2 = np.mean(r2_scores)

        # wandb에 로깅
        wandb.log({"Epoch": epoch, "Average MSE": average_mse, "Average R-squared": average_r2})

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item()}')

# 시각화 함수
def plot_ecgs(original, predicted, sample_index):
    fig, axes = plt.subplots(12, 2, figsize=(15, 20))
    for i in range(12):
        if i == 1:  # 2번째 리드(Lead II)는 입력 데이터
            axes[i, 0].plot(original[sample_index, i, :])
            axes[i, 1].plot(original[sample_index, i, :])
        else:
            axes[i, 0].plot(original[sample_index, i, :])
            axes[i, 1].plot(predicted[sample_index, i - 1 if i > 1 else i, :])
        axes[i, 0].set_title(f'Original Lead {i+1}')
        axes[i, 1].set_title(f'Predicted Lead {i+1}')
    plt.tight_layout()
    plt.show()

# 예측 및 시각화
lin_reg_model.eval()
with torch.no_grad():
    sample_data = next(iter(test_loader))[0]
    lead_II = sample_data[:, 1, :].to(device)
    predicted = lin_reg_model(lead_II).view(-1, 11, 5000).cpu()
    plot_ecgs(sample_data.numpy(), predicted, 0)  # 첫 번째 샘플에 대한 시각화

    


def calculate_scores(original, predicted):
    mse_scores = []
    r2_scores = []

    # 각 리드에 대한 MSE 및 R-squared 점수 계산
    for i in range(12):
        if i != 1:  # 2번째 리드(Lead II)는 입력 데이터이므로 제외
            original_lead = original[:, i, :]
            predicted_lead = predicted[:, i - 1 if i > 1 else i, :]
            mse = mean_squared_error(original_lead, predicted_lead)
            r2 = r2_score(original_lead, predicted_lead)
            mse_scores.append(mse)
            r2_scores.append(r2)
    
    return mse_scores, r2_scores

# 테스트 데이터셋에 대한 예측 및 점수 계산
lin_reg_model.eval()
total_mse = []
total_r2 = []
with torch.no_grad():
    for sample_data in test_loader:
        data = sample_data[0].to(device)
        lead_II = data[:, 1, :]

        predicted_output = lin_reg_model(lead_II)
        if predicted_output.shape[1] == 55000:  # 출력 크기가 55000일 때만 재구성
            predicted = predicted_output.view(-1, 11, 5000).cpu()
            mse_scores, r2_scores = calculate_scores(data.cpu().numpy(), predicted.numpy())
            total_mse.extend(mse_scores)
            total_r2.extend(r2_scores)
        else:
            print("Invalid model output size")

wandb.log({"Average MSE": np.mean(total_mse)})
wandb.log({"Average R-squared": np.mean(total_r2)}) 

# 평균 MSE 및 R-squared 점수 출력
print(f'Average MSE: {np.mean(total_mse)}')
print(f'Average R-squared: {np.mean(total_r2)}')



0,1
Epoch,▁▁▁▁▂▂▂▂▃▃▃▃▃▃▃▃▄▄▄▄▅▅▅▅▆▆▆▆▆▆▆▆▇▇▇▇████
Training Loss,▁▂▃▂▂▂▂▂▂▂▁▂▂▂▃▂▂▂▂█▁▁▁▂▁▁▁▂▂▁▂▂▁▁▁▁▂▇▁▁

0,1
Epoch,9.0
Training Loss,0.99459
