In [2]:
import tarfile
import os 

# Define the path to your tar.gz file
os.chdir('/home/jupyter-choi/')
print(os.getcwd())



/home/jupyter-choi


In [1]:
import neurokit2 as nk  
import matplotlib.pyplot as plt
def find_pqr(signal, sr):
    try:
        signals = signal.reshape(-1,)
        signals = nk.ecg_clean(signals, sr, method='neurokit') #디노이징
        _, rpeaks = nk.ecg_peaks(signals, sampling_rate = sr)
        _, waves_peak = nk.ecg_delineate(signals, rpeaks, sampling_rate = sr, method='peak')

        r_peaks = np.array(rpeaks['ECG_R_Peaks'])
        r_peaks = r_peaks[~np.isnan(r_peaks)].astype('int')

        p_peaks = np.array(waves_peak['ECG_P_Peaks'])
        p_peaks = p_peaks[~np.isnan(p_peaks)].astype('int')

        q_peaks = np.array(waves_peak['ECG_Q_Peaks'])
        q_peaks = q_peaks[~np.isnan(q_peaks)].astype('int')

        s_peaks = np.array(waves_peak['ECG_S_Peaks'])
        s_peaks = s_peaks[~np.isnan(s_peaks)].astype('int')

        t_peaks = np.array(waves_peak['ECG_T_Peaks'])
        t_peaks = t_peaks[~np.isnan(t_peaks)].astype('int')

        plt.figure(figsize=(20,6))
        plt.plot(signals)
        plt.plot(p_peaks, signals[p_peaks], "o", markersize = 6, label = 'P_peaks')
        plt.plot(q_peaks, signals[q_peaks], "o", markersize = 6, label = 'Q_peaks')
        plt.plot(r_peaks, signals[r_peaks], "o", markersize = 6, label = 'R_peaks')
        plt.plot(s_peaks, signals[s_peaks], "o", markersize = 6, label = 'S_peaks')
        plt.plot(t_peaks, signals[t_peaks], "o", markersize = 6, label = 'T_peaks')
        plt.legend()
        plt.show()
    except ValueError as e:
        print(f"ValueError: {e}")
        # 오류가 발생한 경우 NaN 배열을 반환하여 시각화 부분에서 건너뜁니다.
        return np.full_like(signal, np.nan)
 
        

In [6]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
import numpy as np
import os
from sklearn.model_selection import train_test_split
import torch.nn.functional as F
from adamp import AdamP
import wandb
import matplotlib.pyplot as plt
import torch.optim as optim
from sklearn.metrics import mean_squared_error
from math import sqrt
from sklearn.metrics import r2_score
from sklearn.preprocessing import StandardScaler

wandb.init(project="Linear_AdamW")

# CUDA 사용 가능 여부 확인 및 장치 설정
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# 시각화를 위한 함수
def plot_original_vs_predicted(original, predicted, num_samples=5, num_leads=11):
    plt.figure(figsize=(20, 10))
    lead_length = 500  # Correct lead length based on your data

    for i in range(min(num_samples, len(original))):
        original_lead = original[i]
        predicted_lead = predicted[i].reshape(num_leads, lead_length)


        # Plotting the original lead
        plt.subplot(num_samples, 2, 2*i + 1)
        plt.plot(original_lead, label="Original Lead")
        plt.title(f"Original Lead Sample {i+1}")
        plt.legend()

        # Plotting a few selected predicted leads for clarity
        plt.subplot(num_samples, 2, 2*i + 2)
        for j in range(min(3, num_leads)):  # Plotting first 3 leads as an example
            plt.plot(predicted_lead[j], label=f"Predicted Lead {j+1}")
        plt.title(f"Predicted Leads Sample {i+1}")
        plt.legend()

    plt.tight_layout()
    plt.show()


def flatten_data(data):
    return data.reshape(data.shape[0], -1)

# MSE와 RMSE 계산 함수
def calculate_performance_metrics(y_true, y_pred):
    mse = mean_squared_error(y_true, y_pred)
    rmse = sqrt(mse)
    return mse, rmse

# R-squared 계산 함수
def calculate_r_squared(y_true, y_pred):
    return r2_score(y_true, y_pred)


def load_npy_data(folder_path):
    X_list = []
    y_list = []
    processed_files = set()

    for file_name in os.listdir(folder_path):
        if file_name.startswith('wave') and file_name.endswith('.npy') and file_name not in processed_files:
            data = np.load(os.path.join(folder_path, file_name))
            processed_files.add(file_name)
            if data.shape[0] == 13:
                data = data[:12, :]  # 13번째 행 제거
                
            if data.shape[0] >= 12:
                X_feature = data[1, :]  # Assuming 1st lead as input
                y_features = data[:4, :]  # Remaining leads as output
                X_list.append(X_feature)
                y_list.append(y_features.flatten())  # Flatten the 11 leads
            else:
                print(f"Invalid file format: {file_name}")

    X = np.array(X_list)
    y = np.array(y_list)
    return X, y

# 회귀 모델 정의
class EnhancedNonlinearRegressionModel(nn.Module):
    def __init__(self, input_size, lead_length):
        super(EnhancedNonlinearRegressionModel, self).__init__()
        self.fc1 = nn.Linear(input_size, 20)
        self.fc2 = nn.Linear(20, 40)
        self.fc3 = nn.Linear(40, 20)
        self.fc4 = nn.Linear(20, 10)
        self.fc5 = nn.Linear(10, output_size)  # output_size를 목표 데이터의 크기에 맞게 설정


    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = F.relu(self.fc3(x))
        x = F.relu(self.fc4(x))
        x = self.fc5(x)
        return x

# 데이터 로드
folder_path = "diffusion/data_No20000/data_0"
X, y = load_npy_data(folder_path)

# 데이터 분리
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the StandardScaler
scaler = StandardScaler()

# Fit on training data and transform both training and testing data
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Convert scaled data to PyTorch tensors
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
y_train_tensor = torch.tensor(y_train, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.float32)

# Flatten the target data if necessary
y_train_flat = flatten_data(y_train_tensor.numpy())
y_test_flat = flatten_data(y_test_tensor.numpy())

# Create datasets for training and testing
train_dataset = TensorDataset(X_train_tensor, torch.tensor(y_train_flat, dtype=torch.float32))
test_dataset = TensorDataset(X_test_tensor, torch.tensor(y_test_flat, dtype=torch.float32))

# Create DataLoaders for training and testing
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

input_size = 5000  # Lead II의 데이터 길이
output_size = 5000 *4
lead_length = 500

print(torch.cuda.device_count())


# 모델 초기화 시 input_size를 올바르게 설정
model = EnhancedNonlinearRegressionModel(input_size=X_train_tensor.shape[1], lead_length=lead_length)


# Check available GPUs and adjust device_ids if necessary
print(torch.cuda.device_count())

# Ensure the model is on the default device before applying DataParallel
model.to('cuda:0')

# Apply DataParallel
if torch.cuda.is_available():
    model = nn.DataParallel(model, device_ids=[0,1,2,3, 4, 5,6,7])

    
criterion = nn.MSELoss()


# optimizer = AdamP(model.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay=1e-2)
optimizer = optim.AdamW(model.parameters(), lr=0.01, betas=(0.9, 0.999), weight_decay=1e-2)

num_epochs = 10000

# Now your training loop can use train_loader
for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()

    avg_loss = total_loss / len(train_loader)
    print(f'Epoch {epoch+1}, Loss {avg_loss}')
    wandb.log({"epoch": epoch, "loss": avg_loss})

    # 평가 및 wandb에 기록
    if (epoch + 1) % 10 == 0:
        model.eval()
        with torch.no_grad():
            # 훈련 세트 평가
            train_predictions = model(X_train_tensor)
            train_mse, train_rmse = calculate_performance_metrics(y_train_flat, flatten_data(train_predictions.cpu().numpy()))

            # 테스트 세트 평가
            test_predictions = model(X_test_tensor)
            test_mse, test_rmse = calculate_performance_metrics(y_test_flat, flatten_data(test_predictions.cpu().numpy()))

            # R-squared 계산
            train_r2 = calculate_r_squared(y_train_flat, flatten_data(train_predictions.cpu().numpy()))
            test_r2 = calculate_r_squared(y_test_flat, flatten_data(test_predictions.cpu().numpy()))

            # wandb에 기록
            wandb.log({"epoch": epoch, "train_mse": train_mse, "train_rmse": train_rmse, "train_r2": train_r2, 
                       "test_mse": test_mse, "test_rmse": test_rmse, "test_r2": test_r2})

            print(f'Epoch {epoch+1} Evaluation - Train MSE: {train_mse}, Train RMSE: {train_rmse}, Train R2: {train_r2}')
            print(f'Epoch {epoch+1} Evaluation - Test MSE: {test_mse}, Test RMSE: {test_rmse}, Test R2: {test_r2}')
            # After model evaluation
            test_predictions = model(X_test_tensor)
            test_predictions_flat = flatten_data(test_predictions.cpu().numpy())

            # Reshape the predictions to have 11 leads
            test_predictions_reshaped = test_predictions_flat.reshape(-1, 4, lead_length)
            num_visual_samples = 5
            sample_indices = np.random.choice(len(X_test), num_visual_samples, replace=False)
            X_test_samples = X_test[sample_indices]
            test_predictions_samples = test_predictions_reshaped[sample_indices]

            # Plot original vs predicted
            plot_original_vs_predicted(X_test_samples, test_predictions_samples, num_samples=num_visual_samples)

# Evaluate the model
model.eval()
with torch.no_grad():
    train_predictions = model(X_train.unsqueeze(-1)).numpy()
    test_predictions = model(X_test.unsqueeze(-1)).numpy()



# Plot original vs predicted
plot_original_vs_predicted(X_test.numpy(), test_predictions_reshaped, num_samples=5)

# 최종 평가 결과 출력
train_predictions = model(X_train_tensor)
train_predictions_flat = flatten_data(train_predictions.cpu().numpy())
train_mse, train_rmse = calculate_performance_metrics(y_train_flat, train_predictions_flat)
print(f"Training Set - MSE: {train_mse}, RMSE: {train_rmse}")

test_predictions = model(X_test_tensor)
test_predictions_flat = flatten_data(test_predictions.cpu().numpy())
test_mse, test_rmse = calculate_performance_metrics(y_test_flat, test_predictions_flat)
print(f"Test Set - MSE: {test_mse}, RMSE: {test_rmse}")

train_r2 = calculate_r_squared(y_train_flat, train_predictions_flat)
print(f'Training Set - R-squared: {train_r2}')

test_r2 = calculate_r_squared(y_test_flat, test_predictions_flat)
print(f'Test Set - R-squared: {test_r2}')

0,1
epoch,▁▂▃▃▄▅▆▆▇██
loss,█▂▁▁▁▂▃▃▂▁
test_mse,▁
test_r2,▁
test_rmse,▁
train_mse,▁
train_r2,▁
train_rmse,▁

0,1
epoch,9.0
loss,841.44612
test_mse,337525.34375
test_r2,-206.9627
test_rmse,580.96931
train_mse,827.43188
train_r2,0.12265
train_rmse,28.76512


8
8
Epoch 1, Loss 901.4700815429687
Epoch 2, Loss 860.7278571777343
Epoch 3, Loss 845.5960678710937
Epoch 4, Loss 849.1218818359375
Epoch 5, Loss 841.1818415527343
Epoch 6, Loss 840.0620512695313
Epoch 7, Loss 839.2981042480469
Epoch 8, Loss 837.8954382324218
Epoch 9, Loss 838.5261459960938
Epoch 10, Loss 830.2317451171875
Epoch 10 Evaluation - Train MSE: 824.5441284179688, Train RMSE: 28.714876430484054, Train R2: 0.12705934024284674
Epoch 10 Evaluation - Test MSE: 325119.4375, Test RMSE: 570.1924565442795, Test R2: -204.56740820030683


ValueError: cannot reshape array of size 2000 into shape (11,500)

<Figure size 2000x1000 with 0 Axes>