In [1]:
import tarfile
import os 

# Define the path to your tar.gz file
os.chdir('/home/jupyter-choi/')
print(os.getcwd())



/home/jupyter-hong


In [3]:
import torch

if torch.cuda.is_available():
    device_name = torch.cuda.get_device_name(4)
    print(f"GPU: {device_name}")
else:
    print("No GPU available.")

GPU: NVIDIA GeForce RTX 2080 Ti


In [4]:
# .npy 파일이 있는 폴더 경로
folder_path = ""

# 폴더 내의 모든 .npy 파일 로드
file_list = [file for file in os.listdir(folder_path) if file.endswith('.csv')]

# npy 파일의 개수
num_npy_files = len(file_list)
print(f"There are {num_npy_files} .csv files in the folder.")

There are 1629 .csv files in the folder.


In [5]:
import torch

# 사용 가능한 GPU 확인
print(torch.cuda.device_count())
print(torch.cuda.get_device_name(0))

# GPU 변경 (예: 두 번째 GPU 사용)
device = torch.device('cuda:1' if torch.cuda.is_available() else 'cpu')
torch.cuda.set_device(1)

# 모델과 데이터를 새 GPU로 이동
model = model.to(device)
# 데이터 로더 내의 데이터도 같은 방식으로 이동시켜야 합니다.


8
NVIDIA GeForce RTX 2080 Ti


NameError: name 'model' is not defined

In [4]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader, TensorDataset, random_split
import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
import re
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset


def load_labels(label_file):
    # 라벨 파일을 읽고, pid와 라벨을 매핑하는 딕셔너리를 생성합니다.
    label_df = pd.read_csv(label_file)
    label_dict = dict(zip(label_df['PID'], label_df['AF']))
    return label_dict

def preprocess_data(X):
    # Z-점수 정규화를 수행합니다.
    X_mean = X.mean(axis=0)
    X_std = X.std(axis=0)
    X_normalized = (X - X_mean) / X_std
    return X_normalized

def load_data(folder_path, label_dict):
    X_list = []
    y_list = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.npy'):
            # 파일 이름에서 pid 추출
            match = re.search(r's_(\d+)_\d+.npy', file_name)
            if match:
                pid = int(match.group(1))
                
                # 데이터 로드
                data = np.load(os.path.join(folder_path, file_name))

                # 데이터 형태 조정 (32채널을 1채널로 변경)
                data = data.reshape(-1, 1, 2500)

                # 라벨 할당
                label = label_dict.get(pid, 0)  # pid가 없는 경우 기본값으로 0을 사용
                X_list.append(data)
                y_list.append(label)
            else:
                print(f"파일 이름 형식이 맞지 않습니다: {file_name}")

    X = np.concatenate(X_list, axis=0)
    y = np.array(y_list)
    return X, y


# 라벨 파일과 데이터 폴더 경로
label_file = 'diffusion/mobile_data/12Lead+Mobile_ECG/orig_20230816/label/Total_Mobile_label12.csv'
folder_path = 'diffusion/mobile_data/12Lead+Mobile_ECG/orig_20230816/data/Mobile_ECG_Scaled(-1,1)S'

# 라벨 로드
label_dict = load_labels(label_file)

# 데이터 로드
X, y = load_data(folder_path, label_dict)

# 원본 데이터셋에서 훈련 및 검증 데이터셋, 테스트 데이터셋 분리
X_train_val, X_test, y_train_val, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# 훈련 및 검증 데이터셋 전처리
X_train_val_normalized = preprocess_data(X_train_val)
X_train_val_tensor = torch.from_numpy(X_train_val_normalized).float()
y_train_val_tensor = torch.from_numpy(y_train_val).long()

# 데이터셋 생성 및 분할
dataset = TensorDataset(X_train_val_tensor, y_train_val_tensor)
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

# 데이터 로더 생성
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32)


# ResNet 모델 정의
class BasicBlock(nn.Module):
    expansion = 1

    def __init__(self, in_channels, out_channels, stride=1):
        super(BasicBlock, self).__init__()
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size=3, stride=stride, padding=1, bias=False)
        self.bn1 = nn.BatchNorm1d(out_channels)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn2 = nn.BatchNorm1d(out_channels)

        self.shortcut = nn.Sequential()
        if stride != 1 or in_channels != self.expansion * out_channels:
            self.shortcut = nn.Sequential(
                nn.Conv1d(in_channels, self.expansion * out_channels, kernel_size=1, stride=stride, bias=False),
                nn.BatchNorm1d(self.expansion * out_channels)
            )

    def forward(self, x):
        identity = x

        out = self.conv1(x)
        out = self.bn1(out)
        out = F.relu(out)

        out = self.conv2(out)
        out = self.bn2(out)

        out += self.shortcut(identity)
        out = F.relu(out)

        out = F.dropout(out, p=0.5, training=self.training)  # 드롭아웃 추가
        return out


class ResNet(nn.Module):
    def __init__(self, block, num_blocks, num_classes=2):
        super(ResNet, self).__init__()
        self.num_classes = num_classes
        self.in_channels = 64

        self.conv1 = nn.Conv1d(1, 64, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm1d(64)
        self.layer1 = self._make_layer(block, 64, num_blocks[0], stride=1)
        self.layer2 = self._make_layer(block, 128, num_blocks[1], stride=2)
        self.layer3 = self._make_layer(block, 256, num_blocks[2], stride=2)
        self.layer4 = self._make_layer(block, 512, num_blocks[3], stride=2)
        self.linear = None  # Remove the initialization from here

    def _make_layer(self, block, out_channels, num_blocks, stride):
        strides = [stride] + [1] * (num_blocks - 1)
        layers = []
        for stride in strides:
            layers.append(block(self.in_channels, out_channels, stride))
            self.in_channels = out_channels * block.expansion
        return nn.Sequential(*layers)

    def forward(self, x):
        out = F.relu(self.bn1(self.conv1(x)))
        out = self.layer1(out)
        out = self.layer2(out)
        out = self.layer3(out)
        out = self.layer4(out)

        out = F.avg_pool1d(out, 4)
        out = out.view(out.size(0), -1)

        if self.linear is None:
            self.linear = nn.Linear(out.size(1), self.num_classes).to(out.device)

        out = self.linear(out)
        return out

def ResNet18():
    return ResNet(BasicBlock, [2, 2, 2, 2], num_classes=len(np.unique(y)))

# CUDA 사용 가능 여부 확인
device = torch.device("cuda:3" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# 데이터셋 및 레이블 분포 확인
print(f"Unique labels: {np.unique(y)}")
print(f"Label distribution: {np.bincount(y)}")


# 모델, 손실 함수, 옵티마이저 초기화
model = ResNet18().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)



# 훈련 루프
num_epochs = 10
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    correct = 0
    total = 0

    for data, target in train_loader:
        data, target = data.to(device), target.to(device)
        
        optimizer.zero_grad()
        output = model(data)
        loss = criterion(output, target)
        loss.backward()
        optimizer.step()

        train_loss += loss.item()
        _, predicted = torch.max(output.data, 1)
        total += target.size(0)
        correct += (predicted == target).sum().item()
        # print(f"Train batch output: {output[:5]}")
        
    train_loss /= len(train_loader)
    train_accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}, Train Loss: {train_loss:.4f}, Train Accuracy: {train_accuracy:.2f}%')

    # 검증 과정
    model.eval()
    val_loss = 0.0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in val_loader:
            data, target = data.to(device), target.to(device)
            
            output = model(data)
            loss = criterion(output, target)

            val_loss += loss.item()
            _, predicted = torch.max(output.data, 1)
            total += target.size(0)
            correct += (predicted == target).sum().item()

    val_loss /= len(val_loader)
    val_accuracy = 100 * correct / total
    print(f'Epoch {epoch+1}, Validation Loss: {val_loss:.4f}, Validation Accuracy: {val_accuracy:.2f}%')

# 모델 저장
torch.save(model.state_dict(), 'resnet_ecg_model.pth')

# 테스트 데이터셋 전처리 및 DataLoader 생성
X_test_normalized = preprocess_data(X_test)
X_test_tensor = torch.from_numpy(X_test_normalized).float()
y_test_tensor = torch.from_numpy(y_test).long()
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
test_loader = DataLoader(test_dataset, batch_size=32)

# 테스트셋으로 예측 및 성능 평가
model.eval()  # 모델을 평가 모드로 설정
y_pred = []
y_true = []
with torch.no_grad():
    for data, target in test_loader:
        data, target = data.to(device), target.to(device)
        outputs = model(data)
        _, predicted = torch.max(outputs.data, 1)
        y_pred.extend(predicted.view(-1).cpu().numpy())
        y_true.extend(target.view(-1).cpu().numpy())

accuracy = accuracy_score(y_true, y_pred)
precision = precision_score(y_true, y_pred, average='macro')
recall = recall_score(y_true, y_pred, average='macro')
f1 = f1_score(y_true, y_pred, average='macro')

print(f'Accuracy: {accuracy:.4f}')
print(f'Precision: {precision:.4f}')
print(f'Recall: {recall:.4f}')
print(f'F1 Score: {f1:.4f}')

Using device: cuda:3
Unique labels: [0]
Label distribution: [36868]
Epoch 1, Train Loss: 0.0000, Train Accuracy: 100.00%
Epoch 1, Validation Loss: 0.0000, Validation Accuracy: 100.00%
Epoch 2, Train Loss: 0.0000, Train Accuracy: 100.00%
Epoch 2, Validation Loss: 0.0000, Validation Accuracy: 100.00%
Epoch 3, Train Loss: 0.0000, Train Accuracy: 100.00%
Epoch 3, Validation Loss: 0.0000, Validation Accuracy: 100.00%


KeyboardInterrupt: 

In [21]:
import numpy as np
import pandas as pd

# 파일 경로
file_path = ''

# 파일 로드
data = np.load(file_path)

# NumPy 배열을 Pandas DataFrame으로 변환
df = pd.DataFrame(data)

# DataFrame 출력
df


Unnamed: 0,0
0,1.0
1,1.0
2,1.0
3,1.0
4,1.0
...,...
33669,0.0
33670,0.0
33671,0.0
33672,0.0
