## 폐암 수술 환자의 생존율 예측

In [1]:
import os

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

# 필요한 라이브러리 불러옴
import numpy as np

import pandas as pd

# 실행할 때마다 같은 결과를 출력하기 위해 설정하는 부분
np.random.seed(3)
torch.manual_seed(3)

device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print("PyTorch version :", torch.__version__)
print("Device :", device)


'''
# 준비된 수술 환자 데이터를 불러옴
Data_set = np.loadtxt("../dataset/ThoraricSurgery.csv", delimiter=',')

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = Data_set[:, 0:17]
Y = Data_set[:, 17]
'''

# 준비된 수술 환자 데이터를 불러옴 by using pandas
#df = pd.read_csv("../dataset/ThoraricSurgery.csv",
#                      names=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "class"])
df = pd.read_csv("../dataset/ThoraricSurgery.csv", header=None)
print(df.head())
df.rename(columns={17:"class"}, inplace=True)
print(df.head())

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = df.drop(['class'], axis=1, inplace=False).values
Y = df['class'].values

PyTorch version : 1.6.0
Device : cuda:0
    0   1     2     3   4   5   6   7   8   9   10  11  12  13  14  15  16  17
0  293   1  3.80  2.80   0   0   0   0   0   0  12   0   0   0   1   0  62   0
1    1   2  2.88  2.16   1   0   0   0   1   1  14   0   0   0   1   0  60   0
2    8   2  3.19  2.50   1   0   0   0   1   0  11   0   0   1   1   0  66   1
3   14   2  3.98  3.06   2   0   0   0   1   1  14   0   0   0   1   0  80   1
4   17   2  2.21  1.88   0   0   1   0   0   0  12   0   0   0   1   0  56   0
     0  1     2     3  4  5  6  7  8  9  10  11  12  13  14  15  16  class
0  293  1  3.80  2.80  0  0  0  0  0  0  12   0   0   0   1   0  62      0
1    1  2  2.88  2.16  1  0  0  0  1  1  14   0   0   0   1   0  60      0
2    8  2  3.19  2.50  1  0  0  0  1  0  11   0   0   1   1   0  66      1
3   14  2  3.98  3.06  2  0  0  0  1  1  14   0   0   0   1   0  80      1
4   17  2  2.21  1.88  0  0  1  0  0  0  12   0   0   0   1   0  56      0


In [2]:
class Model(nn.Module):
    """
    """
    def __init__(self):
        super(Model, self).__init__()
        
        self.input_size = 17
        
        self.layers = nn.Sequential(
            nn.Linear(in_features=self.input_size, out_features=30, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=30, out_features=1, bias=True),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        out = self.layers(x)
        return out
        

In [3]:
model = Model()
#model = nn.DataParallel(model)
model.to(device)

Model(
  (layers): Sequential(
    (0): Linear(in_features=17, out_features=30, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=30, out_features=1, bias=True)
    (3): Sigmoid()
  )
)

In [6]:
path = "./trained_models_pytorch"
if not os.path.isdir(path):
    os.mkdir(path)

epochs = 1000
batch_size = 64


x = torch.from_numpy(X).type(torch.FloatTensor)
y = torch.from_numpy(Y).type(torch.FloatTensor)
#print(x.size())
#print(y.size())
dataset = TensorDataset(x, y)       
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)  
    
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
#criterion = nn.BCEWithLogitsLoss() # Sigmoid + BCELoss

#    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
#                                 lr=config.learning_rate, betas=(0.9, 0.98), eps=1e-09)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))
        
for epoch in range(1, epochs+1):
    phase = 'train'
    for k, [inputs, targets] in enumerate(train_loader):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(phase=='train'):
            output = model(inputs)
            output = output.squeeze()
            #targets = targets.type_as(output)
            
            #print(output.size())
            #print(targets.size())
            #print("=======")
            
            loss = criterion(output, targets)
           # print(loss)            
            
            loss.backward()
            optimizer.step()
            
    if(epoch%100==0):
        save_path = os.path.join(path, "model-"+ str(epoch)+ ".ckpt")
        print("Save path :", save_path)
        torch.save({     
            "epoch": epoch,
            "model": model, # model.state_dict(),
            #"optimizer_state":optimizer.state_dict(),
            # "scheduler_state":scheduler.state_dict(),
            #"best_score": best_score,
        }, save_path)
        print("The model has been saved as "+save_path)
            
            
print("Training is over.")

Save path : ./trained_models_pytorch\model-100.ckpt
The model has been saved as ./trained_models_pytorch\model-100.ckpt
Save path : ./trained_models_pytorch\model-200.ckpt
The model has been saved as ./trained_models_pytorch\model-200.ckpt
Save path : ./trained_models_pytorch\model-300.ckpt
The model has been saved as ./trained_models_pytorch\model-300.ckpt
Save path : ./trained_models_pytorch\model-400.ckpt
The model has been saved as ./trained_models_pytorch\model-400.ckpt
Save path : ./trained_models_pytorch\model-500.ckpt
The model has been saved as ./trained_models_pytorch\model-500.ckpt
Save path : ./trained_models_pytorch\model-600.ckpt
The model has been saved as ./trained_models_pytorch\model-600.ckpt
Save path : ./trained_models_pytorch\model-700.ckpt
The model has been saved as ./trained_models_pytorch\model-700.ckpt
Save path : ./trained_models_pytorch\model-800.ckpt
The model has been saved as ./trained_models_pytorch\model-800.ckpt
Save path : ./trained_models_pytorch\mod

In [8]:
path = "./trained_models_pytorch/model-1000.ckpt"
checkpoint = torch.load(path)
new_model = checkpoint["model"]

x_test = x.to(device)
prediction = new_model(x_test).to(device)
#prediction = torch.sigmoid(prediction)
prediction.cpu().detach().numpy()

array([[1.25496224e-01],
       [1.07107799e-05],
       [9.97299373e-01],
       [3.00259978e-01],
       [4.94192056e-02],
       [6.87182823e-04],
       [2.23492418e-04],
       [2.25679964e-01],
       [2.90850282e-01],
       [1.41253937e-02],
       [1.12737091e-02],
       [2.83288300e-01],
       [9.03997198e-03],
       [6.86580315e-02],
       [9.89805222e-01],
       [6.14849702e-02],
       [1.60954237e-01],
       [5.20879962e-03],
       [4.21250015e-02],
       [1.30153805e-01],
       [3.11359316e-02],
       [1.26357496e-01],
       [1.47288769e-01],
       [1.41669512e-01],
       [2.91477263e-01],
       [7.97337890e-02],
       [4.80794430e-01],
       [8.60284045e-02],
       [1.99870244e-01],
       [2.21972093e-01],
       [2.55132258e-01],
       [1.43831968e-01],
       [4.80266869e-01],
       [1.06264815e-01],
       [2.75641054e-01],
       [1.27672713e-04],
       [1.36259586e-01],
       [3.27298403e-01],
       [1.91000059e-01],
       [2.25932762e-01],
