## 폐암 수술 환자의 생존율 예측

In [2]:
import os

import torch
from torch import nn
import torch.nn.functional as F
from torch.utils.data import TensorDataset
from torch.utils.data import DataLoader

# 필요한 라이브러리 불러옴
import numpy as np

import pandas as pd

# 실행할 때마다 같은 결과를 출력하기 위해 설정하는 부분
np.random.seed(3)
torch.manual_seed(3)

device = torch.device("cuda:0" if torch.cuda.is_available() else 'cpu')
#device = 'cpu'
print("PyTorch version :", torch.__version__)
print("Device :", device)


'''
# 준비된 수술 환자 데이터를 불러옴
Data_set = np.loadtxt("../dataset/ThoraricSurgery.csv", delimiter=',')

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = Data_set[:, 0:17]
Y = Data_set[:, 17]
'''

# 준비된 수술 환자 데이터를 불러옴 by using pandas
#df = pd.read_csv("../dataset/ThoraricSurgery.csv",
#                      names=["a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "class"])
df = pd.read_csv("../dataset/ThoraricSurgery.csv", header=None)
print(df.head())
df.rename(columns={17:"class"}, inplace=True)
print(df.head())

# 환자의 기록과 수술 결과를 X와 Y로 구분하여 저장
X = df.drop(['class'], axis=1, inplace=False).values
Y = df['class'].values

PyTorch version : 1.6.0
Device : cuda:0
    0   1     2     3   4   5   6   7   8   9   10  11  12  13  14  15  16  17
0  293   1  3.80  2.80   0   0   0   0   0   0  12   0   0   0   1   0  62   0
1    1   2  2.88  2.16   1   0   0   0   1   1  14   0   0   0   1   0  60   0
2    8   2  3.19  2.50   1   0   0   0   1   0  11   0   0   1   1   0  66   1
3   14   2  3.98  3.06   2   0   0   0   1   1  14   0   0   0   1   0  80   1
4   17   2  2.21  1.88   0   0   1   0   0   0  12   0   0   0   1   0  56   0
     0  1     2     3  4  5  6  7  8  9  10  11  12  13  14  15  16  class
0  293  1  3.80  2.80  0  0  0  0  0  0  12   0   0   0   1   0  62      0
1    1  2  2.88  2.16  1  0  0  0  1  1  14   0   0   0   1   0  60      0
2    8  2  3.19  2.50  1  0  0  0  1  0  11   0   0   1   1   0  66      1
3   14  2  3.98  3.06  2  0  0  0  1  1  14   0   0   0   1   0  80      1
4   17  2  2.21  1.88  0  0  1  0  0  0  12   0   0   0   1   0  56      0


In [3]:
class Model(nn.Module):
    """
    """
    def __init__(self):
        super(Model, self).__init__()
        
        self.input_size = 17
        
        self.layers = nn.Sequential(
            nn.Linear(in_features=self.input_size, out_features=30, bias=True),
            nn.ReLU(inplace=True),
            nn.Linear(in_features=30, out_features=1, bias=True),
            nn.Sigmoid()
        )
    
    def forward(self, x):
        out = self.layers(x)
        return out
        

In [4]:
model = Model()
#model = nn.DataParallel(model)
model.to(device)

Model(
  (layers): Sequential(
    (0): Linear(in_features=17, out_features=30, bias=True)
    (1): ReLU(inplace=True)
    (2): Linear(in_features=30, out_features=1, bias=True)
    (3): Sigmoid()
  )
)

In [12]:
path = "./trained_models_pytorch"
if not os.path.isdir(path):
    os.mkdir(path)

epochs = 500
batch_size = 64


x = torch.from_numpy(X).type(torch.FloatTensor)
y = torch.from_numpy(Y).type(torch.FloatTensor)
#print(x.size())
#print(y.size())
dataset = TensorDataset(x, y)       
train_loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)  
    
#criterion = nn.CrossEntropyLoss()
criterion = nn.BCELoss()
#criterion = nn.BCEWithLogitsLoss() # Sigmoid + BCELoss

#    optimizer = torch.optim.Adam(filter(lambda p: p.requires_grad, model.parameters()), 
#                                 lr=config.learning_rate, betas=(0.9, 0.98), eps=1e-09)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001, betas=(0.5, 0.999))
        
for epoch in range(1, epochs+1):
    phase = 'train'
    for k, [inputs, targets] in enumerate(train_loader):
        
        inputs = inputs.to(device)
        targets = targets.to(device)
        
        optimizer.zero_grad()
        
        with torch.set_grad_enabled(phase=='train'):
            output = model(inputs)
            output = output.squeeze()
            #targets = targets.type_as(output)
            
            #print(output.size())
            #print(targets.size())
            #print("=======")
            
            loss = criterion(output, targets)
           # print(loss)            
            
            loss.backward()
            optimizer.step()
            
    if(epoch%100==0):
        save_path = os.path.join(path, "model-"+ str(epoch)+ ".ckpt")
        print("Save path :", save_path)
        torch.save({     
            "epoch": epoch,
            "model": model, # model.state_dict(),
            #"optimizer_state":optimizer.state_dict(),
            # "scheduler_state":scheduler.state_dict(),
            #"best_score": best_score,
        }, save_path)
        print("The model has been saved as "+save_path)
            
            
print("Training is over.")

Save path : ./trained_models_pytorch\model-100.ckpt
The model has been saved as ./trained_models_pytorch\model-100.ckpt
Save path : ./trained_models_pytorch\model-200.ckpt
The model has been saved as ./trained_models_pytorch\model-200.ckpt
Save path : ./trained_models_pytorch\model-300.ckpt
The model has been saved as ./trained_models_pytorch\model-300.ckpt
Save path : ./trained_models_pytorch\model-400.ckpt
The model has been saved as ./trained_models_pytorch\model-400.ckpt
Save path : ./trained_models_pytorch\model-500.ckpt
The model has been saved as ./trained_models_pytorch\model-500.ckpt
Training is over.


In [15]:
path = "./trained_models_pytorch/model-500.ckpt"
checkpoint = torch.load(path)
new_model = checkpoint["model"]

x_test = x.to(device)
prediction = new_model(x_test).to(device)
#prediction = torch.sigmoid(prediction)
prediction.cpu().detach().numpy()

array([[2.32977822e-01],
       [5.42406298e-08],
       [1.00000000e+00],
       [8.85534227e-01],
       [2.99954205e-03],
       [7.65678010e-07],
       [1.10845207e-07],
       [2.61881083e-01],
       [5.91257215e-02],
       [7.04932492e-04],
       [9.35527351e-05],
       [1.87061243e-02],
       [1.62768853e-03],
       [4.06996766e-03],
       [9.99916553e-01],
       [9.81299113e-03],
       [7.05623329e-02],
       [2.75453585e-05],
       [3.97957209e-03],
       [2.54564695e-02],
       [3.01605184e-03],
       [1.96788982e-02],
       [6.21223822e-04],
       [7.13504776e-02],
       [3.84038001e-01],
       [2.51126718e-02],
       [5.14262676e-01],
       [8.04945081e-02],
       [2.16230467e-01],
       [4.01494414e-01],
       [3.81145835e-01],
       [1.42585710e-01],
       [5.45886219e-01],
       [1.32502466e-01],
       [3.65037352e-01],
       [5.66797984e-15],
       [3.03213865e-01],
       [5.32341063e-01],
       [3.95143121e-01],
       [4.19832438e-01],
