In [27]:
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
import matplotlib.pyplot as plt
import warnings
warnings.filterwarnings(action='ignore')
import math

import sklearn
import torch
from torch import nn
from torchvision import models
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F

In [28]:
print('numpy: ', np.__version__)
print('pandas: ', pd.__version__)
print('scikit-learn: ', sklearn.__version__)
print('pytorch: ', torch.__version__)

numpy:  1.20.3
pandas:  1.3.2
scikit-learn:  0.24.2
pytorch:  1.9.0


In [29]:
cancer_type = 'intestine' # breast, intestine, lung
BATCH_SIZE = 8
learning_rate = 1e-7
EPOCHS = 20
device = torch.device('cuda:0')
device_cpu = torch.device('cpu')
save_path = './model/best_model.pt'

In [30]:
class CustomDataset(Dataset):
    def __init__(self, path, mode='train'):
        self.mode = mode
        self.data = pd.read_csv(path)
        self.length = len(np.array(self.data)[0, :])
        self.props = np.array(self.data.iloc[:, range(1, self.length-1)])
        
        if self.mode == 'train':
            self.labels = np.array(self.data.iloc[:, self.length-1])
            self.labels = self.labels / np.max(self.labels)
        
    def __len__(self):
        return len(self.props)
    
    def Max(self):
        return np.max(np.array(self.data.iloc[:, self.length-1]))
    
    def __getitem__(self, i):
        if self.mode == 'train':
            return {
                'props' : torch.tensor(self.props[i], dtype=torch.float32),
                'labels' : torch.tensor(self.labels[i], dtype=torch.float32)
            }
        else:
            return{
                'props' : torch.tensor(self.props[i], dtype=torch.float32)
            }

In [31]:
train_path = f'./train_{cancer_type}.csv'
val_path = f'./val_{cancer_type}.csv'

train_dataset = CustomDataset(train_path)
val_dataset = CustomDataset(val_path)

train_dataloader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [32]:
train_dataset.Max()

1825

In [33]:
sample_batch = next(iter(train_dataloader))
sample_batch['labels']

tensor([0.9622, 0.2849, 0.3512, 0.0932, 0.2762, 0.0186, 0.3819, 0.2416])

In [34]:
sample_batch['props'].shape, sample_batch['labels'].shape

(torch.Size([8, 52]), torch.Size([8]))

In [35]:
sample_batch['props'].dtype, sample_batch['labels'].dtype

(torch.float32, torch.float32)

In [36]:
data_length = len(sample_batch['props'][1])

In [37]:
class MLP(nn.Module):
    def __init__(self, num):
        super(MLP, self).__init__()
        self.ln1 = nn.Linear(num, 8)
        self.ln2 = nn.Linear(8, 4)
        self.ln3 = nn.Linear(4, 1)

    def forward(self, x):
        x = self.ln1(x)
        x = nn.ReLU()(x)
        x = self.ln2(x)
        x = nn.ReLU()(x)
        x = self.ln3(x)
        output = torch.round(nn.ReLU()(x))
        return output

In [38]:
model = MLP(data_length).to(device)

In [39]:
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.MSELoss()

In [40]:
def train_step(batch_item, epoch, batch, training):
    props = batch_item['props'].to(device)
    labels = batch_item['labels'].to(device)
    if training is True:
        model.train()
        optimizer.zero_grad()
        with torch.cuda.amp.autocast():
            output = model(props)
            loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        return loss
    
    else:
        model.eval()
        with torch.no_grad():
            output = model(props)
            loss = criterion(output, labels)
            
        return loss

In [41]:
loss_plot, val_loss_plot = [], []

for epoch in range(EPOCHS):
    total_loss, total_val_loss = 0, 0
    
    tqdm_dataset = enumerate(tqdm(train_dataloader))
    training = True
    for batch, batch_item in tqdm_dataset:
        batch_loss = train_step(batch_item, epoch, batch, training)
        total_loss += batch_loss
    print('Epoch : {}, Loss: {:03f}, Total Loss : {:03f}'.format(epoch+1, batch_loss.item(), total_loss/(batch+1)))

    loss_plot.append(total_loss/(batch+1))
    
    tqdm_dataset = enumerate(tqdm(val_dataloader))
    training = False
    for batch, batch_item in tqdm_dataset:
        batch_loss = train_step(batch_item, epoch, batch, training)
        total_val_loss += batch_loss
    print('Epoch : {}, Val_Loss: {:03f}, Total Val_Loss : {:03f}'.format(epoch+1, batch_loss.item(), total_loss/(batch+1)))
        
    val_loss_plot.append(total_val_loss/(batch+1))

    if np.min(torch.tensor(val_loss_plot).to(device_cpu).numpy()) == val_loss_plot[-1]:
        torch.save(model, save_path)

  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 1, Loss: 0.290475, Total Loss : 0.533706


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 1, Val_Loss: 1.157672, Total Val_Loss : 1.067411


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 2, Loss: 0.628909, Total Loss : 0.533989


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 2, Val_Loss: 1.844614, Total Val_Loss : 1.067979


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 3, Loss: 0.482227, Total Loss : 0.533667


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 3, Val_Loss: 0.423061, Total Val_Loss : 1.067333


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 4, Loss: 1.404092, Total Loss : 0.534966


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 4, Val_Loss: 0.836707, Total Val_Loss : 1.069931


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 5, Loss: 0.380050, Total Loss : 0.533610


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 5, Val_Loss: 0.365156, Total Val_Loss : 1.067221


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 6, Loss: 1.404089, Total Loss : 0.535347


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 6, Val_Loss: 0.685242, Total Val_Loss : 1.070693


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 7, Loss: 0.283375, Total Loss : 0.534841


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 7, Val_Loss: 1.086706, Total Val_Loss : 1.069682


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 8, Loss: 1.253011, Total Loss : 0.533480


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 8, Val_Loss: 0.325329, Total Val_Loss : 1.066961


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 9, Loss: 0.648161, Total Loss : 0.532363


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 9, Val_Loss: 1.104950, Total Val_Loss : 1.064727


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 10, Loss: 0.250479, Total Loss : 0.530615


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 10, Val_Loss: 0.969076, Total Val_Loss : 1.061230


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 11, Loss: 0.708298, Total Loss : 0.535276


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 11, Val_Loss: 0.899827, Total Val_Loss : 1.070553


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 12, Loss: 0.858878, Total Loss : 0.534268


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 12, Val_Loss: 1.105549, Total Val_Loss : 1.068536


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 13, Loss: 0.284718, Total Loss : 0.534382


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 13, Val_Loss: 0.575797, Total Val_Loss : 1.068764


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 14, Loss: 0.559474, Total Loss : 0.532769


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 14, Val_Loss: 0.149665, Total Val_Loss : 1.065538


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 15, Loss: 1.455560, Total Loss : 0.532865


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 15, Val_Loss: 0.717678, Total Val_Loss : 1.065731


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 16, Loss: 1.167098, Total Loss : 0.535041


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 16, Val_Loss: 0.267017, Total Val_Loss : 1.070082


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 17, Loss: 0.129421, Total Loss : 0.534803


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 17, Val_Loss: 0.741414, Total Val_Loss : 1.069606


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 18, Loss: 0.443300, Total Loss : 0.534137


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 18, Val_Loss: 0.362727, Total Val_Loss : 1.068274


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 19, Loss: 0.242954, Total Loss : 0.534543


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 19, Val_Loss: 1.248685, Total Val_Loss : 1.069086


  0%|          | 0/1250 [00:00<?, ?it/s]

Epoch : 20, Loss: 0.043425, Total Loss : 0.531943


  0%|          | 0/625 [00:00<?, ?it/s]

Epoch : 20, Val_Loss: 0.783044, Total Val_Loss : 1.063886


In [None]:
loss_plot_tensor = torch.tensor(loss_plot).to(device_cpu)
val_loss_plot_tensor = torch.tensor(val_loss_plot).to(device_cpu)
loss_plot = loss_plot_tensor.numpy()
val_loss_plot = val_loss_plot_tensor.numpy()

plt.plot(loss_plot, label='train_loss')
plt.plot(val_loss_plot, label='val_loss')
plt.xlabel('epoch')
plt.ylabel('loss(mse)')
plt.legend()
plt.show()

In [43]:
loss_plot

array([0.53370565, 0.53398937, 0.53366673, 0.5349656 , 0.53361034,
       0.5353467 , 0.5348412 , 0.5334804 , 0.5323634 , 0.5306151 ,
       0.5352765 , 0.5342681 , 0.53438216, 0.5327688 , 0.53286535,
       0.53504103, 0.53480303, 0.534137  , 0.534543  , 0.5319428 ],
      dtype=float32)

In [44]:
model = torch.load(save_path)

In [45]:
test_dataset = CustomDataset(val_path, mode='test')
test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, shuffle=True)

In [46]:
def predict(dataset):
    model.eval()
    result = []
    for batch_item in dataset:
        props = batch_item['props'].to(device)
        with torch.no_grad():
            output = model(props)
        output = output.cpu().numpy()
        result.extend(list(output))
        
    return result

In [47]:
maximum = test_dataset.Max()
pred = predict(test_dataloader) * maximum

In [None]:
pred = pd.Series(pred)
pred_df = pd.DataFrame(pred, columns=['Survival_period'])
pred_df.to_csv('./Survival_period.csv')
pred_df

In [None]:
val_label = pd.read_csv(val_path)['Survival period']
val_label

In [None]:
SE = 0
for i in range(len(val_label)):
    SE+=math.sqrt((pred_df.iloc[i] - val_label.iloc[i])**2)

print(SE/len(val_label))

In [None]:
np.array(pred_df).shape, np.array(val_label).shape