In [1]:
import os
current_directory = os.getcwd()
folder_name = os.path.basename(current_directory)
number = folder_name

In [2]:
CONFIG = {
    'data_main': 'C:/Users/Николай/PycharmProjects/CIBMTR/D.Data/main/',
    'train_path': 'train.csv',

    'DEVICE' : 'cuda',
    'SEED' : 42,
    'BATCH_SIZE' : 256,
    'LR' : 0.0001,
    'EPOCHS': 243,
    'output_dim' : 1
}

In [3]:
# Import libs
import numpy as np
from tqdm import tqdm
import pandas as pd 
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.nn.init as init

In [4]:
# Device, torch decimal places and seed for reproducibility
device = torch.device(CONFIG['DEVICE'] if torch.cuda.is_available() else "cpu")
torch.set_printoptions(precision=40) 

torch.manual_seed(CONFIG['SEED'])  
torch.cuda.manual_seed_all(CONFIG['SEED'])  
np.random.seed(CONFIG['SEED'])  

In [5]:
# Load and prepare data
train = pd.read_csv(f"{CONFIG['data_main']}{CONFIG['train_path']}")
train = train.fillna('-1')

for col in ['donor_age', 'age_at_hct']:
    train[col] = train[col].astype(int)
    
train["y"] = train.efs_time.values
mx = train.loc[train.efs==1,"efs_time"].max()
mn = train.loc[train.efs==0,"efs_time"].min()
train.loc[train.efs==0,"y"] = train.loc[train.efs==0,"y"] + mx - mn
train.y = train.y.rank()
train.loc[train.efs==0,"y"] += 2*len(train)
train.y = train.y / train.y.max()
train.y = np.log( train.y )
train.y -= train.y.mean()
train.y *= -1.0

cat_columns = [col for col in train.columns if col not in ['efs', 'efs_time', 'y', 'ID']]
train[cat_columns] = train[cat_columns].astype(str)

In [6]:
train_one_hot = pd.get_dummies(train[cat_columns], drop_first=True)

In [7]:
input_dim = train_one_hot.shape[1]

In [8]:
# Model definition
class Model(nn.Module):
    def __init__(self, input_dim, output_dim=CONFIG['output_dim']):
        super(Model, self).__init__()
    
        self.fc1 = nn.Linear(input_dim, input_dim)
        self.fc2 = nn.Linear(input_dim, input_dim)
        self.fc3 = nn.Linear(input_dim, input_dim)
        self.fc4 = nn.Linear(input_dim, output_dim)
        
        self.relu = nn.ReLU()
        self.do = nn.Dropout(p=0.5)
        
        self.init_weights()
    
    def init_weights(self):
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)
        init.xavier_uniform_(self.fc3.weight)
        init.xavier_uniform_(self.fc4.weight)
    
        self.fc1.weight.data = torch.clamp(self.fc1.weight.data, 0, 1)
        self.fc2.weight.data = torch.clamp(self.fc2.weight.data, 0, 1)
        self.fc3.weight.data = torch.clamp(self.fc3.weight.data, 0, 1)
        self.fc4.weight.data = torch.clamp(self.fc4.weight.data, 0, 1)
        
        if self.fc1.bias is not None:
            init.zeros_(self.fc1.bias)
        if self.fc2.bias is not None:
            init.zeros_(self.fc2.bias)
        if self.fc3.bias is not None:
            init.zeros_(self.fc3.bias)
        if self.fc4.bias is not None:
            init.zeros_(self.fc4.bias)
        
    def forward(self, x):

        x = self.relu(self.fc1(x))
        x = self.do(x)
        x = self.relu(self.fc2(x))
        x = self.do(x)
        x = self.relu(self.fc3(x))
        x = self.do(x)
        x = self.fc4(x)
        
        return x

In [9]:
# Model, criterion and optimizer
model = Model(input_dim).to(device)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=CONFIG['LR'])

In [10]:
train_tensors = torch.tensor(train_one_hot.values, device=device, dtype=torch.float32)

train_targets = torch.tensor(train['y'].values, device=device, dtype=torch.float32)

In [11]:
# Training
train_num_samples = len(train)
train_num_batches = (train_num_samples + CONFIG['BATCH_SIZE'] - 1) // CONFIG['BATCH_SIZE']

for epoch in range(CONFIG['EPOCHS']):
    ##################################################################TRAIN##################################################################
    model.train()
    train_running_loss = 0.0

    with tqdm(range(train_num_batches), desc=f"Epoch {epoch + 1}/{CONFIG['EPOCHS']}", unit="batch") as t:
        for batch_idx in t:
            start_idx = batch_idx * CONFIG['BATCH_SIZE']
            end_idx = min(start_idx + CONFIG['BATCH_SIZE'], train_num_samples)

            # Извлечение батча данных
            batch_inputs = train_tensors[start_idx:end_idx]
            batch_targets = train_targets[start_idx:end_idx]
            
            # Обнуление градиентов
            optimizer.zero_grad()

            # Прямой проход
            outputs = model(batch_inputs)

            # Вычисление функции потерь
            batch_loss = criterion(outputs, batch_targets.unsqueeze(1))

            # Обратный проход
            batch_loss.backward()
            optimizer.step()

            train_running_loss += batch_loss.item()
            t.set_postfix(train_mean_loss=f"{train_running_loss / (batch_idx + 1):.6f}")

Epoch 1/243: 100%|██████████| 113/113 [00:00<00:00, 258.01batch/s, train_mean_loss=229734.563053]
Epoch 2/243: 100%|██████████| 113/113 [00:00<00:00, 326.23batch/s, train_mean_loss=16267.101442]
Epoch 3/243: 100%|██████████| 113/113 [00:00<00:00, 318.28batch/s, train_mean_loss=2088.013760]
Epoch 4/243: 100%|██████████| 113/113 [00:00<00:00, 343.86batch/s, train_mean_loss=500.903062]
Epoch 5/243: 100%|██████████| 113/113 [00:00<00:00, 374.49batch/s, train_mean_loss=399.753890]
Epoch 6/243: 100%|██████████| 113/113 [00:00<00:00, 361.02batch/s, train_mean_loss=380.433054]
Epoch 7/243: 100%|██████████| 113/113 [00:00<00:00, 384.74batch/s, train_mean_loss=374.153044]
Epoch 8/243: 100%|██████████| 113/113 [00:00<00:00, 361.89batch/s, train_mean_loss=367.319121]
Epoch 9/243: 100%|██████████| 113/113 [00:00<00:00, 360.64batch/s, train_mean_loss=356.046290]
Epoch 10/243: 100%|██████████| 113/113 [00:00<00:00, 364.51batch/s, train_mean_loss=339.034508]
Epoch 11/243: 100%|██████████| 113/113 [00:

In [12]:
torch.save(model, f'{number}.pth')