In [1]:
import os
current_directory = os.getcwd()
folder_name = os.path.basename(current_directory)
number = folder_name

In [2]:
CONFIG = {
    'data_main': 'C:/Users/Николай/PycharmProjects/CIBMTR/D.Data/main/',
    'train_path': 'train.csv',

    'DEVICE' : 'cuda',
    'SEED' : 42,
    'BATCH_SIZE' : 32,
    'LR' : 0.001,
    'EPOCHS' : 200,
    'output_dim' : 1
}

In [3]:
# Import libs
import numpy as np
from tqdm import tqdm
import pandas as pd 
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.nn.init as init

In [4]:
# Device, torch decimal places and seed for reproducibility
device = torch.device(CONFIG['DEVICE'] if torch.cuda.is_available() else "cpu")
torch.set_printoptions(precision=40) 

torch.manual_seed(CONFIG['SEED'])  
torch.cuda.manual_seed_all(CONFIG['SEED'])  
np.random.seed(CONFIG['SEED'])  

In [5]:
# Load and prepare data
train = pd.read_csv(f"{CONFIG['data_main']}{CONFIG['train_path']}")
train = train.fillna('-1')

for col in ['donor_age', 'age_at_hct']:
    train[col] = train[col].astype(int)
    
train["y"] = train.efs_time.values
mx = train.loc[train.efs==1,"efs_time"].max()
mn = train.loc[train.efs==0,"efs_time"].min()
train.loc[train.efs==0,"y"] = train.loc[train.efs==0,"y"] + mx - mn
train.y = train.y.rank()
train.loc[train.efs==0,"y"] += 2*len(train)
train.y = train.y / train.y.max()
train.y = np.log( train.y )
train.y -= train.y.mean()
train.y *= -1.0

cat_columns = [col for col in train.columns if col not in ['efs', 'efs_time', 'y', 'ID']]
train[cat_columns] = train[cat_columns].astype(str)

In [6]:
train_one_hot = pd.get_dummies(train[cat_columns], drop_first=True)

In [7]:
input_dim = train_one_hot.shape[1]

In [8]:
# Model definition
class Model(nn.Module):
    def __init__(self, input_dim, output_dim=CONFIG['output_dim']):
        super(Model, self).__init__()
    
        self.fc1 = nn.Linear(input_dim, input_dim)
        self.fc2 = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU()

        
        # self.init_weights()
    
    def init_weights(self):
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)
    
        self.fc1.weight.data = torch.clamp(self.fc1.weight.data, 0, 1)
        self.fc2.weight.data = torch.clamp(self.fc2.weight.data, 0, 1)

        if self.fc1.bias is not None:
            init.zeros_(self.fc1.bias)
        if self.fc2.bias is not None:
            init.zeros_(self.fc2.bias)
        
    def forward(self, x):

        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [9]:
# Model, criterion and optimizer
model = Model(input_dim).to(device)
criterion = nn.MSELoss()
optimizer = Adam(model.parameters(), lr=CONFIG['LR'])

In [10]:
train_tensors = torch.tensor(train_one_hot.values, device=device, dtype=torch.float32)

train_targets = torch.tensor(train['y'].values, device=device, dtype=torch.float32)

In [11]:
# Training
train_num_samples = len(train)
train_num_batches = (train_num_samples + CONFIG['BATCH_SIZE'] - 1) // CONFIG['BATCH_SIZE']

for epoch in range(CONFIG['EPOCHS']):
    ##################################################################TRAIN##################################################################
    model.train()
    train_running_loss = 0.0

    with tqdm(range(train_num_batches), desc=f"Epoch {epoch + 1}/{CONFIG['EPOCHS']}", unit="batch") as t:
        for batch_idx in t:
            start_idx = batch_idx * CONFIG['BATCH_SIZE']
            end_idx = min(start_idx + CONFIG['BATCH_SIZE'], train_num_samples)

            # Извлечение батча данных
            batch_inputs = train_tensors[start_idx:end_idx]
            batch_targets = train_targets[start_idx:end_idx]
            
            # Обнуление градиентов
            optimizer.zero_grad()

            # Прямой проход
            outputs = model(batch_inputs)

            # Вычисление функции потерь
            batch_loss = criterion(outputs, batch_targets.unsqueeze(1))

            # Обратный проход
            batch_loss.backward()
            optimizer.step()

            train_running_loss += batch_loss.item()
            t.set_postfix(train_mean_loss=f"{train_running_loss / (batch_idx + 1):.6f}")

Epoch 1/200: 100%|██████████| 900/900 [00:02<00:00, 447.58batch/s, train_mean_loss=1.923683]
Epoch 2/200: 100%|██████████| 900/900 [00:01<00:00, 461.73batch/s, train_mean_loss=1.822943]
Epoch 3/200: 100%|██████████| 900/900 [00:01<00:00, 460.50batch/s, train_mean_loss=1.744570]
Epoch 4/200: 100%|██████████| 900/900 [00:02<00:00, 410.47batch/s, train_mean_loss=1.627944]
Epoch 5/200: 100%|██████████| 900/900 [00:02<00:00, 421.76batch/s, train_mean_loss=1.483722]
Epoch 6/200: 100%|██████████| 900/900 [00:02<00:00, 345.29batch/s, train_mean_loss=1.325114]
Epoch 7/200: 100%|██████████| 900/900 [00:03<00:00, 279.10batch/s, train_mean_loss=1.167982]
Epoch 8/200: 100%|██████████| 900/900 [00:03<00:00, 281.57batch/s, train_mean_loss=1.018927]
Epoch 9/200: 100%|██████████| 900/900 [00:03<00:00, 290.11batch/s, train_mean_loss=0.881201]
Epoch 10/200: 100%|██████████| 900/900 [00:02<00:00, 316.22batch/s, train_mean_loss=0.761665]
Epoch 11/200: 100%|██████████| 900/900 [00:02<00:00, 327.68batch/s, t

In [13]:
torch.save(model, f'{number}.pth')