In [1]:
import os
current_directory = os.getcwd()
folder_name = os.path.basename(current_directory)
number = folder_name

In [2]:
CONFIG = {
    'data_main': 'C:/Users/Николай/PycharmProjects/CIBMTR/D.Data/main/',
    'train_path': 'train.csv',
    'test_path': 'test.csv',

    'DEVICE' : 'cuda',
    'SEED' : 42,
    'BATCH_SIZE' : 32,
    'LR' : 0.001,
    'EPOCHS' : 2,
    'output_dim' : 1
}

In [3]:
# Import libs
import numpy as np
from tqdm import tqdm
import pandas as pd 
import torch
import torch.nn as nn
from torch.optim import Adam
import torch.nn.init as init

In [4]:
# Device, torch decimal places and seed for reproducibility
device = torch.device(CONFIG['DEVICE'] if torch.cuda.is_available() else "cpu")
torch.set_printoptions(precision=40) 

torch.manual_seed(CONFIG['SEED'])  
torch.cuda.manual_seed_all(CONFIG['SEED'])  
np.random.seed(CONFIG['SEED'])  

In [5]:
# Load and prepare data
train = pd.read_csv(f"{CONFIG['data_main']}{CONFIG['train_path']}")
train = train.fillna('-1')

for col in ['donor_age', 'age_at_hct']:
    train[col] = train[col].astype(int)
    
train["y"] = train.efs_time.values
mx = train.loc[train.efs==1,"efs_time"].max()
mn = train.loc[train.efs==0,"efs_time"].min()
train.loc[train.efs==0,"y"] = train.loc[train.efs==0,"y"] + mx - mn
train.y = train.y.rank()
train.loc[train.efs==0,"y"] += 2*len(train)
train.y = train.y / train.y.max()
train.y = np.log( train.y )
train.y -= train.y.mean()
train.y *= -1.0

cat_columns = [col for col in train.columns if col not in ['efs', 'efs_time', 'y', 'ID']]
train[cat_columns] = train[cat_columns].astype(str)

In [6]:
test = pd.read_csv(f"{CONFIG['data_main']}test.csv")
test = test.fillna('-1')
for col in ['donor_age', 'age_at_hct']:
    test[col] = test[col].astype(int)
test[cat_columns] = test[cat_columns].astype(str)

In [19]:
train_one_hot = pd.get_dummies(train[cat_columns], drop_first=True)
test_one_hot = pd.get_dummies(test[cat_columns], drop_first=True)
test_one_hot = test_one_hot[[col for col in test_one_hot.columns if col in train_one_hot.columns]]
test_one_hot = test_one_hot.reindex(columns=train_one_hot.columns, fill_value=False)

In [21]:
input_dim = train_one_hot.shape[1]

In [22]:
# Model definition
class Model(nn.Module):
    def __init__(self, input_dim, output_dim=CONFIG['output_dim']):
        super(Model, self).__init__()
    
        self.fc1 = nn.Linear(input_dim, input_dim)
        self.fc2 = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU()

        
        # self.init_weights()
    
    def init_weights(self):
        init.xavier_uniform_(self.fc1.weight)
        init.xavier_uniform_(self.fc2.weight)
    
        self.fc1.weight.data = torch.clamp(self.fc1.weight.data, 0, 1)
        self.fc2.weight.data = torch.clamp(self.fc2.weight.data, 0, 1)

        if self.fc1.bias is not None:
            init.zeros_(self.fc1.bias)
        if self.fc2.bias is not None:
            init.zeros_(self.fc2.bias)
        
    def forward(self, x):

        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        
        return x

In [23]:
# Model, criterion and optimizer
model = torch.load(f'{number}.pth')
model.to(device)

  model = torch.load(f'{number}.pth')


Model(
  (fc1): Linear(in_features=399, out_features=399, bias=True)
  (fc2): Linear(in_features=399, out_features=1, bias=True)
  (relu): ReLU()
)

In [24]:
test_tensors = torch.tensor(test_one_hot.values, device=device, dtype=torch.float32)

In [25]:
model.eval()
test_running_loss = 0.0
outputs_list = []

with torch.no_grad():
    outputs = model(test_tensors)
    outputs_list.extend(outputs.cpu().numpy().flatten())

In [26]:
y_pred = test[['ID']].copy(deep=True)
y_pred["prediction"] = outputs_list
y_pred.to_csv('submission.csv', index=False)