In [1]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

/kaggle/input/training-hct-survival/training_columns.pkl
/kaggle/input/training-hct-survival/cat_imputer.pkl
/kaggle/input/training-hct-survival/num_imputer.pkl
/kaggle/input/training-hct-survival/__results__.html
/kaggle/input/training-hct-survival/encoder.pkl
/kaggle/input/training-hct-survival/num_cols.pkl
/kaggle/input/training-hct-survival/scaler.pkl
/kaggle/input/training-hct-survival/cat_cols.pkl
/kaggle/input/training-hct-survival/__notebook__.ipynb
/kaggle/input/training-hct-survival/__output__.json
/kaggle/input/training-hct-survival/xgboost_model.model
/kaggle/input/training-hct-survival/custom.css
/kaggle/input/train2/efs_model.pth
/kaggle/input/train2/__results__.html
/kaggle/input/train2/__notebook__.ipynb
/kaggle/input/train2/__output__.json
/kaggle/input/train2/custom.css
/kaggle/input/train2/preprocessor/cat_imputer.pkl
/kaggle/input/train2/preprocessor/num_imputer.pkl
/kaggle/input/train2/preprocessor/encoder.pkl
/kaggle/input/train2/preprocessor/scaler.pkl
/kaggle/in

In [2]:
import pandas as pd
import numpy as np
import torch
import joblib  # For loading preprocessing models
import torch.nn as nn
from torch.utils.data import DataLoader, TensorDataset
import os

# Load pre-trained model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load test dataset
test_file_path = "/kaggle/input/equity-post-HCT-survival-predictions/test.csv"
df_test = pd.read_csv(test_file_path)

# Define the selected feature columns (excluding target 'efs')
selected_columns = [
    "ID", "prim_disease_hct", "hla_match_b_low", "prod_type",
    "year_hct", "obesity", "donor_age", "prior_tumor", "gvhd_proph",
    "sex_match", "comorbidity_score", "karnofsky_score", "donor_related",
    "age_at_hct"
]

# Keep only the selected columns
df_test = df_test[selected_columns]

# =============================
# STEP 2: LOAD PREPROCESSORS & MODEL
# =============================

# Load saved preprocessors
preprocessor_dir = "/kaggle/input/train2/preprocessor"
num_imputer = joblib.load(os.path.join(preprocessor_dir, "num_imputer.pkl"))
cat_imputer = joblib.load(os.path.join(preprocessor_dir, "cat_imputer.pkl"))
encoder = joblib.load(os.path.join(preprocessor_dir, "encoder.pkl"))
scaler = joblib.load(os.path.join(preprocessor_dir, "scaler.pkl"))

# =============================
# STEP 3: PREPROCESS TEST DATA
# =============================

# Ensure ID column is available
if "ID" not in df_test.columns:
    raise ValueError("Test dataset must contain an 'ID' column.")

# Extract patient IDs
patient_ids = df_test["ID"]
df_test = df_test.drop(columns=["ID"])

# Identify numerical and categorical columns
num_cols = df_test.select_dtypes(include=['int64', 'float64']).columns.tolist()
cat_cols = df_test.select_dtypes(include=['object']).columns.tolist()

# Handle missing values
df_test[num_cols] = num_imputer.transform(df_test[num_cols])
df_test[cat_cols] = cat_imputer.transform(df_test[cat_cols])

# Encode categorical features
encoded_cats_test = encoder.transform(df_test[cat_cols])
df_encoded_test = pd.DataFrame(encoded_cats_test, columns=encoder.get_feature_names_out(cat_cols))

# Drop original categorical columns and merge encoded ones
df_test = df_test.drop(columns=cat_cols)
df_test = pd.concat([df_test, df_encoded_test], axis=1)

# Standardize numerical features
df_test[num_cols] = scaler.transform(df_test[num_cols])

# Convert to PyTorch tensor
X_test_tensor = torch.tensor(df_test.values, dtype=torch.float32).to(device)

# =============================
# STEP 1: DEFINE THE NEURAL NETWORK
# =============================

class EFSModel(nn.Module):
    def __init__(self, input_size):
        super(EFSModel, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_size, 128),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Dropout(0.3),
            nn.Linear(64, 1),
            nn.Sigmoid()
        )

    def forward(self, x):
        return self.model(x)

# Recreate the model architecture
input_size = df_test.shape[1]  # Ensure the correct input size
model = EFSModel(input_size).to(device)

# Load the saved state dict
model.load_state_dict(torch.load("/kaggle/input/train2/efs_model.pth", map_location=device))

# Set the model to evaluation mode
model.eval()

# =============================
# STEP 3: MAKE PREDICTIONS
# =============================

# Predict risk scores
with torch.no_grad():
    predictions = model(X_test_tensor).cpu().numpy().flatten()

# =============================
# STEP 4: CREATE SUBMISSION FILE
# =============================

submission = pd.DataFrame({
    "ID": patient_ids,
    "prediction": predictions
})

submission.to_csv("/kaggle/working/submission.csv", index=False)
print("✅ Submission file saved as 'submission.csv' with 'ID' and 'prediction' columns.")

✅ Submission file saved as 'submission.csv' with 'ID' and 'prediction' columns.


  model.load_state_dict(torch.load("/kaggle/input/train2/efs_model.pth", map_location=device))
