In [1]:
import sys
import os

# Get the absolute path of the project root directory
project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))

# Add the project root directory to the Python path
sys.path.append(project_root)

In [2]:
import pandas as pd

project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))

# Define the path to the data directory
data_dir = os.path.join(project_root, 'data')

# Example: Access a specific data file in the data directory
data_file_path = os.path.join(data_dir, 'RADCURE_challenge_clinical.csv')
df = pd.read_csv(data_file_path)

In [3]:
from AutoML.analyzer import Analyzer
from pprint import pprint

df.drop(columns=["Study ID", "split"], inplace=True)

config = Analyzer.dry_run(df)

pprint(config)

Config file not found. Creating custom...
Used a heuristic to define categorical and continuous columns. Please review!

Categorical: ['T Stage', 'Sex', 'Disease Site', 'Stage', 'N Stage', 'Dose', 'death', 'EGFRI', 'HPV Combined', 'Chemotherapy']
Continuous: ['age at dx', 'survival_time']
  - Outliers found in T Stage: ['T2 (2): 1 out of 2552', 'TX: 1 out of 2552', 'T3 (2): 1 out of 2552']
╒══════════════════════════╤═══════════════════╤═══════════╤═════════════╕
│                          │                   │ Missing   │ Overall     │
╞══════════════════════════╪═══════════════════╪═══════════╪═════════════╡
│ n                        │                   │           │ 2552        │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ age at dx, mean (SD)     │                   │ 0         │ 62.0 (11.7) │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ survival_time, mean (SD) │                   │ 0         │ 4.8 (2.8)   │
├──────

In [4]:
import yaml

config['columns']['categorical'].remove('Dose')
config['columns']['continuous'].append('Dose') 

pprint(config)

with open('radcure_outputs/analyzer/config.yaml', 'w') as f:
            yaml.dump(config, f)

{'columns': {'categorical': ['T Stage',
                             'Sex',
                             'Disease Site',
                             'Stage',
                             'N Stage',
                             'death',
                             'EGFRI',
                             'HPV Combined',
                             'Chemotherapy'],
             'continuous': ['age at dx', 'survival_time', 'Dose'],
             'date': [],
             'other': []},
 'mapping': {'T Stage': {'T1': 'T1',
                         'T1a': 'T1a',
                         'T1b': 'T1b',
                         'T2': 'T2',
                         'T2 (2)': 'Other',
                         'T3': 'T3',
                         'T3 (2)': 'Other',
                         'T4': 'T4',
                         'T4a': 'T4a',
                         'T4b': 'T4b',
                         'TX': 'Other',
                         'Tis': 'Tis'}},
 'missingness_strategy': {'categorical': {

In [5]:
from AutoML.analyzer import Analyzer

analyzer = Analyzer(df, target_variable='death', output_dir='./radcure_outputs/analyzer', one_hot_encode=True, config='radcure_outputs/analyzer/config.yaml')

analyzer.run()

Applying changes from config...

╒══════════════════════════╤═══════════════════╤═══════════╤═════════════╕
│                          │                   │ Missing   │ Overall     │
╞══════════════════════════╪═══════════════════╪═══════════╪═════════════╡
│ n                        │                   │           │ 2552        │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ age at dx, mean (SD)     │                   │ 0         │ 62.0 (11.7) │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ survival_time, mean (SD) │                   │ 0         │ 4.8 (2.8)   │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ Dose, mean (SD)          │                   │ 0         │ 66.9 (5.6)  │
├──────────────────────────┼───────────────────┼───────────┼─────────────┤
│ T Stage, n (%)           │ Other             │           │ 3 (0.1)     │
├──────────────────────────┼───────────────────┼───────────┼───────

In [None]:
from AutoML.trainer import TrainerSupervised

df = pd.read_csv('./radcure_outputs/analyzer/updated_data.csv', index_col=0)
df.rename(columns={'survival_time': 'time', 'death':'event'}, inplace=True)

trainer = TrainerSupervised(task='time_to_event', output_dir='./radcure_outputs/ED_trainer_explainer',)
trainer.run(df, ['event','time'])

In [None]:
from AutoML.explainer import Explainer

exp = Explainer.from_trainer(trainer)
exp.run()

In [77]:
import numpy as np
import matplotlib.pyplot as plt

# For preprocessing
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler

import torch # For building the networks 

from pycox.models import MTLR
from pycox.models import DeepHitSingle

import torchtuples as tt # Some useful functions

df = analyzer.data

df_test = df.sample(frac=0.2)
df_train = df.drop(df_test.index)
df_val = df_train.sample(frac=0.2)
df_train = df_train.drop(df_val.index)

# Define the transformers
transformers = [
    ('standardize', StandardScaler(), analyzer.continuous_columns),
    ('leave', 'passthrough', [col for col in df.columns if not col in analyzer.continuous_columns])  # passthrough means these columns remain unchanged
]

print(transformers)

# Create the ColumnTransformer
x_mapper = ColumnTransformer(transformers)

x_train = x_mapper.fit_transform(df_train).astype('float32')
x_val = x_mapper.transform(df_val).astype('float32')
x_test = x_mapper.transform(df_test).astype('float32')

num_durations = 10
labtrans = MTLR.label_transform(num_durations)
get_target = lambda df: (df['survival_time'].values, df['death'].values)
y_train = labtrans.fit_transform(*get_target(df_train))
y_val = labtrans.transform(*get_target(df_val))

train = (x_train, y_train)
val = (x_val, y_val)

# We don't need to transform the test labels
durations_test, events_test = get_target(df_test)

in_features = x_train.shape[1]
num_nodes = [32, 32]
out_features = labtrans.out_features
batch_norm = True
dropout = 0.1

# net = torch.nn.Sequential(
#     torch.nn.Linear(in_features, 32),
#     torch.nn.ReLU(),
#     torch.nn.BatchNorm1d(32),
#     torch.nn.Dropout(0.1),
    
#     torch.nn.Linear(32, 32),
#     torch.nn.ReLU(),
#     torch.nn.BatchNorm1d(32),
#     torch.nn.Dropout(0.1),
    
#     torch.nn.Linear(32, out_features)
# )

net = tt.practical.MLPVanilla(in_features, num_nodes, out_features, batch_norm, dropout)

# model = MTLR(net, torch.optim.Adam, duration_index=labtrans.cuts)
model = DeepHitSingle(net, torch.optim.Adam, alpha=0.2, sigma=0.1, duration_index=labtrans.cuts)

batch_size = 256
model.optimizer.set_lr(0.01)

epochs = 100
callbacks = [tt.callbacks.EarlyStopping()]
log = model.fit(x_train, y_train, batch_size, epochs, callbacks, val_data=val)


[('standardize', StandardScaler(), ['survival_time', 'age at dx', 'death']), ('leave', 'passthrough', ['Sex_Female', 'Sex_Male', 'T Stage_T1', 'T Stage_T1a', 'T Stage_T1b', 'T Stage_T2', 'T Stage_T2 (2)', 'T Stage_T3', 'T Stage_T3 (2)', 'T Stage_T4', 'T Stage_T4a', 'T Stage_T4b', 'T Stage_TX', 'T Stage_Tis', 'Stage_0', 'Stage_I', 'Stage_II', 'Stage_IIA', 'Stage_III', 'Stage_IIIA', 'Stage_IIIC', 'Stage_IV', 'Stage_IVA', 'Stage_IVB', 'Stage_Unknown', 'Disease Site_esophagus', 'Disease Site_hypopharynx', 'Disease Site_larynx', 'Disease Site_lip & oral cavity', 'Disease Site_nasal cavity', 'Disease Site_nasopharynx', 'Disease Site_oropharynx', 'Disease Site_paranasal sinus', 'Disease Site_salivary glands', 'N Stage_N0', 'N Stage_N1', 'N Stage_N2', 'N Stage_N2a', 'N Stage_N2b', 'N Stage_N2c', 'N Stage_N3', 'N Stage_N3a', 'N Stage_N3b', 'N Stage_NX', 'Chemotherapy_0', 'Chemotherapy_1', 'Dose_50.0', 'Dose_50.8', 'Dose_51.0', 'Dose_53.55', 'Dose_55.0', 'Dose_59.4', 'Dose_60.0', 'Dose_64.0', 'D

In [78]:
surv = model.predict_surv_df(x_test)

In [79]:
from pycox.evaluation import EvalSurv
ev = EvalSurv(surv, durations_test, events_test, censor_surv='km')
ev.concordance_td('antolini')

0.5375007515180665

In [80]:
from pycox.models import CoxPH

CoxPH.label_transform

AttributeError: type object 'CoxPH' has no attribute 'label_transform'