In [1]:
import sys
import pandas as pd
import os

project_root = os.path.abspath(os.path.join(os.getcwd(), "../../"))

sys.path.append(project_root)
data_dir = os.path.join(project_root, 'data')

data_file_path = os.path.join(data_dir, 'RADCURE_processed_clinical.csv')
df = pd.read_csv(data_file_path, index_col=0)

df.drop(columns=["Study ID"], inplace=True)
df.rename(columns={'survival_time': 'time', 'death':'event'}, inplace=True)


In [6]:
from jarvais.analyzer import Analyzer
from rich import print

analyzer = Analyzer(
    data=df, 
    output_dir='./survival_outputs/analyzer',
    categorical_columns= [
      "Sex",
      "T Stage",
      "N Stage",
      "Stage",
      "Smoking Status",
      "Disease Site",
      "event",
      "HPV Combined",
      "Chemotherapy"
    ],
    continuous_columns = [
      "time",
      "age at dx",
      "Dose"
    ],
    target_variable='event',
    task='survival'
)

print(analyzer)

analyzer.run()



[2m        [0m [[32m[1minfo     [0m] [1mPerforming missingness analysis...[0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mmissingness.__call__:43[0m
[2m        [0m [[32m[1minfo     [0m] [1mPerforming outlier analysis...[0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35moutlier.__call__:60[0m
[2m13:14:58[0m [[32m[1minfo     [0m] [1mPlotting Correlation Matrix...[0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:115[0m


+-----------------------+-------------------+-----------+-------------+
|                       |                   | Missing   | Overall     |
| n                     |                   |           | 3346        |
+-----------------------+-------------------+-----------+-------------+
| time, mean (SD)       |                   | 0         | 4.1 (2.7)   |
+-----------------------+-------------------+-----------+-------------+
| age at dx, mean (SD)  |                   | 0         | 62.3 (11.6) |
+-----------------------+-------------------+-----------+-------------+
| Dose, mean (SD)       |                   | 0         | 66.7 (5.8)  |
+-----------------------+-------------------+-----------+-------------+
| Sex, n (%)            | Female            |           | 686 (20.5)  |
+-----------------------+-------------------+-----------+-------------+
|                       | Male              |           | 2660 (79.5) |
+-----------------------+-------------------+-----------+-------

[2m        [0m [[32m[1minfo     [0m] [1mPlotting Pairplot...          [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:118[0m
[2m13:15:00[0m [[32m[1minfo     [0m] [1mPlotting UMAP...              [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:124[0m
[2m13:15:06[0m [[32m[1minfo     [0m] [1mPlotting Frequency Table...   [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:121[0m
[2m13:15:16[0m [[32m[1minfo     [0m] [1mPlotting Kaplan Meier Curves...[0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:127[0m
[2m13:15:19[0m [[32m[1minfo     [0m] [1mPlotting Multiplot...         [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mvisualization.__call__:136[0m
Font MPDFAA+Inter28ptBold is missing the following glyphs: '
' (\n)


In [7]:
from jarvais.trainer import TrainerSupervised

analyzer.data['event'] = analyzer.data['event'].astype(int)

trainer = TrainerSupervised(
    output_dir="./outputs/trainer", 
    target_variable=['event','time'], 
    task="survival",
)
    
print(trainer)

trainer.run(analyzer.data)

[2m13:15:34[0m [[32m[1minfo     [0m] [1mSkipping feature reduction.   [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mfeature_reduction.__call__:39[0m
[2m        [0m [[32m[1minfo     [0m] [1mTraining MTLR...              [0m [[0m[1m[34mjarvais[0m][0m [36mcall[0m=[35mtrain.train_mtlr:66[0m
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
NaN or Inf found in input tensor.
[W 2025-06-09 13:15:35,681] Trial 1 failed with parameters: {'C1': 100.0, 'dropout': 0.2873687420594126, 'dims': [512, 512]} because of the following error: The value nan is not acceptable.
[W 2025-06-09 13:15:35,682] Trial 1 failed with value nan.
[2m13:16:24[0m [[32m[1minfo     [0m] [1mBest trial: C1: 0.01, dropout: 0.38638712110505574, dims: [32, 32][0m [[0m[1


Model Leaderboard
----------------
+------------------+----------------+----------------+----------------+
| model            | test_score     | val_score      | train_score    |
| MTLR             | C_INDEX: 0.723 | C_INDEX: 0.744 | C_INDEX: 0.776 |
+------------------+----------------+----------------+----------------+
| CoxPH            | C_INDEX: 0.721 | N/A            | C_INDEX: 0.777 |
+------------------+----------------+----------------+----------------+
| RandomForest     | C_INDEX: 0.716 | N/A            | C_INDEX: 0.853 |
+------------------+----------------+----------------+----------------+
| SVM              | C_INDEX: 0.716 | N/A            | C_INDEX: 0.783 |
+------------------+----------------+----------------+----------------+
| GradientBoosting | C_INDEX: 0.715 | N/A            | C_INDEX: 0.79  |
+------------------+----------------+----------------+----------------+
| DeepSurv         | C_INDEX: 0.69  | C_INDEX: 0.712 | C_INDEX: 0.772 |
+------------------+--------

In [8]:
from jarvais.explainer import Explainer

exp = Explainer.from_trainer(trainer)
exp.run()

⚠️  **Possible Bias Detected in Smoking Status** ⚠️
=== Subgroup Analysis for 'Smoking Status' Using Cox Proportional Hazards Model ===

Model Statistics:
    AIC (Partial):               2375.84
    Log-Likelihood:              -1183.92
    Log-Likelihood Ratio p-value: 0.0000
    Concordance Index (C-index):   0.61
Model Coefficients:
    +---------------------------+---------------+------------------+
    | Feature                   |   Coefficient |   Standard Error |
    | Smoking Status_Current    |         0.508 |            4.720 |
    +---------------------------+---------------+------------------+
    | Smoking Status_Ex-smoker  |         0.070 |            4.720 |
    +---------------------------+---------------+------------------+
    | Smoking Status_Non-smoker |        -0.710 |            4.721 |
    +---------------------------+---------------+------------------+
    | Smoking Status_unknown    |         0.417 |            4.753 |
    +---------------------------+-------