In [8]:
import config as cfg
from pathlib import Path
import pandas as pd

# Load PROACT
df = pd.read_csv(Path.joinpath(cfg.PROACT_DATA_DIR, 'proact_processed.csv'), index_col=0)
event_names = ['Speech', 'Swallowing', 'Handwriting', 'Walking']
for event_name in event_names:
    df = df.loc[(df[f'Event_{event_name}'] == 0) | (df[f'Event_{event_name}'] == 1)] # drop left-censored
    df = df.loc[(df[f'TTE_{event_name}'] > 0) & (df[f'TTE_{event_name}'] <= 500)] # 1 - 500
df = df.drop(df.filter(like='_Strength').columns, axis=1) # Drop strength tests
df = df.drop('Race_Caucasian', axis=1) # Drop race information
df = df.drop('El_escorial', axis=1) # Drop el_escorial
df = df.drop(['Height', 'Weight', 'BMI'], axis=1) # Drop height/weight/bmi
df = df[['ALSFRS_R_Total', 'Age', 'Sex', 
         'Site_of_Onset', 'Onset_Delta', 'DiseaseProgressionRate',
         'Subject_used_Riluzole', 'FVC_Mean']]

In [9]:
from docx import Document

# Function to add a missing data table for a given dataframe
def add_missing_data_table(doc, df, title, is_last_table):
    missing_data_table = df.isnull().sum().reset_index()
    missing_data_table.columns = ['Covariate', 'Missing Rows']
    missing_data_table = missing_data_table[missing_data_table['Missing Rows'] > 0]
    doc.add_heading(f'{title} Missing Data (N={df.shape[0]})', 0)
    table = doc.add_table(rows=1, cols=len(missing_data_table.columns))
    hdr_cells = table.rows[0].cells
    for idx, col in enumerate(missing_data_table.columns):
        hdr_cells[idx].text = col
    for i, row in missing_data_table.iterrows():
        cells = table.add_row().cells
        cells[0].text = str(row['Covariate'])
        cells[1].text = str(row['Missing Rows'])
    if not is_last_table:
        doc.add_page_break()

doc = Document()

dataframes = [(df, 'PRO-ACT')]

for idx, (df, title) in enumerate(dataframes):
    is_last_table = (idx == len(dataframes) - 1)
    add_missing_data_table(doc, df, title, is_last_table)

doc.save(Path.joinpath(cfg.MISC_DIR, 'missing_data.docx'))

In [10]:
# Print full list of covariates
def add_covariate_table(doc, df, title, is_last_table):
    doc.add_heading(f'{title} Covariates (d={df.shape[1]})', 0)
    table = doc.add_table(rows=1, cols=1)
    table.rows[0].cells[0].text = 'Covariate'
    for covariate in df.columns:
        row_cells = table.add_row().cells
        row_cells[0].text = covariate
    if not is_last_table:
        doc.add_page_break()

doc = Document()

dataframes = [(df, 'PRO-ACT')]
for idx, (df, title) in enumerate(dataframes):
    is_last_table = (idx == len(dataframes) - 1)
    add_covariate_table(doc, df, title, is_last_table)

doc.save(Path.joinpath(cfg.MISC_DIR, 'covariates.docx'))

In [11]:
# Function to add a hyperparameter table for each model
def add_params_table(doc, model_name, params, is_last_table):
    doc.add_heading(f'{model_name} Hyperparameters', 0)
    table = doc.add_table(rows=1, cols=2)
    hdr_cells = table.rows[0].cells
    hdr_cells[0].text = 'Parameter'
    hdr_cells[1].text = 'Value'
    for param, value in params.items():
        row_cells = table.add_row().cells
        row_cells[0].text = str(param)
        row_cells[1].text = str(value)
    if not is_last_table:
        doc.add_page_break()
    
models_params = {
    "CoxPH": cfg.COXPH_PARAMS,
    "RSF": cfg.RSF_PARAMS,
    "DeepSurv": cfg.DEEPSURV_PARAMS,
    "MTLR": cfg.MTLR_PARAMS,
    "MENSA": cfg.MENSA_PARAMS
}

doc = Document()

model_names = list(models_params.keys())
for idx, (model_name, params) in enumerate(models_params.items()):
    is_last_table = (idx == len(model_names) - 1)
    add_params_table(doc, model_name, params, is_last_table)

doc.save(Path.joinpath(cfg.MISC_DIR, 'hyperparameters.docx'))