In [8]:
import pandas as pd
import numpy as np
from lifelines.utils import concordance_index
from sksurv.metrics import concordance_index_censored, concordance_index_ipcw

import rpy2.robjects as ro
import rpy2.robjects.packages as rpackages
from rpy2.robjects.conversion import localconverter
from rpy2.robjects import pandas2ri

## Getting counts of patients/slices in train/valid/test datasets

In [23]:
train_info = pd.read_csv('/Data/train_info_fold1.csv')
valid_info = pd.read_csv('/Data/valid_info_fold1.csv')
test_info = pd.read_csv('/Data/Labels/HDFS_test_tumors.csv')

In [24]:
patient_valid_info = valid_info.loc[valid_info['Slice_Num']==1]
# patient_valid_info.reset_index(inplace=True)
patient_valid_info.to_csv('/Data/patient_valid_info.csv', index=False)

In [25]:
patient_train_info = train_info.loc[train_info['Slice_Num']==1]
# patient_train_info.reset_index(inplace=True)
patient_train_info.to_csv('/Data/patient_train_info.csv', index=False)

In [26]:
patient_test_info = test_info.loc[test_info['Slice_Num']==1]
# patient_test_info.reset_index(inplace=True)
patient_test_info.to_csv('/Data/patient_test_info.csv', index=False)

In [27]:
total_train = pd.read_csv('/Data/Labels/HDFS_train_tumors.csv')
patient_total_train = total_train.loc[total_train['Slice_Num']==1]
patient_total_train.shape
patient_total_train.to_csv('/Data/correct_train_patients.csv', index=False)

## Calculating c-index for each cancer types predictions

In [None]:
# Only need to run this once
utils = rpackages.importr('utils')
utils.chooseCRANmirror(ind=1)
utils.install_packages("survAUC")
utils.install_packages("survival")

### Uno's C statistic function

In [None]:
def uno_c_statistic(train_time, train_event, test_time, test_event, risk_preds):
    survAUC = rpackages.importr('survAUC')
    survival = rpackages.importr('survival')

    if not isinstance(train_time, np.ndarray):
        raise TypeError("Train times must be a numpy array")
    if not isinstance(train_event, np.ndarray):
        raise TypeError("Train events must be a numpy array")
    if not isinstance(test_time, np.ndarray):
        raise TypeError("Test times must be a numpy array")
    if not isinstance(test_event, np.ndarray):
        raise TypeError("Train events must be a numpy array")
    if not isinstance(risk_preds, np.ndarray):
        raise TypeError("Risk predictions must be a numpy array")

    R_train_time = ro.vectors.FloatVector(train_time)
    R_train_event = ro.vectors.IntVector(train_event)

    R_test_time = ro.vectors.FloatVector(test_time)
    R_test_event = ro.vectors.IntVector(test_event)

    R_risk_pred = ro.vectors.FloatVector(risk_preds)

    trainSurv_rsp = survival.Surv(R_train_time, R_train_event)
    testSurv_rsp = survival.Surv(R_test_time, R_test_event)

    cstat = survAUC.UnoC(trainSurv_rsp, testSurv_rsp, R_risk_pred)

    return cstat

train_cph_time = cph_labels[['HDFS_Time']].to_numpy()
train_cph_event = cph_labels[['HDFS_Code']].to_numpy()

test_cph_time = cph_preds[['HDFS_Time']].to_numpy()
test_cph_event = cph_preds[['HDFS_Code']].to_numpy()

all_cph_preds = cph_preds[['Prediction']].to_numpy()

uno_c_statistic(train_cph_time, train_cph_event, test_cph_time, test_cph_event, all_cph_preds)

In [3]:
# test_preds_0134 = pd.read_csv('/Data/Output/HDFSModel2/Liver/2022_02_22_2020_train/test_predictions.csv')
# ICC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("ICC")]
# HCC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("hcc")]
# MCRC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("preop")]

# ICC_preds_0134.to_csv('/Data/ICC_preds_0134.csv', index=False)
# HCC_preds_0134.to_csv('/Data/HCC_preds_0134.csv', index=False)
# MCRC_preds_0134.to_csv('/Data/MCRC_preds_0134.csv', index=False)

### Deep Learning Calculations

In [11]:
test_preds_0134 = pd.read_csv('/Data/Output/HDFSModel2/Tumor/2022_03_28_1706_train/test_predictions.csv')
ICC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("ICC")]
HCC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("hcc")]
MCRC_preds_0134 = test_preds_0134[test_preds_0134['Slice_File_Name'].str.contains("preop")]

np_all_preds = np.array(test_preds_0134['Prediction'])
np_ICC_preds = np.array(ICC_preds_0134['Prediction'])
np_HCC_preds = np.array(HCC_preds_0134['Prediction'])
np_MCRC_preds = np.array(MCRC_preds_0134['Prediction'])

#all_ghci = gh_c_index(np_all_preds)
# ICC_ghci = gh_c_index(np_ICC_preds)
# HCC_ghci = gh_c_index(np_HCC_preds)
# MCRC_ghci = gh_c_index(np_MCRC_preds)

# print("CNN Prediction Scores")
# print("Total GHCI:", all_ghci)
# print("HCC GHCI: ", HCC_ghci)
# print("ICC GHCI: ", ICC_ghci)
# print("MCRC GHCI: ", MCRC_ghci)

# print("All n:", test_preds_0134.shape)
# print("HCC n:", np_HCC_preds.shape)
# print("MCRC n:", np_MCRC_preds.shape)
# print("ICC n:", np_ICC_preds.shape)

ICC_cind = concordance_index(np.array(ICC_preds_0134['Time']), np_ICC_preds, np.array(ICC_preds_0134['Event']))
HCC_cind = concordance_index(np.array(HCC_preds_0134['Time']), np_HCC_preds, np.array(HCC_preds_0134['Event']))
MCRC_cind = concordance_index(np.array(MCRC_preds_0134['Time']), np_MCRC_preds, np.array(MCRC_preds_0134['Event']))
total_cind = concordance_index(np.array(test_preds_0134['Time']), np_all_preds, np.array(test_preds_0134['Event']))

print("\nTotal c-index:", total_cind)
print("HCC C-index:", HCC_cind)
print("ICC C-index:", ICC_cind)
print("MCRC C-index:", MCRC_cind)

CNN Prediction Scores
Total GHCI: 0.6214548079516716
HCC GHCI:  0.6574064009284492
ICC GHCI:  0.6293049311846381
MCRC GHCI:  0.585893117397307

Total c-index: 0.6031671814635344
HCC C-index: 0.605305323126224
ICC C-index: 0.5873654170450123
MCRC C-index: 0.657877862019539


In [10]:
ICC_preds_0134.to_csv("/Data/tumor_ICC_test_predictions.csv", index=False)
HCC_preds_0134.to_csv("/Data/tumor_HCC_test_predictions.csv", index=False)
MCRC_preds_0134.to_csv("/Data/tumor_MCRC_test_predictions.csv", index=False)

### CPH Calculations

In [4]:
print("CPH Predictions")
cph_preds = pd.read_excel("/Data/FeatureSelection/HCC_MCRC_ICC_HDFS_90_10/CPH_test_liver_predictions.xlsx")

HCC_cph_preds = cph_preds[cph_preds['Cancer_Type']==0]
MCRC_cph_preds = cph_preds[cph_preds['Cancer_Type']==1]
ICC_cph_preds = cph_preds[cph_preds['Cancer_Type']==2]

# HCC_ghci = gh_c_index(np.array(HCC_cph_preds['Prediction']))
# MCRC_ghci = gh_c_index(np.array(MCRC_cph_preds['Prediction']))
# ICC_ghci = gh_c_index(np.array(ICC_cph_preds['Prediction']))

# HAVE TO SWITCH THE SIGN ON THE PREDICTIONS FOR CPH MODEL
HCC_cind = concordance_index(np.array(HCC_cph_preds['Prediction']), -np.array(HCC_cph_preds['HDFS_Time']), np.array(HCC_cph_preds['HDFS_Code']))
MCRC_cind = concordance_index(np.array(MCRC_cph_preds['Prediction']), -np.array(MCRC_cph_preds['HDFS_Time']), np.array(MCRC_cph_preds['HDFS_Code']))
ICC_cind = concordance_index(np.array(ICC_cph_preds['Prediction']), -np.array(ICC_cph_preds['HDFS_Time']), np.array(ICC_cph_preds['HDFS_Code']))


# print("HCC GHCI: ", HCC_ghci)
# print("ICC GHCI: ", ICC_ghci)
# print("MCRC GHCI: ", MCRC_ghci)

print("\nHCC C-index:", HCC_cind)
print("ICC C-index:", ICC_cind)
print("MCRC C-index:", MCRC_cind)


all_cind = concordance_index(np.array(cph_preds['HDFS_Time']), -np.array(cph_preds['Prediction']), np.array(cph_preds['HDFS_Code']))
print("\nAll c-index:", all_cind)

CPH Predictions

HCC C-index: 0.4772727272727273
ICC C-index: 0.5876288659793815
MCRC C-index: 0.13636363636363635

All c-index: 0.5733882030178327


### RSF Calculations

In [4]:
print("RSF Predictions")
rsf_preds = pd.read_excel("/Data/FeatureSelection/HCC_MCRC_ICC_HDFS_90_10/RSF_test_tumor_predictions_90_10_041122.xlsx")

HCC_rsf_preds = rsf_preds[rsf_preds['Cancer_Type']==0]
MCRC_rsf_preds = rsf_preds[rsf_preds['Cancer_Type']==1]
ICC_rsf_preds = rsf_preds[rsf_preds['Cancer_Type']==2]

# HCC_ghci = gh_c_index(np.array(HCC_rsf_preds['Prediction']))
# MCRC_ghci = gh_c_index(np.array(MCRC_rsf_preds['Prediction']))
# ICC_ghci = gh_c_index(np.array(ICC_rsf_preds['Prediction']))

HCC_cind = concordance_index(np.array(HCC_rsf_preds['HDFS_Time']), np.array(HCC_rsf_preds['Prediction']), np.array(HCC_rsf_preds['HDFS_Code']))
MCRC_cind = concordance_index(np.array(MCRC_rsf_preds['HDFS_Time']), np.array(MCRC_rsf_preds['Prediction']), np.array(MCRC_rsf_preds['HDFS_Code']))
ICC_cind = concordance_index(np.array(ICC_rsf_preds['HDFS_Time']), np.array(ICC_rsf_preds['Prediction']), np.array(ICC_rsf_preds['HDFS_Code']))


# print("HCC GHCI: ", HCC_ghci)
# print("ICC GHCI: ", ICC_ghci)
# print("MCRC GHCI: ", MCRC_ghci)

print("\nHCC C-index:", HCC_cind)
print("ICC C-index:", ICC_cind)
print("MCRC C-index:", MCRC_cind)

# all_cind = concordance_index(np.array(rsf_preds['HDFS_Time']), np.array(rsf_preds['Prediction']), np.array(rsf_preds['HDFS_Code']))
# print("\nAll c-index:", all_cind)


RSF Predictions
HCC GHCI:  0.8173539707446802
ICC GHCI:  0.654411764705878
MCRC GHCI:  0.4561403508771875

HCC C-index: 0.5
ICC C-index: 0.57421875
MCRC C-index: 0.5548780487804879


In [15]:
all_cind = concordance_index(np.array(cph_preds['HDFS_Time']), -np.array(cph_preds['Prediction']), np.array(cph_preds['HDFS_Code']))
print(all_cind)

0.6844993141289437


# Old Code

In [None]:
ICC_cind = concordance_index(np_ICC_preds, np.array(ICC_preds_0134['Time']), np.array(ICC_preds_0134['Event']))
HCC_cind = concordance_index(np_HCC_preds, np.array(HCC_preds_0134['Time']), np.array(HCC_preds_0134['Event']))
MCRC_cind = concordance_index(np_MCRC_preds, np.array(MCRC_preds_0134['Time']), np.array(MCRC_preds_0134['Event']))
total_cind = concordance_index(np.array(test_preds_0134['Prediction']), np.array(test_preds_0134['Time']), np.array(test_preds_0134['Event']))

print("Total c-index:", total_cind)
print("HCC C-index:", HCC_cind)
print("MCRC C-index:", MCRC_cind)
print("ICC C-index:", ICC_cind)

In [None]:
print("All n:", test_preds_0134.shape)
print("HCC n:", np_HCC_preds.shape)
print("MCRC n:", np_MCRC_preds.shape)
print("ICC n:", np_ICC_preds.shape)

In [None]:
test_preds_0134.head(50)

0
0.57905
