In [88]:
import numpy as np
import pandas as pd
from lifelines import CoxPHFitter
from lifelines.utils import concordance_index

pd.set_option('display.max_columns', 500)

# Specify data locations

In [89]:
#Specify the path to clinical_data_cleaned.csv, which contains clinical features and outcome data
clinical_data_path = "/deep/group/aihc-bootcamp-winter2020/dlbcl/DLBCL-Morph/clinical_data_cleaned.csv"

#Specify the path to cell_shapes.csv, which contains geometric features for each tumor nucleus
cell_shapes_path = "/deep/group/aihc-bootcamp-winter2020/dlbcl/DLBCL-Morph/Cells/cell_shapes.csv"

# Select which features to use

In [90]:
#Specify one of the following: "ALL", "GEOMETRIC", "CLINICAL"
FEATURES = "GEOMETRIC"

# Load data

In [91]:
df_outcome = pd.read_csv(clinical_data_path)
print(df_outcome.columns)

Index(['patient_id', 'MYC IHC', 'BCL2 IHC', 'BCL6 IHC', 'CD10 IHC', 'MUM1 IHC',
       'HANS', 'BCL6 FISH', 'MYC FISH', 'BCL2 FISH', 'Age', 'ECOG PS', 'LDH',
       'EN', 'Stage', 'IPI Score', 'IPI Risk Group (4 Class)',
       'RIPI Risk Group', 'OS', 'PFS', 'Follow-up Status'],
      dtype='object')


In [92]:
df_outcome = df_outcome.fillna(df_outcome.mean()) #fill NaN entries with the mean value of the feature

In [93]:
df_cell_shapes = pd.read_csv(cell_shapes_path)
df_cell_shapes.columns

Index(['patient_id', 'patch_id', 'cell_number', 'rotate_angle', 'shortAxis',
       'longAxis', 'ellip_perimt', 'ellip_area', 'hull_area', 'minDiameter',
       'maxDiameter', 'minAngle', 'maxAngle', 'esf', 'csf', 'sf1', 'sf2',
       'elogation', 'convexity', 'rectCenter_x', 'rectCenter_y', 'rect_width',
       'rect_height', 'ellip_centroid_x', 'ellip_centroid_y'],
      dtype='object')

In [94]:
print("Number of patients before join:", df_cell_shapes['patient_id'].nunique())
new_cols = ['MYC IHC', 'BCL2 IHC', 'BCL6 IHC', 'CD10 IHC', 'MUM1 IHC',
       'HANS', 'BCL6 FISH', 'MYC FISH', 'BCL2 FISH', 'Age', 'ECOG PS', 'LDH',
       'EN', 'Stage', 'IPI Score', 'IPI Risk Group (4 Class)',
       'RIPI Risk Group']

df = df_cell_shapes.merge(df_outcome[['patient_id', 'OS', 'Follow-up Status'] + new_cols], left_on='patient_id', right_on='patient_id', how='inner', validate="m:1")
print("Number of patients after join:", df['patient_id'].nunique())

df = df.drop(["patch_id", "cell_number"], axis=1)
df = df.astype(np.float)

df_agg = df.groupby("patient_id").agg([np.mean, np.std])
display(df_agg.head(20))
df_agg.columns = df_agg.columns.map('_'.join)
df_agg = df_agg.drop(["OS_std", "Follow-up Status_std"] + [col + '_std' for col in new_cols], axis=1)
df_agg = df_agg.rename(columns={"OS_mean":"OS", "Follow-up Status_mean": "FUS"})
display(df_agg.head(20))

Number of patients before join: 170
Number of patients after join: 170


Unnamed: 0_level_0,rotate_angle,rotate_angle,shortAxis,shortAxis,longAxis,longAxis,ellip_perimt,ellip_perimt,ellip_area,ellip_area,hull_area,hull_area,minDiameter,minDiameter,maxDiameter,maxDiameter,minAngle,minAngle,maxAngle,maxAngle,esf,esf,csf,csf,sf1,sf1,sf2,sf2,elogation,elogation,convexity,convexity,rectCenter_x,rectCenter_x,rectCenter_y,rectCenter_y,rect_width,rect_width,rect_height,rect_height,ellip_centroid_x,ellip_centroid_x,ellip_centroid_y,ellip_centroid_y,OS,OS,Follow-up Status,Follow-up Status,MYC IHC,MYC IHC,BCL2 IHC,BCL2 IHC,BCL6 IHC,BCL6 IHC,CD10 IHC,CD10 IHC,MUM1 IHC,MUM1 IHC,HANS,HANS,BCL6 FISH,BCL6 FISH,MYC FISH,MYC FISH,BCL2 FISH,BCL2 FISH,Age,Age,ECOG PS,ECOG PS,LDH,LDH,EN,EN,Stage,Stage,IPI Score,IPI Score,IPI Risk Group (4 Class),IPI Risk Group (4 Class),RIPI Risk Group,RIPI Risk Group
Unnamed: 0_level_1,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std,mean,std
patient_id,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2,Unnamed: 25_level_2,Unnamed: 26_level_2,Unnamed: 27_level_2,Unnamed: 28_level_2,Unnamed: 29_level_2,Unnamed: 30_level_2,Unnamed: 31_level_2,Unnamed: 32_level_2,Unnamed: 33_level_2,Unnamed: 34_level_2,Unnamed: 35_level_2,Unnamed: 36_level_2,Unnamed: 37_level_2,Unnamed: 38_level_2,Unnamed: 39_level_2,Unnamed: 40_level_2,Unnamed: 41_level_2,Unnamed: 42_level_2,Unnamed: 43_level_2,Unnamed: 44_level_2,Unnamed: 45_level_2,Unnamed: 46_level_2,Unnamed: 47_level_2,Unnamed: 48_level_2,Unnamed: 49_level_2,Unnamed: 50_level_2,Unnamed: 51_level_2,Unnamed: 52_level_2,Unnamed: 53_level_2,Unnamed: 54_level_2,Unnamed: 55_level_2,Unnamed: 56_level_2,Unnamed: 57_level_2,Unnamed: 58_level_2,Unnamed: 59_level_2,Unnamed: 60_level_2,Unnamed: 61_level_2,Unnamed: 62_level_2,Unnamed: 63_level_2,Unnamed: 64_level_2,Unnamed: 65_level_2,Unnamed: 66_level_2,Unnamed: 67_level_2,Unnamed: 68_level_2,Unnamed: 69_level_2,Unnamed: 70_level_2,Unnamed: 71_level_2,Unnamed: 72_level_2,Unnamed: 73_level_2,Unnamed: 74_level_2,Unnamed: 75_level_2,Unnamed: 76_level_2,Unnamed: 77_level_2,Unnamed: 78_level_2,Unnamed: 79_level_2,Unnamed: 80_level_2,Unnamed: 81_level_2,Unnamed: 82_level_2
13901.0,-30.203768,32.123104,18.053119,5.319929,25.79176,7.707645,69.604697,19.242608,388.327122,207.450218,367.746142,209.657477,18.507646,5.741539,27.229503,7.62076,-23.598508,76.783746,-1.167264,140.147801,0.714059,0.144262,0.93978,0.068678,0.670788,0.129709,0.685801,0.141318,1.540486,0.422075,1.046642,0.127535,10.966762,3.843282,10.444223,3.633004,21.168709,7.746182,21.041199,7.51823,10.971682,4.132051,10.463238,3.902402,2.78,0.0,0.0,0.0,0.0,0.0,30.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,61.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,4.0,0.0,4.0,0.0,3.0,0.0,2.0,0.0
13902.0,-29.423052,31.866907,16.799208,4.851827,24.317176,7.529572,65.307907,18.409229,341.227716,180.928765,326.058674,188.067495,17.360127,5.3607,25.80186,7.54259,-21.145773,76.969812,14.03086,136.402492,0.708758,0.143803,0.938162,0.066646,0.662463,0.128765,0.681033,0.138314,1.547771,0.416758,1.043724,0.119947,10.075676,3.519298,10.073096,3.72611,19.927231,7.463325,19.796531,7.413028,10.078738,3.748831,10.066373,3.953555,10.08,0.0,0.0,0.0,10.0,0.0,0.0,0.0,70.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.224359,0.0,24.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13903.0,-30.349564,31.280504,15.80954,4.716802,24.220746,7.749293,63.808208,18.283335,318.852182,172.578439,303.356232,178.119444,16.43144,5.01161,25.691504,7.870433,-13.327497,78.365459,-19.692376,136.967791,0.676833,0.159982,0.920523,0.08019,0.633071,0.143944,0.653426,0.134246,1.609098,0.406415,1.043825,0.11986,9.909315,3.679379,9.8073,3.662121,18.741856,7.104246,19.873945,8.253596,9.912469,4.006994,9.762954,3.802286,9.78,0.0,0.0,0.0,0.0,0.0,50.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.213483,0.0,0.078212,0.0,0.0,0.0,25.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
13904.0,-34.466977,34.333292,15.451094,5.098248,25.875319,8.977452,66.201771,20.961549,337.499648,196.429641,310.527981,187.398678,15.933636,5.182056,27.229388,8.842433,-29.878338,79.49737,8.483001,145.738244,0.620706,0.151584,0.894926,0.091007,0.584008,0.141612,0.603398,0.141624,1.778404,0.566419,1.057293,0.141543,11.102816,4.587523,9.102394,3.756669,20.360341,8.722106,19.286392,8.926088,10.985653,4.938259,8.977256,4.204336,7.4,0.0,1.0,0.0,14.461538,0.0,48.825641,0.0,23.455497,0.0,0.380435,0.0,0.551351,0.0,0.432432,0.0,0.213483,0.0,0.078212,0.0,0.224359,0.0,64.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
13905.0,-26.744428,30.938156,16.226045,4.673878,26.082492,8.333628,67.598635,19.232158,351.702822,196.199166,331.095023,197.254295,16.972882,4.714176,27.169452,8.351487,-22.836169,80.285581,26.910928,145.588529,0.645798,0.153842,0.907719,0.083815,0.615255,0.138939,0.642958,0.135344,1.640414,0.431812,1.044817,0.116173,11.424346,4.451223,9.273681,3.150679,21.901859,8.921579,18.706747,6.952829,11.507091,4.82183,9.255364,3.503049,9.62,0.0,0.0,0.0,14.461538,0.0,48.825641,0.0,23.455497,0.0,0.380435,0.0,0.551351,0.0,0.432432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,41.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,4.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
13906.0,-31.406059,31.784805,14.054645,4.444628,23.142015,7.776459,59.534827,18.056534,272.489293,164.586702,265.6568,171.538904,15.060175,4.739558,24.769079,8.013515,-22.669314,80.650718,21.388682,143.355495,0.631809,0.156515,0.899429,0.089935,0.585474,0.138965,0.625211,0.138463,1.696503,0.469968,1.029376,0.111832,10.100786,4.071574,8.488882,3.271956,18.917345,7.995416,17.543999,7.699383,10.073121,4.249374,8.462739,3.460808,8.12,0.0,0.0,0.0,0.0,0.0,80.0,0.0,0.0,0.0,0.380435,0.0,0.551351,0.0,0.432432,0.0,0.0,0.0,0.0,0.0,0.0,0.0,78.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
13908.0,-28.916903,31.056615,16.208309,4.012041,23.276012,6.442699,62.689855,15.285239,309.134267,147.318309,298.875458,158.003845,16.932741,4.610079,24.83874,6.561346,-20.800668,77.387493,3.922276,137.700065,0.71419,0.14094,0.940869,0.064869,0.665269,0.124701,0.689989,0.13449,1.522295,0.397969,1.037526,0.115002,9.822045,3.245763,9.659261,3.178071,19.144951,6.682884,19.170114,6.424332,9.854123,3.432126,9.673899,3.368503,7.54,0.0,1.0,0.0,0.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0
13911.0,-32.826387,32.435577,19.44914,7.167019,29.794954,10.976688,78.496813,27.154253,500.46195,325.232429,466.986029,317.072941,19.767387,7.436887,31.173709,10.808486,-12.65231,81.588368,-25.904545,131.197677,0.671457,0.157594,0.918194,0.086887,0.634371,0.144912,0.644002,0.150424,1.667222,0.530586,1.056262,0.143665,11.668069,4.810984,12.479788,5.413834,22.200912,9.514983,25.137869,11.183351,11.683999,5.169621,12.43202,5.838947,10.22,0.0,0.0,0.0,0.0,0.0,20.0,0.0,30.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,62.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,1.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0
13912.0,-28.596861,31.305722,16.743245,5.374643,24.101754,7.804513,64.892541,19.485145,340.142098,194.188423,325.342362,199.21225,17.340018,5.736276,25.709815,7.920074,-24.19717,77.687736,26.153403,137.897313,0.71171,0.15449,0.936086,0.073935,0.661583,0.139197,0.683185,0.146353,1.552247,0.439279,1.041803,0.122648,10.304495,3.838874,9.765354,3.670144,20.258354,7.900105,19.281863,7.457296,10.319985,4.126095,9.772024,3.946458,0.33,0.0,1.0,0.0,10.0,0.0,10.0,0.0,50.0,0.0,1.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.078212,0.0,0.224359,0.0,54.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0,2.0,0.0,2.0,0.0,1.0,0.0,1.0,0.0
13913.0,-29.798667,32.29517,19.521436,6.621578,27.932794,9.193201,75.326885,23.759126,465.690314,288.777426,439.633221,292.013046,19.744831,7.101092,29.410546,9.188052,-24.03921,76.322165,10.447383,137.678283,0.711115,0.142307,0.939348,0.066829,0.669779,0.130456,0.674579,0.143215,1.572442,0.451538,1.052572,0.133221,11.674355,4.550433,11.475156,4.496822,22.698696,9.160557,22.910337,9.10228,11.680293,4.856287,11.462349,4.826211,0.4,0.0,1.0,0.0,50.0,0.0,0.0,0.0,60.0,0.0,0.0,0.0,0.0,0.0,1.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,65.0,0.0,3.0,0.0,0.0,0.0,2.0,0.0,4.0,0.0,4.0,0.0,3.0,0.0,2.0,0.0


Unnamed: 0_level_0,rotate_angle_mean,rotate_angle_std,shortAxis_mean,shortAxis_std,longAxis_mean,longAxis_std,ellip_perimt_mean,ellip_perimt_std,ellip_area_mean,ellip_area_std,hull_area_mean,hull_area_std,minDiameter_mean,minDiameter_std,maxDiameter_mean,maxDiameter_std,minAngle_mean,minAngle_std,maxAngle_mean,maxAngle_std,esf_mean,esf_std,csf_mean,csf_std,sf1_mean,sf1_std,sf2_mean,sf2_std,elogation_mean,elogation_std,convexity_mean,convexity_std,rectCenter_x_mean,rectCenter_x_std,rectCenter_y_mean,rectCenter_y_std,rect_width_mean,rect_width_std,rect_height_mean,rect_height_std,ellip_centroid_x_mean,ellip_centroid_x_std,ellip_centroid_y_mean,ellip_centroid_y_std,OS,FUS,MYC IHC_mean,BCL2 IHC_mean,BCL6 IHC_mean,CD10 IHC_mean,MUM1 IHC_mean,HANS_mean,BCL6 FISH_mean,MYC FISH_mean,BCL2 FISH_mean,Age_mean,ECOG PS_mean,LDH_mean,EN_mean,Stage_mean,IPI Score_mean,IPI Risk Group (4 Class)_mean,RIPI Risk Group_mean
patient_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1,Unnamed: 24_level_1,Unnamed: 25_level_1,Unnamed: 26_level_1,Unnamed: 27_level_1,Unnamed: 28_level_1,Unnamed: 29_level_1,Unnamed: 30_level_1,Unnamed: 31_level_1,Unnamed: 32_level_1,Unnamed: 33_level_1,Unnamed: 34_level_1,Unnamed: 35_level_1,Unnamed: 36_level_1,Unnamed: 37_level_1,Unnamed: 38_level_1,Unnamed: 39_level_1,Unnamed: 40_level_1,Unnamed: 41_level_1,Unnamed: 42_level_1,Unnamed: 43_level_1,Unnamed: 44_level_1,Unnamed: 45_level_1,Unnamed: 46_level_1,Unnamed: 47_level_1,Unnamed: 48_level_1,Unnamed: 49_level_1,Unnamed: 50_level_1,Unnamed: 51_level_1,Unnamed: 52_level_1,Unnamed: 53_level_1,Unnamed: 54_level_1,Unnamed: 55_level_1,Unnamed: 56_level_1,Unnamed: 57_level_1,Unnamed: 58_level_1,Unnamed: 59_level_1,Unnamed: 60_level_1,Unnamed: 61_level_1,Unnamed: 62_level_1,Unnamed: 63_level_1
13901.0,-30.203768,32.123104,18.053119,5.319929,25.79176,7.707645,69.604697,19.242608,388.327122,207.450218,367.746142,209.657477,18.507646,5.741539,27.229503,7.62076,-23.598508,76.783746,-1.167264,140.147801,0.714059,0.144262,0.93978,0.068678,0.670788,0.129709,0.685801,0.141318,1.540486,0.422075,1.046642,0.127535,10.966762,3.843282,10.444223,3.633004,21.168709,7.746182,21.041199,7.51823,10.971682,4.132051,10.463238,3.902402,2.78,0.0,0.0,30.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,61.0,2.0,1.0,1.0,4.0,4.0,3.0,2.0
13902.0,-29.423052,31.866907,16.799208,4.851827,24.317176,7.529572,65.307907,18.409229,341.227716,180.928765,326.058674,188.067495,17.360127,5.3607,25.80186,7.54259,-21.145773,76.969812,14.03086,136.402492,0.708758,0.143803,0.938162,0.066646,0.662463,0.128765,0.681033,0.138314,1.547771,0.416758,1.043724,0.119947,10.075676,3.519298,10.073096,3.72611,19.927231,7.463325,19.796531,7.413028,10.078738,3.748831,10.066373,3.953555,10.08,0.0,10.0,0.0,70.0,0.0,1.0,0.0,0.0,0.0,0.224359,24.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
13903.0,-30.349564,31.280504,15.80954,4.716802,24.220746,7.749293,63.808208,18.283335,318.852182,172.578439,303.356232,178.119444,16.43144,5.01161,25.691504,7.870433,-13.327497,78.365459,-19.692376,136.967791,0.676833,0.159982,0.920523,0.08019,0.633071,0.143944,0.653426,0.134246,1.609098,0.406415,1.043825,0.11986,9.909315,3.679379,9.8073,3.662121,18.741856,7.104246,19.873945,8.253596,9.912469,4.006994,9.762954,3.802286,9.78,0.0,0.0,50.0,0.0,0.0,0.0,0.0,0.213483,0.078212,0.0,25.0,0.0,0.0,1.0,1.0,0.0,0.0,0.0
13904.0,-34.466977,34.333292,15.451094,5.098248,25.875319,8.977452,66.201771,20.961549,337.499648,196.429641,310.527981,187.398678,15.933636,5.182056,27.229388,8.842433,-29.878338,79.49737,8.483001,145.738244,0.620706,0.151584,0.894926,0.091007,0.584008,0.141612,0.603398,0.141624,1.778404,0.566419,1.057293,0.141543,11.102816,4.587523,9.102394,3.756669,20.360341,8.722106,19.286392,8.926088,10.985653,4.938259,8.977256,4.204336,7.4,1.0,14.461538,48.825641,23.455497,0.380435,0.551351,0.432432,0.213483,0.078212,0.224359,64.0,0.0,0.0,0.0,1.0,1.0,0.0,1.0
13905.0,-26.744428,30.938156,16.226045,4.673878,26.082492,8.333628,67.598635,19.232158,351.702822,196.199166,331.095023,197.254295,16.972882,4.714176,27.169452,8.351487,-22.836169,80.285581,26.910928,145.588529,0.645798,0.153842,0.907719,0.083815,0.615255,0.138939,0.642958,0.135344,1.640414,0.431812,1.044817,0.116173,11.424346,4.451223,9.273681,3.150679,21.901859,8.921579,18.706747,6.952829,11.507091,4.82183,9.255364,3.503049,9.62,0.0,14.461538,48.825641,23.455497,0.380435,0.551351,0.432432,0.0,0.0,0.0,41.0,1.0,0.0,1.0,4.0,1.0,0.0,1.0
13906.0,-31.406059,31.784805,14.054645,4.444628,23.142015,7.776459,59.534827,18.056534,272.489293,164.586702,265.6568,171.538904,15.060175,4.739558,24.769079,8.013515,-22.669314,80.650718,21.388682,143.355495,0.631809,0.156515,0.899429,0.089935,0.585474,0.138965,0.625211,0.138463,1.696503,0.469968,1.029376,0.111832,10.100786,4.071574,8.488882,3.271956,18.917345,7.995416,17.543999,7.699383,10.073121,4.249374,8.462739,3.460808,8.12,0.0,0.0,80.0,0.0,0.380435,0.551351,0.432432,0.0,0.0,0.0,78.0,0.0,0.0,1.0,2.0,1.0,0.0,1.0
13908.0,-28.916903,31.056615,16.208309,4.012041,23.276012,6.442699,62.689855,15.285239,309.134267,147.318309,298.875458,158.003845,16.932741,4.610079,24.83874,6.561346,-20.800668,77.387493,3.922276,137.700065,0.71419,0.14094,0.940869,0.064869,0.665269,0.124701,0.689989,0.13449,1.522295,0.397969,1.037526,0.115002,9.822045,3.245763,9.659261,3.178071,19.144951,6.682884,19.170114,6.424332,9.854123,3.432126,9.673899,3.368503,7.54,1.0,0.0,80.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,62.0,1.0,0.0,0.0,1.0,1.0,0.0,1.0
13911.0,-32.826387,32.435577,19.44914,7.167019,29.794954,10.976688,78.496813,27.154253,500.46195,325.232429,466.986029,317.072941,19.767387,7.436887,31.173709,10.808486,-12.65231,81.588368,-25.904545,131.197677,0.671457,0.157594,0.918194,0.086887,0.634371,0.144912,0.644002,0.150424,1.667222,0.530586,1.056262,0.143665,11.668069,4.810984,12.479788,5.413834,22.200912,9.514983,25.137869,11.183351,11.683999,5.169621,12.43202,5.838947,10.22,0.0,0.0,20.0,30.0,0.0,1.0,0.0,1.0,0.0,0.0,62.0,0.0,1.0,0.0,1.0,2.0,1.0,1.0
13912.0,-28.596861,31.305722,16.743245,5.374643,24.101754,7.804513,64.892541,19.485145,340.142098,194.188423,325.342362,199.21225,17.340018,5.736276,25.709815,7.920074,-24.19717,77.687736,26.153403,137.897313,0.71171,0.15449,0.936086,0.073935,0.661583,0.139197,0.683185,0.146353,1.552247,0.439279,1.041803,0.122648,10.304495,3.838874,9.765354,3.670144,20.258354,7.900105,19.281863,7.457296,10.319985,4.126095,9.772024,3.946458,0.33,1.0,10.0,10.0,50.0,1.0,1.0,1.0,0.0,0.078212,0.224359,54.0,2.0,1.0,1.0,2.0,2.0,1.0,1.0
13913.0,-29.798667,32.29517,19.521436,6.621578,27.932794,9.193201,75.326885,23.759126,465.690314,288.777426,439.633221,292.013046,19.744831,7.101092,29.410546,9.188052,-24.03921,76.322165,10.447383,137.678283,0.711115,0.142307,0.939348,0.066829,0.669779,0.130456,0.674579,0.143215,1.572442,0.451538,1.052572,0.133221,11.674355,4.550433,11.475156,4.496822,22.698696,9.160557,22.910337,9.10228,11.680293,4.856287,11.462349,4.826211,0.4,1.0,50.0,0.0,60.0,0.0,0.0,1.0,1.0,0.0,0.0,65.0,3.0,0.0,2.0,4.0,4.0,3.0,2.0


# C-index and optimism computation

In [95]:
geo_cols = ['rotate_angle_mean', 'rotate_angle_std', 'shortAxis_mean',
       'shortAxis_std', 'longAxis_mean', 'longAxis_std', 'ellip_perimt_mean',
       'ellip_perimt_std', 'ellip_area_mean', 'ellip_area_std',
       'hull_area_mean', 'hull_area_std', 'minDiameter_mean',
       'minDiameter_std', 'maxDiameter_mean', 'maxDiameter_std',
       'minAngle_mean', 'minAngle_std', 'maxAngle_mean', 'maxAngle_std',
       'esf_mean', 'esf_std', 'csf_mean', 'csf_std', 'sf1_mean', 'sf1_std',
       'sf2_mean', 'sf2_std', 'elogation_mean', 'elogation_std',
       'convexity_mean', 'convexity_std', 'rectCenter_x_mean',
       'rectCenter_x_std', 'rectCenter_y_mean', 'rectCenter_y_std',
       'rect_width_mean', 'rect_width_std', 'rect_height_mean',
       'rect_height_std', 'ellip_centroid_x_mean', 'ellip_centroid_x_std',
       'ellip_centroid_y_mean', 'ellip_centroid_y_std', 'OS', 'FUS']

In [96]:
penalizer_g_q = 0 # Geomatrical features
penalizer_c_q = 0  # Clinical features
penalizer_gc_q = 0.00005  # All features

In [97]:
if FEATURES == "ALL":
    penalizer = penalizer_gc_q
    df = df_agg
elif FEATURES == "GEOMETRIC":
    penalizer = penalizer_g_q
    df = df_agg[geo_cols]
else:
    penalizer = penalizer_c_q
    df = df_agg.drop(set(geo_cols) - set(["OS", "FUS"]), axis=1)

In [98]:
from sklearn.preprocessing import QuantileTransformer

In [99]:
qt = QuantileTransformer(n_quantiles=10, random_state=42)
qt.fit(df)

df = pd.DataFrame.from_records(qt.transform(df), columns=df.columns)

In [100]:
df.columns

Index(['rotate_angle_mean', 'rotate_angle_std', 'shortAxis_mean',
       'shortAxis_std', 'longAxis_mean', 'longAxis_std', 'ellip_perimt_mean',
       'ellip_perimt_std', 'ellip_area_mean', 'ellip_area_std',
       'hull_area_mean', 'hull_area_std', 'minDiameter_mean',
       'minDiameter_std', 'maxDiameter_mean', 'maxDiameter_std',
       'minAngle_mean', 'minAngle_std', 'maxAngle_mean', 'maxAngle_std',
       'esf_mean', 'esf_std', 'csf_mean', 'csf_std', 'sf1_mean', 'sf1_std',
       'sf2_mean', 'sf2_std', 'elogation_mean', 'elogation_std',
       'convexity_mean', 'convexity_std', 'rectCenter_x_mean',
       'rectCenter_x_std', 'rectCenter_y_mean', 'rectCenter_y_std',
       'rect_width_mean', 'rect_width_std', 'rect_height_mean',
       'rect_height_std', 'ellip_centroid_x_mean', 'ellip_centroid_x_std',
       'ellip_centroid_y_mean', 'ellip_centroid_y_std', 'OS', 'FUS'],
      dtype='object')

## Un-corrected C-index

In [101]:
cph = CoxPHFitter(penalizer=penalizer)
cph.fit(df, duration_col='OS', event_col='FUS')
c_main = cph.score(df, scoring_method="concordance_index")
print("The un-corrected C-index is: %.3f" % c_main)

The un-corrected C-index is: 0.786





## Optimism computation

In [102]:
np.random.seed(42)
c_b_boot, c_b_orig = [], []
num_bootstraps = 1000
bootstrap_size = len(df)

for i in range(num_bootstraps):
    choices = np.random.choice(np.arange(0, len(df)), size=bootstrap_size, replace=True) 
    new_df = df.iloc[choices] #sample bootstrap replicate with replacement
    
    cph = CoxPHFitter(penalizer=penalizer)
    cph.fit(new_df, duration_col='OS', event_col='FUS') #fit on bootstrap
    
    c = cph.score(new_df, scoring_method="concordance_index") #score on bootstrap
    c_b_boot.append(c)
    
    c = cph.score(df, scoring_method="concordance_index") #score on original
    c_b_orig.append(c)
    
c_b_orig = np.array(c_b_orig)
c_b_boot = np.array(c_b_boot)


















































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































































In [103]:
o = np.mean(c_b_boot - c_b_orig)
print("measure of optimism: %.3f" % o)

measure of optimism: 0.152


In [104]:
c_final = c_main - o
print("optimism-corrected c-index is %.3f" % c_final)

optimism-corrected c-index is 0.635


# 95% Confidence Intervals

In [105]:
np.random.seed(42)
c_indices = []
num_bootstraps = 1000
bootstrap_size = len(df)

cph = CoxPHFitter(penalizer=penalizer) #fit on original data
cph.fit(df, duration_col='OS', event_col='FUS')

for i in range(num_bootstraps):
    choices = np.random.choice(np.arange(0, len(df)), size=bootstrap_size, replace=True)
    new_df = df.iloc[choices]
    
    c_index = cph.score(new_df, scoring_method="concordance_index")
    c_indices.append(c_index)
    
c_indices.sort()
hi = c_indices[974]
lo = c_indices[24]

print("Confidence interval is: %.3f - %.3f" % (lo, hi))




Confidence interval is: 0.726 - 0.843


In [106]:
print("Adjusting for optimism")
print("Adjusted confidence interval is: %.3f - %.3f" % (lo-o, hi-o))

Adjusting for optimism
Adjusted confidence interval is: 0.574 - 0.691


# Results

# Clinical features only
Optimism = 0.074

Final c-index: 0.674

Adjusted 95% CI: (0.602, 0.737)

# Geometric features only
Optimism = 0.152

Final c-index = 0.635

Adjusted 95% CI: (0.574, 0.691)

# Clinical + Geometric features
Optimism = 0.156

Final c-index = 0.700

Adjusted 95% CI: (0.651, 0.744)