In [1]:
# header files
%matplotlib inline
import os
import glob
import csv
import numpy as np
import pandas as pd
from sksurv.nonparametric import kaplan_meier_estimator
from sksurv.linear_model import CoxnetSurvivalAnalysis
from sksurv.linear_model import CoxPHSurvivalAnalysis
from sksurv.ensemble import RandomSurvivalForest
from sksurv.svm import HingeLossSurvivalSVM
from sksurv.metrics import (
    concordance_index_censored,
    concordance_index_ipcw,
    cumulative_dynamic_auc,
    integrated_brier_score,
)
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
from sklearn.pipeline import make_pipeline
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
from sklearn.feature_selection import SelectKBest
from sklearn.feature_selection import chi2, f_regression, f_classif
import matplotlib.pyplot as plt
from datetime import datetime
plt.rcParams['figure.figsize'] = [4, 4]
print("Header files loaded!")

Header files loaded!


In [2]:
tcga_oc_files = glob.glob("survival_analysis_results/ovary/tcga/*")
upmc_oc_files = glob.glob("survival_analysis_results/ovary/upmc/*")
ucla_oc_files = glob.glob("survival_analysis_results/ovary/ucla/*")
print(len(tcga_oc_files))
print(len(upmc_oc_files))
print(len(ucla_oc_files))

102
117
150


In [3]:
# create output survival information for training model
is_ovarian_cancer = 1
if is_ovarian_cancer:
    pfs_event_temp = []
    pfs_days_temp = []
    os_event_temp = []
    os_days_temp = []
    filenames_temp = []
    age_temp = []
    stage_temp = []
    hrd_temp = []
    cr_temp = []
    pr_temp = []
    pd_temp = []
    brca_temp = []
    debulking_temp = []
    flag = -1
    c = 0
    with open("survival_analysis_results/ovary/tcga.csv", newline='', encoding = "ISO-8859-1") as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
            else:
                array = row
                filenames_temp.append(array[0])
                age_temp.append(float(array[1]))
                os_days_temp.append(float(array[10])*30.5)
                pfs_days_temp.append(float(array[11])*30.5)
                
                if array[12] == "1:DECEASED":
                    os_event_temp.append(True)
                else:
                    os_event_temp.append(False)
                
                if array[13] == "1:PROGRESSION":
                    pfs_event_temp.append(True)
                else:
                    pfs_event_temp.append(False)
                    
                if array[17] == "Stage IIIC":
                    stage_temp.append(3)
                elif array[17] == "Stage IV":
                    stage_temp.append(4)
                elif array[17] == "Stage IIIB":
                    stage_temp.append(3)
                elif array[17] == "Stage IIA":
                    stage_temp.append(2)
                elif array[17] == "Stage IIC":
                    stage_temp.append(2)
                elif array[17] == "Stage IIB":
                    stage_temp.append(2)
                elif array[17] == "Stage IC":
                    stage_temp.append(1)
                else:
                    stage_temp.append(3)
                    
                if array[26] == "completeresponse":
                    cr_temp.append(1)
                else:
                    cr_temp.append(0)
                
                if array[26] == "partialresponse":
                    pr_temp.append(1)
                else:
                    pr_temp.append(0)
                
                if array[26] == "progressivedisease":
                    pd_temp.append(1)
                else:
                    pd_temp.append(0)
                
                if array[28] == "NA":
                    hrd_temp.append(-1)
                elif array[28] == "low":
                    hrd_temp.append(0)
                else:
                    hrd_temp.append(1)
                
                if array[25] == "NA":
                    brca_temp.append(-1)
                elif array[25] == "BRCAmut" or array[25] == "BRCA1meth":
                    brca_temp.append(1)
                else:
                    brca_temp.append(0)
                    
                if array[22] == "No residual disease R0":
                    debulking_temp.append(1)
                elif array[22] == "Residual disease R1":
                    debulking_temp.append(0)
                else:
                    debulking_temp.append(-1)
    print(len(filenames_temp))
    print(len(age_temp))
    print(len(stage_temp))
    print(len(debulking_temp))
    print(len(brca_temp))
    print(len(hrd_temp))
    print(len(pr_temp))
    print(len(cr_temp))
    print(len(pd_temp))
    print(len(os_days_temp))
    print(len(os_event_temp))
    print(len(pfs_days_temp))
    print(len(pfs_event_temp))

102
102
102
102
102
102
102
102
102
102
102
102
102


In [4]:
# create output survival information for testing model
is_ovarian_cancer = 1
if is_ovarian_cancer:
    t_pfs_event_temp = []
    t_pfs_days_temp = []
    t_os_event_temp = []
    t_os_days_temp = []
    t_filenames_temp = []
    t_age_temp = []
    t_stage_temp = []
    t_hrd_temp = []
    t_cr_temp = []
    t_pr_temp = []
    t_pd_temp = []
    t_brca_temp = []
    t_debulking_temp = []
    flag = -1
    c = 0
    with open("survival_analysis_results/ovary/upmc.csv", newline='', encoding = "ISO-8859-1") as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
            else:
                array = row
                t_filenames_temp.append(array[0])
                t_age_temp.append(float(array[9]))
                
                if array[3] == "CR":
                    t_cr_temp.append(1)
                else:
                    t_cr_temp.append(0)
                    
                if array[3] == "PR":
                    t_pr_temp.append(1)
                else:
                    t_pr_temp.append(0)
                
                if array[3] == "PD":
                    t_pd_temp.append(1)
                else:
                    t_pd_temp.append(0)
                    
                if array[8] == "Yes":
                    t_debulking_temp.append(1)
                elif array[8] == "No":
                    t_debulking_temp.append(0)
                else:
                    t_debulking_temp.append(-1)
                    
                if array[12] == "0" and array[13] == "0":
                    t_brca_temp.append(0)
                else:
                    t_brca_temp.append(1)
                
                if array[1] == "1":
                    t_hrd_temp.append(1)
                elif array[1] == "0":
                    t_hrd_temp.append(0)
                else:
                    t_hrd_temp.append(-1)
                
                if array[4] == "DOD" or array[4] == "D-other":
                    t_os_event_temp.append(True)
                else:
                    t_os_event_temp.append(False)
                if array[7] == "" or array[7] == "NA":
                    t_os_days_temp.append(0)
                else:
                    t_os_days_temp.append(int(array[7])*30.5)
                    
                if array[5] == "Yes":
                    t_pfs_event_temp.append(True)
                else:
                    t_pfs_event_temp.append(False)
                if array[6] == "" or array[6] == "NA":
                    if array[7] == "" or array[7] == "NA":
                        t_pfs_days_temp.append(0)
                    else:
                        t_pfs_days_temp.append(int(array[7])*30.5)
                else:
                    t_pfs_days_temp.append(int(array[6])*30.5)
                
                if array[11] == "IIIc":
                    t_stage_temp.append(3)
                elif array[11] == "IV":
                    t_stage_temp.append(4)
                elif array[11] == "IVb":
                    t_stage_temp.append(4)
                elif array[11] == "IIIa":
                    t_stage_temp.append(3)
                elif array[11] == "IIIb":
                    t_stage_temp.append(3)
                elif array[11] == "IIIa1":
                    t_stage_temp.append(3)
                elif array[11] == "IIIa2":
                    t_stage_temp.append(3)
                elif array[11] == "II":
                    t_stage_temp.append(2)
                elif array[11] == "IIc":
                    t_stage_temp.append(2)
                elif array[11] == "IIb":
                    t_stage_temp.append(2)
                elif array[11] == "IIa":
                    t_stage_temp.append(2)
                elif array[11] == "I":
                    t_stage_temp.append(1)
                else:
                    t_stage_temp.append(3)
    print(len(t_filenames_temp))
    print(len(t_age_temp))
    print(len(t_stage_temp))
    print(len(t_debulking_temp))
    print(len(t_brca_temp))
    print(len(t_hrd_temp))
    print(len(t_pr_temp))
    print(len(t_cr_temp))
    print(len(t_pd_temp))
    print(len(t_os_days_temp))
    print(len(t_os_event_temp))
    print(len(t_pfs_days_temp))
    print(len(t_pfs_event_temp))

128
128
128
128
128
128
128
128
128
128
128
128
128


In [5]:
# create output survival information for testing model
is_ovarian_cancer = 1
if is_ovarian_cancer:
    t1_pfs_event_temp = []
    t1_pfs_days_temp = []
    t1_os_event_temp = []
    t1_os_days_temp = []
    t1_filenames_temp = []
    t1_age_temp = []
    t1_stage_temp = []
    t1_hrd_temp = []
    t1_cr_temp = []
    t1_pr_temp = []
    t1_pd_temp = []
    t1_brca_temp = []
    t1_debulking_temp = []
    flag = -1
    c = 0
    with open("survival_analysis_results/ovary/ucla.csv", newline='', encoding = "ISO-8859-1") as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                flag = 1
                print(row)
            else:
                array = row
                t1_filenames_temp.append(array[3])
                t1_age_temp.append(float(array[4]))
                t1_os_days_temp.append(float(array[23])*30.5)
                
                if row[18] == "":
                    t1_pfs_days_temp.append(0.0)
                else:
                    t1_pfs_days_temp.append(float(array[18])*30.5)
                
                if array[25] == "1":
                    t1_os_event_temp.append(True)
                else:
                    t1_os_event_temp.append(False)
                
                if array[19] == "Yes":
                    t1_pfs_event_temp.append(True)
                else:
                    t1_pfs_event_temp.append(False)
                    
                if array[15] == "IIIC":
                    t1_stage_temp.append(3)
                elif array[15] == "IV":
                    t1_stage_temp.append(4)
                elif array[15] == "IIIB":
                    t1_stage_temp.append(3)
                elif array[15] == "IIIA":
                    t1_stage_temp.append(3)
                elif array[15] == "III":
                    t1_stage_temp.append(3)
                elif array[15] == "IIA":
                    t1_stage_temp.append(2)
                elif array[15] == "IIC":
                    t1_stage_temp.append(2)
                elif array[15] == "IIB":
                    t1_stage_temp.append(2)
                elif array[15] == "IC":
                    t1_stage_temp.append(1)
                else:
                    t1_stage_temp.append(3)
                    
    print(len(t1_filenames_temp))
    print(len(t1_age_temp))
    print(len(t1_stage_temp))
    print(len(t1_debulking_temp))
    print(len(t1_brca_temp))
    print(len(t1_hrd_temp))
    print(len(t1_pr_temp))
    print(len(t1_cr_temp))
    print(len(t1_pd_temp))
    print(len(t1_os_days_temp))
    print(len(t1_os_event_temp))
    print(len(t1_pfs_days_temp))
    print(len(t1_pfs_event_temp))

['RNASeq130Patients', 'NanoString152Patients', 'R2_NanoString_ID', 'Patient_Deidentified_ID', 'Age_at_Diagnosis', 'Height_Inches', 'Weight_Pounds', 'BMI', 'Race', 'Ethnicity', 'Mutation_Status', 'Mutation_Description', 'Patient_Mutant_History', 'Primary Cancer Types (Ovary, FT, Peritoneal) ', 'Debulking  (optimal vs suboptimal) ', 'Cancer Stage', 'Tumor Grades', 'Neoadjuvant therapy or not', 'Disease Free Interval - Time to First Recurrence (months)', 'Recurrence (yes or no)', 'Status_recurrence_or_progressive_disease', 'Progressive_disease_or_subopt_debulk_or_recurr_within_12_months_or_died_within_24_months', 'Progressive_disease_or_recurr_1y_or_less', 'Overall Survival: Time to Death or to last survival status if alive (months)', 'Patient_status ', 'Patient_status ', 'Additional_Notes', '%Immune', '%Fibroblasts', '%Tumor']
154
154
154
0
0
0
0
0
0
154
154
154
154


In [6]:
tcga_features = []
for file in tcga_oc_files:
    filename = file.split("/")[-1]
    flag = -1
    file_features = []
    with open("survival_analysis_results/ovary/tcga/"+filename, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                array = row
                for index in range(0, len(row)):
                    val = float(row[index])
                    file_features.append(val)                
    tcga_features.append(file_features)
print(len(tcga_features))
print(tcga_features[0])

102
[1.171941133036615, 2.2959641149086463, 1.2734015856142278, 2.294842887677939, 1.3630914222864567, 2.2971831200210606, 1.4419773818047057, 2.2930770181341265, 1.5215349440211505, 2.2906935879728403, 1.5845584762337994, 2.2890937402546077, 1.672927439286999, 2.292230641993306, 1.717596980802387, 2.291076243910365, 1.7872802767678533, 2.292495291577524, 0.5977914775705666, 2.2960208013429573, 0.6702316191708713, 2.297894492878315, 0.7551284825998712, 2.2960208013429573, 0.8149309852344033, 2.2928703499349146, 0.8956122528233192, 2.2960208013429573, 0.9737443721686092, 2.2900893655153496, 1.0209029561271683, 2.288537761403695, 1.123396061285378, 2.291757278478528, 1.1756532003587183, 2.2960208013429573]


In [7]:
upmc_features = []
for file in upmc_oc_files:
    filename = file.split("/")[-1]
    flag = -1
    file_features = []
    with open("survival_analysis_results/ovary/upmc/"+filename, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                for index in range(0, len(row)):
                    val = float(row[index])
                    file_features.append(val)
    upmc_features.append(file_features)
print(len(upmc_features))

117


In [8]:
ucla_features = []
for file in ucla_oc_files:
    filename = file.split("/")[-1]
    flag = -1
    file_features = []
    with open("survival_analysis_results/ovary/ucla/"+filename, newline='') as csvfile:
        spamreader = csv.reader(csvfile)
        for row in spamreader:
            if flag == -1:
                for index in range(0, len(row)):
                    val = float(row[index])
                    file_features.append(val)
    ucla_features.append(file_features)
print(len(ucla_features))

150


In [9]:
train_pfs_event = []
train_pfs_days = []
train_os_event = []
train_os_days = []
train_age = []
train_stage = []
train_hrd = []
train_cr = []
train_pr = []
train_pd = []
train_brca = []
train_debulking = []
for index in range(0, len(tcga_oc_files)):
    filename1 = tcga_oc_files[index].split("/")[-1][:-4]
    flag = -1
    count = 0
    for index1 in range(0, len(filenames_temp)):
        filename2 = filenames_temp[index1]
        if filename1 == filename2:
            flag = 1
            train_pfs_event.append(pfs_event_temp[count])
            train_pfs_days.append(pfs_days_temp[count])
            train_os_event.append(os_event_temp[count])
            train_os_days.append(os_days_temp[count])
            train_age.append(age_temp[count])
            train_stage.append(stage_temp[count])
            train_hrd.append(hrd_temp[count])
            train_cr.append(cr_temp[count])
            train_pr.append(pr_temp[count])
            train_pd.append(pd_temp[count])
            train_brca.append(brca_temp[count])
            train_debulking.append(debulking_temp[count])
            break
        count += 1
print(len(train_age))
print(len(train_stage))
print(len(train_debulking))
print(len(train_brca))
print(len(train_hrd))
print(len(train_pr))
print(len(train_cr))
print(len(train_pd))
print(len(train_os_days))
print(len(train_os_event))
print(len(train_pfs_days))
print(len(train_pfs_event))

102
102
102
102
102
102
102
102
102
102
102
102


In [10]:
test_pfs_event = []
test_pfs_days = []
test_os_event = []
test_os_days = []
test_age = []
test_stage = []
test_hrd = []
test_cr = []
test_pr = []
test_pd = []
test_brca = []
test_debulking = []
for index in range(0, len(upmc_oc_files)):
    filename1 = upmc_oc_files[index].split("/")[-1][:-4]
    flag = -1
    count = 0
    for index1 in range(0, len(t_filenames_temp)):
        filename2 = t_filenames_temp[index1]
        if filename1 == filename2:
            flag = 1
            test_pfs_event.append(t_pfs_event_temp[count])
            test_pfs_days.append(t_pfs_days_temp[count])
            test_os_event.append(t_os_event_temp[count])
            test_os_days.append(t_os_days_temp[count])
            test_age.append(t_age_temp[count])
            test_stage.append(t_stage_temp[count])
            test_hrd.append(t_hrd_temp[count])
            test_cr.append(t_cr_temp[count])
            test_pr.append(t_pr_temp[count])
            test_pd.append(t_pd_temp[count])
            test_brca.append(t_brca_temp[count])
            test_debulking.append(t_debulking_temp[count])
            break
        count += 1
    
    if flag == -1:
        print(filename1)
print(len(test_age))
print(len(test_stage))
print(len(test_debulking))
print(len(test_brca))
print(len(test_hrd))
print(len(test_pr))
print(len(test_cr))
print(len(test_pd))
print(len(test_os_days))
print(len(test_os_event))
print(len(test_pfs_days))
print(len(test_pfs_event))

117
117
117
117
117
117
117
117
117
117
117
117


In [11]:
test_pfs_event_1 = []
test_pfs_days_1 = []
test_os_event_1 = []
test_os_days_1 = []
test_age_1 = []
test_stage_1 = []
for index in range(0, len(ucla_oc_files)):
    filename1 = ucla_oc_files[index].split("/")[-1][:-4]
    flag = -1
    count = 0
    for index1 in range(0, len(t1_filenames_temp)):
        filename2 = t1_filenames_temp[index1]
        if filename1 == filename2:
            flag = 1
            test_pfs_event_1.append(t1_pfs_event_temp[count])
            test_pfs_days_1.append(t1_pfs_days_temp[count])
            test_os_event_1.append(t1_os_event_temp[count])
            test_os_days_1.append(t1_os_days_temp[count])
            test_age_1.append(t1_age_temp[count])
            test_stage_1.append(t1_stage_temp[count])
            break
        count += 1
    
    if flag == -1:
        print(filename1)
print(len(test_age_1))
print(len(test_stage_1))
print(len(test_os_days_1))
print(len(test_os_event_1))
print(len(test_pfs_days_1))
print(len(test_pfs_event_1))

150
150
150
150
150
150


In [12]:
train_features = np.array(tcga_features)
test_features = np.array(upmc_features+ucla_features)
train_censor = np.array(train_os_event)
train_days = np.array(train_os_days)
test_censor = np.array(test_os_event+test_os_event_1)
test_days = np.array(test_os_days+test_os_days_1)

In [13]:
train_y = []
for index in range(0, len(train_censor)):
    train_y.append([train_censor[index], train_days[index]])
print(len(train_y))

test_y = []
for index in range(0, len(test_censor)):
    test_y.append([test_censor[index], test_days[index]])
print(len(test_y))

102
267


In [14]:
# run on test set
group = []
train_group = []
features_train = train_features
features_test = test_features
y_train = train_y
dt = dtype=[('Status', '?'), ('Survival_in_days', '<f8')]
y_train = np.array([tuple(row) for row in y_train], dtype=dt)
scaler = MinMaxScaler()
features_train = scaler.fit_transform(features_train)
features_test = scaler.transform(features_test)
features_train_df = pd.DataFrame(features_train)
features_test_df = pd.DataFrame(features_test)

# fit model
estimator = CoxnetSurvivalAnalysis()
estimator.fit(features_train_df, y_train)
score, _, _, _, _ = concordance_index_censored(test_censor, test_days, estimator.predict(features_test_df))
print("Test: " + str(score))
score, _, _, _, _ = concordance_index_censored(train_censor, train_days, estimator.predict(features_train_df))
print("Train: " + str(score))

# get risk scores
train_risk_scores = estimator.predict(features_train_df)
test_risk_scores = estimator.predict(features_test_df)

Test: 0.5317971770771117
Train: 0.7153979238754326


In [15]:
# train
median = np.median(train_risk_scores)
count_low = 0
count_high = 0
for index in range(0, len(train_risk_scores)):
    if train_risk_scores[index] > median:
        count_high += 1
        train_group.append(1)
    else:
        count_low += 1
        train_group.append(0)

# test
count_low = 0
count_high = 0
for index in range(0, len(test_risk_scores)):
    if test_risk_scores[index] > median:
        count_high += 1
        group.append(1)
    else:
        count_low += 1
        group.append(0)
print(median)

-0.031388674368200364


In [16]:
print(*train_days, sep="; ")

2594.059243265; 143.39020943100002; 16.043659806500003; 822.237564435; 1507.10129216; 2795.607719445; 2668.26116979; 268.7313015835; 31.084590846500003; 361.985074245; 92.251043834; 1520.1367655450001; 84.2292139155; 365.99598925; 1160.157148955; 137.37383701500002; 458.24703284000003; 1587.31959104; 2348.3907026399997; 1006.7396522250001; 2014.482033125; 31.084590846500003; 823.240293415; 1282.49005493; 61.166452957; 945.5731990849999; 135.36837951249998; 1496.07127582; 2135.0; 1015.7642109100001; 732.9947068700001; 397.080580005; 346.944143205; 593.615412505; 45.122793181; 723.970148185; 1243.3836341649999; 2094.700332005; 317.8650098; 1210.2935857549999; 98.26741625; 53.144623069; 1662.5242462400001; 577.57175279; 680.8528127200001; 9.15; 951.589571745; 1450.9484827; 1587.31959104; 1192.2444686899998; 884.40674625; 765.082026605; 1020.7778545900001; 1093.9770524399999; 1566.262287645; 194.52937502799998; 130.3547358325; 566.5417364499999; 395.07512235; 1281.487326255; 1830.0; 2623.1

In [17]:
a = []
for index in range(0, len(train_censor)):
    if train_censor[index] == False:
        a.append(0)
    else:
        a.append(1)
print(*a, sep="; ")

1; 0; 0; 1; 0; 0; 0; 0; 1; 0; 1; 1; 0; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 1; 1; 0; 0; 1; 0; 0; 0; 1; 1; 1; 1; 1; 0; 0; 1; 1; 1; 0; 0; 1; 1; 0; 1; 0; 1; 0; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 0; 0; 1; 1; 1; 1; 1; 1; 0; 0; 0; 1; 1; 1; 1


In [18]:
print(*train_risk_scores, sep='; ')

-1.174529364205475; -1.2003069955310448; -0.31611128817716483; -0.7480043630337436; -1.6054483663565142; -1.0419944334782683; -0.8923555316357086; 0.45055689196031934; -0.7158087274950022; 1.3516015870182725; 0.2809224560220114; -0.31627047847018197; -0.6182169847179113; 0.13391208433371204; -0.042637744236393216; -0.28744523052553284; -0.24051108990733283; -0.25094937533539685; -0.7321095881739592; -0.2623193411369177; -0.2937577876259625; 0.45738797165969824; 0.5005624707815184; -0.5497362740201424; -0.11384126292878949; 0.38089471971961486; -1.5140880310221896; -0.5539341341057749; 0.08566829089891526; 0.06172129391127634; 1.4664374562348539; 0.5263750871167285; 2.019925092292007; 0.7817281299882639; 2.7397311061563316; -1.0211662708769675; -0.5040254766360344; 0.13155562655845676; 0.8171663189165832; -1.8998009800084832; 0.13567986129329257; 2.034362263845625; -0.19315123827987613; -0.2164111466784045; -0.1012987262719025; 0.17097609755803478; 0.7508966385693492; 0.1270794179382015

In [19]:
print(*train_group, sep="; ")

0; 0; 0; 0; 0; 0; 0; 1; 0; 1; 1; 0; 0; 1; 0; 0; 0; 0; 0; 0; 0; 1; 1; 0; 0; 1; 0; 0; 1; 1; 1; 1; 1; 1; 1; 0; 0; 1; 1; 0; 1; 1; 0; 0; 0; 1; 1; 1; 0; 1; 1; 1; 0; 1; 0; 1; 1; 0; 1; 0; 0; 0; 1; 0; 0; 1; 1; 1; 0; 1; 1; 1; 0; 1; 0; 1; 0; 1; 1; 0; 0; 0; 1; 0; 1; 0; 1; 0; 0; 0; 1; 1; 1; 1; 1; 1; 0; 0; 1; 0; 1; 1


In [20]:
print(len(test_days))
print(*test_days, sep="; ")

267
793.0; 1647.0; 3324.5; 2257.0; 2409.5; 2379.0; 2440.0; 854.0; 2379.0; 6100.0; 2440.0; 945.5; 854.0; 610.0; 1555.5; 1006.5; 122.0; 1281.0; 1037.0; 457.5; 1098.0; 1738.5; 3538.0; 3111.0; 488.0; 1403.0; 1189.5; 1738.5; 1037.0; 854.0; 1128.5; 6954.0; 1281.0; 2653.5; 884.5; 1342.0; 2806.0; 793.0; 1067.5; 1830.0; 1403.0; 1098.0; 1403.0; 122.0; 1494.5; 793.0; 1128.5; 579.5; 4422.5; 427.0; 2013.0; 5886.5; 1128.5; 3934.5; 3843.0; 4636.0; 1159.0; 610.0; 3690.5; 1037.0; 610.0; 1281.0; 1189.5; 640.5; 1616.5; 244.0; 4697.0; 1189.5; 854.0; 1220.0; 854.0; 4819.0; 1067.5; 976.0; 640.5; 1342.0; 1799.5; 2653.5; 183.0; 549.0; 1921.5; 1067.5; 4453.0; 1799.5; 1769.0; 1464.0; 5307.0; 488.0; 2379.0; 2379.0; 2196.0; 2653.5; 1647.0; 1677.5; 1403.0; 640.5; 1067.5; 1006.5; 366.0; 732.0; 2653.5; 2470.5; 3965.0; 793.0; 4026.0; 1860.5; 3477.0; 2623.0; 1677.5; 732.0; 1799.5; 1037.0; 732.0; 183.0; 2135.0; 3446.5; 2745.0; 2501.0; 488.0; 1647.0; 244.0; 915.0; 427.0; 488.0; 610.0; 10766.5; 1433.5; 9028.0; 1525.0; 30

In [21]:
a = []
for index in range(0, len(test_censor)):
    if test_censor[index] == False:
        a.append(0)
    else:
        a.append(1)
print(len(a))
print(*a, sep="; ")

267
1; 1; 0; 1; 0; 0; 1; 1; 1; 0; 0; 0; 0; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 0; 0; 0; 1; 1; 0; 0; 1; 1; 1; 0; 1; 1; 0; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 0; 1; 0; 0; 0; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 0; 1; 1; 1; 1; 0; 1; 1; 1; 1; 0; 1; 1; 1; 0; 1; 0; 0; 1; 1; 1; 0; 1; 1; 1; 0; 1; 1; 1; 1; 1; 0; 0; 1; 1; 1; 0; 1; 1; 0; 0; 1; 1; 0; 0; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 0; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1


In [22]:
print(*test_risk_scores, sep='; ')

0.2856922281375678; 1.0520870714337227; -0.8882164606117691; -1.1527217560191514; -0.05436138013420089; -0.9660223968331192; 1.8735918193058938; 0.794184423544394; 0.6388506331516963; 0.3494943985436354; 1.4590927940710372; -0.35153092972486055; -1.1645814110390846; -0.6118211675179992; 0.01794407601133441; 2.697320249309578; -0.08315920188836823; 1.005461419595262; 0.9697332127722382; -0.1710988587472081; -0.020603550943345983; 2.6323805015454926; 0.2622080909032918; -1.7591207459473126; -0.9073835632353333; -1.283567457525047; -0.08578663879278436; 0.4842285895277081; 1.2322428331292161; -0.8634084611362076; 0.7071408733471656; -0.2315438205843694; 1.5856146491059033; 0.12491066040527343; 0.4699286012146633; 2.613850911980687; -0.31743031584811376; 0.8416207098379443; 0.702348375132491; 0.9718278196330717; 0.4593613840658153; 0.7736391391277793; 0.39060960068801787; 0.09788145032805762; 1.243397798016094; 1.1848422397463856; 0.293345311174936; 2.262158567526909; 0.08037679713958323; 

In [23]:
print(len(group))
print(*group, sep="; ")

267
1; 1; 0; 0; 0; 0; 1; 1; 1; 1; 1; 0; 0; 0; 1; 1; 0; 1; 1; 0; 1; 1; 1; 0; 0; 0; 0; 1; 1; 0; 1; 0; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 0; 1; 0; 0; 1; 1; 1; 1; 1; 1; 0; 0; 0; 0; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 0; 1; 0; 1; 1; 1; 1; 1; 0; 1; 1; 0; 1; 0; 1; 1; 1; 0; 0; 1; 0; 1; 0; 1; 1; 1; 0; 1; 0; 0; 1; 0; 1; 1; 1; 1; 0; 1; 0; 1; 1; 1; 0; 1; 1; 1; 1; 0; 1; 0; 0; 1; 0; 1; 1; 1; 0; 1; 0; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 0; 1; 1; 0; 1; 1; 0; 1; 0; 1; 1; 0; 1; 1; 0; 1; 1; 0; 1; 0; 0; 1; 1; 1; 1; 0; 1; 0; 0; 1; 1; 1; 0; 1; 0; 1; 1; 1; 0; 0; 1; 1; 0; 1; 0; 1; 1; 1; 0; 1; 0; 1; 0; 0; 0; 1; 0; 0; 1; 1; 1; 1; 0; 1; 1; 0; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 1; 0; 1; 1; 1; 0; 1; 1; 1; 0; 1; 0; 0; 1; 0; 0; 1; 0; 1; 0; 0; 1; 0; 0; 1; 0; 1


In [24]:
# find prognostic features from model trained above
count = 0
for index1 in range(0, len(estimator.coef_)):
    flag = -1
    for index2 in range(0, 100):
        if estimator.coef_[index1][index2] > 0 or estimator.coef_[index1][index2] < 0:
            flag = 1
            print(str(index1) + " " + str(estimator.coef_[index1][index2]))
            break
    if flag == 1:
        count += 1
print()
print("Prognostic features count = " + str(count))

0 0.0221565003487717
1 0.03368286367107484
2 0.011275766827634199
3 -0.00038625705562837066
4 0.0009359031532353456
5 0.03224270293557591
6 -0.003814129259849575
7 -0.0028064367750474115
8 -0.010774577620709939
9 0.03869170343692291
10 -0.01028754926120012
11 0.014888850590702376
12 -0.01635485705189915
13 0.006706758406606901
14 -0.006287030903067476
15 -0.0013132939133200566
16 -0.024681707111511006
17 0.046536837337497416
18 0.008644904192316748
19 -0.018552655380394717
20 0.010769839672545722
21 -0.01255805456595892
22 0.00864259090131374
23 -0.0008511410673447672
24 0.028952830814315808
25 -0.029695905897374385
26 0.00955963197176243
27 0.003957749199646939
28 0.002531035336940767
29 0.019134274605283854
30 0.009399826149977048
31 0.035128607968803766
32 0.0032151746184532924
33 -0.02195232609170379
34 -0.05167272338291648
35 -0.05586974042537949

Prognostic features count = 36
