Skip to content

Commit

Permalink
Merge 71a175f into b401d4d
Browse files Browse the repository at this point in the history
  • Loading branch information
kshedden committed Jan 18, 2014
2 parents b401d4d + 71a175f commit 94fb5b3
Show file tree
Hide file tree
Showing 10 changed files with 2,200 additions and 0 deletions.
599 changes: 599 additions & 0 deletions statsmodels/sandbox/phreg.py

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions statsmodels/sandbox/tests/phreg_gentests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
import numpy as np

"""
Generate data sets for testing Cox proportional hazards regression
models.
After updating the test data sets, use R to run the survival.R script
to update the R results.
"""

# Loop over pairs containing (sample size, number of variables).
for (n,p) in (20,1), (50,1), (50,2), (100,5), (1000,10):

exog = np.random.normal(size=(5*n,p))
coef = np.linspace(-0.5, 0.5, p)
lpred = np.dot(exog, coef)
expected_survival_time = np.exp(-lpred)

# Survival times are exponential
survival_time = -expected_survival_time*\
np.log(np.random.uniform(size=5*n))

# Set this to get a reasonable amount of censoring
expected_censoring_time = np.mean(expected_survival_time)

# Theses are the observation times.
censoring_time = -expected_censoring_time*\
np.log(np.random.uniform(size=5*n))

# Entry times
entry_time = -0.5*expected_censoring_time*\
np.log(np.random.uniform(size=5*n))

# 1=failure (death), 0=no failure (no death)
status = 1*(survival_time <= censoring_time)

# The censoring time of the failure time, whichever comes first
time = np.where(status==1, survival_time, censoring_time)
time = np.around(time, decimals=1)

# Only take cases where the entry time is before the failulre or
# censoring time. Take exactly n such cases.
ii = np.flatnonzero(entry_time < time)
ii = ii[np.random.permutation(len(ii))[0:n]]
status = status[ii]
time = time[ii]
exog = exog[ii,:]
entry_time = entry_time[ii]

data = np.concatenate((time[:,None], status[:,None],
entry_time[:,None], exog),
axis=1)

fname = "results/survival_data_%d_%d.csv" % (n, p)
np.savetxt(fname, data, fmt="%.5f")


1,000 changes: 1,000 additions & 0 deletions statsmodels/sandbox/tests/results/survival_data_1000_10.csv

Large diffs are not rendered by default.

100 changes: 100 additions & 0 deletions statsmodels/sandbox/tests/results/survival_data_100_5.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
2.00000 0.00000 0.04088 1.65888 0.43887 2.16009 1.53852 -0.32477
0.80000 0.00000 0.64317 -0.01543 1.27200 0.06238 -1.39983 0.28912
0.70000 0.00000 0.62035 0.01568 1.10189 1.08241 0.28838 0.03060
0.20000 0.00000 0.00396 1.92122 -0.74627 -0.20202 -0.49432 0.18627
1.30000 0.00000 0.48061 0.03445 0.62648 -0.05107 0.21468 -1.33564
0.10000 1.00000 0.00311 -0.42507 -0.02765 0.92646 -1.79002 0.46062
0.10000 1.00000 0.00360 -1.92760 -0.97652 0.33572 1.09292 0.99413
0.20000 1.00000 0.16825 -0.92224 -1.46157 -0.83315 1.27701 0.89145
1.90000 0.00000 0.31681 0.47668 -2.21992 -1.23187 0.17330 -0.71609
0.20000 1.00000 0.11154 -0.78852 -0.22280 -0.39515 0.49144 0.73336
0.20000 0.00000 0.08597 0.63231 0.55002 0.32947 0.04749 -0.85148
0.60000 1.00000 0.07035 -0.98309 0.24301 -0.72338 0.81746 -1.26767
0.90000 1.00000 0.12016 -0.95699 -0.79824 -0.33108 1.09238 -1.56246
0.90000 1.00000 0.02773 0.71522 0.92762 -0.58902 1.14907 0.04783
1.10000 0.00000 0.69469 0.23457 0.83722 -0.50415 0.55788 -0.06246
1.20000 0.00000 0.75846 -0.48568 0.26540 -0.65537 1.36581 -0.12905
0.90000 1.00000 0.18518 -0.64696 1.20687 -1.18730 1.23675 -0.30962
0.40000 1.00000 0.05883 -0.81157 0.13249 -1.84991 0.19780 0.79789
0.30000 1.00000 0.29091 0.50970 1.45016 0.02051 1.09439 0.66784
0.90000 0.00000 0.21925 1.25407 -0.55512 0.06966 -0.22258 -1.92762
0.80000 0.00000 0.52030 -1.82573 -0.96723 3.06176 -0.78653 -1.03201
0.50000 0.00000 0.31396 -1.50465 0.34928 -0.28756 -0.75276 -1.60910
2.80000 0.00000 0.50161 -0.51879 1.85085 1.62061 1.62330 0.31456
0.20000 0.00000 0.17478 0.51892 -2.01221 0.24535 0.33276 -0.00219
0.70000 1.00000 0.02345 1.01616 -2.08282 0.85616 0.20281 0.85377
0.50000 0.00000 0.02822 -1.22080 0.43769 -0.94754 -1.30187 0.90808
0.60000 0.00000 0.35722 1.42375 -0.38970 -0.64537 -1.08120 -0.66272
0.10000 0.00000 0.01318 -2.32399 -0.32153 -0.64911 2.44607 -0.67781
0.70000 1.00000 0.33234 -0.86239 2.74390 -0.26652 -1.38002 -1.72868
0.30000 1.00000 0.01818 -0.00276 -0.73023 -0.07908 -0.36846 0.24726
2.50000 1.00000 0.12683 -0.20405 1.38441 0.11380 -1.55265 1.61875
1.90000 1.00000 1.05276 0.46781 0.14070 -2.79994 0.69604 -0.71800
0.30000 0.00000 0.12424 0.34365 -1.30879 0.06613 -1.80332 0.50442
2.00000 1.00000 0.16247 -0.31284 -1.08730 -0.08539 0.00160 -1.68699
2.10000 1.00000 0.19426 -0.31993 0.21405 -0.52766 -1.15955 -1.66554
0.20000 1.00000 0.01721 0.25259 0.37725 1.05776 0.20531 0.00615
0.20000 0.00000 0.08898 0.25225 -0.59453 -1.68651 0.01607 -0.13487
3.30000 1.00000 0.36441 1.58715 0.10576 0.90020 -0.64476 0.16278
1.60000 1.00000 0.25111 0.25002 0.29993 0.17598 -0.01910 0.14305
1.00000 1.00000 0.91850 0.51040 -0.37021 -1.11632 0.21391 -0.13709
1.50000 1.00000 0.85495 1.01267 0.53073 1.60362 0.91206 -0.06950
1.70000 0.00000 0.98528 -0.03470 0.46678 0.51241 1.81681 -0.87271
1.50000 0.00000 0.45072 1.22177 -0.33281 -0.21419 0.30984 0.25901
0.30000 1.00000 0.01095 0.08628 0.06831 -0.55069 -0.98570 -1.47021
0.70000 1.00000 0.20957 0.09985 0.55591 -0.14907 1.44737 -0.19020
1.80000 0.00000 0.48048 -0.15396 2.06115 0.73874 -0.63995 0.83598
1.20000 1.00000 0.05958 1.34778 1.18792 -0.37932 2.49795 0.34569
1.60000 0.00000 0.37681 0.42462 0.39925 0.31818 -1.24178 -2.10098
1.60000 0.00000 0.27681 0.43797 0.42857 0.71614 0.47339 -0.69239
1.50000 0.00000 0.16083 -0.68583 -1.09449 0.14085 0.15615 -2.17219
0.30000 0.00000 0.11799 0.84516 0.60886 -0.02455 -0.88110 -0.78903
1.30000 0.00000 0.47439 1.14278 0.22765 0.00355 1.24747 -0.44769
1.00000 0.00000 0.34329 1.13115 -0.71649 0.20680 -0.62674 0.11690
0.90000 0.00000 0.07241 0.87043 -0.52193 -1.82556 -0.96574 -0.46303
1.50000 1.00000 0.40548 0.17182 1.23767 -0.22511 0.92575 -0.99200
0.70000 0.00000 0.13319 0.64169 0.50024 -1.40025 -0.22809 0.37239
0.40000 1.00000 0.30444 -1.48351 -0.71597 1.13583 -0.31381 0.17228
0.40000 0.00000 0.00518 0.78247 0.27451 0.90321 0.88329 0.87831
0.10000 1.00000 0.06982 -0.40623 -0.59915 0.16943 0.65481 0.74213
0.20000 1.00000 0.02326 0.98320 -0.33050 -1.45162 1.03546 0.05095
1.70000 1.00000 0.26671 0.39948 -0.01170 -1.99802 1.09214 0.38202
0.50000 1.00000 0.20631 1.85586 1.26267 1.09781 0.26681 0.86920
0.30000 1.00000 0.03166 -1.12012 -2.17322 -0.81189 0.47966 0.66974
0.40000 0.00000 0.17714 -0.62389 0.11727 0.30922 0.45983 -1.18198
0.90000 0.00000 0.39204 -1.45848 0.60149 0.40569 -0.14587 -0.41534
0.30000 0.00000 0.03714 0.83848 0.53119 -0.58428 1.31723 -1.24635
1.10000 0.00000 0.46379 0.21576 -0.81655 0.31364 -0.27610 -1.03055
1.40000 0.00000 0.71581 0.88179 2.50473 -1.20963 0.22792 -0.68145
0.50000 1.00000 0.04560 -0.13862 1.38279 -0.96235 1.86021 0.77598
0.10000 0.00000 0.05473 0.46752 -1.80012 -0.98436 0.60777 -0.35878
0.80000 1.00000 0.42481 -0.40098 2.76071 0.74394 1.09659 0.23616
0.30000 0.00000 0.23718 -0.32013 0.39748 -0.88636 -1.68891 0.75689
0.50000 0.00000 0.03726 0.59910 -0.14940 -0.01991 -0.28925 0.23417
0.30000 0.00000 0.12740 0.15096 0.90280 -0.61087 0.48783 1.80243
0.70000 1.00000 0.63824 0.55312 -0.76756 -1.62404 0.50754 1.83655
1.60000 0.00000 0.03744 -0.92345 -0.40163 -0.14609 -0.71917 -0.39060
3.20000 0.00000 0.52178 0.30277 -0.17976 -0.45561 -0.48284 -1.54195
0.90000 0.00000 0.04532 -0.14812 0.03298 -0.11498 0.09681 -2.37051
0.80000 0.00000 0.36657 -0.97253 -0.04856 0.14913 0.00607 0.78143
0.60000 1.00000 0.03655 -0.99128 0.85325 1.24141 -1.52963 1.18147
0.40000 0.00000 0.01616 1.21142 -2.49815 2.32805 0.95173 0.40863
0.30000 1.00000 0.00955 -2.06282 -1.01843 -1.49079 0.84123 1.16571
0.40000 1.00000 0.01111 -0.02352 -1.14282 -1.73493 -0.08088 0.22988
1.00000 1.00000 0.14784 0.66488 -0.55177 0.29813 1.06110 -0.09346
0.10000 0.00000 0.01538 -0.01239 -0.38551 0.90332 0.98841 -1.09158
1.80000 1.00000 1.44280 1.15346 1.52878 -0.78441 0.74890 1.19751
0.40000 0.00000 0.14388 -0.62050 1.33600 -1.25079 1.04416 1.09523
0.50000 0.00000 0.25285 0.43475 -0.66619 -1.80572 -1.41659 -0.18333
0.70000 1.00000 0.12394 0.07814 -0.49443 0.52055 -0.85472 1.47196
1.00000 0.00000 0.00122 1.35184 -0.75871 -0.33501 -0.20529 1.28920
0.90000 1.00000 0.41370 0.54683 1.24851 -1.14847 -0.04749 1.60048
0.10000 1.00000 0.06023 0.32626 -0.11112 -0.10537 0.58654 0.55261
1.30000 0.00000 0.27967 -0.77736 -0.03111 -3.14766 -0.79793 -0.72638
0.70000 0.00000 0.36676 -0.77517 0.31001 -1.09973 -1.75452 -0.82917
0.30000 1.00000 0.13509 -0.12913 1.30423 -1.15341 0.04622 0.52666
1.80000 0.00000 0.11982 -0.64227 -0.51723 0.64616 -1.48093 -0.81054
1.10000 1.00000 0.26003 -0.04358 -0.64401 1.67422 -1.79441 -1.28941
1.00000 0.00000 0.98041 1.36679 0.03887 0.94946 -1.98330 -1.71907
0.50000 1.00000 0.48143 0.25458 0.05441 0.11660 0.00090 -0.40448
1.80000 1.00000 0.15619 0.29362 -1.22544 -0.09859 0.02863 -1.39324
20 changes: 20 additions & 0 deletions statsmodels/sandbox/tests/results/survival_data_20_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
0.70000 1.00000 0.17884 -0.80807
0.60000 0.00000 0.23284 -0.10099
1.40000 1.00000 0.07038 -0.34916
0.80000 1.00000 0.04943 1.32464
0.40000 1.00000 0.19263 -1.51983
0.70000 0.00000 0.11941 0.75329
0.40000 1.00000 0.16606 0.13835
1.20000 0.00000 0.05183 0.97574
1.20000 0.00000 0.27814 1.15109
0.30000 0.00000 0.26536 -0.07403
0.50000 1.00000 0.15457 -1.10065
0.40000 1.00000 0.19745 -0.64876
0.30000 1.00000 0.07634 0.76335
1.10000 0.00000 0.25912 0.86522
0.50000 1.00000 0.00805 -0.27593
1.50000 0.00000 0.47960 0.24884
0.10000 0.00000 0.00516 0.33618
0.20000 0.00000 0.07805 0.39894
1.30000 1.00000 0.28314 0.19641
0.70000 0.00000 0.29323 0.70037
50 changes: 50 additions & 0 deletions statsmodels/sandbox/tests/results/survival_data_50_1.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
1.10000 0.00000 0.34994 -0.01447
0.50000 0.00000 0.19143 0.10335
0.20000 1.00000 0.07772 -0.23178
0.40000 1.00000 0.20723 -2.16909
1.00000 0.00000 0.15399 -0.10232
0.40000 1.00000 0.03610 -0.49694
0.40000 1.00000 0.12840 -1.20376
0.20000 0.00000 0.14323 0.78727
0.30000 0.00000 0.29799 0.31506
0.20000 1.00000 0.14530 0.89771
1.80000 0.00000 0.20163 2.18046
0.20000 1.00000 0.08723 -0.49941
0.70000 0.00000 0.62887 0.74497
1.70000 0.00000 1.26137 0.31418
1.00000 1.00000 0.61647 1.54372
0.70000 0.00000 0.16552 1.02511
0.80000 1.00000 0.24764 0.03132
0.60000 1.00000 0.31866 -0.75876
0.10000 0.00000 0.04446 0.16542
0.40000 0.00000 0.28586 0.12311
0.50000 1.00000 0.36568 -0.70318
0.40000 1.00000 0.06591 0.10302
2.40000 0.00000 0.62202 0.89696
0.30000 1.00000 0.04989 -2.19325
0.50000 0.00000 0.14153 0.35311
1.20000 0.00000 0.32188 0.37552
1.20000 1.00000 0.43017 0.85034
0.20000 0.00000 0.08460 0.69427
0.40000 0.00000 0.11157 1.02984
1.90000 1.00000 0.82567 0.56098
0.70000 1.00000 0.03936 -0.32045
0.40000 1.00000 0.29959 -0.53575
0.50000 0.00000 0.40070 -2.07033
2.80000 0.00000 0.47653 -0.13154
1.70000 0.00000 0.90095 1.60037
0.70000 0.00000 0.12335 0.39483
0.80000 0.00000 0.56741 -0.29127
0.40000 1.00000 0.35975 -0.62702
0.60000 0.00000 0.34351 -0.60780
1.00000 0.00000 0.54425 -0.83077
2.40000 1.00000 0.15080 0.01460
1.50000 0.00000 0.15170 2.29830
1.00000 1.00000 0.09671 0.80049
0.30000 1.00000 0.23474 0.28060
0.90000 0.00000 0.56784 -0.45666
1.60000 1.00000 0.73536 0.17942
0.20000 0.00000 0.19858 1.43121
0.30000 0.00000 0.10249 0.63181
0.40000 0.00000 0.15787 -0.86227
1.10000 1.00000 0.40295 0.73249
50 changes: 50 additions & 0 deletions statsmodels/sandbox/tests/results/survival_data_50_2.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
1.00000 1.00000 0.03528 -0.38315 -1.11646
0.30000 1.00000 0.01149 -2.16263 0.67790
1.50000 1.00000 1.19729 1.33121 -0.43045
0.70000 1.00000 0.53896 0.84117 -0.63001
0.20000 1.00000 0.16054 -0.95780 -0.82060
0.90000 1.00000 0.30738 -1.08477 -0.06120
0.40000 0.00000 0.12942 -1.76866 -0.04241
0.50000 1.00000 0.13266 -0.26914 -1.53422
0.90000 0.00000 0.15526 -0.81154 -0.36088
0.60000 0.00000 0.12826 0.33186 0.65154
0.70000 0.00000 0.22373 0.92204 0.25643
1.20000 1.00000 0.48185 -0.43982 0.31456
0.90000 0.00000 0.63365 -1.04665 -2.21337
0.60000 0.00000 0.07299 -2.42939 0.18910
1.20000 1.00000 0.69923 -0.29619 0.26199
0.80000 0.00000 0.10030 0.85841 0.88421
0.80000 1.00000 0.63120 -0.36353 0.26827
0.70000 1.00000 0.55604 -1.95990 -0.44759
1.10000 0.00000 0.40564 0.88427 -1.96625
0.20000 1.00000 0.03342 0.64777 -0.36391
0.10000 1.00000 0.06233 -0.64523 0.51863
1.10000 1.00000 0.04864 2.43561 0.44143
1.00000 0.00000 0.14794 1.51211 -1.71867
1.30000 1.00000 0.16023 -0.49040 -0.02378
0.40000 1.00000 0.24862 0.08828 -0.70830
0.50000 0.00000 0.12510 1.64139 -0.07560
1.90000 1.00000 0.57775 0.22910 -0.06035
0.60000 0.00000 0.21988 -0.61760 -0.84216
1.50000 0.00000 0.73572 -0.31742 -0.50461
0.20000 0.00000 0.09880 0.01778 -0.61300
2.70000 1.00000 0.68267 0.46799 -1.20130
0.20000 0.00000 0.00640 -0.57452 0.07641
1.00000 0.00000 0.53120 0.10882 -1.29716
1.40000 0.00000 0.39915 1.78210 -0.98747
0.30000 1.00000 0.24249 -0.52481 -0.00631
1.50000 0.00000 0.90297 -0.15558 -0.41401
0.60000 0.00000 0.11513 1.91513 0.84025
2.90000 0.00000 1.22897 0.83216 0.05158
0.20000 1.00000 0.14026 -0.15508 2.57765
0.20000 1.00000 0.02342 -0.43017 -0.19378
0.10000 1.00000 0.01043 -2.09826 1.82889
1.00000 0.00000 0.12414 -0.15255 -0.78868
0.10000 0.00000 0.08182 2.05096 -0.00273
0.70000 0.00000 0.50106 -1.11553 -0.37599
0.70000 0.00000 0.04540 0.20011 -1.63279
0.70000 1.00000 0.41003 1.04118 0.07873
0.40000 1.00000 0.33584 -1.70869 -0.94676
1.20000 0.00000 0.59698 0.21795 -2.65967
1.10000 0.00000 0.90747 1.10303 2.21828
0.20000 0.00000 0.15655 1.50321 0.88795
54 changes: 54 additions & 0 deletions statsmodels/sandbox/tests/survival.R
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
library(survival)
library(R2nparray)

ixd = list(c(20,1), c(50,1), c(50,2), c(100,5), c(1000,10))

res = list()

for (ix in ixd) {
fname = sprintf("results/survival_data_%d_%d.csv", ix[1], ix[2])
data = read.table(fname)

time = data[,1]
status = data[,2]
entry = data[,3]
exog = data[,4:dim(data)[2]]
exog = as.matrix(exog)
n = dim(exog)[1]
p = dim(exog)[2]

# Needs to match the kronecker statement in test_phreg.py
strata = kronecker(seq(5), array(1, n/5))

for (ties in c("breslow", "efron")) {

ti = substr(ties, 1, 3)

# Base model
surv = Surv(time, status)
md = coxph(surv ~ exog, ties=ties)
res[[sprintf("coef_%d_%d_%s", n, p, ti)]] = md$coef
res[[sprintf("se_%d_%d_%s", n, p, ti)]] = sqrt(diag(md$var))

# With entry time
surv = Surv(entry, time, status)
md = coxph(surv ~ exog, ties=ties)
res[[sprintf("coef_%d_%d_et_%s", n, p, ti)]] = md$coef
res[[sprintf("se_%d_%d_et_%s", n, p, ti)]] = sqrt(diag(md$var))

# With strata
surv = Surv(time, status)
md = coxph(surv ~ exog + strata(strata), ties=ties)
res[[sprintf("coef_%d_%d_st_%s", n, p, ti)]] = md$coef
res[[sprintf("se_%d_%d_st_%s", n, p, ti)]] = sqrt(diag(md$var))

# With entry time and strata
surv = Surv(entry, time, status)
md = coxph(surv ~ exog + strata(strata), ties=ties)
res[[sprintf("coef_%d_%d_et_st_%s", n, p, ti)]] = md$coef
res[[sprintf("se_%d_%d_et_st_%s", n, p, ti)]] = sqrt(diag(md$var))
}
}

R2nparray(res, fname="survival_r_results.py")

Loading

0 comments on commit 94fb5b3

Please sign in to comment.