Skip to content

Commit

Permalink
add column with missing values. fixes #10.
Browse files Browse the repository at this point in the history
  • Loading branch information
tompollard committed Aug 1, 2017
1 parent cac561a commit 85f17c4
Show file tree
Hide file tree
Showing 3 changed files with 129 additions and 127 deletions.
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@
# Versions should comply with PEP440. For a discussion on single-sourcing
# the version across setup.py and the project code, see
# https://packaging.python.org/en/latest/single_source_version.html
version='0.2.6',
version='0.2.7',

description='Table One',
long_description=long_description,
Expand Down
188 changes: 98 additions & 90 deletions tableone.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
},
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {
"collapsed": true
},
Expand All @@ -47,7 +47,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {
"collapsed": true
},
Expand All @@ -58,10 +58,8 @@
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": true
},
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# load sample data into a pandas dataframe\n",
Expand All @@ -71,7 +69,7 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand Down Expand Up @@ -271,7 +269,7 @@
"5 279.0 3.53 143.0 671.0 113.15 72.0 136.0 10.9 3.0 "
]
},
"execution_count": 5,
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -282,7 +280,7 @@
},
{
"cell_type": "code",
"execution_count": 9,
"execution_count": 5,
"metadata": {
"collapsed": true
},
Expand All @@ -292,10 +290,10 @@
"columns = ['time','age','bili','chol','albumin','copper',\n",
" 'alk.phos','ast','trig','platelet','protime',\n",
" 'status', 'ascites', 'hepato', 'spiders', 'edema', \n",
" 'stage', 'sex']\n",
" 'stage', 'sex','trt']\n",
"\n",
"# list of columns containing categorical variables\n",
"catvars = ['status', 'ascites', 'hepato', 'spiders', 'edema', \n",
"categorical = ['status', 'ascites', 'hepato', 'spiders', 'edema', \n",
" 'stage', 'sex']\n",
"\n",
"# optionally, a list of non-normal variables\n",
Expand All @@ -304,66 +302,67 @@
},
{
"cell_type": "code",
"execution_count": 10,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# create an instance of TableOne with the input arguments\n",
"# firstly, with no stratifying variable\n",
"overall_table = TableOne(data, columns, catvars, nonnormal=nonnormal)"
"overall_table = TableOne(data, columns, categorical, nonnormal=nonnormal)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Overall\n",
" overall\n",
"--------------------- -----------------\n",
" overall missing\n",
"--------------------- ----------------- ---------\n",
"n 418\n",
"time (mean (std)) 1917.78 (1104.67)\n",
"age (mean (std)) 50.74 (10.45)\n",
"bili (median [IQR]) 1.40 [0.80,3.40]\n",
"chol (mean (std)) 369.51 (231.94)\n",
"albumin (mean (std)) 3.50 (0.42)\n",
"copper (mean (std)) 97.65 (85.61)\n",
"alk.phos (mean (std)) 1982.66 (2140.39)\n",
"ast (mean (std)) 122.56 (56.70)\n",
"trig (mean (std)) 124.70 (65.15)\n",
"platelet (mean (std)) 257.02 (98.33)\n",
"protime (mean (std)) 10.73 (1.02)\n",
"status (n (%))\n",
"time (mean (std)) 1917.78 (1104.67) 0\n",
"age (mean (std)) 50.74 (10.45) 0\n",
"bili (median [IQR]) 1.40 [0.80,3.40] 0\n",
"chol (mean (std)) 369.51 (231.94) 134\n",
"albumin (mean (std)) 3.50 (0.42) 0\n",
"copper (mean (std)) 97.65 (85.61) 108\n",
"alk.phos (mean (std)) 1982.66 (2140.39) 106\n",
"ast (mean (std)) 122.56 (56.70) 106\n",
"trig (mean (std)) 124.70 (65.15) 136\n",
"platelet (mean (std)) 257.02 (98.33) 11\n",
"protime (mean (std)) 10.73 (1.02) 2\n",
"trt (mean (std)) 1.49 (0.50) 106\n",
"status (n (%)) 0\n",
"0 232 (55.50)\n",
"1 25 (5.98)\n",
"2 161 (38.52)\n",
"ascites (n (%))\n",
"ascites (n (%)) 106\n",
"0.0 288 (92.31)\n",
"1.0 24 (7.69)\n",
"hepato (n (%))\n",
"hepato (n (%)) 106\n",
"0.0 152 (48.72)\n",
"1.0 160 (51.28)\n",
"spiders (n (%))\n",
"spiders (n (%)) 106\n",
"0.0 222 (71.15)\n",
"1.0 90 (28.85)\n",
"edema (n (%))\n",
"edema (n (%)) 0\n",
"0.0 354 (84.69)\n",
"0.5 44 (10.53)\n",
"1.0 20 (4.78)\n",
"stage (n (%))\n",
"stage (n (%)) 6\n",
"1.0 21 (5.10)\n",
"2.0 92 (22.33)\n",
"3.0 155 (37.62)\n",
"4.0 144 (34.95)\n",
"sex (n (%))\n",
"sex (n (%)) 0\n",
"f 374 (89.47)\n",
"m 44 (10.53)"
]
},
"execution_count": 11,
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -382,78 +381,78 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# optionally, a categorical variable for stratification\n",
"strat = 'trt'\n",
"columns = columns + [strat]"
"columns = columns"
]
},
{
"cell_type": "code",
"execution_count": 23,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"# create an instance of TableOne with the input arguments\n",
"strat_table = TableOne(data, columns, catvars, strat, nonnormal)"
"strat_table = TableOne(data, columns, categorical, strat, nonnormal)"
]
},
{
"cell_type": "code",
"execution_count": 24,
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Stratified by trt\n",
" 1.0 2.0\n",
"--------------------- ----------------- -----------------\n",
"n 158 154\n",
"time (mean (std)) 2015.62 (1094.12) 1996.86 (1155.93)\n",
"age (mean (std)) 51.42 (11.01) 48.58 (9.96)\n",
"bili (median [IQR]) 1.40 [0.80,3.20] 1.30 [0.72,3.60]\n",
"chol (mean (std)) 365.01 (209.54) 373.88 (252.48)\n",
"albumin (mean (std)) 3.52 (0.44) 3.52 (0.40)\n",
"copper (mean (std)) 97.64 (90.59) 97.65 (80.49)\n",
"alk.phos (mean (std)) 2021.30 (2183.44) 1943.01 (2101.69)\n",
"ast (mean (std)) 120.21 (54.52) 124.97 (58.93)\n",
"trig (mean (std)) 124.14 (71.54) 125.25 (58.52)\n",
"platelet (mean (std)) 258.75 (100.32) 265.20 (90.73)\n",
"protime (mean (std)) 10.65 (0.85) 10.80 (1.14)\n",
"status (n (%))\n",
" 1.0 2.0 missing\n",
"--------------------- ----------------- ----------------- ---------\n",
"n 158 154 106\n",
"time (mean (std)) 2015.62 (1094.12) 1996.86 (1155.93) 0\n",
"age (mean (std)) 51.42 (11.01) 48.58 (9.96) 0\n",
"bili (median [IQR]) 1.40 [0.80,3.20] 1.30 [0.72,3.60] 0\n",
"chol (mean (std)) 365.01 (209.54) 373.88 (252.48) 134\n",
"albumin (mean (std)) 3.52 (0.44) 3.52 (0.40) 0\n",
"copper (mean (std)) 97.64 (90.59) 97.65 (80.49) 108\n",
"alk.phos (mean (std)) 2021.30 (2183.44) 1943.01 (2101.69) 106\n",
"ast (mean (std)) 120.21 (54.52) 124.97 (58.93) 106\n",
"trig (mean (std)) 124.14 (71.54) 125.25 (58.52) 136\n",
"platelet (mean (std)) 258.75 (100.32) 265.20 (90.73) 11\n",
"protime (mean (std)) 10.65 (0.85) 10.80 (1.14) 2\n",
"status (n (%)) 0\n",
"0 83 (52.53) 85 (55.19)\n",
"1 10 (6.33) 9 (5.84)\n",
"2 65 (41.14) 60 (38.96)\n",
"ascites (n (%))\n",
"ascites (n (%)) 106\n",
"0.0 144 (91.14) 144 (93.51)\n",
"1.0 14 (8.86) 10 (6.49)\n",
"hepato (n (%))\n",
"hepato (n (%)) 106\n",
"0.0 85 (53.80) 67 (43.51)\n",
"1.0 73 (46.20) 87 (56.49)\n",
"spiders (n (%))\n",
"spiders (n (%)) 106\n",
"0.0 113 (71.52) 109 (70.78)\n",
"1.0 45 (28.48) 45 (29.22)\n",
"edema (n (%))\n",
"edema (n (%)) 0\n",
"0.0 132 (83.54) 131 (85.06)\n",
"0.5 16 (10.13) 13 (8.44)\n",
"1.0 10 (6.33) 10 (6.49)\n",
"stage (n (%))\n",
"stage (n (%)) 6\n",
"1.0 12 (7.59) 4 (2.60)\n",
"2.0 35 (22.15) 32 (20.78)\n",
"3.0 56 (35.44) 64 (41.56)\n",
"4.0 55 (34.81) 54 (35.06)\n",
"sex (n (%))\n",
"sex (n (%)) 0\n",
"f 137 (86.71) 139 (90.26)\n",
"m 21 (13.29) 15 (9.74)"
]
},
"execution_count": 24,
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -473,65 +472,74 @@
},
{
"cell_type": "code",
"execution_count": 25,
"execution_count": 11,
"metadata": {},
"outputs": [],
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"tableone.py:281: UserWarning: No p-value was computed for stage due to the low number of observations.\n",
" warnings.warn('No p-value was computed for ' + str(v) + ' due to the low number of observations.')\n"
]
}
],
"source": [
"# create strat_table with p values\n",
"strat_table = TableOne(data, columns, catvars, strat, nonnormal, pval = True)"
"strat_table = TableOne(data, columns, categorical, strat, nonnormal, pval = True)"
]
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Stratified by trt\n",
" 1.0 2.0 pval testname\n",
"--------------------- ----------------- ----------------- ------ --------------\n",
"n 158 154\n",
"time (mean (std)) 2015.62 (1094.12) 1996.86 (1155.93) 0.883 One_way_ANOVA\n",
"age (mean (std)) 51.42 (11.01) 48.58 (9.96) 0.018 One_way_ANOVA\n",
"bili (median [IQR]) 1.40 [0.80,3.20] 1.30 [0.72,3.60] 0.842 Kruskal-Wallis\n",
"chol (mean (std)) 365.01 (209.54) 373.88 (252.48) 0.748 One_way_ANOVA\n",
"albumin (mean (std)) 3.52 (0.44) 3.52 (0.40) 0.874 One_way_ANOVA\n",
"copper (mean (std)) 97.64 (90.59) 97.65 (80.49) 0.999 One_way_ANOVA\n",
"alk.phos (mean (std)) 2021.30 (2183.44) 1943.01 (2101.69) 0.747 One_way_ANOVA\n",
"ast (mean (std)) 120.21 (54.52) 124.97 (58.93) 0.460 One_way_ANOVA\n",
"trig (mean (std)) 124.14 (71.54) 125.25 (58.52) 0.886 One_way_ANOVA\n",
"platelet (mean (std)) 258.75 (100.32) 265.20 (90.73) 0.555 One_way_ANOVA\n",
"protime (mean (std)) 10.65 (0.85) 10.80 (1.14) 0.197 One_way_ANOVA\n",
"status (n (%)) 0.894 Chi-squared\n",
" 1.0 2.0 missing pval testname\n",
"--------------------- ----------------- ----------------- --------- ------ --------------\n",
"n 158 154 106\n",
"time (mean (std)) 2015.62 (1094.12) 1996.86 (1155.93) 0 0.883 One_way_ANOVA\n",
"age (mean (std)) 51.42 (11.01) 48.58 (9.96) 0 0.018 One_way_ANOVA\n",
"bili (median [IQR]) 1.40 [0.80,3.20] 1.30 [0.72,3.60] 0 0.842 Kruskal-Wallis\n",
"chol (mean (std)) 365.01 (209.54) 373.88 (252.48) 134 0.748 One_way_ANOVA\n",
"albumin (mean (std)) 3.52 (0.44) 3.52 (0.40) 0 0.874 One_way_ANOVA\n",
"copper (mean (std)) 97.64 (90.59) 97.65 (80.49) 108 0.999 One_way_ANOVA\n",
"alk.phos (mean (std)) 2021.30 (2183.44) 1943.01 (2101.69) 106 0.747 One_way_ANOVA\n",
"ast (mean (std)) 120.21 (54.52) 124.97 (58.93) 106 0.460 One_way_ANOVA\n",
"trig (mean (std)) 124.14 (71.54) 125.25 (58.52) 136 0.886 One_way_ANOVA\n",
"platelet (mean (std)) 258.75 (100.32) 265.20 (90.73) 11 0.555 One_way_ANOVA\n",
"protime (mean (std)) 10.65 (0.85) 10.80 (1.14) 2 0.197 One_way_ANOVA\n",
"status (n (%)) 0 0.894 Chi-squared\n",
"0 83 (52.53) 85 (55.19)\n",
"1 10 (6.33) 9 (5.84)\n",
"2 65 (41.14) 60 (38.96)\n",
"ascites (n (%)) 0.567 Chi-squared\n",
"ascites (n (%)) 106 0.567 Chi-squared\n",
"0.0 144 (91.14) 144 (93.51)\n",
"1.0 14 (8.86) 10 (6.49)\n",
"hepato (n (%)) 0.088 Chi-squared\n",
"hepato (n (%)) 106 0.088 Chi-squared\n",
"0.0 85 (53.80) 67 (43.51)\n",
"1.0 73 (46.20) 87 (56.49)\n",
"spiders (n (%)) 0.985 Chi-squared\n",
"spiders (n (%)) 106 0.985 Chi-squared\n",
"0.0 113 (71.52) 109 (70.78)\n",
"1.0 45 (28.48) 45 (29.22)\n",
"edema (n (%)) 0.877 Chi-squared\n",
"edema (n (%)) 0 0.877 Chi-squared\n",
"0.0 132 (83.54) 131 (85.06)\n",
"0.5 16 (10.13) 13 (8.44)\n",
"1.0 10 (6.33) 10 (6.49)\n",
"stage (n (%)) nan Not tested\n",
"stage (n (%)) 6 nan Not tested\n",
"1.0 12 (7.59) 4 (2.60)\n",
"2.0 35 (22.15) 32 (20.78)\n",
"3.0 56 (35.44) 64 (41.56)\n",
"4.0 55 (34.81) 54 (35.06)\n",
"sex (n (%)) 0.421 Chi-squared\n",
"sex (n (%)) 0 0.421 Chi-squared\n",
"f 137 (86.71) 139 (90.26)\n",
"m 21 (13.29) 15 (9.74)"
]
},
"execution_count": 26,
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -557,7 +565,7 @@
},
{
"cell_type": "code",
"execution_count": 12,
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -567,7 +575,7 @@
},
{
"cell_type": "code",
"execution_count": 13,
"execution_count": 14,
"metadata": {
"collapsed": true
},
Expand Down
Loading

0 comments on commit 85f17c4

Please sign in to comment.