In [4]:
1 + 2

In [1]:
## tableone package itself
library(tableone)
## survival package for Mayo Clinic's PBC data
library(survival)

library(dplyr)
library(pipeR)
library(broom)


Attaching package: 'dplyr'


The following objects are masked from 'package:stats':

    filter, lag


The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union




## Data "pbc"

Mayo Clinic Primary Biliary Cholangitis Data
Primary sclerosing cholangitis is an autoimmune disease 
leading to destruction of the small bile ducts in the liver. 
Progression is slow but inexhortable, 
eventually leading to cirrhosis and liver decompensation. 
The condition has been recognised since at least 1851 
and was named "primary biliary cirrhosis" in 1949. 
Because cirrhosis is a feature only of advanced disease, 
a change of its name to "primary biliary cholangitis" was
proposed by patient advocacy groups in 2014.
This data is from the Mayo Clinic trial in PBC conducted between 1974 and 1984. 
A total of 424 PBC patients, referred to Mayo Clinic during that ten-year interval, 
met eligibility criteria for the randomized placebo controlled trial
of the drug D-penicillamine. 
The first 312 cases in the data set participated in the randomized trial 
and contain largely complete data. 
The additional 112 cases did not participate in the clinical trial, 
but consented to have basic measurements recorded and to be followed for survival. 
Six of those cases were lost to follow-up shortly after diagnosis, 
so the data here are on an additional 106 cases as well as the 312 randomized participants.

In [2]:
# age:	in years
# albumin:	serum albumin (g/dl)
# alk.phos:	alkaline phosphotase (U/liter)
# ascites:	presence of ascites
# ast:	aspartate aminotransferase, once called SGOT (U/ml)
# bili:	serum bilirunbin (mg/dl)
# chol:	serum cholesterol (mg/dl)
# copper:	urine copper (ug/day)
# edema:	0 no edema, 0.5 untreated or successfully treated
#                  1 edema despite diuretic therapy
# hepato:	presence of hepatomegaly or enlarged liver
# id:	case number
# platelet:	platelet count
# protime:	standardised blood clotting time
# sex:	m/f
# spiders:	blood vessel malformations in the skin
# stage:	histologic stage of disease (needs biopsy)
# status:	status at endpoint, 0/1/2 for censored, transplant, dead
# time:	number of days between registration and the earlier of death,
# transplantion, or study analysis in July, 1986
# trt:	1/2/NA for D-penicillmain, placebo, not randomised
# trig:	triglycerides (mg/dl)


In [8]:
pbc <- read.csv(file = "../import/pbc.csv")

In [9]:
pbc

X,id,time,status,trt,age,sex,ascites,hepato,spiders,...,bili,chol,albumin,copper,alk.phos,ast,trig,platelet,protime,stage
<int>,<int>,<int>,<int>,<int>,<dbl>,<chr>,<int>,<int>,<int>,...,<dbl>,<int>,<dbl>,<int>,<dbl>,<dbl>,<int>,<int>,<dbl>,<int>
1,1,400,2,1,58.76523,f,1,1,1,...,14.5,261,2.60,156,1718.0,137.95,172,190,12.2,4
2,2,4500,0,1,56.44627,f,0,1,1,...,1.1,302,4.14,54,7394.8,113.52,88,221,10.6,3
3,3,1012,2,1,70.07255,m,0,0,0,...,1.4,176,3.48,210,516.0,96.10,55,151,12.0,4
4,4,1925,2,1,54.74059,f,0,1,1,...,1.8,244,2.54,64,6121.8,60.63,92,183,10.3,4
5,5,1504,1,2,38.10541,f,0,1,1,...,3.4,279,3.53,143,671.0,113.15,72,136,10.9,3
6,6,2503,2,2,66.25873,f,0,1,0,...,0.8,248,3.98,50,944.0,93.00,63,,11.0,3
7,7,1832,0,2,55.53457,f,0,1,0,...,1.0,322,4.09,52,824.0,60.45,213,204,9.7,3
8,8,2466,2,2,53.05681,f,0,0,0,...,0.3,280,4.00,52,4651.2,28.38,189,373,11.0,3
9,9,2400,2,1,42.50787,f,0,0,1,...,3.2,562,3.08,79,2276.0,144.15,88,251,11.0,2
10,10,51,2,2,70.55989,f,1,0,1,...,12.6,200,2.74,140,918.0,147.25,143,302,11.5,4


In [11]:
# For more information, type 
# ?pbc
?pbc

# Create table 1


In [13]:
CreateTableOne(data = pbc)

                      
                       Overall          
  n                        418          
  X (mean (SD))         209.50 (120.81) 
  id (mean (SD))        209.50 (120.81) 
  time (mean (SD))     1917.78 (1104.67)
  status (mean (SD))      0.83 (0.96)   
  trt (mean (SD))         1.49 (0.50)   
  age (mean (SD))        50.74 (10.45)  
  sex = m (%)               44 (10.5)   
  ascites (mean (SD))     0.08 (0.27)   
  hepato (mean (SD))      0.51 (0.50)   
  spiders (mean (SD))     0.29 (0.45)   
  edema (mean (SD))       0.10 (0.25)   
  bili (mean (SD))        3.22 (4.41)   
  chol (mean (SD))      369.51 (231.94) 
  albumin (mean (SD))     3.50 (0.42)   
  copper (mean (SD))     97.65 (85.61)  
  alk.phos (mean (SD)) 1982.66 (2140.39)
  ast (mean (SD))       122.56 (56.70)  
  trig (mean (SD))      124.70 (65.15)  
  platelet (mean (SD))  257.02 (98.33)  
  protime (mean (SD))    10.73 (1.02)   
  stage (mean (SD))       3.02 (0.88)   

In [15]:
## Get variables names
# dput(names(pbc))
names(pbc)

In [16]:
## Vector of variables to summarize
myVars <- c("time", "status", "trt", "age", "sex", "ascites", "hepato",
            "spiders", "edema", "bili", "chol", "albumin", "copper", "alk.phos",
            "ast", "trig", "platelet", "protime", "stage")

## Vector of categorical variables that need transformation
catVars <- c("status", "trt", "ascites", "hepato",
             "spiders", "edema", "stage")

In [19]:
head(pbc[catVars])

Unnamed: 0_level_0,time,status,trt,age,sex,ascites,hepato,spiders,edema,bili,chol,albumin,copper,alk.phos,ast,trig,platelet,protime,stage
Unnamed: 0_level_1,<int>,<int>,<int>,<dbl>,<chr>,<int>,<int>,<int>,<dbl>,<dbl>,<int>,<dbl>,<int>,<dbl>,<dbl>,<int>,<int>,<dbl>,<int>
1,400,2,1,58.76523,f,1,1,1,1.0,14.5,261,2.6,156,1718.0,137.95,172,190.0,12.2,4
2,4500,0,1,56.44627,f,0,1,1,0.0,1.1,302,4.14,54,7394.8,113.52,88,221.0,10.6,3
3,1012,2,1,70.07255,m,0,0,0,0.5,1.4,176,3.48,210,516.0,96.1,55,151.0,12.0,4
4,1925,2,1,54.74059,f,0,1,1,0.5,1.8,244,2.54,64,6121.8,60.63,92,183.0,10.3,4
5,1504,1,2,38.10541,f,0,1,1,0.0,3.4,279,3.53,143,671.0,113.15,72,136.0,10.9,3
6,2503,2,2,66.25873,f,0,1,0,0.0,0.8,248,3.98,50,944.0,93.0,63,,11.0,3


Unnamed: 0_level_0,status,trt,ascites,hepato,spiders,edema,stage
Unnamed: 0_level_1,<int>,<int>,<int>,<int>,<int>,<dbl>,<int>
1,2,1,1,1,1,1.0,4
2,0,1,0,1,1,0.0,3
3,2,1,0,0,0,0.5,4
4,2,1,0,1,1,0.5,4
5,1,2,0,1,1,0.0,3
6,2,2,0,1,0,0.0,3


In [17]:
## Create a TableOne object
tab2 <- CreateTableOne(vars = myVars, data = pbc, factorVars = catVars)
tab2

                      
                       Overall          
  n                        418          
  time (mean (SD))     1917.78 (1104.67)
  status (%)                            
     0                     232 (55.5)   
     1                      25 ( 6.0)   
     2                     161 (38.5)   
  trt = 2 (%)              154 (49.4)   
  age (mean (SD))        50.74 (10.45)  
  sex = m (%)               44 (10.5)   
  ascites = 1 (%)           24 ( 7.7)   
  hepato = 1 (%)           160 (51.3)   
  spiders = 1 (%)           90 (28.8)   
  edema (%)                             
     0                     354 (84.7)   
     0.5                    44 (10.5)   
     1                      20 ( 4.8)   
  bili (mean (SD))        3.22 (4.41)   
  chol (mean (SD))      369.51 (231.94) 
  albumin (mean (SD))     3.50 (0.42)   
  copper (mean (SD))     97.65 (85.61)  
  alk.phos (mean (SD)) 1982.66 (2140.39)
  ast (mean (SD))       122.56 (56.70)  
  trig (mean (SD))      124.70 (65