Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
branch: master
Tal Galili January 29, 2012
file 169 lines (137 sloc) 7.892 kb
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
# adding an an S3 method to tabular ...
tabular <- function(...) UseMethod("tabular")
tabular.default <- tables::tabular

# This was tested on "tables" Version: 0.5

tabular.cast_df <- function(xx,...)
{
# a bunch of assumptions that must be met for this function to work:
if(!require(reshape)) stop("The {reshape} package must be installed for this function to work")
if(!require(tables)) stop("The {tables} package must be installed for this function to work")
if(! any(class(xx) == "cast_df")) stop("This function only works for cast_df objects")
# xx is a casted object

m_xx <- melt(xx)
rdimnames_xx <- attr(xx, "rdimnames")
if(length(rdimnames_xx)>2) stop("This function only works for 2D tables")

ROWS <- colnames(rdimnames_xx[[1]])
COLUMNS <- colnames(rdimnames_xx[[2]])
colnames_m_xx <- colnames(m_xx)

# This is for cases when one of the equations has "(all)" in them due to something like cast(DATA, x ~.)
if(all(ROWS == "value")) ROWS <- 1
if(all(COLUMNS == "value")) COLUMNS <- 1

if(any(colnames_m_xx == "value.1")) { # then we are supposed to have a "(all)" case (e.g: cast(DATA, .~x) )
# m_xx <- m_xx[, -c(which(colnames_m_xx == "value")[-1])] # then remove the column with no value but "(all)" # This would only work for cast(DATA, x~.) and not for cast(DATA, .~x)
m_xx[,"value"] <- m_xx[,"value.1"]
column_where_all_is <- which(colnames_m_xx == "value.1")
m_xx <- m_xx[, -column_where_all_is] # then remove the column with no value but "(all)"
colnames_m_xx <- colnames(m_xx)
}
if(sum(colnames_m_xx == "value") > 1 ) { # then we are supposed to have a "(all)" case (e.g: cast(DATA, x~.) )
# m_xx <- m_xx[, -c(which(colnames_m_xx == "value")[-1])] # then remove the column with no value but "(all)" # This would only work for cast(DATA, x~.) and not for cast(DATA, .~x)
column_where_all_is <- which(m_xx[1,] == "(all)")
m_xx <- m_xx[, -column_where_all_is] # then remove the column with no value but "(all)"
colnames_m_xx <- colnames(m_xx)
}

LEFT <- paste(ROWS , collapse="*")
RIGHT <- paste(COLUMNS , collapse="*")

# turn all ROWS/COLUMNS variables into factors - so to make sure that the tabular will work on them as we expect
column_to_turn_into_factor <- intersect(c(ROWS, COLUMNS), colnames_m_xx) # this removes the "1"s in case of cast(DATA, x~.)
for(i in column_to_turn_into_factor) m_xx[,i] <- factor(m_xx[,i])

# Further motivation for the above two lines have been given by Duncan (on 11.12.11):
# The problem here is that tabular() needs to figure out what you want to do with each variable. value and month are both numeric, so it can't tell which one you want as an analysis variable. temp2 is a character variable; those are also treated as possible analysis variables, but perhaps they should be treated like factors instead. (But then there would need to be syntax to say "don't treat this character as a factor".)
# So another way to get what you want would be to change the table spec to
# tabular(value*v*factor(month)*factor(temp2) ~ variable2*result_variable, data = m_xx)
# but this changes the headings too; so maybe I should have a function Factor that does what factor() does without changing the heading. Here's a quick definition:
# Factor <- function( x ) substitute(Heading(xname)*x, list(xname = as.name(substitute(x)), x = factor(x)))
# tabular(value*v*Factor(month)*Factor(temp2)~variable2*result_variable, data = melt(xx), suppress=2)

v <- function(x) x[1L]
txt <- paste("tabular(value*v*", LEFT , "~" ,RIGHT ,", data = m_xx, suppressLabels = 2,...)", sep = "")
# suppressLabels is in order to remove the value and the v labels (which are added so to make sure the information inside the table is presented)
eval(parse(text = txt ))
}



if(F) {

###########################################
###### Examples
###########################################

###### loading libraries and data
###########################################

library(tables)
library(reshape)

# getting our data ready
names(airquality) <- tolower(names(airquality))
airquality2 <- airquality
airquality2$temp2 <- ifelse(airquality2$temp > median(airquality2$temp), "hot", "cold")
aqm <- melt(airquality2, id=c("month", "day","temp2"), na.rm=TRUE)
colnames(aqm)[4] <- "variable2" # because otherwise the function is having problem when relying on the melt function of the cast object
head(aqm,4)
  # month day temp2 variable2 value
# 1 5 1 cold ozone 41
# 2 5 2 cold ozone 36
# 3 5 3 cold ozone 12


###### Running examples:
###########################################

# Examples that work:

# Trivial, but works:
tabular.cast_df(cast(aqm, month ~ ., mean))
# month All
 # 5 68.71
 # 6 87.38
 # 7 93.50
 # 8 79.71
 # 9 71.83

tabular.cast_df(cast(aqm, . ~ temp2, mean))
    # cold hot
 # All 69.88 91.06

tabular.cast_df(cast(aqm, . ~ ., mean))
     # All
 # All 80.06

tabular.cast_df(cast(aqm, month ~ temp2, mean))
 # month cold hot
 # 5 67.64 98.22
 # 6 79.63 98.83
 # 7 58.15 96.05
 # 8 69.21 84.83
 # 9 67.25 80.86

 # Here starts the cool examples:
tabular.cast_df(cast(aqm, month ~ temp2, c(mean,sd)))
      # cold hot
 # month mean sd mean sd
 # 5 67.64 86.21 98.22 106.02
 # 6 79.63 84.49 98.83 95.99
 # 7 58.15 87.92 96.05 89.44
 # 8 69.21 71.18 84.83 73.06
 # 9 67.25 75.87 80.86 68.43

tabular.cast_df(cast(aqm, month ~ variable2, c(mean,sd)))
       # ozone solar.r wind temp
 # month mean sd mean sd mean sd mean sd
 # 5 23.62 22.22 181.3 115.08 11.623 3.531 65.55 6.855
 # 6 29.44 18.21 190.2 92.88 10.267 3.769 79.10 6.599
 # 7 59.12 31.64 216.5 80.57 8.942 3.036 83.90 4.316
 # 8 59.96 39.68 171.9 76.83 8.794 3.226 83.97 6.585
 # 9 31.45 24.14 167.4 79.12 10.180 3.461 76.90 8.356

tabular.cast_df(cast(aqm, month ~ variable2*temp2, c(mean,sd)))
       # ozone solar.r wind temp
       # cold hot cold hot cold hot cold hot
 # month mean sd mean sd mean sd mean sd mean sd mean sd mean sd mean sd
 # 5 22.76 22.242 45.00 NaN 178.6 116.47 252.0 NA 11.51 3.538 14.900 NaN 65.03 6.3326 81.00 NA
 # 6 20.60 10.015 40.50 21.38 170.0 97.58 220.4 79.78 10.38 4.578 10.092 2.237 74.89 3.8177 85.42 4.441
 # 7 13.00 4.243 62.96 29.78 135.5 181.73 222.1 72.63 10.60 5.233 8.828 2.948 73.50 0.7071 84.62 3.416
 # 8 31.75 16.360 72.50 40.80 149.7 91.19 184.2 67.26 10.88 2.819 7.800 2.970 77.00 2.2111 87.29 5.198
 # 9 19.95 8.847 53.30 29.10 163.6 90.30 175.1 53.46 10.95 2.808 8.640 4.243 72.15 4.6257 86.40 5.420

 
# This one doesn't work - nor should it work:
tabular.cast_df(cast(aqm, month ~ variable|temp2, fun.aggregate = mean)) # stops the function, since it doesn't work for 3D objects...

####################
# Bug that was fixed in tables version 0.5.20:
# "BUG" this one gets the "temp2" header in all of the first 2 columns
tabular.cast_df(cast(aqm, month*temp2 ~ variable2, c(mean,sd))) # same problem (but this is a problem with tables not in reshape)
tabular.cast_df(cast(aqm, month*temp2*variable2 ~ ., c(mean,sd))) # same issue
 # variable2 variable2 variable2 mean sd
 # 5 cold ozone 22.760 22.2416
                     # solar.r 178.577 116.4664
                     # wind 11.513 3.5381
                     # temp 65.033 6.3326
           # hot ozone 45.000 NaN
                     # solar.r 252.000 NA
                     # wind 14.900 NaN
                     # temp 81.000 NA
# ......


} # end of if(F)
Something went wrong with that request. Please try again.