Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 193 lines (175 sloc) 7.309 kB
23916b3 @talgalili added merge.data.frame patch for R base core
authored
1 #############
2 # Explenation for the new keep_order parameter:
3 # keep_order can accept the numbers 1 or 2, in which case it will make sure the resulting merged data.frame will be ordered according to the original order of rows of the data.frame entered to x (if keep_order=1) or to y (if keep_order=2). If keep_order is missing, merge will continue working as usual. If keep_order gets some input other then 1 or 2, it will issue a warning that it doesn't accept these values, but will continue working as merge normally would. Notice that the parameter "sort" is practically overridden when using keep_order (with the value 1 or 2).
4
5 # an example is offered at the end of this code chunk
6
7 merge.data.frame <- function (x, y, by = intersect(names(x), names(y)), by.x = by,
8 by.y = by, all = FALSE, all.x = all, all.y = all, sort = TRUE,
9 suffixes = c(".x", ".y"), incomparables = NULL, keep_order, ...)
10 {
466fa22 @talgalili Allowing "keep_order" to work with "x" or "y"
authored
11 # if we use the "keep_order" parameter, we might need to modify either the x or y data.frame objects: (either by placing 1 or "x", or by placing 2 or "y")
23916b3 @talgalili added merge.data.frame patch for R base core
authored
12 if(!missing(keep_order))
13 {
bd7dd5b @talgalili Use a different way for dealing with "x" and "y" with keep_order
authored
14
23916b3 @talgalili added merge.data.frame patch for R base core
authored
15 # some functions we will use soon:
16 add.id.column.to.data <- function(DATA)
17 {
18 data.frame(DATA, id... = seq_len(nrow(DATA)))
19 }
20 # example:
21 # add.id.column.to.data(data.frame(x = rnorm(5), x2 = rnorm(5)))
22 order.by.id...and.remove.it <- function(DATA)
23 {
24 # gets in a data.frame with the "id..." column. Orders by it and returns it
25 if(!any(colnames(DATA)=="id..."))
26 {
27 warning("The function order.by.id...and.remove.it is useful only for data.frame objects that includes the 'id...' order column")
28 return(DATA)
29 }
30
31
32 ss_r <- order(DATA$id...)
33 ss_c <- colnames(DATA) != "id..."
34 DATA[ss_r, ss_c]
35 }
36 # example:
37 # set.seed(3424)
38 # x <- data.frame(x = rnorm(5), x2 = rnorm(5))
39 # x2 <- add.id.column.to.data(x)[c(1,4,2,5,3),]
40 # x2
41 # order.by.id...and.remove.it(x2)
bd7dd5b @talgalili Use a different way for dealing with "x" and "y" with keep_order
authored
42 if(keep_order == "x") keep_order <- 1
43 if(keep_order == "y") keep_order <- 2
23916b3 @talgalili added merge.data.frame patch for R base core
authored
44
bd7dd5b @talgalili Use a different way for dealing with "x" and "y" with keep_order
authored
45 if(keep_order == 1) x<-add.id.column.to.data(x)
46 if(keep_order == 2) y<-add.id.column.to.data(y)
23916b3 @talgalili added merge.data.frame patch for R base core
authored
47 # if you didn't get 1 or 2 - issue a warning:
bd7dd5b @talgalili Use a different way for dealing with "x" and "y" with keep_order
authored
48 if(!(any(keep_order == c(1,2)) )) warning("The parameter 'keep_order' in the function merge.data.frame only accepts the values 1 (for the x data.frame) or 2 (for the y data.frame)")
23916b3 @talgalili added merge.data.frame patch for R base core
authored
49
50 # sort <- FALSE
51 # notice that if sort was TRUE, using the keep_order parameter will eventually override it...
52 }
53
54 fix.by <- function(by, df) {
55 if (is.null(by))
56 by <- numeric()
57 by <- as.vector(by)
58 nc <- ncol(df)
59 if (is.character(by))
60 by <- match(by, c("row.names", names(df))) - 1L
61 else if (is.numeric(by)) {
62 if (any(by < 0L) || any(by > nc))
63 stop("'by' must match numbers of columns")
64 }
65 else if (is.logical(by)) {
66 if (length(by) != nc)
67 stop("'by' must match number of columns")
68 by <- seq_along(by)[by]
69 }
70 else stop("'by' must specify column(s) as numbers, names or logical")
71 if (any(is.na(by)))
72 stop("'by' must specify valid column(s)")
73 unique(by)
74 }
75 nx <- nrow(x <- as.data.frame(x))
76 ny <- nrow(y <- as.data.frame(y))
77 by.x <- fix.by(by.x, x)
78 by.y <- fix.by(by.y, y)
79 if ((l.b <- length(by.x)) != length(by.y))
80 stop("'by.x' and 'by.y' specify different numbers of columns")
81 if (l.b == 0L) {
82 nm <- nm.x <- names(x)
83 nm.y <- names(y)
84 has.common.nms <- any(cnm <- nm.x %in% nm.y)
85 if (has.common.nms) {
86 names(x)[cnm] <- paste(nm.x[cnm], suffixes[1L], sep = "")
87 cnm <- nm.y %in% nm
88 names(y)[cnm] <- paste(nm.y[cnm], suffixes[2L], sep = "")
89 }
90 if (nx == 0L || ny == 0L) {
91 res <- cbind(x[FALSE, ], y[FALSE, ])
92 }
93 else {
94 ij <- expand.grid(seq_len(nx), seq_len(ny))
95 res <- cbind(x[ij[, 1L], , drop = FALSE], y[ij[,
96 2L], , drop = FALSE])
97 }
98 }
99 else {
100 if (any(by.x == 0L)) {
101 x <- cbind(Row.names = I(row.names(x)), x)
102 by.x <- by.x + 1L
103 }
104 if (any(by.y == 0L)) {
105 y <- cbind(Row.names = I(row.names(y)), y)
106 by.y <- by.y + 1L
107 }
108 row.names(x) <- NULL
109 row.names(y) <- NULL
110 if (l.b == 1L) {
111 bx <- x[, by.x]
112 if (is.factor(bx))
113 bx <- as.character(bx)
114 by <- y[, by.y]
115 if (is.factor(by))
116 by <- as.character(by)
117 }
118 else {
119 bx <- x[, by.x, drop = FALSE]
120 by <- y[, by.y, drop = FALSE]
121 names(bx) <- names(by) <- paste("V", seq_len(ncol(bx)),
122 sep = "")
123 bz <- do.call("paste", c(rbind(bx, by), sep = "\r"))
124 bx <- bz[seq_len(nx)]
125 by <- bz[nx + seq_len(ny)]
126 }
127 comm <- match(bx, by, 0L)
128 bxy <- bx[comm > 0L]
129 xinds <- match(bx, bxy, 0L, incomparables)
130 yinds <- match(by, bxy, 0L, incomparables)
131 if (nx > 0L && ny > 0L)
132 m <- .Internal(merge(xinds, yinds, all.x, all.y))
133 else m <- list(xi = integer(), yi = integer(), x.alone = seq_len(nx),
134 y.alone = seq_len(ny))
135 nm <- nm.x <- names(x)[-by.x]
136 nm.by <- names(x)[by.x]
137 nm.y <- names(y)[-by.y]
138 ncx <- ncol(x)
139 if (all.x)
140 all.x <- (nxx <- length(m$x.alone)) > 0L
141 if (all.y)
142 all.y <- (nyy <- length(m$y.alone)) > 0L
143 lxy <- length(m$xi)
144 has.common.nms <- any(cnm <- nm.x %in% nm.y)
145 if (has.common.nms)
146 nm.x[cnm] <- paste(nm.x[cnm], suffixes[1L], sep = "")
147 x <- x[c(m$xi, if (all.x) m$x.alone), c(by.x, seq_len(ncx)[-by.x]),
148 drop = FALSE]
149 names(x) <- c(nm.by, nm.x)
150 if (all.y) {
151 ya <- y[m$y.alone, by.y, drop = FALSE]
152 names(ya) <- nm.by
153 ya <- cbind(ya, x[rep.int(NA_integer_, nyy), nm.x,
154 drop = FALSE])
155 x <- rbind(x, ya)
156 }
157 if (has.common.nms) {
158 cnm <- nm.y %in% nm
159 nm.y[cnm] <- paste(nm.y[cnm], suffixes[2L], sep = "")
160 }
161 y <- y[c(m$yi, if (all.x) rep.int(1L, nxx), if (all.y) m$y.alone),
162 -by.y, drop = FALSE]
163 if (all.x)
164 for (i in seq_along(y)) is.na(y[[i]]) <- (lxy + 1L):(lxy +
165 nxx)
166 if (has.common.nms)
167 names(y) <- nm.y
168 res <- cbind(x, y)
169 if (sort)
170 res <- res[if (all.x || all.y)
171 do.call("order", x[, seq_len(l.b), drop = FALSE])
172 else sort.list(bx[m$xi]), , drop = FALSE]
173 }
174 attr(res, "row.names") <- .set_row_names(nrow(res))
175
176 if(!missing(keep_order) && any(keep_order == c(1,2))) return(order.by.id...and.remove.it(res))
177 # notice how it is essential to use && here, since if the first argument is false, it will not be possible to evaluate the second argument
178
179 res
180 }
181
182
183
184
185 if(F) # example
186 {
187
188
189 merge( x.labels, x.vals, by='ref', all.y = T, sort=F )
190 merge( x.labels, x.vals, by='ref', all.y = T, sort=F ,keep_order = 2) # yay - works as we wanted it to...
191
192 }
Something went wrong with that request. Please try again.