Skip to content

Commit

Permalink
fix dplyr joins with same variable names
Browse files Browse the repository at this point in the history
  • Loading branch information
javierluraschi committed Jun 9, 2017
1 parent a4ba214 commit 0c39d2e
Show file tree
Hide file tree
Showing 3 changed files with 1 addition and 44 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
Package: sparklyr
Type: Package
Title: R Interface to Apache Spark
Version: 0.5.5
Version: 0.5.6
Authors@R: c(
person("Javier", "Luraschi", email = "javier@rstudio.com", role = c("aut", "cre")),
person("Kevin", "Ushey", role = "aut", email = "kevin@rstudio.com"),
Expand Down
1 change: 0 additions & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,6 @@ S3method(spark_write_json,tbl_spark)
S3method(spark_write_parquet,spark_jobj)
S3method(spark_write_parquet,tbl_spark)
S3method(sql_escape_ident,spark_connection)
S3method(sql_join,spark_connection)
S3method(sql_select,spark_connection)
S3method(sql_translate_env,spark_connection)
S3method(src_tbls,spark_connection)
Expand Down
42 changes: 0 additions & 42 deletions R/dplyr_spark_connection.R
Original file line number Diff line number Diff line change
Expand Up @@ -141,45 +141,3 @@ sql_select.spark_connection <- function(con, select, from, where = NULL,

escape(unname(compact(out)), collapse = "\n", parens = FALSE, con = con)
}

#' @export
sql_join.spark_connection <- function(con, x, y, type = "inner", by = NULL, ...) {
# TODO: This function needs to be removed once dplyr can workaround this issue by avoiding USING statements.

sparkVersion <- spark_version(con)

if (compareVersion(toString(sparkVersion), "2.0.0") < 0) {
sameNameColumns <- length(Filter(function(e) by$x[[e]] == by$y[[e]], seq_len(length(by$x))))
if (sameNameColumns > 0) {
stop(paste("This dplyr operation requires a feature not supported in Spark", sparkVersion,
". Try Spark 2.0.0 instead or avoid using same-column names in joins."))
}
}

# Invoke dplyrs default join:
join <- switch(type,
left = sql("LEFT"),
inner = sql("INNER"),
right = sql("RIGHT"),
full = sql("FULL"),
stop("Unknown join type:", type, call. = FALSE)
)

using <- all(by$x == by$y)

if (using) {
cond <- build_sql("USING ", lapply(by$x, ident), con = con)
} else {
on <- sql_vector(paste0(sql_escape_ident(con, by$x), " = ", sql_escape_ident(con, by$y)),
collapse = " AND ", parens = TRUE)
cond <- build_sql("ON ", on, con = con)
}

build_sql(
'SELECT * FROM ',x, "\n\n",
join, " JOIN\n\n" ,
y, "\n\n",
cond,
con = con
)
}

0 comments on commit 0c39d2e

Please sign in to comment.