Skip to content

Commit

Permalink
Added dim, ncol, nrow, names, rbind, and unique functions to DataFrames
Browse files Browse the repository at this point in the history
  • Loading branch information
falaki committed Jul 30, 2015
1 parent 069a4c4 commit b5aa988
Show file tree
Hide file tree
Showing 2 changed files with 89 additions and 0 deletions.
6 changes: 6 additions & 0 deletions R/pkg/NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ exportMethods("arrange",
"count",
"crosstab",
"describe",
"dim",
"distinct",
"dropna",
"dtypes",
Expand All @@ -44,11 +45,15 @@ exportMethods("arrange",
"isLocal",
"join",
"limit",
"names",
"ncol",
"nrow",
"orderBy",
"mutate",
"names",
"persist",
"printSchema",
"rbind",
"registerTempTable",
"rename",
"repartition",
Expand All @@ -65,6 +70,7 @@ exportMethods("arrange",
"summarize",
"take",
"unionAll",
"unique",
"unpersist",
"where",
"withColumn",
Expand Down
83 changes: 83 additions & 0 deletions R/pkg/R/DataFrame.R
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,16 @@ setMethod("names",
columns(x)
})

#' @rdname columns
setMethod("names<-",
signature(x = "DataFrame"),
function(x, value) {
if (!is.null(value)) {
sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
dataFrame(sdf)
}
})

#' Register Temporary Table
#'
#' Registers a DataFrame as a Temporary Table in the SQLContext
Expand Down Expand Up @@ -473,6 +483,14 @@ setMethod("distinct",
dataFrame(sdf)
})

#' @rdname unique
#' @aliases unique
setMethod("unique",
signature(x = "DataFrame"),
function(x) {
distinct(x)
})

#' Sample
#'
#' Return a sampled subset of this DataFrame using a random seed.
Expand Down Expand Up @@ -534,6 +552,53 @@ setMethod("count",
callJMethod(x@sdf, "count")
})

#' @rdname nrow
#' @aliases count
setMethod("nrow",
signature(x = "DataFrame"),
function(x) {
count(x)
})

#' Returns the number of columns in a DataFrame
#'
#' @param x a SparkSQL DataFrame
#'
#' @rdname ncol
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' ncol(df)
#' }
setMethod("ncol",
signature(x = "DataFrame"),
function(x) {
length(columns(x))
})

#' Returns the dimentions (number for rows and columns) of a DataFrame
#' @param x a SparkSQL DataFrame
#'
#' @rdname dim
#' @export
#' @examples
#'\dontrun{
#' sc <- sparkR.init()
#' sqlContext <- sparkRSQL.init(sc)
#' path <- "path/to/file.json"
#' df <- jsonFile(sqlContext, path)
#' dim(df)
#' }
setMethod("dim",
signature(x = "DataFrame"),
function(x) {
c(count(x), ncol(x))
})

#' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
#'
#' @param x A SparkSQL DataFrame
Expand Down Expand Up @@ -1231,6 +1296,24 @@ setMethod("unionAll",
dataFrame(unioned)
})

setGeneric("rbind", signature = "...")

rbind.SparkDataFrames <- function(x, ..., deparse.level = 1) {
allargs <- list(...)
if (nargs() == 3) {
unionAll(x, ...)
} else {
unionAll(x, Recall(..., deparse.level = 1))
}
}

#' @rdname rbind
#' @aliases unionAll
setMethod("rbind",
signature(... = "DataFrame"),
rbind.SparkDataFrames
)

#' Intersect
#'
#' Return a new DataFrame containing rows only in both this DataFrame
Expand Down

0 comments on commit b5aa988

Please sign in to comment.