Added dim, ncol, nrow, names, rbind, and unique functions to DataFrames

prabeesh · Jul 30, 2015 · b5aa988 · b5aa988
1 parent 069a4c4
commit b5aa988
Show file tree

Hide file tree

Showing 2 changed files with 89 additions and 0 deletions.
diff --git a/R/pkg/NAMESPACE b/R/pkg/NAMESPACE
@@ -28,6 +28,7 @@ exportMethods("arrange",
               "count",
               "crosstab",
               "describe",
+              "dim",
               "distinct",
               "dropna",
               "dtypes",
@@ -44,11 +45,15 @@ exportMethods("arrange",
               "isLocal",
               "join",
               "limit",
+              "names",
+              "ncol",
+              "nrow",
               "orderBy",
               "mutate",
               "names",
               "persist",
               "printSchema",
+              "rbind",
               "registerTempTable",
               "rename",
               "repartition",
@@ -65,6 +70,7 @@ exportMethods("arrange",
               "summarize",
               "take",
               "unionAll",
+              "unique",
               "unpersist",
               "where",
               "withColumn",

diff --git a/R/pkg/R/DataFrame.R b/R/pkg/R/DataFrame.R
@@ -255,6 +255,16 @@ setMethod("names",
             columns(x)
           })
 
+#' @rdname columns
+setMethod("names<-",
+          signature(x = "DataFrame"),
+          function(x, value) {
+            if (!is.null(value)) {
+              sdf <- callJMethod(x@sdf, "toDF", listToSeq(as.list(value)))
+              dataFrame(sdf)
+            }
+          })
+
 #' Register Temporary Table
 #'
 #' Registers a DataFrame as a Temporary Table in the SQLContext
@@ -473,6 +483,14 @@ setMethod("distinct",
             dataFrame(sdf)
           })
 
+#' @rdname unique
+#' @aliases unique
+setMethod("unique",
+          signature(x = "DataFrame"),
+          function(x) {
+            distinct(x)
+          })
+
 #' Sample
 #'
 #' Return a sampled subset of this DataFrame using a random seed.
@@ -534,6 +552,53 @@ setMethod("count",
             callJMethod(x@sdf, "count")
           })
 
+#' @rdname nrow
+#' @aliases count
+setMethod("nrow",
+          signature(x = "DataFrame"),
+          function(x) {
+            count(x)
+          })
+
+#' Returns the number of columns in a DataFrame
+#'
+#' @param x a SparkSQL DataFrame
+#'
+#' @rdname ncol
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlContext, path)
+#' ncol(df)
+#' }
+setMethod("ncol",
+          signature(x = "DataFrame"),
+          function(x) {
+            length(columns(x))
+          })
+
+#' Returns the dimentions (number for rows and columns) of a DataFrame
+#' @param x a SparkSQL DataFrame
+#'
+#' @rdname dim
+#' @export
+#' @examples
+#'\dontrun{
+#' sc <- sparkR.init()
+#' sqlContext <- sparkRSQL.init(sc)
+#' path <- "path/to/file.json"
+#' df <- jsonFile(sqlContext, path)
+#' dim(df)
+#' }
+setMethod("dim",
+          signature(x = "DataFrame"),
+          function(x) {
+            c(count(x), ncol(x))
+          })
+
 #' Collects all the elements of a Spark DataFrame and coerces them into an R data.frame.
 #'
 #' @param x A SparkSQL DataFrame
@@ -1231,6 +1296,24 @@ setMethod("unionAll",
             dataFrame(unioned)
           })
 
+setGeneric("rbind", signature = "...")
+
+rbind.SparkDataFrames <- function(x, ..., deparse.level = 1) {
+  allargs <- list(...)
+  if (nargs() == 3) {
+    unionAll(x, ...)
+  } else {
+    unionAll(x, Recall(..., deparse.level = 1))
+  }
+}
+
+#' @rdname rbind
+#' @aliases unionAll
+setMethod("rbind",
+          signature(... = "DataFrame"),
+          rbind.SparkDataFrames
+          )
+
 #' Intersect
 #'
 #' Return a new DataFrame containing rows only in both this DataFrame