Skip to content

Commit

Permalink
[SPARK-6635][SQL] DataFrame.withColumn should replace columns with id…
Browse files Browse the repository at this point in the history
…entical column names

JIRA https://issues.apache.org/jira/browse/SPARK-6635

Author: Liang-Chi Hsieh <viirya@gmail.com>

Closes apache#5541 from viirya/replace_with_column and squashes the following commits:

b539c7b [Liang-Chi Hsieh] For comment.
72f35b1 [Liang-Chi Hsieh] DataFrame.withColumn can replace original column with identical column name.
  • Loading branch information
viirya authored and nemccarthy committed Jun 19, 2015
1 parent 5efd83f commit ab39212
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 1 deletion.
14 changes: 13 additions & 1 deletion sql/core/src/main/scala/org/apache/spark/sql/DataFrame.scala
Original file line number Diff line number Diff line change
Expand Up @@ -747,7 +747,19 @@ class DataFrame private[sql](
* Returns a new [[DataFrame]] by adding a column.
* @group dfops
*/
def withColumn(colName: String, col: Column): DataFrame = select(Column("*"), col.as(colName))
def withColumn(colName: String, col: Column): DataFrame = {
val resolver = sqlContext.analyzer.resolver
val replaced = schema.exists(f => resolver(f.name, colName))
if (replaced) {
val colNames = schema.map { field =>
val name = field.name
if (resolver(name, colName)) col.as(colName) else Column(name)
}
select(colNames :_*)
} else {
select(Column("*"), col.as(colName))
}
}

/**
* Returns a new [[DataFrame]] with a column renamed.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -473,6 +473,14 @@ class DataFrameSuite extends QueryTest {
assert(df.schema.map(_.name).toSeq === Seq("key", "value", "newCol"))
}

test("replace column using withColumn") {
val df2 = TestSQLContext.sparkContext.parallelize(Array(1, 2, 3)).toDF("x")
val df3 = df2.withColumn("x", df2("x") + 1)
checkAnswer(
df3.select("x"),
Row(2) :: Row(3) :: Row(4) :: Nil)
}

test("withColumnRenamed") {
val df = testData.toDF().withColumn("newCol", col("key") + 1)
.withColumnRenamed("value", "valueRenamed")
Expand Down

0 comments on commit ab39212

Please sign in to comment.