In [12]:
import pyspark
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
from termcolor import cprint 

spark = SparkSession.builder.appName('withField').getOrCreate()

### pyspark.sql.Column.withField
#### Column.withField(fieldName, col)[source]  
An expression that adds/replaces a field in StructType by name.

In [13]:
from pyspark.sql import Row
from pyspark.sql.functions import lit
df = spark.createDataFrame([Row(a=Row(b=1, c=2))])
df.show()

+------+
|     a|
+------+
|{1, 2}|
+------+



#### Modify an element on the struct

In [14]:
df.withColumn('a', df['a'].withField('b', lit(3))).select('a').show()

+------+
|     a|
+------+
|{3, 2}|
+------+



#### Add a new element to the structure 'a'

In [23]:
df.withColumn('a', df['a'].withField('d', lit(4))).show()

+---------+
|        a|
+---------+
|{1, 2, 4}|
+---------+



In [16]:
df_1 = spark.createDataFrame([Row(a=Row(b=1, c=2)), Row(a=Row(b=5, c=6)), Row(a=Row(b=7, c=8))])
df_1.show()

+------+
|     a|
+------+
|{1, 2}|
|{5, 6}|
|{7, 8}|
+------+



In [17]:
df_1.withColumn('a', df_1['a'].withField('b', lit(3))).select('a').show()

+------+
|     a|
+------+
|{3, 2}|
|{3, 6}|
|{3, 8}|
+------+



In [18]:
df_1.withColumn('a', df_1['a'].withField('d', lit(4))).select('a').show()

+---------+
|        a|
+---------+
|{1, 2, 4}|
|{5, 6, 4}|
|{7, 8, 4}|
+---------+



An expression that adds/replaces a field in StructType by name.

In [19]:
df_2 = spark.createDataFrame([Row(a=[Row(b=1, c=2), Row(b=10, c=20)]), Row(a=[Row(b=5, c=6), Row(b=50, c=60)]), Row(a=[Row(b=7, c=8), Row(b=0, c=0)])])
df_2.show()

+------------------+
|                 a|
+------------------+
|[{1, 2}, {10, 20}]|
|[{5, 6}, {50, 60}]|
|  [{7, 8}, {0, 0}]|
+------------------+



In [20]:
df_2.printSchema()

root
 |-- a: array (nullable = true)
 |    |-- element: struct (containsNull = true)
 |    |    |-- b: long (nullable = true)
 |    |    |-- c: long (nullable = true)



In [21]:
df_2.withColumn('a', df_2['a'][1].withField('b', lit(3))).select('a').show()

+-------+
|      a|
+-------+
|{3, 20}|
|{3, 60}|
| {3, 0}|
+-------+



In [22]:
import pyspark.sql.functions as F

transform_expr = """transform(a, x -> struct(coalesce(x.b, 3) as b, x.c as c)) """

df_3 = df_2.withColumn("a", F.expr(transform_expr))

df_3.select(F.expr("inline(a)")).show()

+---+---+
|  b|  c|
+---+---+
|  1|  2|
| 10| 20|
|  5|  6|
| 50| 60|
|  7|  8|
|  0|  0|
+---+---+

