In [1]:
from pyspark.sql.functions import *
from pyspark.sql import SparkSession

spark = SparkSession.builder.getOrCreate()

In [2]:
data = [('James','','Smith','1991-04-01','M',3000),
  ('Michael','Rose','','2000-05-19','M',4000),
  ('Robert','','Williams','1978-09-05','M',4000),
  ('Maria','Anne','Jones','1967-12-01','F',4000),
  ('Jen','Mary','Brown','1980-02-17','F',-1)
]

columns = ["firstname","middlename","lastname","dob","gender","salary"]

In [3]:
df = spark.createDataFrame(data=data,schema=columns)
df.show(5)

+---------+----------+--------+----------+------+------+
|firstname|middlename|lastname|       dob|gender|salary|
+---------+----------+--------+----------+------+------+
|    James|          |   Smith|1991-04-01|     M|  3000|
|  Michael|      Rose|        |2000-05-19|     M|  4000|
|   Robert|          |Williams|1978-09-05|     M|  4000|
|    Maria|      Anne|   Jones|1967-12-01|     F|  4000|
|      Jen|      Mary|   Brown|1980-02-17|     F|    -1|
+---------+----------+--------+----------+------+------+



In [4]:
df.printSchema()

root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- dob: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: long (nullable = true)



In [9]:
df.withColumn('Full Name',concat_ws(' ',col('firstname'),col('middlename'),col('lastname'))).show(truncate=False)

+---------+----------+--------+----------+------+------+----------------+
|firstname|middlename|lastname|dob       |gender|salary|Full Name       |
+---------+----------+--------+----------+------+------+----------------+
|James    |          |Smith   |1991-04-01|M     |3000  |James  Smith    |
|Michael  |Rose      |        |2000-05-19|M     |4000  |Michael Rose    |
|Robert   |          |Williams|1978-09-05|M     |4000  |Robert  Williams|
|Maria    |Anne      |Jones   |1967-12-01|F     |4000  |Maria Anne Jones|
|Jen      |Mary      |Brown   |1980-02-17|F     |-1    |Jen Mary Brown  |
+---------+----------+--------+----------+------+------+----------------+



In [13]:
df.select(concat_ws(' ',col('firstname'),col('middlename'),col('lastname')).alias('Full Name'),'gender','salary').show(truncate=True)

+----------------+------+------+
|       Full Name|gender|salary|
+----------------+------+------+
|    James  Smith|     M|  3000|
|   Michael Rose |     M|  4000|
|Robert  Williams|     M|  4000|
|Maria Anne Jones|     F|  4000|
|  Jen Mary Brown|     F|    -1|
+----------------+------+------+



In [10]:
df.withColumn('Full Name',concat(col('firstname'),col('middlename'),col('lastname'))).show(truncate=True)

+---------+----------+--------+----------+------+------+--------------+
|firstname|middlename|lastname|       dob|gender|salary|     Full Name|
+---------+----------+--------+----------+------+------+--------------+
|    James|          |   Smith|1991-04-01|     M|  3000|    JamesSmith|
|  Michael|      Rose|        |2000-05-19|     M|  4000|   MichaelRose|
|   Robert|          |Williams|1978-09-05|     M|  4000|RobertWilliams|
|    Maria|      Anne|   Jones|1967-12-01|     F|  4000|MariaAnneJones|
|      Jen|      Mary|   Brown|1980-02-17|     F|    -1|  JenMaryBrown|
+---------+----------+--------+----------+------+------+--------------+



In [18]:
df.createOrReplaceTempView('employee')
spark.sql("select concat_ws(' ',firstname,middlename,lastname) as FullName,gender,salary from employee").show()

+----------------+------+------+
|        FullName|gender|salary|
+----------------+------+------+
|    James  Smith|     M|  3000|
|   Michael Rose |     M|  4000|
|Robert  Williams|     M|  4000|
|Maria Anne Jones|     F|  4000|
|  Jen Mary Brown|     F|    -1|
+----------------+------+------+

