In [0]:
# creating Dataframe
from pyspark.sql.types import StructType,StructField, StringType, IntegerType
data2 = [("James","","Smith","36636","M",3000),
    ("Michael","Rose","","40288","M",4000),
    ("Robert","","Williams","42114","M",4000),
    ("Maria","Anne","Jones","39192","F",4000),
    ("Jen","Mary","Brown","","F",-1)
  ]

schema = StructType([ \
    StructField("firstname",StringType(),True), \
    StructField("middlename",StringType(),True), \
    StructField("lastname",StringType(),True), \
    StructField("id", StringType(), True), \
    StructField("gender", StringType(), True), \
    StructField("salary", IntegerType(), True) \
  ])
 
df = spark.createDataFrame(data=data2,schema=schema)
df.printSchema()
df.show()


root
 |-- firstname: string (nullable = true)
 |-- middlename: string (nullable = true)
 |-- lastname: string (nullable = true)
 |-- id: string (nullable = true)
 |-- gender: string (nullable = true)
 |-- salary: integer (nullable = true)

+---------+----------+--------+-----+------+------+
|firstname|middlename|lastname|   id|gender|salary|
+---------+----------+--------+-----+------+------+
|    James|          |   Smith|36636|     M|  3000|
|  Michael|      Rose|        |40288|     M|  4000|
|   Robert|          |Williams|42114|     M|  4000|
|    Maria|      Anne|   Jones|39192|     F|  4000|
|      Jen|      Mary|   Brown|     |     F|    -1|
+---------+----------+--------+-----+------+------+



In [0]:
# slecting required columns from dataframe
df.select('id','gender','salary').show()

+-----+------+------+
|   id|gender|salary|
+-----+------+------+
|36636|     M|  3000|
|40288|     M|  4000|
|42114|     M|  4000|
|39192|     F|  4000|
|     |     F|    -1|
+-----+------+------+



In [0]:
# slecting required columns from dataframe
df.select(['id','gender','salary']).show()

+-----+------+------+
|   id|gender|salary|
+-----+------+------+
|36636|     M|  3000|
|40288|     M|  4000|
|42114|     M|  4000|
|39192|     F|  4000|
|     |     F|    -1|
+-----+------+------+



In [0]:
# Defining alias to Dataframe
df.alias('u').select('u.*').show()

+---------+----------+--------+-----+------+------+
|firstname|middlename|lastname|   id|gender|salary|
+---------+----------+--------+-----+------+------+
|    James|          |   Smith|36636|     M|  3000|
|  Michael|      Rose|        |40288|     M|  4000|
|   Robert|          |Williams|42114|     M|  4000|
|    Maria|      Anne|   Jones|39192|     F|  4000|
|      Jen|      Mary|   Brown|     |     F|    -1|
+---------+----------+--------+-----+------+------+



In [0]:
df.alias('u').select('u.id','u.firstname','u.lastname').show()

+-----+---------+--------+
|   id|firstname|lastname|
+-----+---------+--------+
|36636|    James|   Smith|
|40288|  Michael|        |
|42114|   Robert|Williams|
|39192|    Maria|   Jones|
|     |      Jen|   Brown|
+-----+---------+--------+



In [0]:
# importing col function 
from pyspark.sql.functions import col
df.select(col('id'),'firstname','lastname').show()

+-----+---------+--------+
|   id|firstname|lastname|
+-----+---------+--------+
|36636|    James|   Smith|
|40288|  Michael|        |
|42114|   Robert|Williams|
|39192|    Maria|   Jones|
|     |      Jen|   Brown|
+-----+---------+--------+



In [0]:
#concatinating first & last name
from pyspark.sql.functions import *
df.select('id','firstname','lastname',
         concat('firstname', 'lastname').alias('Fullname')).show()

+-----+---------+--------+--------------+
|   id|firstname|lastname|      Fullname|
+-----+---------+--------+--------------+
|36636|    James|   Smith|    JamesSmith|
|40288|  Michael|        |       Michael|
|42114|   Robert|Williams|RobertWilliams|
|39192|    Maria|   Jones|    MariaJones|
|     |      Jen|   Brown|      JenBrown|
+-----+---------+--------+--------------+



In [0]:
# Using selectExpr
df.selectExpr('id','firstname','lastname',"concat(firstname,  lastname) as name").show()

+-----+---------+--------+--------------+
|   id|firstname|lastname|          name|
+-----+---------+--------+--------------+
|36636|    James|   Smith|    JamesSmith|
|40288|  Michael|        |       Michael|
|42114|   Robert|Williams|RobertWilliams|
|39192|    Maria|   Jones|    MariaJones|
|     |      Jen|   Brown|      JenBrown|
+-----+---------+--------+--------------+



In [0]:
# creating temp view on Dataframe 
df.createOrReplaceTempView('user')

In [0]:
spark.sql(""" 
select id, firstname,lastname,concat(firstname, lastname) as name
from user
""").\
show()

+-----+---------+--------+--------------+
|   id|firstname|lastname|          name|
+-----+---------+--------+--------------+
|36636|    James|   Smith|    JamesSmith|
|40288|  Michael|        |       Michael|
|42114|   Robert|Williams|RobertWilliams|
|39192|    Maria|   Jones|    MariaJones|
|     |      Jen|   Brown|      JenBrown|
+-----+---------+--------+--------------+



In [0]:
df.show()