In [0]:
from pyspark.sql.types import *

myschema = StructType([
    StructField("id", IntegerType()),
    StructField("name", StringType()),
    StructField("age", StringType()),
    StructField("salary", IntegerType()),
    StructField("address", StringType()),
    StructField("gender", StringType())
])

employee_df = spark.read.format("csv")\
    .option("header", "true")\
    .schema(myschema)\
    .load("/FileStore/tables/employee_write_data.csv")

employee_df.show()
employee_df.printSchema()
employee_df.columns

+---+----------+--------+------+----------+-----------+
| id|      name|     age|salary|   address|     gender|
+---+----------+--------+------+----------+-----------+
|  1|    Manish|      26|  null|     INDIA|          m|
|  2|    Nikita|      23|  null|       USA|          f|
|  3|    Pritam|      22|  null|     INDIA|          m|
|  4|  Prantosh|      17|  null|     JAPAN|          m|
|  5|    Vikash|      31|  null|       USA|          m|
|  6|     Rahul|      55|  null|     INDIA|          m|
|  7|      Raju|      67|  null|       USA|          m|
|  8|   Praveen|      28|  null|     JAPAN|          m|
|  9|       Dev|      32|  null|     JAPAN|          m|
| 10|    Sherin|      16|  null|    RUSSIA|          f|
| 11|      Ragu|      12|  null|     INDIA|          f|
| 12|     Sweta|      43|  null|     INDIA|          f|
| 13|   Raushan|      48|  null|       USA|          m|
| 14|    Mukesh|      36|  null|    RUSSIA|          m|
| 15|   Prakash|      52|  null|     INDIA|     

In [0]:
employee_df.select("name").show()

+----------+
|      name|
+----------+
|    Manish|
|    Nikita|
|    Pritam|
|  Prantosh|
|    Vikash|
|     Rahul|
|      Raju|
|   Praveen|
|       Dev|
|    Sherin|
|      Ragu|
|     Sweta|
|   Raushan|
|    Mukesh|
|   Prakash|
+----------+



In [0]:
from pyspark.sql.functions import *

employee_df.select(col("name")).show()

+----------+
|      name|
+----------+
|    Manish|
|    Nikita|
|    Pritam|
|  Prantosh|
|    Vikash|
|     Rahul|
|      Raju|
|   Praveen|
|       Dev|
|    Sherin|
|      Ragu|
|     Sweta|
|   Raushan|
|    Mukesh|
|   Prakash|
+----------+



In [0]:
employee_df.select(col("id") + 5).show()

+--------+
|(id + 5)|
+--------+
|       6|
|       7|
|       8|
|       9|
|      10|
|      11|
|      12|
|      13|
|      14|
|      15|
|      16|
|      17|
|      18|
|      19|
|      20|
+--------+



In [0]:
employee_df.select("id", "name", "age").show()

+---+----------+--------+
| id|      name|     age|
+---+----------+--------+
|  1|    Manish|      26|
|  2|    Nikita|      23|
|  3|    Pritam|      22|
|  4|  Prantosh|      17|
|  5|    Vikash|      31|
|  6|     Rahul|      55|
|  7|      Raju|      67|
|  8|   Praveen|      28|
|  9|       Dev|      32|
| 10|    Sherin|      16|
| 11|      Ragu|      12|
| 12|     Sweta|      43|
| 13|   Raushan|      48|
| 14|    Mukesh|      36|
| 15|   Prakash|      52|
+---+----------+--------+



In [0]:
"""
employee_df["salary"] ---> Used for join
"""
employee_df.select("id", col("name"), employee_df["salary"], employee_df.address).show()

+---+----------+------+----------+
| id|      name|salary|   address|
+---+----------+------+----------+
|  1|    Manish|  null|     INDIA|
|  2|    Nikita|  null|       USA|
|  3|    Pritam|  null|     INDIA|
|  4|  Prantosh|  null|     JAPAN|
|  5|    Vikash|  null|       USA|
|  6|     Rahul|  null|     INDIA|
|  7|      Raju|  null|       USA|
|  8|   Praveen|  null|     JAPAN|
|  9|       Dev|  null|     JAPAN|
| 10|    Sherin|  null|    RUSSIA|
| 11|      Ragu|  null|     INDIA|
| 12|     Sweta|  null|     INDIA|
| 13|   Raushan|  null|       USA|
| 14|    Mukesh|  null|    RUSSIA|
| 15|   Prakash|  null|     INDIA|
+---+----------+------+----------+



In [0]:
employee_df.select(expr("id as employee_id"), expr("id + 5"), expr("concat(name,address) as name_and_address")).show()

+-----------+--------+------------------+
|employee_id|(id + 5)|  name_and_address|
+-----------+--------+------------------+
|          1|       6|   Manish    INDIA|
|          2|       7|      Nikita   USA|
|          3|       8|    Pritam   INDIA|
|          4|       9|  Prantosh   JAPAN|
|          5|      10|      Vikash   USA|
|          6|      11|     Rahul   INDIA|
|          7|      12|        Raju   USA|
|          8|      13|  Praveen    JAPAN|
|          9|      14|       Dev   JAPAN|
|         10|      15|  Sherin    RUSSIA|
|         11|      16|     Ragu    INDIA|
|         12|      17|     Sweta   INDIA|
|         13|      18|     Raushan   USA|
|         14|      19|  Mukesh    RUSSIA|
|         15|      20|   Prakash   INDIA|
+-----------+--------+------------------+



In [0]:
employee_df.createOrReplaceTempView("employee_tbl")

spark.sql("""
          select * from employee_tbl
          """).show()

+---+----------+--------+------+----------+-----------+
| id|      name|     age|salary|   address|     gender|
+---+----------+--------+------+----------+-----------+
|  1|    Manish|      26|  null|     INDIA|          m|
|  2|    Nikita|      23|  null|       USA|          f|
|  3|    Pritam|      22|  null|     INDIA|          m|
|  4|  Prantosh|      17|  null|     JAPAN|          m|
|  5|    Vikash|      31|  null|       USA|          m|
|  6|     Rahul|      55|  null|     INDIA|          m|
|  7|      Raju|      67|  null|       USA|          m|
|  8|   Praveen|      28|  null|     JAPAN|          m|
|  9|       Dev|      32|  null|     JAPAN|          m|
| 10|    Sherin|      16|  null|    RUSSIA|          f|
| 11|      Ragu|      12|  null|     INDIA|          f|
| 12|     Sweta|      43|  null|     INDIA|          f|
| 13|   Raushan|      48|  null|       USA|          m|
| 14|    Mukesh|      36|  null|    RUSSIA|          m|
| 15|   Prakash|      52|  null|     INDIA|     