<a href="https://colab.research.google.com/github/nitiksha/PySpark_code_practice/blob/main/column_rename.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
from pyspark.sql import SparkSession
spark = SparkSession.builder.appName("column_rename").getOrCreate()

In [3]:
df = spark.createDataFrame([(14, "Tom"), (23, "Alice"), (16, "Bob")], ["age", "name"])

df1 = df.withColumnRenamed("name","first_name")
df1.show()

+---+----------+
|age|first_name|
+---+----------+
| 14|       Tom|
| 23|     Alice|
| 16|       Bob|
+---+----------+



In [5]:
df2 = df1.withColumn("age_twice",df1["age"]*2)
df2.show()

+---+----------+---------+
|age|first_name|age_twice|
+---+----------+---------+
| 14|       Tom|       28|
| 23|     Alice|       46|
| 16|       Bob|       32|
+---+----------+---------+



In [6]:
df3 = df.withColumnsRenamed({"age":"umar","name":"naam"})
df3.show()

+----+-----+
|umar| naam|
+----+-----+
|  14|  Tom|
|  23|Alice|
|  16|  Bob|
+----+-----+



**Add multiple columns**

In [11]:
from pyspark.sql.functions import lit,expr

In [12]:
df3 = df.withColumn("country",lit('india'))

In [13]:
df3.show()

+---+-----+-------+
|age| name|country|
+---+-----+-------+
| 14|  Tom|  india|
| 23|Alice|  india|
| 16|  Bob|  india|
+---+-----+-------+



In [16]:
df3 = df3.select(
    "*",
    lit("asia").alias("continent"),
    lit("andromeda").alias("galaxy")
)

df3.show()

+---+-----+-------+---------+---------+
|age| name|country|continent|   galaxy|
+---+-----+-------+---------+---------+
| 14|  Tom|  india|     asia|andromeda|
| 23|Alice|  india|     asia|andromeda|
| 16|  Bob|  india|     asia|andromeda|
+---+-----+-------+---------+---------+



In [17]:
df4 = df3.withColumn("month",lit('feb'))\
           .withColumn("year",lit('1999'))

df4.show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 14|  Tom|  india|     asia|andromeda|  feb|1999|
| 23|Alice|  india|     asia|andromeda|  feb|1999|
| 16|  Bob|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+



In [18]:
df4.filter(df4["age"]>20).show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 23|Alice|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+



In [19]:
df4.where(df4["age"]>20).show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 23|Alice|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+



In [20]:
df4.orderBy("age").show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 14|  Tom|  india|     asia|andromeda|  feb|1999|
| 16|  Bob|  india|     asia|andromeda|  feb|1999|
| 23|Alice|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+



In [22]:
df4.sort("age").show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 14|  Tom|  india|     asia|andromeda|  feb|1999|
| 16|  Bob|  india|     asia|andromeda|  feb|1999|
| 23|Alice|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+



In [23]:
from pyspark.sql.functions import asc,desc

In [24]:
df4.sort(desc('age')).show()

+---+-----+-------+---------+---------+-----+----+
|age| name|country|continent|   galaxy|month|year|
+---+-----+-------+---------+---------+-----+----+
| 23|Alice|  india|     asia|andromeda|  feb|1999|
| 16|  Bob|  india|     asia|andromeda|  feb|1999|
| 14|  Tom|  india|     asia|andromeda|  feb|1999|
+---+-----+-------+---------+---------+-----+----+

