Replace

In [6]:
from pyspark.sql import SparkSession

spark = SparkSession.builder \
    .appName("MyApp") \
    .getOrCreate()


data = [
    ("U0001","Delhi",450),
    ("U0002","Mumbai",620),
    ("U0004","Delhi",700)
]

In [7]:


columns = [ "user_id","city","amount"]
df = spark.createDataFrame(data,columns)
df.show()

+-------+------+------+
|user_id|  city|amount|
+-------+------+------+
|  U0001| Delhi|   450|
|  U0002|Mumbai|   620|
|  U0004| Delhi|   700|
+-------+------+------+



In [8]:
from pyspark.sql.functions import col

df = df.withColumn("amount_with_tax", col("amount")*1.18)
df.show()

+-------+------+------+-----------------+
|user_id|  city|amount|  amount_with_tax|
+-------+------+------+-----------------+
|  U0001| Delhi|   450|            531.0|
|  U0002|Mumbai|   620|731.5999999999999|
|  U0004| Delhi|   700|            826.0|
+-------+------+------+-----------------+



In [9]:
df = df.withColumnRenamed("amount_with_tax","total_amount")
df.show()

+-------+------+------+-----------------+
|user_id|  city|amount|     total_amount|
+-------+------+------+-----------------+
|  U0001| Delhi|   450|            531.0|
|  U0002|Mumbai|   620|731.5999999999999|
|  U0004| Delhi|   700|            826.0|
+-------+------+------+-----------------+



In [10]:
df = df.replace("Delhi","New Delhi", subset=["city"])
df.show()

+-------+---------+------+-----------------+
|user_id|     city|amount|     total_amount|
+-------+---------+------+-----------------+
|  U0001|New Delhi|   450|            531.0|
|  U0002|   Mumbai|   620|731.5999999999999|
|  U0004|New Delhi|   700|            826.0|
+-------+---------+------+-----------------+



In [12]:
from pyspark.sql.functions import when

df = df.withColumn(
    "amount_category",
    when(col("amount") >=500,"High")
    .otherwise("Low")
)
df.show()

+-------+---------+------+-----------------+---------------+
|user_id|     city|amount|     total_amount|amount_category|
+-------+---------+------+-----------------+---------------+
|  U0001|New Delhi|   450|            531.0|            Low|
|  U0002|   Mumbai|   620|731.5999999999999|           High|
|  U0004|New Delhi|   700|            826.0|           High|
+-------+---------+------+-----------------+---------------+

