## Introduction to PySpark Math Functions
PySpark math functions, available in the pyspark.sql.functions module, provide a range of mathematical operations for transforming numerical data in DataFrames.

In [0]:
# Define schema using DDL
schema = "transaction_id INT, quantity INT, unit_price DOUBLE"

# Sample data
data = [
    (1, 3, 20.0),
    (2, -1, 15.0),  # Negative quantity (return scenario)
    (3, 2, 29.99),
    (4, 5, 10.0),
]

# Create DataFrame
df = spark.createDataFrame(data, schema)

df.show()

+--------------+--------+----------+
|transaction_id|quantity|unit_price|
+--------------+--------+----------+
|             1|       3|      20.0|
|             2|      -1|      15.0|
|             3|       2|     29.99|
|             4|       5|      10.0|
+--------------+--------+----------+



In [0]:
# Returning a total_price column via a simple arithmetic operation

df = df.select("transaction_id", "quantity", "unit_price", (df.quantity * df.unit_price).alias("total_price"))

df.display()

transaction_id,quantity,unit_price,total_price
1,3,20.0,60.0
2,-1,15.0,-15.0
3,2,29.99,59.98
4,5,10.0,50.0


In [0]:
df.withColumn("total_price", df.quantity * df.unit_price).display()

transaction_id,quantity,unit_price,total_price
1,3,20.0,60.0
2,-1,15.0,-15.0
3,2,29.99,59.98
4,5,10.0,50.0


In [0]:
from pyspark.sql.functions import abs, round

In [0]:
# abs

df.select("total_price", abs("total_price")).display()

total_price,abs(total_price)
60.0,60.0
-15.0,15.0
59.98,59.98
50.0,50.0


In [0]:
# Adding a 25% discount to total_price

df = df.withColumn("total_price_with_discount", df.quantity * df.unit_price * 0.75)

df.display()

transaction_id,quantity,unit_price,total_price,total_price_with_discount
1,3,20.0,60.0,45.0
2,-1,15.0,-15.0,-11.25
3,2,29.99,59.98,44.985
4,5,10.0,50.0,37.5


In [0]:
# rounding to 2 dp

df.withColumn("total_price_with_discount", round("total_price_with_discount", 2)).display()

transaction_id,quantity,unit_price,total_price,total_price_with_discount
1,3,20.0,60.0,45.0
2,-1,15.0,-15.0,-11.25
3,2,29.99,59.98,44.99
4,5,10.0,50.0,37.5
