In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('com').getOrCreate()

simpleData = (("Java",4000,5), \
    ("Python", 4600,10),  \
    ("Scala", 4100,15),   \
    ("Scala", 4500,15),   \
    ("PHP", 3000,20),  \
  )
columns= ["CourseName", "fee", "discount"]

df = spark.createDataFrame(data = simpleData, schema = columns)
df.printSchema()
df.show(truncate=False)

root
 |-- CourseName: string (nullable = true)
 |-- fee: long (nullable = true)
 |-- discount: long (nullable = true)

+----------+----+--------+
|CourseName|fee |discount|
+----------+----+--------+
|Java      |4000|5       |
|Python    |4600|10      |
|Scala     |4100|15      |
|Scala     |4500|15      |
|PHP       |3000|20      |
+----------+----+--------+



In [46]:
data = [("Alice", 25), ("Bob", 30), ("Charlie", 22)]
columns = ["Name", "Age"]
df = spark.createDataFrame(data, columns)

# Example 1: Transform "Name" column to uppercase
def to_upper_case(df):
    return df.withColumn("Name", upper(col("Name")))

# Example 2: Double the "Age" value
def double_age(df):
    return df.withColumn("Age", col("Age") * 2)

df1= df.transform(to_upper_case)
df1.show()
df2 = df.transform(double_age)
df2.show()

+-------+---+
|   Name|Age|
+-------+---+
|  ALICE| 25|
|    BOB| 30|
|CHARLIE| 22|
+-------+---+

+-------+---+
|   Name|Age|
+-------+---+
|  Alice| 50|
|    Bob| 60|
|Charlie| 44|
+-------+---+



In [47]:
data = [
 ("James,,Smith",["Java","Scala","C++"],["Spark","Java"]),
 ("Michael,Rose,",["Spark","Java","C++"],["Spark","Java"]),
 ("Robert,,Williams",["CSharp","VB"],["Spark","Python"])
]
df=spark.createDataFrame(data=data,schema=["Name","Lan1","Lan2"])
df.printSchema()
df.show()

# using transform() function
from pyspark.sql.functions import upper
from pyspark.sql.functions import transform
df.select(transform("Lan1", lambda x: upper(x)).alias("lan1")).show()

root
 |-- Name: string (nullable = true)
 |-- Lan1: array (nullable = true)
 |    |-- element: string (containsNull = true)
 |-- Lan2: array (nullable = true)
 |    |-- element: string (containsNull = true)

+----------------+------------------+---------------+
|            Name|              Lan1|           Lan2|
+----------------+------------------+---------------+
|    James,,Smith|[Java, Scala, C++]|  [Spark, Java]|
|   Michael,Rose,|[Spark, Java, C++]|  [Spark, Java]|
|Robert,,Williams|      [CSharp, VB]|[Spark, Python]|
+----------------+------------------+---------------+

+------------------+
|              lan1|
+------------------+
|[JAVA, SCALA, C++]|
|[SPARK, JAVA, C++]|
|      [CSHARP, VB]|
+------------------+

