In [0]:
from pyspark import SparkConf
from pyspark.sql.session import SparkSession
from pyspark.sql.types import StructType,StructField,StringType,IntegerType
from pyspark.sql.functions import col,udf

spark = SparkSession.builder.appName("app").master("local[3]").getOrCreate()

In [0]:
schema = StructType([
    StructField("user_id",IntegerType(),False),
    StructField("name",StringType(),False)
])

data = [
    (1,"aLice"),
    (2,"bOB"),
    (3,"amIT")
]
names = spark.createDataFrame(data,schema)
names.show()

+-------+-----+
|user_id| name|
+-------+-----+
|      1|aLice|
|      2|  bOB|
|      3| amIT|
+-------+-----+



In [0]:
# Write a solution to fix the names so that only the first character is uppercase and the rest are lowercase.
# Return the result table ordered by user_id.

@udf(returnType=StringType())
def title(df_col:str):
    return df_col.title()

names.withColumn("name",title("name")).orderBy("user_id").show()

+-------+-----+
|user_id| name|
+-------+-----+
|      1|Alice|
|      2|  Bob|
|      3| Amit|
+-------+-----+



In [0]:
names.createOrReplaceTempView("names")

# Using udf in spark sql 
# Note: Dont specify any return type in udf as spark sql determines the output type during run time only
@udf()
def title(df_col:str):
    return df_col.title()

spark.sql("select user_id, title(name) name from names order by 1").show()

#Using concat, len, left and right functions
spark.sql("select user_id, concat(upper(left(name,1)),lower(right(name,length(name)-1))) name from names order by 1").show()

+-------+-----+
|user_id| name|
+-------+-----+
|      1|Alice|
|      2|  Bob|
|      3| Amit|
+-------+-----+

+-------+-----+
|user_id| name|
+-------+-----+
|      1|Alice|
|      2|  Bob|
|      3| Amit|
+-------+-----+



In [0]:
spark.stop()