In [1]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *
from pyspark.sql.types import *

In [2]:
spark = SparkSession.builder \
.master("local") \
.appName("udf_df") \
.getOrCreate()

In [3]:
columns = ["Seqno","Name"]
data = [("1", "john jones"),
    ("2", "tracey smith"),
    ("3", "amy sanders")]

df1 = spark.createDataFrame(data=data,schema=columns)

In [9]:
def convert_case(str):
    return str.title() 

In [10]:
convert_udf = udf(lambda x:convert_case(x))

In [11]:
df1.select(col("Seqno"), convert_udf(col("Name")).alias("Name")).show()

+-----+------------+
|Seqno|        Name|
+-----+------------+
|    1|  John Jones|
|    2|Tracey Smith|
|    3| Amy Sanders|
+-----+------------+



In [12]:
@udf(returnType=StringType())
def upper_case(str):
    return str.upper()

In [13]:
upper_case_udf = udf(lambda x: upper_case(x), StringType())

In [14]:
df1.withColumn("New_name", upper_case(col("Name"))).show()

+-----+------------+------------+
|Seqno|        Name|    New_name|
+-----+------------+------------+
|    1|  john jones|  JOHN JONES|
|    2|tracey smith|TRACEY SMITH|
|    3| amy sanders| AMY SANDERS|
+-----+------------+------------+



In [15]:
spark.udf.register("convert_case_udf",convert_case,StringType())

<function __main__.convert_case(str)>

In [16]:
df1.createOrReplaceTempView("NAME_TABLE")

In [17]:
spark.sql("""select SeqNo, convert_case_udf(Name) as Name from NAME_TABLE""").show()

+-----+------------+
|SeqNo|        Name|
+-----+------------+
|    1|  John Jones|
|    2|Tracey Smith|
|    3| Amy Sanders|
+-----+------------+

