In [1]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName('SparkByExamples.com').getOrCreate()
columns = ["Seqno","Name"]
data = [("1", "john jones"),
    ("2", "tracey smith"),
    ("3", "amy sanders")]

df = spark.createDataFrame(data=data,schema=columns)
df.show(truncate=False)

+-----+------------+
|Seqno|Name        |
+-----+------------+
|1    |john jones  |
|2    |tracey smith|
|3    |amy sanders |
+-----+------------+



In [4]:
def convertCase(str):
    w=""
    for x in str.split(" "):
       w = w + x[0:1].upper() + x[1:len(x)] + " "
    return w

In [7]:
from pyspark.sql.functions import col, udf
from pyspark.sql.types import StringType

# Converting function to UDF 
c_udf = udf(lambda z: convertCase(z),StringType())

df.select(col("Seqno"),c_udf(col("Name")).alias("Name")).show(truncate=False)

+-----+-------------+
|Seqno|Name         |
+-----+-------------+
|1    |John Jones   |
|2    |Tracey Smith |
|3    |Amy Sanders  |
+-----+-------------+



In [8]:
df.select(col("Seqno"),convertUDF(col("Name")).alias("Name")).show(truncate=False)

+-----+-------------+
|Seqno|Name         |
+-----+-------------+
|1    |John Jones   |
|2    |Tracey Smith |
|3    |Amy Sanders  |
+-----+-------------+



In [9]:
def upperCase(str):
    return str.upper()

uc = udf(lambda z:upperCase(z),StringType())   
df.withColumn("Cureated Name", uc(col("Name"))).show()

+-----+------------+-------------+
|Seqno|        Name|Cureated Name|
+-----+------------+-------------+
|    1|  john jones|   JOHN JONES|
|    2|tracey smith| TRACEY SMITH|
|    3| amy sanders|  AMY SANDERS|
+-----+------------+-------------+



In [10]:
spark.udf.register("uc", upperCase,StringType())
df.createOrReplaceTempView("NAME_TABLE")
spark.sql("select Seqno, uc(Name) as Name from NAME_TABLE").show(truncate=False)

+-----+------------+
|Seqno|Name        |
+-----+------------+
|1    |JOHN JONES  |
|2    |TRACEY SMITH|
|3    |AMY SANDERS |
+-----+------------+



In [11]:
@udf(returnType=StringType()) 
def upperCase(str):
    return str.upper()

df.withColumn("NAME", upperCase(col("Name"))).show()

+-----+------------+
|Seqno|        NAME|
+-----+------------+
|    1|  JOHN JONES|
|    2|TRACEY SMITH|
|    3| AMY SANDERS|
+-----+------------+



In [20]:
columns = ["Seqno","Name"]
data = [("1", "john jones"),
    ("2", "tracey smith"),
    ("3", "amy sanders"),
    ('4',None)]

df2=spark.createDataFrame(data=data,schema=columns)
df2.show(truncate=False)
df2.createOrReplaceTempView("NAME_TABLE2")

#spark.sql("select Seqno, uc(Name) as Name from NAME_TABLE2").show(truncate=False)
def convertC(str):
    w=""
    for x in str.split(" "):
       w = w + x[0:1].upper() + x[1:len(x)] + " "
    return w
spark.udf.register("nullUDF", lambda str: convertC(str) if not str is None else "" , StringType())
spark.sql("select Seqno, nullUDF(Name) NAME from NAME_TABLE2").show(truncate=False)  

+-----+------------+
|Seqno|Name        |
+-----+------------+
|1    |john jones  |
|2    |tracey smith|
|3    |amy sanders |
|4    |null        |
+-----+------------+

+-----+-------------+
|Seqno|NAME         |
+-----+-------------+
|1    |John Jones   |
|2    |Tracey Smith |
|3    |Amy Sanders  |
|4    |             |
+-----+-------------+

