In [None]:
"""
Author: Matt Martin
Date: 2/7/24
Desc: demonstrates how to register a udf in spark so you can use it in sparksql
"""

## create the spark connection/instance
from pyspark.sql import SparkSession
spark = SparkSession.builder \
    .appName("sequence") \
    .config("spark.driver.memory", "2g") \
    .config("spark.executor.memory", "8g") \
    .config("spark.executor.instances", 10) \
    .getOrCreate()

In [2]:
from pyspark.sql.functions import udf
from pyspark.sql.types import *

In [7]:
@udf(returnType=IntegerType())
def add_some_stuff(x, y) -> int:
    return x+y

spark.udf.register('udf_add_some_stuff',add_some_stuff)

<pyspark.sql.udf.UserDefinedFunction at 0x106691ca0>

In [4]:
data = [{'x':1, 'y':3}, {'x':5, 'y':2}]
df = spark.createDataFrame(data)
df.createOrReplaceTempView("numbers")

In [8]:
sql = """
    select x, y, udf_add_some_stuff(x,y) as res
    from numbers
"""
spark.sql(sql).show()

                                                                                

+---+---+---+
|  x|  y|res|
+---+---+---+
|  1|  3|  4|
|  5|  2|  7|
+---+---+---+

