In [1]:
from pyspark.sql import SparkSession
from pyspark import SparkContext
sc = SparkContext("local", "Basic extra")
spark = (SparkSession
         .builder
         .appName('Basic extra')
         .getOrCreate())

In [10]:
from pyspark.sql.types import *

schema = StructType([StructField("celsius", ArrayType(IntegerType()))])
t_list = [[35, 36, 32, 30, 40, 42, 38]], [[31, 32, 34, 55, 56]]
df = spark.createDataFrame(t_list, schema)
df.createOrReplaceTempView('tC')
df.show(truncate=False)

+----------------------------+
|celsius                     |
+----------------------------+
|[35, 36, 32, 30, 40, 42, 38]|
|[31, 32, 34, 55, 56]        |
+----------------------------+



Transform an array


In [15]:
spark.sql("""
SELECT celsius,
transform(celsius, x -> ((x * 9) div 5) + 32) as fahrenheit
FROM tC
""").show()


+--------------------+--------------------+
|             celsius|          fahrenheit|
+--------------------+--------------------+
|[35, 36, 32, 30, ...|[95, 96, 89, 86, ...|
|[31, 32, 34, 55, 56]|[87, 89, 93, 131,...|
+--------------------+--------------------+



Filter an array

In [16]:
spark.sql("""
SELECT celsius,
filter(celsius, x -> x > 38) as high
FROM tC
""").show()

+--------------------+--------+
|             celsius|    high|
+--------------------+--------+
|[35, 36, 32, 30, ...|[40, 42]|
|[31, 32, 34, 55, 56]|[55, 56]|
+--------------------+--------+



Check if exist

In [17]:
spark.sql("""
SELECT celsius,
exists (celsius, x -> x = 38) as threshold
FROM tC
""").show()

+--------------------+---------+
|             celsius|threshold|
+--------------------+---------+
|[35, 36, 32, 30, ...|     true|
|[31, 32, 34, 55, 56]|    false|
+--------------------+---------+



Reduce array to scalar


In [21]:
spark.sql("""
SELECT celsius,
aggregate (
    celsius,
    0,
    (x, acc) -> x + acc,
    acc -> (acc div size(celsius) * 9 div 5) + 32
    ) as avgFahrenheit
FROM tC
""").show()



+--------------------+-------------+
|             celsius|avgFahrenheit|
+--------------------+-------------+
|[35, 36, 32, 30, ...|           96|
|[31, 32, 34, 55, 56]|          105|
+--------------------+-------------+

