In [61]:
from pyspark.sql import SparkSession
from pyspark.sql.functions import *

spark = (SparkSession
    .builder
    .config("spark.sql.session.timeZone", "Asia/Seoul")
    .getOrCreate())

In [87]:
from pyspark.sql import Row
animal = spark.createDataFrame([("Cat", [28,30]), ("Dog", [31,28]), ("Monkey", [3,28]), ("Lion", [10,24]), ("Tiger", [6,10])], ["Name", "Ages"]).toDF("key", "value")
animal.createOrReplaceTempView("animal")
spark.sql("select * from animal").show()

+------+--------+
|   key|   value|
+------+--------+
|   Cat|[28, 30]|
|   Dog|[31, 28]|
|Monkey| [3, 28]|
|  Lion|[10, 24]|
| Tiger| [6, 10]|
+------+--------+



In [88]:
agg = animal.selectExpr("key", "EXPLODE(value) as value")
agg.printSchema()
spark.sql("select * from agg").show()

root
 |-- key: string (nullable = true)
 |-- value: long (nullable = true)

+------+-----+
|   key|value|
+------+-----+
|   Cat|   28|
|   Cat|   30|
|   Dog|   31|
|   Dog|   28|
|Monkey|    3|
|Monkey|   28|
|   Cat|   10|
|   Cat|   24|
|   Dog|    6|
|   Dog|   10|
+------+-----+



In [67]:
spark.sql("show tables").show()
spark.sql("describe agg").show()

+--------+---------+-----------+
|database|tableName|isTemporary|
+--------+---------+-----------+
|        |      agg|       true|
|        |   animal|       true|
+--------+---------+-----------+

+--------+---------+-------+
|col_name|data_type|comment|
+--------+---------+-------+
|     key|   string|   null|
|   value|   bigint|   null|
+--------+---------+-------+



In [68]:
spark.sql("select key, collect_list(value) as values from agg group by key").show()

+------+----------------+
|   key|          values|
+------+----------------+
|   Cat|[28, 30, 10, 24]|
|Monkey|         [3, 28]|
|   Dog| [31, 28, 6, 10]|
+------+----------------+



In [91]:
spark.sql("select key, collect_list(value + 1) as values from (select key, explode(value) as value from animal) as value group by key order by key asc").show()

+------+--------+
|   key|  values|
+------+--------+
|   Cat|[29, 31]|
|   Dog|[32, 29]|
|  Lion|[11, 25]|
|Monkey| [4, 29]|
| Tiger| [7, 11]|
+------+--------+



In [92]:
from pyspark.sql.types import ArrayType, FloatType, StringType, LongType

def plusOneInt(in_values):
    out_values = []
    for value in in_values:
        out_values.append(value + 1)
    return out_values

spark.udf.register("plusOneInt", plusOneInt, ArrayType(LongType()))
spark.sql("select key, plusOneInt(value) AS values FROM animal order by key asc").show()

+------+--------+
|   key|  values|
+------+--------+
|   Cat|[29, 31]|
|   Dog|[32, 29]|
|  Lion|[11, 25]|
|Monkey| [4, 29]|
| Tiger| [7, 11]|
+------+--------+



In [94]:
spark.sql("explain select key, collect_list(value + 1) as values from (select key, explode(value) as value from animal) as value group by key order by key asc").show(truncate=False)

+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|plan                                                                                                                                                                                                                                                                                                                                                                  

In [95]:
spark.sql("explain select key, plusOneInt(value) AS values FROM animal order by key asc").show(truncate=False)

+--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
|plan                                                                                                                                                                                                                                                                                                                                                    |
+-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------