Skip to content

Commit

Permalink
[SPARK-47644][PYTHON][DOCS] Refine docstrings of try_*
Browse files Browse the repository at this point in the history
### What changes were proposed in this pull request?

This PR refines docstring of  `try_*` functions with more descriptive examples.

### Why are the changes needed?

For better API reference documentation.

### Does this PR introduce _any_ user-facing change?

Yes, it fixes user-facing documentation.

### How was this patch tested?

Manually tested. GitHub Actions should verify them.

### Was this patch authored or co-authored using generative AI tooling?

No

Closes apache#45769 from HyukjinKwon/SPARK-47644.

Authored-by: Hyukjin Kwon <gurwls223@apache.org>
Signed-off-by: Hyukjin Kwon <gurwls223@apache.org>
  • Loading branch information
HyukjinKwon authored and sweisdb committed Apr 1, 2024
1 parent dbf62af commit 77e178a
Showing 1 changed file with 195 additions and 16 deletions.
211 changes: 195 additions & 16 deletions python/pyspark/sql/functions/builtin.py
Original file line number Diff line number Diff line change
Expand Up @@ -511,6 +511,22 @@ def try_add(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+-----------------+
|INTERVAL '3' YEAR|
+-----------------+

Example 5: Overflow results in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_add(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+-------------------------------------------------+
|try_add(9223372036854775807, 9223372036854775807)|
+-------------------------------------------------+
| NULL|
+-------------------------------------------------+
"""
return _invoke_function_over_columns("try_add", left, right)

Expand Down Expand Up @@ -616,6 +632,22 @@ def try_divide(left: "ColumnOrName", right: "ColumnOrName") -> Column:
| 6 months|
| 4 months|
+--------------------------------------------------+

Example 3: Exception druing division, resulting in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_divide(df.id, sf.lit(0))).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+-----------------+
|try_divide(id, 0)|
+-----------------+
| NULL|
+-----------------+
"""
return _invoke_function_over_columns("try_divide", left, right)

Expand Down Expand Up @@ -666,6 +698,22 @@ def try_multiply(left: "ColumnOrName", right: "ColumnOrName") -> Column:
| 1 years|
| 1 years 3 months|
+----------------------------------------------------+

Example 3: Overflow results in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_multiply(sf.lit(sys.maxsize), sf.lit(sys.maxsize))).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+------------------------------------------------------+
|try_multiply(9223372036854775807, 9223372036854775807)|
+------------------------------------------------------+
| NULL|
+------------------------------------------------------+
"""
return _invoke_function_over_columns("try_multiply", left, right)

Expand Down Expand Up @@ -733,6 +781,22 @@ def try_subtract(left: "ColumnOrName", right: "ColumnOrName") -> Column:
+------------------+
|INTERVAL '-1' YEAR|
+------------------+

Example 5: Overflow results in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_subtract(sf.lit(-sys.maxsize), sf.lit(sys.maxsize))).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+-------------------------------------------------------+
|try_subtract(-9223372036854775807, 9223372036854775807)|
+-------------------------------------------------------+
| NULL|
+-------------------------------------------------------+
"""
return _invoke_function_over_columns("try_subtract", left, right)

Expand Down Expand Up @@ -8492,13 +8556,27 @@ def to_timestamp(col: "ColumnOrName", format: Optional[str] = None) -> Column:

Examples
--------
Example 1: Convert string to a timestamp

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
>>> df.select(to_timestamp(df.t).alias('dt')).collect()
[Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
>>> df.select(sf.try_to_timestamp(df.t).alias('dt')).show()
+-------------------+
| dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+

Example 2: Convert string to a timestamp with a format

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
>>> df.select(to_timestamp(df.t, 'yyyy-MM-dd HH:mm:ss').alias('dt')).collect()
[Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
>>> df.select(sf.try_to_timestamp(df.t, sf.lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).show()
+-------------------+
| dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+
"""
if format is None:
return _invoke_function_over_columns("to_timestamp", col)
Expand All @@ -8524,12 +8602,43 @@ def try_to_timestamp(col: "ColumnOrName", format: Optional["ColumnOrName"] = Non

Examples
--------
Example 1: Convert string to a timestamp

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
>>> df.select(try_to_timestamp(df.t).alias('dt')).collect()
[Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
>>> df.select(sf.try_to_timestamp(df.t).alias('dt')).show()
+-------------------+
| dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+

Example 2: Convert string to a timestamp with a format

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([('1997-02-28 10:30:00',)], ['t'])
>>> df.select(sf.try_to_timestamp(df.t, sf.lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).show()
+-------------------+
| dt|
+-------------------+
|1997-02-28 10:30:00|
+-------------------+

Example 3: Converion failure results in NULL when ANSI mode is on

>>> df.select(try_to_timestamp(df.t, lit('yyyy-MM-dd HH:mm:ss')).alias('dt')).collect()
[Row(dt=datetime.datetime(1997, 2, 28, 10, 30))]
>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.createDataFrame([('malformed',)], ['t'])
... df.select(sf.try_to_timestamp(df.t)).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+-------------------+
|try_to_timestamp(t)|
+-------------------+
| NULL|
+-------------------+
"""
if format is not None:
return _invoke_function_over_columns("try_to_timestamp", col, format)
Expand Down Expand Up @@ -11516,12 +11625,18 @@ def to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None) -> C

Examples
--------
Example 1: Convert string to a binary with encoding specified

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("abc",)], ["e"])
>>> df.select(to_binary(df.e, lit("utf-8")).alias('r')).collect()
>>> df.select(sf.try_to_binary(df.e, sf.lit("utf-8")).alias('r')).collect()
[Row(r=bytearray(b'abc'))]

Example 2: Convert string to a timestamp without encoding specified

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("414243",)], ["e"])
>>> df.select(to_binary(df.e).alias('r')).collect()
>>> df.select(sf.try_to_binary(df.e).alias('r')).collect()
[Row(r=bytearray(b'ABC'))]
"""
if format is not None:
Expand Down Expand Up @@ -12310,13 +12425,35 @@ def try_to_binary(col: "ColumnOrName", format: Optional["ColumnOrName"] = None)

Examples
--------
Example 1: Convert string to a binary with encoding specified

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("abc",)], ["e"])
>>> df.select(try_to_binary(df.e, lit("utf-8")).alias('r')).collect()
>>> df.select(sf.try_to_binary(df.e, sf.lit("utf-8")).alias('r')).collect()
[Row(r=bytearray(b'abc'))]

Example 2: Convert string to a timestamp without encoding specified

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("414243",)], ["e"])
>>> df.select(try_to_binary(df.e).alias('r')).collect()
>>> df.select(sf.try_to_binary(df.e).alias('r')).collect()
[Row(r=bytearray(b'ABC'))]

Example 3: Converion failure results in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_to_binary(sf.lit("malformed"), sf.lit("hex"))).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+-----------------------------+
|try_to_binary(malformed, hex)|
+-----------------------------+
| NULL|
+-----------------------------+
"""
if format is not None:
return _invoke_function_over_columns("try_to_binary", col, format)
Expand All @@ -12342,9 +12479,32 @@ def try_to_number(col: "ColumnOrName", format: "ColumnOrName") -> Column:

Examples
--------
Example 1: Convert a string to a number with a format specified

>>> import pyspark.sql.functions as sf
>>> df = spark.createDataFrame([("$78.12",)], ["e"])
>>> df.select(try_to_number(df.e, lit("$99.99")).alias('r')).collect()
[Row(r=Decimal('78.12'))]
>>> df.select(sf.try_to_number(df.e, sf.lit("$99.99")).alias('r')).show()
+-----+
| r|
+-----+
|78.12|
+-----+

Example 2: Converion failure results in NULL when ANSI mode is on

>>> import pyspark.sql.functions as sf
>>> origin = spark.conf.get("spark.sql.ansi.enabled")
>>> spark.conf.set("spark.sql.ansi.enabled", "true")
>>> try:
... df = spark.range(1)
... df.select(sf.try_to_number(sf.lit("77"), sf.lit("$99.99")).alias('r')).show()
... finally:
... spark.conf.set("spark.sql.ansi.enabled", origin)
+----+
| r|
+----+
|NULL|
+----+
"""
return _invoke_function_over_columns("try_to_number", col, format)

Expand Down Expand Up @@ -19977,12 +20137,31 @@ def try_reflect(*cols: "ColumnOrName") -> Column:

Examples
--------
Example 1: Reflecting a method call with arguments

>>> from pyspark.sql import functions as sf
>>> df = spark.createDataFrame([("a5cf6c42-0c85-418f-af6c-3e4e5b1328f2",)], ["a"])
>>> df.select(
... sf.try_reflect(sf.lit("java.util.UUID"), sf.lit("fromString"), df.a)
... ).collect()
[Row(try_reflect(java.util.UUID, fromString, a)='a5cf6c42-0c85-418f-af6c-3e4e5b1328f2')]
... ).show()
+------------------------------------------+
|try_reflect(java.util.UUID, fromString, a)|
+------------------------------------------+
| a5cf6c42-0c85-418...|
+------------------------------------------+

Example 2: Exception in the reflection call, resulting in null

>>> from pyspark.sql import functions as sf
>>> df = spark.range(1)
>>> df.select(
... sf.try_reflect(sf.lit("scala.Predef"), sf.lit("require"), sf.lit(False))
... ).show()
+-----------------------------------------+
|try_reflect(scala.Predef, require, false)|
+-----------------------------------------+
| NULL|
+-----------------------------------------+
"""
return _invoke_function_over_seq_of_columns("try_reflect", cols)

Expand Down

0 comments on commit 77e178a

Please sign in to comment.