From aad096b1befaf5bc9cde174c1f53424b3b8a958a Mon Sep 17 00:00:00 2001
From: Gil Forsyth <gil@forsyth.dev>
Date: Thu, 15 Jun 2023 10:08:32 -0400
Subject: [PATCH] feat: package extension yamls

---
 .gitattributes                                |    1 +
 gen_proto.sh                                  |    8 +
 src/substrait/extensions/extension_types.yaml |   10 +
 .../functions_aggregate_approx.yaml           |   18 +
 .../functions_aggregate_generic.yaml          |   37 +
 .../extensions/functions_arithmetic.yaml      | 1588 +++++++++++++++++
 .../functions_arithmetic_decimal.yaml         |  151 ++
 .../extensions/functions_boolean.yaml         |  140 ++
 .../extensions/functions_comparison.yaml      |  216 +++
 .../extensions/functions_datetime.yaml        |  690 +++++++
 .../extensions/functions_logarithmic.yaml     |  147 ++
 .../extensions/functions_rounding.yaml        |  270 +++
 src/substrait/extensions/functions_set.yaml   |   27 +
 .../extensions/functions_string.yaml          | 1397 +++++++++++++++
 src/substrait/extensions/type_variations.yaml |   25 +
 src/substrait/extensions/unknown.yaml         |   66 +
 16 files changed, 4791 insertions(+)
 create mode 100644 src/substrait/extensions/extension_types.yaml
 create mode 100644 src/substrait/extensions/functions_aggregate_approx.yaml
 create mode 100644 src/substrait/extensions/functions_aggregate_generic.yaml
 create mode 100644 src/substrait/extensions/functions_arithmetic.yaml
 create mode 100644 src/substrait/extensions/functions_arithmetic_decimal.yaml
 create mode 100644 src/substrait/extensions/functions_boolean.yaml
 create mode 100644 src/substrait/extensions/functions_comparison.yaml
 create mode 100644 src/substrait/extensions/functions_datetime.yaml
 create mode 100644 src/substrait/extensions/functions_logarithmic.yaml
 create mode 100644 src/substrait/extensions/functions_rounding.yaml
 create mode 100644 src/substrait/extensions/functions_set.yaml
 create mode 100644 src/substrait/extensions/functions_string.yaml
 create mode 100644 src/substrait/extensions/type_variations.yaml
 create mode 100644 src/substrait/extensions/unknown.yaml

diff --git a/.gitattributes b/.gitattributes
index 439fb8f..7928937 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1 +1,2 @@
 src/substrait/gen/** linguist-generated=true
+src/substrait/extensions/** linguist-generated=true
diff --git a/gen_proto.sh b/gen_proto.sh
index 5d61caa..04939b1 100755
--- a/gen_proto.sh
+++ b/gen_proto.sh
@@ -7,6 +7,7 @@ submodule_dir=./third_party/substrait
 src_dir="$submodule_dir"/proto
 tmp_dir=./buf_work_dir
 dest_dir=./src/substrait/gen
+extension_dir=./src/substrait/extensions
 
 # Prefix the protobuf files with a unique configuration to prevent namespace conflicts
 # with other substrait packages. Save output to the work dir.
@@ -19,5 +20,12 @@ rm -rf "$dest_dir"
 buf generate
 protol --in-place --create-package --python-out "$dest_dir" buf
 
+# Remove the old extension files
+rm -rf "$extension_dir"
+
+# Copy over new yaml files
+cp -fr "$submodule_dir"/extensions "$extension_dir"
+find "$extension_dir" -type f -exec chmod u+rw {} +
+
 # Remove the temporary work dir
 rm -rf "$tmp_dir"
diff --git a/src/substrait/extensions/extension_types.yaml b/src/substrait/extensions/extension_types.yaml
new file mode 100644
index 0000000..e03073c
--- /dev/null
+++ b/src/substrait/extensions/extension_types.yaml
@@ -0,0 +1,10 @@
+---
+types:
+  - name: point
+    structure:
+      latitude: i32
+      longitude: i32
+  - name: line
+    structure:
+      start: point
+      end: point
diff --git a/src/substrait/extensions/functions_aggregate_approx.yaml b/src/substrait/extensions/functions_aggregate_approx.yaml
new file mode 100644
index 0000000..c77caec
--- /dev/null
+++ b/src/substrait/extensions/functions_aggregate_approx.yaml
@@ -0,0 +1,18 @@
+%YAML 1.2
+---
+aggregate_functions:
+  - name: "approx_count_distinct"
+    description: >-
+      Calculates the approximate number of rows that contain distinct values of the expression argument using
+      HyperLogLog. This function provides an alternative to the COUNT (DISTINCT expression) function, which
+      returns the exact number of rows that contain distinct values of an expression. APPROX_COUNT_DISTINCT
+      processes large amounts of data significantly faster than COUNT, with negligible deviation from the exact
+      result.
+    impls:
+      - args:
+          - name: x
+            value: any
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: binary
+        return: i64
diff --git a/src/substrait/extensions/functions_aggregate_generic.yaml b/src/substrait/extensions/functions_aggregate_generic.yaml
new file mode 100644
index 0000000..4d891e9
--- /dev/null
+++ b/src/substrait/extensions/functions_aggregate_generic.yaml
@@ -0,0 +1,37 @@
+%YAML 1.2
+---
+aggregate_functions:
+  - name: "count"
+    description: Count a set of values
+    impls:
+      - args:
+          - name: x
+            value: any
+        options:
+          overflow:
+            values: [SILENT, SATURATE, ERROR]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+  - name: "count"
+    description: "Count a set of records (not field referenced)"
+    impls:
+      - options:
+          overflow:
+            values: [SILENT, SATURATE, ERROR]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+  - name: "any_value"
+    description: >
+      Selects an arbitrary value from a group of values.
+
+      If the input is empty, the function returns null.
+    impls:
+      - args:
+          - name: x
+            value: any
+        nullability: DECLARED_OUTPUT
+        return: any?
diff --git a/src/substrait/extensions/functions_arithmetic.yaml b/src/substrait/extensions/functions_arithmetic.yaml
new file mode 100644
index 0000000..61573e8
--- /dev/null
+++ b/src/substrait/extensions/functions_arithmetic.yaml
@@ -0,0 +1,1588 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "add"
+    description: "Add two values."
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - value: i64
+          - value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "subtract"
+    description: "Subtract one value from another."
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "multiply"
+    description: "Multiply two values."
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "divide"
+    description: >
+      Divide x by y. In the case of integer division, partial values are truncated (i.e. rounded towards 0).
+      The `on_division_by_zero` option governs behavior in cases where y is 0 and x is not 0.
+      `LIMIT` means positive or negative infinity (depending on the sign of x and y).
+      If x and y are both 0 or both +/-infinity, behavior will be governed by `on_domain_error`.
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_division_by_zero:
+            values: [ LIMIT, NAN, ERROR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_division_by_zero:
+            values: [ LIMIT, NAN, ERROR ]
+        return: fp64
+  -
+    name: "negate"
+    description: "Negation of the value"
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        return: fp64
+  -
+    name: "modulus"
+    description: "Get the remainder when dividing one value by another."
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        return: i64
+  -
+    name: "power"
+    description: "Take the power with x as the base and y as exponent."
+    impls:
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        return: fp64
+  -
+    name: "sqrt"
+    description: "Square root of the value"
+    impls:
+      - args:
+          - name: x
+            value: i64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "exp"
+    description: "The mathematical constant e, raised to the power of the value."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "cos"
+    description: "Get the cosine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "sin"
+    description: "Get the sine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "tan"
+    description: "Get the tangent of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "cosh"
+    description: "Get the hyperbolic cosine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "sinh"
+    description: "Get the hyperbolic sine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "tanh"
+    description: "Get the hyperbolic tangent of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "acos"
+    description: "Get the arccosine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "asin"
+    description: "Get the arcsine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "atan"
+    description: "Get the arctangent of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "acosh"
+    description: "Get the hyperbolic arccosine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "asinh"
+    description: "Get the hyperbolic arcsine of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        return: fp64
+  -
+    name: "atanh"
+    description: "Get the hyperbolic arctangent of a value in radians."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "atan2"
+    description: "Get the arctangent of values given as x/y pairs."
+    impls:
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+        return: fp64
+  -
+    name: "abs"
+    description: >
+      Calculate the absolute value of the argument.
+
+      Integer values allow the specification of overflow behavior to handle the
+      unevenness of the twos complement, e.g. Int8 range [-128 : 127].
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i8
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i16
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        return: fp64
+  -
+    name: "sign"
+    description: >
+      Return the signedness of the argument.
+
+      Integer values return signedness with the same type as the input.
+      Possible return values are [-1, 0, 1]
+
+      Floating point values return signedness with the same type as the input.
+      Possible return values are [-1.0, -0.0, 0.0, 1.0, NaN]
+    impls:
+      - args:
+          - name: x
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        return: fp64
+  -
+    name: "factorial"
+    description: >
+      Return the factorial of a given integer input.
+
+      The factorial of 0! is 1 by convention.
+
+      Negative inputs will raise an error.
+    impls:
+      - args:
+          - value: i32
+            name: "n"
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i32
+      - args:
+          - value: i64
+            name: "n"
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: i64
+  -
+    name: "bitwise_not"
+    description: >
+      Return the bitwise NOT result for one integer input.
+
+    impls:
+      - args:
+          - name: x
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+        return: i64
+  -
+    name: "bitwise_and"
+    description: >
+      Return the bitwise AND result for two integer inputs.
+
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        return: i64
+  -
+    name: "bitwise_or"
+    description: >
+      Return the bitwise OR result for two given integer inputs.
+
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        return: i64
+  -
+    name: "bitwise_xor"
+    description: >
+      Return the bitwise XOR result for two integer inputs.
+
+    impls:
+      - args:
+          - name: x
+            value: i8
+          - name: y
+            value: i8
+        return: i8
+      - args:
+          - name: x
+            value: i16
+          - name: y
+            value: i16
+        return: i16
+      - args:
+          - name: x
+            value: i32
+          - name: y
+            value: i32
+        return: i32
+      - args:
+          - name: x
+            value: i64
+          - name: y
+            value: i64
+        return: i64
+
+aggregate_functions:
+  - name: "sum"
+    description: Sum a set of values. The sum of zero elements yields null.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: fp32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64?
+        return: fp64?
+      - args:
+          - name: x
+            value: fp64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64?
+        return: fp64?
+  - name: "sum0"
+    description: >
+      Sum a set of values. The sum of zero elements yields zero.
+
+      Null values are ignored.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64
+        return: fp64
+      - args:
+          - name: x
+            value: fp64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64
+        return: fp64
+  - name: "avg"
+    description: Average a set of values. For integral types, this truncates partial values.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<i64,i64>"
+        return: i8?
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<i64,i64>"
+        return: i16?
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<i64,i64>"
+        return: i32?
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<i64,i64>"
+        return: i64?
+      - args:
+          - name: x
+            value: fp32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<fp64,i64>"
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<fp64,i64>"
+        return: fp64?
+  - name: "min"
+    description: Min a set of values.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i8?
+        return: i8?
+      - args:
+          - name: x
+            value: i16
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i16?
+        return: i16?
+      - args:
+          - name: x
+            value: i32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i32?
+        return: i32?
+      - args:
+          - name: x
+            value: i64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: fp32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp32?
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64?
+        return: fp64?
+  - name: "max"
+    description: Max a set of values.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i8?
+        return: i8?
+      - args:
+          - name: x
+            value: i16
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i16?
+        return: i16?
+      - args:
+          - name: x
+            value: i32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i32?
+        return: i32?
+      - args:
+          - name: x
+            value: i64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: i64?
+        return: i64?
+      - args:
+          - name: x
+            value: fp32
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp32?
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: fp64?
+        return: fp64?
+  - name: "product"
+    description: Product of a set of values. Returns 1 for empty input.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: i64
+        return: i8
+      - args:
+          - name: x
+            value: i16
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: i64
+        return: i16
+      - args:
+          - name: x
+            value: i32
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: i64
+        return: i32
+      - args:
+          - name: x
+            value: i64
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: i64
+        return: i64
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: fp64
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: MIRROR
+        decomposable: MANY
+        intermediate: fp64
+        return: fp64
+  - name: "std_dev"
+    description: Calculates standard-deviation for a set of values.
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          distribution:
+            values: [ SAMPLE, POPULATION]
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          distribution:
+            values: [ SAMPLE, POPULATION]
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "variance"
+    description: Calculates variance for a set of values.
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          distribution:
+            values: [ SAMPLE, POPULATION]
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          distribution:
+            values: [ SAMPLE, POPULATION]
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "corr"
+    description: >
+      Calculates the value of Pearson's correlation coefficient between `x` and `y`.
+      If there is no input, null is returned.
+    impls:
+      - args:
+          - name: x
+            value: fp32
+          - name: y
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+          - name: y
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "mode"
+    description: >
+      Calculates mode for a set of values.
+      If there is no input, null is returned.
+    impls:
+      - args:
+          - name: x
+            value: i8
+        nullability: DECLARED_OUTPUT
+        return: i8?
+      - args:
+          - name: x
+            value: i16
+        nullability: DECLARED_OUTPUT
+        return: i16?
+      - args:
+          - name: x
+            value: i32
+        nullability: DECLARED_OUTPUT
+        return: i32?
+      - args:
+          - name: x
+            value: i64
+        nullability: DECLARED_OUTPUT
+        return: i64?
+      - args:
+          - name: x
+            value: fp32
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: x
+            value: fp64
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "median"
+    description: >
+      Calculate the median for a set of values.
+
+      Returns null if applied to zero records. For the integer implementations,
+      the rounding option determines how the median should be rounded if it ends
+      up midway between two values. For the floating point implementations,
+      they specify the usual floating point rounding mode.
+    impls:
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: i8
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: i8?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: i16
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: i16?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: i32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: i32?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: i64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: i64?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        return: fp64?
+  - name: "quantile"
+    description: >
+      Calculates quantiles for a set of values.
+
+      This function will divide the aggregated values (passed via the
+      distribution argument) over N equally-sized bins, where N is passed
+      via a constant argument. It will then return the values at the
+      boundaries of these bins in list form. If the input is appropriately
+      sorted, this computes the quantiles of the distribution.
+
+      The function can optionally return the first and/or last element of
+      the input, as specified by the `boundaries` argument. If the input is
+      appropriately sorted, this will thus be the minimum and/or maximum
+      values of the distribution.
+
+      When the boundaries do not lie exactly on elements of the incoming
+      distribution, the function will interpolate between the two nearby
+      elements. If the interpolated value cannot be represented exactly,
+      the `rounding` option controls how the value should be selected or
+      computed.
+
+      The function fails and returns null in the following cases:
+        - `n` is null or less than one;
+        - any value in `distribution` is null.
+
+      The function returns an empty list if `n` equals 1 and `boundaries` is
+      set to `NEITHER`.
+
+    impls:
+      - args:
+          - name: boundaries
+            description: >
+              Which boundaries to include. For NEITHER, the output will have
+              n-1 elements, for MINIMUM and MAXIMUM it will have n elements,
+              and for BOTH it will have n+1 elements.
+            options: [ NEITHER, MINIMUM, MAXIMUM, BOTH ]
+          - name: precision
+            description: >
+              Based on required operator performance and configured optimizations
+              on saving memory bandwidth, the precision of the end result can be
+              the highest possible accuracy or an approximation.
+
+                - EXACT: provides the exact result, rounded if needed according
+                  to the rounding option.
+                - APPROXIMATE: provides only an estimate; the result must lie
+                  between the minimum and maximum values in the input
+                  (inclusive), but otherwise the accuracy is left up to the
+                  consumer.
+            options: [ EXACT, APPROXIMATE ]
+          - value: i64
+            constant: true
+            name: n
+            description: >
+              A positive integer which defines the number of quantile
+              partitions.
+          - value: any
+            name: distribution
+            description: >
+              The data for which the quantiles should be computed.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it. For floating point numbers, it specifies the IEEE
+              754 rounding mode (as it does for all other floating point
+              operations). For integer types:
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+
+              For non-numeric types, the behavior is the same as for integer
+              types, but applied to the index of the value in distribution.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+        nullability: DECLARED_OUTPUT
+        ordered: true
+        return: LIST?<any>
+
+window_functions:
+  - name: "row_number"
+    description: "the number of the current row within its partition."
+    impls:
+      - args: []
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: i64?
+        window_type: PARTITION
+  - name: "rank"
+    description: "the rank of the current row, with gaps."
+    impls:
+      - args: []
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: i64?
+        window_type: PARTITION
+  - name: "dense_rank"
+    description: "the rank of the current row, without gaps."
+    impls:
+      - args: []
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: i64?
+        window_type: PARTITION
+  - name: "percent_rank"
+    description: "the relative rank of the current row."
+    impls:
+      - args: []
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: fp64?
+        window_type: PARTITION
+  - name: "cume_dist"
+    description: "the cumulative distribution."
+    impls:
+      - args: []
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: fp64?
+        window_type: PARTITION
+  - name: "ntile"
+    description: "Return an integer ranging from 1 to the argument value,dividing the partition as equally as possible."
+    impls:
+      - args:
+          - name: x
+            value: i32
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: i32?
+        window_type: PARTITION
+      - args:
+          - name: x
+            value: i64
+        nullability: DECLARED_OUTPUT
+        decomposable: NONE
+        return: i64?
+        window_type: PARTITION
diff --git a/src/substrait/extensions/functions_arithmetic_decimal.yaml b/src/substrait/extensions/functions_arithmetic_decimal.yaml
new file mode 100644
index 0000000..0fc4caa
--- /dev/null
+++ b/src/substrait/extensions/functions_arithmetic_decimal.yaml
@@ -0,0 +1,151 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "add"
+    description: "Add two decimal values."
+    impls:
+      - args:
+          - name: x
+            value: decimal<P1,S1>
+          - name: y
+            value: decimal<P2,S2>
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: |-
+          init_scale = max(S1,S2)
+          init_prec = init_scale + max(P1 - S1, P2 - S2) + 1
+          min_scale = min(init_scale, 6)
+          delta = init_prec - 38
+          prec = min(init_prec, 38)
+          scale_after_borrow = max(init_scale - delta, min_scale)
+          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          DECIMAL<prec, scale>
+  -
+    name: "subtract"
+    impls:
+      - args:
+          - name: x
+            value: decimal<P1,S1>
+          - name: y
+            value: decimal<P2,S2>
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: |-
+          init_scale = max(S1,S2)
+          init_prec = init_scale + max(P1 - S1, P2 - S2) + 1
+          min_scale = min(init_scale, 6)
+          delta = init_prec - 38
+          prec = min(init_prec, 38)
+          scale_after_borrow = max(init_scale - delta, min_scale)
+          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          DECIMAL<prec, scale>
+  -
+    name: "multiply"
+    impls:
+      - args:
+          - name: x
+            value: decimal<P1,S1>
+          - name: y
+            value: decimal<P2,S2>
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: |-
+          init_scale = S1 + S2
+          init_prec = P1 + P2 + 1
+          min_scale = min(init_scale, 6)
+          delta = init_prec - 38
+          prec = min(init_prec, 38)
+          scale_after_borrow = max(init_scale - delta, min_scale)
+          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          DECIMAL<prec, scale>
+  -
+    name: "divide"
+    impls:
+      - args:
+          - name: x
+            value: decimal<P1,S1>
+          - name: y
+            value: decimal<P2,S2>
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: |-
+          init_scale = max(6, S1 + P2 + 1)
+          init_prec = P1 - S1 + P2 + init_scale
+          min_scale = min(init_scale, 6)
+          delta = init_prec - 38
+          prec = min(init_prec, 38)
+          scale_after_borrow = max(init_scale - delta, min_scale)
+          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          DECIMAL<prec, scale>
+  -
+    name: "modulus"
+    impls:
+      - args:
+          - name: x
+            value: decimal<P1,S1>
+          - name: y
+            value: decimal<P2,S2>
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        return: |-
+          init_scale = max(S1,S2)
+          init_prec = min(P1 - S1, P2 - S2) + init_scale
+          min_scale = min(init_scale, 6)
+          delta = init_prec - 38
+          prec = min(init_prec, 38)
+          scale_after_borrow = max(init_scale - delta, min_scale)
+          scale = init_prec > 38 ? scale_after_borrow : init_scale
+          DECIMAL<prec, scale>
+aggregate_functions:
+  - name: "sum"
+    description: Sum a set of values.
+    impls:
+      - args:
+          - name: x
+            value: "DECIMAL<P, S>"
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "DECIMAL?<38,S>"
+        return: "DECIMAL?<38,S>"
+  - name: "avg"
+    description: Average a set of values.
+    impls:
+      - args:
+          - name: x
+            value: "DECIMAL<P,S>"
+        options:
+          overflow:
+            values: [ SILENT, SATURATE, ERROR ]
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "STRUCT<DECIMAL<38,S>,i64>"
+        return: "DECIMAL<38,S>"
+  - name: "min"
+    description: Min a set of values.
+    impls:
+      - args:
+          - name: x
+            value: "DECIMAL<P, S>"
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "DECIMAL?<P, S>"
+        return: "DECIMAL?<P, S>"
+  - name: "max"
+    description: Max a set of values.
+    impls:
+      - args:
+          - name: x
+            value: "DECIMAL<P,S>"
+        nullability: DECLARED_OUTPUT
+        decomposable: MANY
+        intermediate: "DECIMAL?<P, S>"
+        return: "DECIMAL?<P, S>"
diff --git a/src/substrait/extensions/functions_boolean.yaml b/src/substrait/extensions/functions_boolean.yaml
new file mode 100644
index 0000000..22ae296
--- /dev/null
+++ b/src/substrait/extensions/functions_boolean.yaml
@@ -0,0 +1,140 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: or
+    description: >
+      The boolean `or` using Kleene logic.
+
+      This function behaves as follows with nulls:
+
+          true or null = true
+
+          null or true = true
+
+          false or null = null
+
+          null or false = null
+
+          null or null = null
+
+      In other words, in this context a null value really means "unknown", and
+      an unknown value `or` true is always true.
+
+      Behavior for 0 or 1 inputs is as follows:
+        or() -> false
+        or(x) -> x
+    impls:
+      - args:
+          - value: boolean?
+            name: a
+        variadic:
+          min: 0
+        return: boolean?
+  -
+    name: and
+    description: >
+      The boolean `and` using Kleene logic.
+
+      This function behaves as follows with nulls:
+
+          true and null = null
+
+          null and true = null
+
+          false and null = false
+
+          null and false = false
+
+          null and null = null
+
+      In other words, in this context a null value really means "unknown", and
+      an unknown value `and` false is always false.
+
+      Behavior for 0 or 1 inputs is as follows:
+        and() -> true
+        and(x) -> x
+    impls:
+      - args:
+          - value: boolean?
+            name: a
+        variadic:
+          min: 0
+        return: boolean?
+  -
+    name: and_not
+    description: >
+      The boolean `and` of one value and the negation of the other using Kleene logic.
+
+      This function behaves as follows with nulls:
+
+          true and not null = null
+
+          null and not false = null
+
+          false and not null = false
+
+          null and not true = false
+
+          null and not null = null
+
+      In other words, in this context a null value really means "unknown", and
+      an unknown value `and not` true is always false, as is false `and not` an
+      unknown value.
+    impls:
+      - args:
+          - value: boolean?
+            name: a
+          - value: boolean?
+            name: b
+        return: boolean?
+  -
+    name: xor
+    description: >
+      The boolean `xor` of two values using Kleene logic.
+
+      When a null is encountered in either input, a null is output.
+    impls:
+      - args:
+          - value: boolean?
+            name: a
+          - value: boolean?
+            name: b
+        return: boolean?
+  -
+    name: not
+    description: >
+      The `not` of a boolean value.
+
+      When a null is input, a null is output.
+    impls:
+      - args:
+          - value: boolean?
+            name: a
+        return: boolean?
+
+aggregate_functions:
+  -
+    name: "bool_and"
+    description: >
+      If any value in the input is false, false is returned. If the input is
+      empty or only contains nulls, null is returned. Otherwise, true is
+      returned.
+    impls:
+      - args:
+          - value: boolean
+            name: a
+        nullability: DECLARED_OUTPUT
+        return: boolean?
+  -
+    name: "bool_or"
+    description: >
+      If any value in the input is true, true is returned. If the input is
+      empty or only contains nulls, null is returned. Otherwise, false is
+      returned.
+    impls:
+      - args:
+          - value: boolean
+            name: a
+        nullability: DECLARED_OUTPUT
+        return: boolean?
diff --git a/src/substrait/extensions/functions_comparison.yaml b/src/substrait/extensions/functions_comparison.yaml
new file mode 100644
index 0000000..7d11f3c
--- /dev/null
+++ b/src/substrait/extensions/functions_comparison.yaml
@@ -0,0 +1,216 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "not_equal"
+    description: >
+      Whether two values are not_equal.
+
+      `not_equal(x, y) := (x != y)`
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "equal"
+    description: >
+      Whether two values are equal.
+
+      `equal(x, y) := (x == y)`
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "is_not_distinct_from"
+    description: >
+      Whether two values are equal.
+
+      This function treats `null` values as comparable, so
+
+      `is_not_distinct_from(null, null) == True`
+
+      This is in contrast to `equal`, in which `null` values do not compare.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "lt"
+    description: >
+      Less than.
+
+      lt(x, y) := (x < y)
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "gt"
+    description: >
+      Greater than.
+
+      gt(x, y) := (x > y)
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "lte"
+    description: >
+      Less than or equal to.
+
+      lte(x, y) := (x <= y)
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "gte"
+    description: >
+      Greater than or equal to.
+
+      gte(x, y) := (x >= y)
+
+      If either/both of `x` and `y` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: BOOLEAN
+  -
+    name: "between"
+    description: >-
+      Whether the `expression` is greater than or equal to `low` and less than or equal to `high`.
+
+      `expression` BETWEEN `low` AND `high`
+
+      If `low`, `high`, or `expression` are `null`, `null` is returned.
+    impls:
+      - args:
+          - value: any1
+            name: expression
+            description: The expression to test for in the range defined by `low` and `high`.
+          - value: any1
+            name: low
+            description: The value to check if greater than or equal to.
+          - value: any1
+            name: high
+            description: The value to check if less than or equal to.
+        return: BOOLEAN
+  -
+    name: "is_null"
+    description: Whether a value is null. NaN is not null.
+    impls:
+      - args:
+          - value: any1
+            name: x
+        return: BOOLEAN
+        nullability: DECLARED_OUTPUT
+  -
+    name: "is_not_null"
+    description: Whether a value is not null. NaN is not null.
+    impls:
+      - args:
+          - value: any1
+            name: x
+        return: BOOLEAN
+        nullability: DECLARED_OUTPUT
+  -
+    name: "is_nan"
+    description: >
+      Whether a value is not a number.
+
+      If `x` is `null`, `null` is returned.
+    impls:
+      - args:
+          - value: fp32
+            name: x
+        return: BOOLEAN
+      - args:
+          - value: fp64
+            name: x
+        return: BOOLEAN
+  -
+    name: "is_finite"
+    description: >
+      Whether a value is finite (neither infinite nor NaN).
+
+      If `x` is `null`, `null` is returned.
+    impls:
+      - args:
+          - value: fp32
+            name: x
+        return: BOOLEAN
+      - args:
+          - value: fp64
+            name: x
+        return: BOOLEAN
+  -
+    name: "is_infinite"
+    description: >
+      Whether a value is infinite.
+
+      If `x` is `null`, `null` is returned.
+    impls:
+      - args:
+          - value: fp32
+            name: x
+        return: BOOLEAN
+      - args:
+          - value: fp64
+            name: x
+        return: BOOLEAN
+  -
+    name: "nullif"
+    description: If two values are equal, return null. Otherwise, return the first value.
+    impls:
+      - args:
+          - value: any1
+            name: x
+          - value: any1
+            name: y
+        return: any1
+  -
+    name: "coalesce"
+    description: >-
+      Evaluate arguments from left to right and return the first argument that is not null. Once
+      a non-null argument is found, the remaining arguments are not evaluated.
+
+      If all arguments are null, return null.
+    impls:
+      - args:
+          - value: any1
+        variadic:
+          min: 2
+        return: any1
diff --git a/src/substrait/extensions/functions_datetime.yaml b/src/substrait/extensions/functions_datetime.yaml
new file mode 100644
index 0000000..60e563f
--- /dev/null
+++ b/src/substrait/extensions/functions_datetime.yaml
@@ -0,0 +1,690 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: extract
+    description:  >-
+      Extract portion of a date/time value.
+      * YEAR Return the year.
+      * ISO_YEAR Return the ISO 8601 week-numbering year. First week of an ISO year has the majority (4 or more) of
+        its days in January.
+      * US_YEAR Return the US epidemiological year. First week of US epidemiological year has the majority (4 or more)
+        of its days in January. Last week of US epidemiological year has the year's last Wednesday in it. US 
+        epidemiological week starts on Sunday.
+      * QUARTER Return the number of the quarter within the year. January 1 through March 31 map to the first quarter,
+        April 1 through June 30 map to the second quarter, etc.
+      * MONTH Return the number of the month within the year.
+      * DAY Return the number of the day within the month.
+      * DAY_OF_YEAR Return the number of the day within the year. January 1 maps to the first day, February 1 maps to
+        the thirty-second day, etc.
+      * MONDAY_DAY_OF_WEEK Return the number of the day within the week, from Monday (first day) to Sunday (seventh
+        day).
+      * SUNDAY_DAY_OF_WEEK Return the number of the day within the week, from Sunday (first day) to Saturday (seventh
+        day).
+      * MONDAY_WEEK Return the number of the week within the year. First week starts on first Monday of January.
+      * SUNDAY_WEEK Return the number of the week within the year. First week starts on first Sunday of January.
+      * ISO_WEEK Return the number of the ISO week within the ISO year. First ISO week has the majority (4 or more)
+        of its days in January. ISO week starts on Monday.
+      * US_WEEK Return the number of the US week within the US year. First US week has the majority (4 or more) of
+        its days in January. US week starts on Sunday.
+      * HOUR Return the hour (0-23).
+      * MINUTE Return the minute (0-59).
+      * SECOND Return the second (0-59).
+      * MILLISECOND Return number of milliseconds since the last full second.
+      * MICROSECOND Return number of microseconds since the last full millisecond.
+      * SUBSECOND Return number of microseconds since the last full second of the given timestamp.
+      * UNIX_TIME Return number of seconds that have elapsed since 1970-01-01 00:00:00 UTC, ignoring leap seconds.
+      * TIMEZONE_OFFSET Return number of seconds of timezone offset to UTC.
+
+      The range of values returned for QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK, SUNDAY_DAY_OF_WEEK,
+      MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK depends on whether counting starts at 1 or 0. This is governed
+      by the indexing option.
+
+      When indexing is ONE:
+      * QUARTER returns values in range 1-4
+      * MONTH returns values in range 1-12
+      * DAY returns values in range 1-31
+      * DAY_OF_YEAR returns values in range 1-366
+      * MONDAY_DAY_OF_WEEK and SUNDAY_DAY_OF_WEEK return values in range 1-7
+      * MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK return values in range 1-53
+
+      When indexing is ZERO:
+      * QUARTER returns values in range 0-3
+      * MONTH returns values in range 0-11
+      * DAY returns values in range 0-30
+      * DAY_OF_YEAR returns values in range 0-365
+      * MONDAY_DAY_OF_WEEK and SUNDAY_DAY_OF_WEEK return values in range 0-6
+      * MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, and US_WEEK return values in range 0-52
+
+      The indexing option must be specified when the component is QUARTER, MONTH, DAY, DAY_OF_YEAR,
+      MONDAY_DAY_OF_WEEK, SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, or US_WEEK. The 
+      indexing option cannot be specified when the component is YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND,
+      MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME, or TIMEZONE_OFFSET.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: component
+            options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND,
+                       MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME, TIMEZONE_OFFSET ]
+            description: The part of the value to extract.
+          - name: x
+            value: timestamp_tz
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: i64
+      - args:
+          - name: component
+            options: [ YEAR, ISO_YEAR, US_YEAR, HOUR, MINUTE, SECOND,
+                       MILLISECOND, MICROSECOND, SUBSECOND, UNIX_TIME ]
+            description: The part of the value to extract.
+          - name: x
+            value: timestamp
+        return: i64
+      - args:
+          - name: component
+            options: [ YEAR, ISO_YEAR, US_YEAR, UNIX_TIME ]
+            description: The part of the value to extract.
+          - name: x
+            value: date
+        return: i64
+      - args:
+          - name: component
+            options: [ HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND, SUBSECOND ]
+            description: The part of the value to extract.
+          - name: x
+            value: time
+        return: i64
+      - args:
+          - name: component
+            options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK,
+                       SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ]
+            description: The part of the value to extract.
+          - name: indexing
+            options: [ ONE, ZERO ]
+            description: Start counting from 1 or 0.
+          - name: x
+            value: timestamp_tz
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: i64
+      - args:
+          - name: component
+            options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK,
+                       SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ]
+            description: The part of the value to extract.
+          - name: indexing
+            options: [ ONE, ZERO ]
+            description: Start counting from 1 or 0.
+          - name: x
+            value: timestamp
+        return: i64
+      - args:
+          - name: component
+            options: [ QUARTER, MONTH, DAY, DAY_OF_YEAR, MONDAY_DAY_OF_WEEK,
+                       SUNDAY_DAY_OF_WEEK, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK ]
+            description: The part of the value to extract.
+          - name: indexing
+            options: [ ONE, ZERO ]
+            description: Start counting from 1 or 0.
+          - name: x
+            value: date
+        return: i64
+  -
+    name: "extract_boolean"
+    description: >-
+      Extract boolean values of a date/time value.
+      * IS_LEAP_YEAR Return true if year of the given value is a leap year and false otherwise.
+      * IS_DST Return true if DST (Daylight Savings Time) is observed at the given value
+        in the given timezone.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: component
+            options: [ IS_LEAP_YEAR ]
+            description: The part of the value to extract.
+          - name: x
+            value: timestamp
+        return: boolean
+      - args:
+          - name: component
+            options: [ IS_LEAP_YEAR, IS_DST ]
+            description: The part of the value to extract.
+          - name: x
+            value: timestamp_tz
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: boolean
+      - args:
+          - name: component
+            options: [ IS_LEAP_YEAR ]
+            description: The part of the value to extract.
+          - name: x
+            value: date
+        return: boolean
+  -
+    name: "add"
+    description: >-
+      Add an interval to a date/time type.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: interval_year
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: interval_year
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: interval_year
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: interval_day
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: interval_day
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: interval_day
+        return: timestamp
+  -
+    name: "add_intervals"
+    description: Add two intervals together.
+    impls:
+      - args:
+          - name: x
+            value: interval_day
+          - name: y
+            value: interval_day
+        return: interval_day
+      - args:
+          - name: x
+            value: interval_year
+          - name: y
+            value: interval_year
+        return: interval_year
+  -
+    name: "subtract"
+    description: >-
+      Subtract an interval from a date/time type.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: interval_year
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: interval_year
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: interval_year
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: interval_year
+        return: date
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: interval_day
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: interval_day
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: interval_day
+        return: date
+  -
+    name: "lte"
+    description: less than or equal to
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: timestamp
+        return: boolean
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: timestamp_tz
+        return: boolean
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: date
+        return: boolean
+      - args:
+          - name: x
+            value: interval_day
+          - name: y
+            value: interval_day
+        return: boolean
+      - args:
+          - name: x
+            value: interval_year
+          - name: y
+            value: interval_year
+        return: boolean
+  -
+    name: "lt"
+    description: less than
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: timestamp
+        return: boolean
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: timestamp_tz
+        return: boolean
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: date
+        return: boolean
+      - args:
+          - name: x
+            value: interval_day
+          - name: y
+            value: interval_day
+        return: boolean
+      - args:
+          - name: x
+            value: interval_year
+          - name: y
+            value: interval_year
+        return: boolean
+  -
+    name: "gte"
+    description: greater than or equal to
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: timestamp
+        return: boolean
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: timestamp_tz
+        return: boolean
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: date
+        return: boolean
+      - args:
+          - name: x
+            value: interval_day
+          - name: y
+            value: interval_day
+        return: boolean
+      - args:
+          - name: x
+            value: interval_year
+          - name: y
+            value: interval_year
+        return: boolean
+  -
+    name: "gt"
+    description: greater than
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: y
+            value: timestamp
+        return: boolean
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: y
+            value: timestamp_tz
+        return: boolean
+      - args:
+          - name: x
+            value: date
+          - name: y
+            value: date
+        return: boolean
+      - args:
+          - name: x
+            value: interval_day
+          - name: y
+            value: interval_day
+        return: boolean
+      - args:
+          - name: x
+            value: interval_year
+          - name: y
+            value: interval_year
+        return: boolean
+  -
+    name: "assume_timezone"
+    description: >-
+      Convert local timestamp to UTC-relative timestamp_tz using given local time's timezone.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: timezone
+            description: Timezone string from IANA tzdb. Returned timestamp_tz will have time set to 00:00:00.
+            value: string
+        return: timestamp_tz
+  -
+    name: "local_timestamp"
+    description: >-
+      Convert UTC-relative timestamp_tz to local timestamp using given local time's timezone.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp
+  -
+    name: "strptime_time"
+    description: >-
+      Parse string into time using provided format,
+      see https://man7.org/linux/man-pages/man3/strptime.3.html for reference.
+    impls:
+      - args:
+          - name: time_string
+            value: string
+          - name: format
+            value: string
+        return: time
+  -
+    name: "strptime_date"
+    description: >-
+      Parse string into date using provided format,
+      see https://man7.org/linux/man-pages/man3/strptime.3.html for reference.
+    impls:
+      - args:
+          - name: date_string
+            value: string
+          - name: format
+            value: string
+        return: date
+  -
+    name: "strptime_timestamp"
+    description: >-
+      Parse string into timestamp using provided format,
+      see https://man7.org/linux/man-pages/man3/strptime.3.html for reference.
+      If timezone is present in timestamp and provided as parameter an error is thrown.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is supplied as parameter and present in the parsed string the parsed timezone is used.
+      If parameter supplied timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: timestamp_string
+            value: string
+          - name: format
+            value: string
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp_tz
+      - args:
+          - name: timestamp_string
+            value: string
+          - name: format
+            value: string
+        return: timestamp_tz
+  -
+    name: "strftime"
+    description: >-
+      Convert timestamp/date/time to string using provided format,
+      see https://man7.org/linux/man-pages/man3/strftime.3.html for reference.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: format
+            value: string
+        return: string
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: format
+            value: string
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: string
+      - args:
+          - name: x
+            value: date
+          - name: format
+            value: string
+        return: string
+      - args:
+          - name: x
+            value: time
+          - name: format
+            value: string
+        return: string
+  -
+    name: "round_temporal"
+    description: >-
+      Round a given timestamp/date/time to a multiple of a time unit. If the given timestamp is not already an
+      exact multiple from the origin in the given timezone, the resulting point is chosen as one of the
+      two nearest multiples. Which of these is chosen is governed by rounding: FLOOR means to use the earlier
+      one, CEIL means to use the later one, ROUND_TIE_DOWN means to choose the nearest and tie to the
+      earlier one if equidistant, ROUND_TIE_UP means to choose the nearest and tie to the later one if
+      equidistant.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: multiple
+            value: i64
+          - name: origin
+            value: timestamp
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: multiple
+            value: i64
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+          - name: origin
+            value: timestamp_tz
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY ]
+          - name: multiple
+            value: i64
+          - name: origin
+            value: date
+        return: date
+      - args:
+          - name: x
+            value: time
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: multiple
+            value: i64
+          - name: origin
+            value: time
+        return: time
+  -
+    name: "round_calendar"
+    description: >-
+      Round a given timestamp/date/time to a multiple of a time unit. If the given timestamp is not already an
+      exact multiple from the last origin unit in the given timezone, the resulting point is chosen as one of the
+      two nearest multiples. Which of these is chosen is governed by rounding: FLOOR means to use the earlier
+      one, CEIL means to use the later one, ROUND_TIE_DOWN means to choose the nearest and tie to the
+      earlier one if equidistant, ROUND_TIE_UP means to choose the nearest and tie to the later one if
+      equidistant.
+
+      Timezone strings must be as defined by IANA timezone database (https://www.iana.org/time-zones).
+      Examples: "Pacific/Marquesas", "Etc/GMT+1".
+      If timezone is invalid an error is thrown.
+
+    impls:
+      - args:
+          - name: x
+            value: timestamp
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: origin
+            options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK,
+                       US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ]
+          - name: multiple
+            value: i64
+        return: timestamp
+      - args:
+          - name: x
+            value: timestamp_tz
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: origin
+            options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK,
+                       US_WEEK, DAY, HOUR, MINUTE, SECOND, MILLISECOND ]
+          - name: multiple
+            value: i64
+          - name: timezone
+            description: Timezone string from IANA tzdb.
+            value: string
+        return: timestamp_tz
+      - args:
+          - name: x
+            value: date
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ YEAR, MONTH, WEEK, DAY ]
+          - name: origin
+            options: [ YEAR, MONTH, MONDAY_WEEK, SUNDAY_WEEK, ISO_WEEK, US_WEEK, DAY ]
+          - name: multiple
+            value: i64
+          - name: origin
+            value: date
+        return: date
+      - args:
+          - name: x
+            value: time
+          - name: rounding
+            options: [ FLOOR, CEIL, ROUND_TIE_DOWN, ROUND_TIE_UP ]
+          - name: unit
+            options: [ DAY, HOUR, MINUTE, SECOND, MILLISECOND, MICROSECOND ]
+          - name: origin
+            options: [ DAY, HOUR, MINUTE, SECOND, MILLISECOND ]
+          - name: multiple
+            value: i64
+          - name: origin
+            value: time
+        return: time
diff --git a/src/substrait/extensions/functions_logarithmic.yaml b/src/substrait/extensions/functions_logarithmic.yaml
new file mode 100644
index 0000000..f4b8acc
--- /dev/null
+++ b/src/substrait/extensions/functions_logarithmic.yaml
@@ -0,0 +1,147 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "ln"
+    description: "Natural logarithm of the value"
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp64
+  -
+    name: "log10"
+    description: "Logarithm to base 10 of the value"
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp64
+  -
+    name: "log2"
+    description: "Logarithm to base 2 of the value"
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp64
+  -
+    name: "logb"
+    description: >
+      Logarithm of the value with the given base
+
+      logb(x, b) => log_{b} (x)
+    impls:
+      - args:
+          - value: fp32
+            name: "x"
+            description: "The number `x` to compute the logarithm of"
+          - value: fp32
+            name: "base"
+            description: "The logarithm base `b` to use"
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp32
+      - args:
+          - value: fp64
+            name: "x"
+            description: "The number `x` to compute the logarithm of"
+          - value: fp64
+            name: "base"
+            description: "The logarithm base `b` to use"
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp64
+  -
+    name: "log1p"
+    description: >
+      Natural logarithm (base e) of 1 + x
+
+      log1p(x) => log(1+x)
+    impls:
+      - args:
+          - name: x
+            value: fp32
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp32
+      - args:
+          - name: x
+            value: fp64
+        options:
+          rounding:
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR ]
+          on_domain_error:
+            values: [ NAN, ERROR ]
+          on_log_zero:
+            values: [NAN, ERROR, MINUS_INFINITY]
+        return: fp64
diff --git a/src/substrait/extensions/functions_rounding.yaml b/src/substrait/extensions/functions_rounding.yaml
new file mode 100644
index 0000000..09309f2
--- /dev/null
+++ b/src/substrait/extensions/functions_rounding.yaml
@@ -0,0 +1,270 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "ceil"
+    description: >
+      Rounding to the ceiling of the value `x`.
+    impls:
+      - args:
+          - value: fp32
+            name: "x"
+        return: fp32
+      - args:
+          - value: fp64
+            name: "x"
+        return: fp64
+  -
+    name: "floor"
+    description: >
+      Rounding to the floor of the value `x`.
+    impls:
+      - args:
+          - value: fp32
+            name: "x"
+        return: fp32
+      - args:
+          - value: fp64
+            name: "x"
+        return: fp64
+  -
+    name: "round"
+    description: >
+      Rounding the value `x` to `s` decimal places.
+    impls:
+      - args:
+          - value: i8
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, nothing will happen
+              since `x` is an integer value.
+
+              When `s` is a negative number, the rounding is
+              performed to the nearest multiple of `10^(-s)`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: i8?
+      - args:
+          - value: i16
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, nothing will happen
+              since `x` is an integer value.
+
+              When `s` is a negative number, the rounding is
+              performed to the nearest multiple of `10^(-s)`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: i16?
+      - args:
+          - value: i32
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, nothing will happen
+              since `x` is an integer value.
+
+              When `s` is a negative number, the rounding is
+              performed to the nearest multiple of `10^(-s)`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: i32?
+      - args:
+          - value: i64
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, nothing will happen
+              since `x` is an integer value.
+
+              When `s` is a negative number, the rounding is
+              performed to the nearest multiple of `10^(-s)`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: i64?
+      - args:
+          - value: fp32
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, the rounding
+              is performed to a `s` number of decimal places.
+
+              When `s` is a negative number, the rounding is
+              performed to the left side of the decimal point
+              as specified by `s`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: fp32?
+      - args:
+          - value: fp64
+            name: "x"
+            description: >
+              Numerical expression to be rounded.
+          - value: i32
+            name: "s"
+            description: >
+              Number of decimal places to be rounded to.
+
+              When `s` is a positive number, the rounding
+              is performed to a `s` number of decimal places.
+
+              When `s` is a negative number, the rounding is
+              performed to the left side of the decimal point
+              as specified by `s`.
+        options:
+          rounding:
+            description: >
+              When a boundary is computed to lie somewhere between two values,
+              and this value cannot be exactly represented, this specifies how
+              to round it.
+
+                - TIE_TO_EVEN: round to nearest value; if exactly halfway, tie
+                  to the even option.
+                - TIE_AWAY_FROM_ZERO: round to nearest value; if exactly
+                  halfway, tie away from zero.
+                - TRUNCATE: always round toward zero.
+                - CEILING: always round toward positive infinity.
+                - FLOOR: always round toward negative infinity.
+                - AWAY_FROM_ZERO: round negative values with FLOOR rule, round positive values with CEILING rule
+                - TIE_DOWN: round ties with FLOOR rule
+                - TIE_UP: round ties with CEILING rule
+                - TIE_TOWARDS_ZERO: round ties with TRUNCATE rule
+                - TIE_TO_ODD: round to nearest value; if exactly halfway, tie
+                  to the odd option.
+            values: [ TIE_TO_EVEN, TIE_AWAY_FROM_ZERO, TRUNCATE, CEILING, FLOOR,
+              AWAY_FROM_ZERO, TIE_DOWN, TIE_UP, TIE_TOWARDS_ZERO, TIE_TO_ODD ]
+        nullability: DECLARED_OUTPUT
+        return: fp64?
diff --git a/src/substrait/extensions/functions_set.yaml b/src/substrait/extensions/functions_set.yaml
new file mode 100644
index 0000000..ce02bf3
--- /dev/null
+++ b/src/substrait/extensions/functions_set.yaml
@@ -0,0 +1,27 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: "index_in"
+    description: >
+      Checks the membership of a value in a list of values
+
+      Returns the first 0-based index value of some input `T` if `T` is equal to
+      any element in `List<T>`.  Returns `NULL` if not found.
+
+      If `T` is `NULL`, returns `NULL`.
+
+      If `T` is `NaN`:
+        - Returns 0-based index of `NaN` in `List<T>` (default)
+        - Returns `NULL` (if `NAN_IS_NOT_NAN` is specified)
+    impls:
+      - args:
+          - name: x
+            value: T
+          - name: y
+            value: List<T>
+        options:
+          nan_equality:
+            values: [ NAN_IS_NAN, NAN_IS_NOT_NAN ]
+        nullability: DECLARED_OUTPUT
+        return: int64?
diff --git a/src/substrait/extensions/functions_string.yaml b/src/substrait/extensions/functions_string.yaml
new file mode 100644
index 0000000..11f2d18
--- /dev/null
+++ b/src/substrait/extensions/functions_string.yaml
@@ -0,0 +1,1397 @@
+%YAML 1.2
+---
+scalar_functions:
+  -
+    name: concat
+    description: Concatenate strings.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        variadic:
+          min: 1
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+        variadic:
+          min: 1
+        return: "string"
+  -
+    name: like
+    description: >-
+      Are two strings like each other.
+
+      The `case_sensitivity` option applies to the `match` argument.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "match"
+            description: The string to match against the input string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "match"
+            description: The string to match against the input string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+  -
+    name: substring
+    description: >-
+      Extract a substring of a specified `length` starting from position `start`.
+      A `start` value of 1 refers to the first characters of the string.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: i32
+            name: "start"
+          - value: i32
+            name: "length"
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+          - value: i32
+            name: "start"
+          - value: i32
+            name: "length"
+        return: "string"
+      - args:
+          - value: "fixedchar<l1>"
+            name: "input"
+          - value: i32
+            name: "start"
+          - value: i32
+            name: "length"
+        return: "string"
+  -
+    name: regexp_match_substring
+    description: >-
+      Extract a substring that matches the given regular expression pattern. The regular expression
+      pattern should follow the International Components for Unicode implementation
+      (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The occurrence of the
+      pattern to be extracted is specified using the `occurrence` argument. Specifying `1` means
+      the first occurrence will be extracted, `2` means the second occurrence, and so on.
+      The `occurrence` argument should be a positive non-zero integer. The number of characters
+      from the beginning of the string to begin starting to search for pattern matches can be
+      specified using the `position` argument. Specifying `1` means to search for matches
+      starting at the first character of the input string, `2` means the second character, and so
+      on. The `position` argument should be a positive non-zero integer. The regular
+      expression capture group can be specified using the `group` argument. Specifying `0`
+      will return the substring matching the full regular expression. Specifying `1` will
+      return the substring matching only the first capture group, and so on. The `group`
+      argument should be a non-negative integer.
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines. This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+
+      Behavior is undefined if the regex fails to compile, the occurrence value is out of range,
+      the position value is out of range, or the group value is out of range.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: "varchar<L2>"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "occurrence"
+          - value: i64
+            name: "group"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+          - value: "string"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "occurrence"
+          - value: i64
+            name: "group"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "string"
+  -
+    name: regexp_match_substring_all
+    description: >-
+      Extract all substrings that match the given regular expression pattern. This will return a
+      list of extracted strings with one value for each occurrence of a match. The regular expression
+      pattern should follow the International Components for Unicode implementation
+      (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters
+      from the beginning of the string to begin starting to search for pattern matches can be
+      specified using the `position` argument. Specifying `1` means to search for matches
+      starting at the first character of the input string, `2` means the second character, and so
+      on. The `position` argument should be a positive non-zero integer. The regular
+      expression capture group can be specified using the `group` argument. Specifying `0`
+      will return substrings matching the full regular expression. Specifying `1` will return
+      substrings matching only the first capture group, and so on. The `group` argument should
+      be a non-negative integer.
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines. This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+
+      Behavior is undefined if the regex fails to compile, the position value is out of range,
+      or the group value is out of range.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: "varchar<L2>"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "group"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "List<varchar<L1>>"
+      - args:
+          - value: "string"
+            name: "input"
+          - value: "string"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "group"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "List<string>"
+  -
+    name: starts_with
+    description: >-
+      Whether the `input` string starts with the `substring`.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+  -
+    name: ends_with
+    description: >-
+      Whether `input` string ends with the substring.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+  -
+    name: contains
+    description: >-
+      Whether the `input` string contains the `substring`.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "BOOLEAN"
+  -
+    name: strpos
+    description: >-
+      Return the position of the first occurrence of a string in another string. The first
+      character of the string is at position 1. If no occurrence is found, 0 is returned.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L1>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to search for.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+  -
+    name: regexp_strpos
+    description: >-
+      Return the position of an occurrence of the given regular expression pattern in a
+      string. The first character of the string is at position 1. The regular expression pattern
+      should follow the International Components for Unicode implementation
+      (https://unicode-org.github.io/icu/userguide/strings/regexp.html). The number of characters
+      from the beginning of the string to begin starting to search for pattern matches can be
+      specified using the `position` argument. Specifying `1` means to search for matches
+      starting at the first character of the input string, `2` means the second character, and so
+      on. The `position` argument should be a positive non-zero integer. Which occurrence to
+      return the position of is specified using the `occurrence` argument. Specifying `1` means
+      the position first occurrence will be returned, `2` means the position of the second
+      occurrence, and so on. The `occurrence` argument should be a positive non-zero integer. If
+      no occurrence is found, 0 is returned.
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines. This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+
+      Behavior is undefined if the regex fails to compile, the occurrence value is out of range, or
+      the position value is out of range.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: "varchar<L2>"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "occurrence"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: i64
+      - args:
+          - value: "string"
+            name: "input"
+          - value: "string"
+            name: "pattern"
+          - value: i64
+            name: "position"
+          - value: i64
+            name: "occurrence"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: i64
+  -
+    name: count_substring
+    description: >-
+      Return the number of non-overlapping occurrences of a substring in an input string.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to count.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to count.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "fixedchar<L2>"
+            name: "substring"
+            description: The substring to count.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: i64
+  -
+    name: regexp_count_substring
+    description: >-
+      Return the number of non-overlapping occurrences of a regular expression pattern in an input
+      string. The regular expression pattern should follow the International Components for
+      Unicode implementation (https://unicode-org.github.io/icu/userguide/strings/regexp.html).
+      The number of characters from the beginning of the string to begin starting to search for
+      pattern matches can be specified using the `position` argument. Specifying `1` means to
+      search for matches starting at the first character of the input string, `2` means the
+      second character, and so on. The `position` argument should be a positive non-zero integer.
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines. This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+
+      Behavior is undefined if the regex fails to compile or the position value is out of range.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+          - value: "string"
+            name: "pattern"
+          - value: i64
+            name: "position"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: "varchar<L2>"
+            name: "pattern"
+          - value: i64
+            name: "position"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+          - value: "fixedchar<L2>"
+            name: "pattern"
+          - value: i64
+            name: "position"
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: i64
+  -
+    name: replace
+    description: >-
+      Replace all occurrences of the substring with the replacement string.
+
+      The `case_sensitivity` option applies to the `substring` argument.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: Input string.
+          - value: "string"
+            name: "substring"
+            description: The substring to replace.
+          - value: "string"
+            name: "replacement"
+            description: The replacement string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: Input string.
+          - value: "varchar<L2>"
+            name: "substring"
+            description: The substring to replace.
+          - value: "varchar<L3>"
+            name: "replacement"
+            description: The replacement string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+        return: "varchar<L1>"
+  -
+    name: concat_ws
+    description: Concatenate strings together separated by a separator.
+    impls:
+      - args:
+          - value: "string"
+            name: "separator"
+            description: Character to separate strings by.
+          - value: "string"
+            name: "string_arguments"
+            description: Strings to be concatenated.
+        variadic:
+          min: 1
+        return: "string"
+      - args:
+          - value: "varchar<L2>"
+            name: "separator"
+            description: Character to separate strings by.
+          - value: "varchar<L1>"
+            name: "string_arguments"
+            description: Strings to be concatenated.
+        variadic:
+          min: 1
+        return: "varchar<L1>"
+  -
+    name: repeat
+    description: Repeat a string `count` number of times.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+          - value: i64
+            name: "count"
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+          - value: i64
+            name: "input"
+          - value: i64
+            name: "count"
+        return: "varchar<L1>"
+  -
+    name: reverse
+    description: Returns the string in reverse order.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        return: "fixedchar<L1>"
+  -
+    name: replace_slice
+    description: >-
+      Replace a slice of the input string.  A specified 'length' of characters will be deleted from
+      the input string beginning at the 'start' position and will be replaced by a new string.  A
+      start value of 1 indicates the first character of the input string. If start is negative
+      or zero, or greater than the length of the input string, a null string is returned. If 'length'
+      is negative, a null string is returned.  If 'length' is zero, inserting of the new string
+      occurs at the specified 'start' position and no characters are deleted. If 'length' is
+      greater than the input string, deletion will occur up to the last character of the input string.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: Input string.
+          - value: i64
+            name: "start"
+            description: The position in the string to start deleting/inserting characters.
+          - value: i64
+            name: "length"
+            description: The number of characters to delete from the input string.
+          - value: "string"
+            name: "replacement"
+            description: The new string to insert at the start position.
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: Input string.
+          - value: i64
+            name: "start"
+            description: The position in the string to start deleting/inserting characters.
+          - value: i64
+            name: "length"
+            description: The number of characters to delete from the input string.
+          - value: "varchar<L2>"
+            name: "replacement"
+            description: The new string to insert at the start position.
+        return: "varchar<L1>"
+  -
+    name: lower
+    description: >-
+      Transform the string to lower case characters. Implementation should follow the utf8_unicode_ci
+      collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "fixedchar<L1>"
+  -
+    name: upper
+    description: >-
+      Transform the string to upper case characters. Implementation should follow the utf8_unicode_ci
+      collations according to the Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "fixedchar<L1>"
+  -
+    name: swapcase
+    description: >-
+      Transform the string's lowercase characters to uppercase and uppercase characters to
+      lowercase. Implementation should follow the utf8_unicode_ci collations according to the
+      Unicode Collation Algorithm described at http://www.unicode.org/reports/tr10/.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "fixedchar<L1>"
+  -
+    name: capitalize
+    description: >-
+      Capitalize the first character of the input string. Implementation should follow the
+      utf8_unicode_ci collations according to the Unicode Collation Algorithm described at
+      http://www.unicode.org/reports/tr10/.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "fixedchar<L1>"
+  -
+    name: title
+    description: >-
+      Converts the input string into titlecase. Capitalize the first character of each word in the
+      input string except for articles (a, an, the). Implementation should follow the
+      utf8_unicode_ci collations according to the Unicode Collation Algorithm described at
+      http://www.unicode.org/reports/tr10/.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "varchar<L1>"
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        options:
+          char_set:
+            values: [ UTF8, ASCII_ONLY ]
+        return: "fixedchar<L1>"
+  -
+    name: char_length
+    description: >-
+      Return the number of characters in the input string.  The length includes trailing spaces.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        return: i64
+  -
+    name: bit_length
+    description: Return the number of bits in the input string.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        return: i64
+  -
+    name: octet_length
+    description: Return the number of bytes in the input string.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+        return: i64
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+        return: i64
+      - args:
+          - value: "fixedchar<L1>"
+            name: "input"
+        return: i64
+  -
+    name: regexp_replace
+    description: >-
+      Search a string for a substring that matches a given regular expression pattern and replace
+      it with a replacement string. The regular expression pattern should follow the
+      International Components for Unicode implementation (https://unicode-org.github
+      .io/icu/userguide/strings/regexp.html). The occurrence of the pattern to be replaced is
+      specified using the `occurrence` argument. Specifying `1` means only the first occurrence
+      will be replaced, `2` means the second occurrence, and so on. Specifying `0` means all
+      occurrences will be replaced. The number of characters from the beginning of the string to
+      begin starting to search for pattern matches can be specified using the `position` argument.
+      Specifying `1` means to search for matches starting at the first character of the input
+      string, `2` means the second character, and so on. The `position` argument should be a
+      positive non-zero integer. The replacement string can capture groups using numbered
+      backreferences.
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines.  This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+
+      Behavior is undefined if the regex fails to compile, the replacement contains an illegal
+      back-reference, the occurrence value is out of range, or the position value is out of range.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "pattern"
+            description: The regular expression to search for within the input string.
+          - value: "string"
+            name: "replacement"
+            description: The replacement string.
+          - value: i64
+            name: "position"
+            description: The position to start the search.
+          - value: i64
+            name: "occurrence"
+            description: Which occurrence of the match to replace.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "string"
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "pattern"
+            description: The regular expression to search for within the input string.
+          - value: "varchar<L3>"
+            name: "replacement"
+            description: The replacement string.
+          - value: i64
+            name: "position"
+            description: The position to start the search.
+          - value: i64
+            name: "occurrence"
+            description: Which occurrence of the match to replace.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "varchar<L1>"
+  -
+    name: ltrim
+    description: >-
+      Remove any occurrence of the characters from the left side of the string.
+      If no characters are specified, spaces are removed.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "varchar<L2>"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "string"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "string"
+  -
+    name: rtrim
+    description: >-
+      Remove any occurrence of the characters from the right side of the string.
+      If no characters are specified, spaces are removed.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "varchar<L2>"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "string"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "string"
+  -
+    name: trim
+    description: >-
+      Remove any occurrence of the characters from the left and right sides of
+      the string. If no characters are specified, spaces are removed.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "varchar<L2>"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to remove characters from."
+          - value: "string"
+            name: "characters"
+            description: "The set of characters to remove."
+        return: "string"
+  -
+    name: lpad
+    description: >-
+      Left-pad the input string with the string of 'characters' until the specified length of the
+      string has been reached. If the input string is longer than 'length', remove characters from
+      the right-side to shorten it to 'length' characters. If the string of 'characters' is longer
+      than the remaining 'length' needed to be filled, only pad until 'length' has been reached.
+      If 'characters' is not specified, the default value is a single space.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "varchar<L2>"
+            name: "characters"
+            description: "The string of characters to use for padding."
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "string"
+            name: "characters"
+            description: "The string of characters to use for padding."
+        return: "string"
+  -
+    name: rpad
+    description: >-
+      Right-pad the input string with the string of 'characters' until the specified length of the
+      string has been reached. If the input string is longer than 'length', remove characters from
+      the left-side to shorten it to 'length' characters. If the string of 'characters' is longer
+      than the remaining 'length' needed to be filled, only pad until 'length' has been reached.
+      If 'characters' is not specified, the default value is a single space.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "varchar<L2>"
+            name: "characters"
+            description: "The string of characters to use for padding."
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "string"
+            name: "characters"
+            description: "The string of characters to use for padding."
+        return: "string"
+  -
+    name: center
+    description: >-
+      Center the input string by padding the sides with a single `character` until the specified
+      `length` of the string has been reached. By default, if the `length` will be reached with
+      an uneven number of padding, the extra padding will be applied to the right side.
+      The side with extra padding can be controlled with the `padding` option.
+
+      Behavior is undefined if the number of characters passed to the `character` argument is not 1.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "varchar<L1>"
+            name: "character"
+            description: "The character to use for padding."
+        options:
+          padding:
+            values: [ RIGHT, LEFT ]
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: "The string to pad."
+          - value: i32
+            name: "length"
+            description: "The length of the output string."
+          - value: "string"
+            name: "character"
+            description: "The character to use for padding."
+        options:
+          padding:
+            values: [ RIGHT, LEFT ]
+        return: "string"
+  -
+    name: left
+    description: Extract `count` characters starting from the left of the string.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: i32
+            name: "count"
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+          - value: i32
+            name: "count"
+        return: "string"
+  -
+    name: right
+    description: Extract `count` characters starting from the right of the string.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+          - value: i32
+            name: "count"
+        return: "varchar<L1>"
+      - args:
+          - value: "string"
+            name: "input"
+          - value: i32
+            name: "count"
+        return: "string"
+  -
+    name: string_split
+    description: >-
+      Split a string into a list of strings, based on a specified `separator` character.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "separator"
+            description: A character used for splitting the string.
+        return: "List<varchar<L1>>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "separator"
+            description: A character used for splitting the string.
+        return: "List<string>"
+  -
+    name: regexp_string_split
+    description: >-
+      Split a string into a list of strings, based on a regular expression pattern.  The
+      substrings matched by the pattern will be used as the separators to split the input
+      string and will not be included in the resulting list. The regular expression
+      pattern should follow the International Components for Unicode implementation
+      (https://unicode-org.github.io/icu/userguide/strings/regexp.html).
+
+      The `case_sensitivity` option specifies case-sensitive or case-insensitive matching.
+      Enabling the `multiline` option will treat the input string as multiple lines. This makes
+      the `^` and `$` characters match at the beginning and end of any line, instead of just the
+      beginning and end of the input string. Enabling the `dotall` option makes the `.` character
+      match line terminator characters in a string.
+    impls:
+      - args:
+          - value: "varchar<L1>"
+            name: "input"
+            description: The input string.
+          - value: "varchar<L2>"
+            name: "pattern"
+            description: The regular expression to search for within the input string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "List<varchar<L1>>"
+      - args:
+          - value: "string"
+            name: "input"
+            description: The input string.
+          - value: "string"
+            name: "pattern"
+            description: The regular expression to search for within the input string.
+        options:
+          case_sensitivity:
+            values: [ CASE_SENSITIVE, CASE_INSENSITIVE, CASE_INSENSITIVE_ASCII ]
+          multiline:
+            values: [ MULTILINE_DISABLED, MULTILINE_ENABLED ]
+          dotall:
+            values: [ DOTALL_DISABLED, DOTALL_ENABLED ]
+        return: "List<string>"
+
+aggregate_functions:
+
+  -
+    name: string_agg
+    description: Concatenates a column of string values with a separator.
+    impls:
+      - args:
+          - value: "string"
+            name: "input"
+            description: "Column of string values."
+          - value: "string"
+            name: "separator"
+            constant: true
+            description: "Separator for concatenated strings"
+        ordered: true
+        return: "string"
diff --git a/src/substrait/extensions/type_variations.yaml b/src/substrait/extensions/type_variations.yaml
new file mode 100644
index 0000000..f6f96d5
--- /dev/null
+++ b/src/substrait/extensions/type_variations.yaml
@@ -0,0 +1,25 @@
+%YAML 1.2
+---
+type_variations:
+  - parent: string
+    name: dict4
+    description: a four-byte dictionary encoded string
+    functions: INHERITS
+  - parent: string
+    name: bigoffset
+    description: >-
+      The arrow large string representation of strings, still restricted to the default string size defined in
+      Substrait.
+    functions: SEPARATE
+  - parent: struct
+    name: avro
+    description: an avro encoded struct
+    functions: SEPARATE
+  - parent: struct
+    name: cstruct
+    description: a cstruct representation of the struct
+    functions: SEPARATE
+  - parent: struct
+    name: dict2
+    description: a 2-byte dictionary encoded string.
+    functions: INHERITS
diff --git a/src/substrait/extensions/unknown.yaml b/src/substrait/extensions/unknown.yaml
new file mode 100644
index 0000000..3b0e6c1
--- /dev/null
+++ b/src/substrait/extensions/unknown.yaml
@@ -0,0 +1,66 @@
+%YAML 1.2
+---
+types:
+  - name: unknown
+scalar_functions:
+  - name: "add"
+    impls:
+      - args:
+          - value: unknown
+          - value: unknown
+        return: unknown
+  - name: "subtract"
+    impls:
+      - args:
+          - value: unknown
+          - value: unknown
+        return: unknown
+  - name: "multiply"
+    impls:
+      - args:
+          - value: unknown
+          - value: unknown
+        return: unknown
+  - name: "divide"
+    impls:
+      - args:
+          - value: unknown
+          - value: unknown
+        return: unknown
+  - name: "modulus"
+    impls:
+      - args:
+          - value: unknown
+          - value: unknown
+        return: unknown
+aggregate_functions:
+  - name: "sum"
+    impls:
+      - args:
+          - value: unknown
+        intermediate: unknown
+        return: unknown
+  - name: "avg"
+    impls:
+      - args:
+          - value: unknown
+        intermediate: unknown
+        return: unknown
+  - name: "min"
+    impls:
+      - args:
+          - value: unknown
+        intermediate: unknown
+        return: unknown
+  - name: "max"
+    impls:
+      - args:
+          - value: unknown
+        intermediate: unknown
+        return: unknown
+  - name: "count"
+    impls:
+      - args:
+          - value: unknown
+        intermediate: unknown
+        return: unknown