In [1]:
!pip install snowflake-opendic==0.1.21

Collecting snowflake-opendic==0.1.21
  Downloading snowflake_opendic-0.1.21-py3-none-any.whl (10 kB)
Collecting snowflake-connector-python[pandas]>=3.13.2
  Downloading snowflake_connector_python-3.15.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (2.6 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.6/2.6 MB[0m [31m5.9 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
Collecting toml>=0.10.2
  Downloading toml-0.10.2-py2.py3-none-any.whl (16 kB)
Collecting pydantic>=2.11.3
  Downloading pydantic-2.11.4-py3-none-any.whl (443 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m443.9/443.9 kB[0m [31m9.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting typing-inspection>=0.4.0
  Downloading typing_inspection-0.4.0-py3-none-any.whl (14 kB)
Collecting pydantic-core==2.33.2
  Downloading pydantic_core-2.33.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

## Setting up pyspark-opendic

In [6]:
from snowflake_opendic.snow_opendic import snowflake_connect

def read_secret(secret_name):
    """ Get `secret_name` from docker-compose secret store"""
    secret_path = f"/run/secrets/{secret_name}"
    try:
        with open(secret_path, "r") as f:
            return f.read().strip()  # Remove any trailing newline
    except FileNotFoundError:
        print(f"Secret {secret_name} not found.")
        return None

def snowflake_init_db(conn):
    with conn.cursor() as curr:
        curr.execute("CREATE DATABASE IF NOT EXISTS OPENDIC;")
        curr.execute("use OPENDIC;")
        curr.execute("CREATE SCHEMA IF NOT EXISTS EXPERIMENT;")

ENGINEER_CLIENT_ID = read_secret("engineer_client_id")
ENGINEER_CLIENT_SECRET = read_secret("engineer_client_secret")

print("Secrets read ✔️")

config_path = f"/run/secrets/snowflake-conf"
SNOWFLAKE_CONN = snowflake_connect(config_path)
snowflake_init_db(SNOWFLAKE_CONN)

print("Snowflake conn initialized ✔️")

Secrets read ✔️
Snowflake conn initialized ✔️


In [7]:
from snowflake_opendic.catalog import OpenDicSnowflakeCatalog


POLARIS_URI= "http://polaris:8181/api"

catalog = OpenDicSnowflakeCatalog(SNOWFLAKE_CONN, POLARIS_URI, ENGINEER_CLIENT_ID, ENGINEER_CLIENT_SECRET)
print("Catalog initialized ✔️")


Connection Established | Server: America/Los_Angeles | Latency: 0.383338 ✔︎
Catalog initialized ✔️


In [8]:
catalog.sql("Show schemas")

[(datetime.datetime(2025, 5, 1, 8, 16, 55, 6000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'EXPERIMENT',
  'N',
  'N',
  'OPENDIC',
  'TRAINING_ROLE',
  '',
  '',
  '1',
  'ROLE',
  None,
  None,
  None,
  None,
  None),
 (datetime.datetime(2025, 5, 1, 8, 20, 36, 220000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'INFORMATION_SCHEMA',
  'N',
  'N',
  'OPENDIC',
  '',
  'Views describing the contents of schemas in this database',
  '',
  '1',
  '',
  None,
  None,
  None,
  None,
  None),
 (datetime.datetime(2025, 5, 1, 8, 16, 54, 621000, tzinfo=<DstTzInfo 'America/Los_Angeles' PDT-1 day, 17:00:00 DST>),
  'PUBLIC',
  'N',
  'Y',
  'OPENDIC',
  'TRAINING_ROLE',
  '',
  '',
  '1',
  'ROLE',
  None,
  None,
  None,
  None,
  None)]

## Define the schema for a andfunc object

In [30]:
catalog.sql(
    """
    DEFINE OPEN function_v2
    props {
        "args": "MAP",
        "language": "STRING",
        "def": "string",
        "comment": "string",
        "packages": "list",
        "runtime": "string",
        "client_version": "int",
        "signature": "STRING",
        "return_type": "STRING"
    }
    """
)

Unnamed: 0,udoType,properties,createdTimestamp,lastUpdatedTimestamp,version
0,function_v2,"{'return_type': 'STRING', 'created_time': 'STRING', 'entity_version': 'STRING', 'uname': 'STRING', 'def': 'STRING', 'signature': 'STRING', 'runtime': 'STRING', 'language': 'STRING', 'packages': 'STRING', 'args': 'STRING', 'last_updated_time': 'STRING', 'comment': 'STRING', 'client_version': 'STRING'}",1970-01-01T00:00Z,1970-01-01T00:00Z,


In [31]:
catalog.sql(
 """
 CREATE OPEN function_v2 baz
    props {
            "args": {
                "arg1": "int", 
                "arg2": "int"
                },
            "language": "python",
            "def": "def baz(arg1, arg2):\\n        return arg1 + arg2",
            "packages" : ["numpy", "pandas"],
            "comment": "test fun",
            "runtime": "3.12",
            "client_version": 1,
            "return_type": "int",
            "signature": "baz(arg1 str, arg2 int)"
        }
    """
)

Unnamed: 0,type-name,object-name,props,created-time-stamp,last-updated-time-stamp,entity-version
0,function_v2,baz,"{'args': {'arg1': 'int', 'arg2': 'int'}, 'return_type': 'int', 'def': 'def baz(arg1, arg2):  return arg1 + arg2', 'signature': 'baz(arg1 str, arg2 int)', 'runtime': '3.12', 'language': 'python', 'comment': 'test fun', 'packages': ['numpy', 'pandas'], 'client_version': 1}",2025-05-01T15:24:35.281060138Z,2025-05-01T15:24:35.281063263Z,1


In [24]:
catalog.sql(
    """
    SHOW OPEN function_v2
    """
)

Unnamed: 0,type,name,props,createdTimestamp,lastUpdatedTimestamp,entityVersion
0,function_v2,baz,"{'args': {'arg1': 'int', 'arg2': 'int'}, 'return_type': 'int', 'def': 'def baz(arg1, arg2):  return arg1 + arg2', 'signature': 'foo(arg1 str, arg2 int)', 'runtime': '3.12', 'language': 'python', 'comment': 'test fun', 'packages': ['numpy', 'pandas'], 'client_version': 1}",2025-05-01T15:22:31.247830094Z,2025-05-01T15:22:31.247833636Z,1


In [32]:
catalog.sql(
    """
    ADD OPEN MAPPING function_v2 PLATFORM snowflake
    SYNTAX {
CREATE OR ALTER function <name>(<args>)
    RETURNS <return_type>
    LANGUAGE <language>
    PACKAGES = (<packages>)
    runtime_version = <runtime>
    HANDLER = '<name>'
    AS 
$$
<def>
$$;
    }
    PROPS {
        "args": {
                "propType": "map",
                "format": "<key> <value>",
                "delimiter": ", "
            },
        "packages": {"propType": "list", "format": "'<item>'", "delimiter": ", "}
    }
    """
)

Unnamed: 0,typeName,platformName,syntax,objectDumpMap,createdTimestamp,lastUpdatedTimestamp,version
0,function_v2,snowflake,CREATE OR ALTER function <name>(<args>)\n RETURNS <return_type>\n LANGUAGE <language>\n PACKAGES = (<packages>)\n runtime_version = <runtime>\n HANDLER = '<name>'\n AS \n$$\n<def>\n$$;,"{'args': {'propType': 'map', 'format': '<key> <value>', 'delimiter': ', '}, 'packages': {'propType': 'list', 'format': ''<item>'', 'delimiter': ', '}}",2025-05-01T15:24:53.720662424Z,2025-05-01T15:24:53.720666049Z,1


In [33]:
# Show all mappings from <object>. Example: [snowflake,spark]
catalog.sql(
    """
    SHOW OPEN PLATFORMS FOR function_v2
    """
)

Unnamed: 0,typeName,platformName,syntax,objectDumpMap,createdTimestamp,lastUpdatedTimestamp,version
0,function_v2,snowflake,CREATE OR ALTER function <name>(<args>)\n RETURNS <return_type>\n LANGUAGE <language>\n PACKAGES = (<packages>)\n runtime_version = <runtime>\n HANDLER = '<name>'\n AS \n$$\n<def>\n$$;,"{'args': {'propType': 'map', 'format': '<key> <value>', 'delimiter': ', '}, 'packages': {'propType': 'list', 'format': ''<item>'', 'delimiter': ', '}}",2025-05-01T15:24:53.720662Z,2025-05-01T15:24:53.720666Z,1


In [15]:
# Show all mappings from <object>. Example: [snowflake,spark]
catalog.sql(
    """
    SHOW OPEN Mapping for snowflake
    """
)

Unnamed: 0,typeName,platformName,syntax,objectDumpMap,createdTimestamp,lastUpdatedTimestamp,version
0,function_v2,snowflake,CREATE OR ALTER function <name>(<args>)\n RETURNS <return_type>\n LANGUAGE <language>\n PACKAGES = (<packages>)\n runtime_version = <runtime>\n HANDLER = '<name>'\n AS \n $$\n <def>\n $$;,"{'args': {'propType': 'map', 'format': '<key> <value>', 'delimiter': ', '}, 'packages': {'propType': 'list', 'format': ''<item>'', 'delimiter': ', '}}",2025-05-01T15:20:52.939571Z,2025-05-01T15:20:52.939575Z,1


In [35]:
catalog.sql(
    """
    SYNC OPEN function_v2 for snowflake
    """
)

```json
{
    "executions": [
        {
            "sql": "CREATE OR ALTER function baz(arg1 int, arg2 int)\n    RETURNS int\n    LANGUAGE python\n    PACKAGES = ('numpy', 'pandas')\n    runtime_version = 3.12\n    HANDLER = 'baz'\n    AS \n$$\ndef baz(arg1, arg2):\n        return arg1 + arg2\n$$;",
            "status": "executed"
        }
    ]
}
```

In [17]:
catalog.sql(
    """
    SYNC OPEN OBJECTS for snowflake
    """
)

```json
{
    "executions": [
        {
            "sql": "CREATE OR ALTER function baz(arg1 int, arg2 int)\n            RETURNS int\n            LANGUAGE python\n            PACKAGES = ('numpy', 'pandas')\n            runtime_version = 3.12\n            HANDLER = 'baz'\n            AS \n        $$\n        def foo(arg1, arg2):\n\n        return arg1 + arg2\n        $$;",
            "status": "failed",
            "error": "100357 (P0000): Python Interpreter Error:\n  File \"_udf_code.py\", line 2\n    def foo(arg1, arg2):\nIndentationError: unexpected indent\n in function BAZ with handler baz"
        }
    ]
}
```

## Drop object and mapping

In [27]:
catalog.sql(
    """
    DROP OPEN function_v2
    """
)

Unnamed: 0,Deleted all objects of type
0,function_v2


In [28]:
catalog.sql(
    """
    DROP OPEN MAPPINGS for snowflake
    """
)

Unnamed: 0,Deleted all mappings for platform
0,snowflake
