In [1]:
import os

# Set up the Snowflake and PostgreSQL data sources
SNOWFLAKE_USER = os.environ.get("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.environ.get("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.environ.get("SNOWFLAKE_ACCOUNT")
SNOWFLAKE_WAREHOUSE = os.environ.get("SNOWFLAKE_WAREHOUSE")
SNOWFLAKE_DATABASE = os.environ.get("SNOWFLAKE_DATABASE")
SNOWFLAKE_SCHEMA = os.environ.get("SNOWFLAKE_SCHEMA")
SNOWFLAKE_ROLE = os.environ.get("SNOWFLAKE_ROLE")

In [2]:
import logging
import sys

date_strftime_format = "%Y-%m-%y %H:%M:%S"
logging.basicConfig(
    stream=sys.stdout,
    level=logging.INFO,
    format="%(asctime)s %(message)s",
    datefmt=date_strftime_format,
)

In [3]:
from aita.datasource.snowflake import SnowflakeDataSource

sf_datasource = SnowflakeDataSource(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
    warehouse=SNOWFLAKE_WAREHOUSE,
    database=SNOWFLAKE_DATABASE,
    db_schema=SNOWFLAKE_SCHEMA,
    role="ACCOUNTADMIN",
)

In [4]:
# sf_datasource.execute("select * from SNOWFLAKE_SAMPLE_DATA.TPCH_SF1.orders limit 3")

In [5]:
from databuilder.loader.generic_loader import GenericLoader

In [6]:
sf_datasource.crawl_data_catalog(loader=GenericLoader())

2024-07-24 20:53:56 Crawling Snowflake data catalog
2024-07-24 20:53:56 Creating Snowflake metadata extraction job
2024-07-24 20:53:56 Running Snowflake metadata extraction job
2024-07-24 20:53:56 Launching a job
2024-07-24 20:53:56 SQL for snowflake metadata: 
    SELECT
        lower(c.column_name) AS col_name,
        c.comment AS col_description,
        lower(c.data_type) AS col_type,
        lower(c.ordinal_position) AS col_sort_order,
        lower(c.table_catalog) AS database,
        lower(c.table_catalog) AS cluster,
        lower(c.table_schema) AS schema,
        lower(c.table_name) AS name,
        t.comment AS description,
        decode(lower(t.table_type), 'view', 'true', 'false') AS is_view
    FROM
        SNOWFLAKE_SAMPLE_DATA.INFORMATION_SCHEMA.COLUMNS AS c
    LEFT JOIN
        SNOWFLAKE_SAMPLE_DATA.INFORMATION_SCHEMA.TABLES t
            ON c.TABLE_NAME = t.TABLE_NAME
            AND c.TABLE_SCHEMA = t.TABLE_SCHEMA
    ;
    
2024-07-24 20:53:56 Snowflake Connecto

  functions.register_function("flatten", flatten)


2024-07-24 20:53:57 Number of results in first chunk: 1
2024-07-24 20:53:57 Number of results in first chunk: 1
2024-07-24 20:53:59 Number of results in first chunk: 521
2024-07-24 20:53:59 Running a task
2024-07-24 20:53:59 record: TableMetadata('snowflake', 'snowflake_sample_data', 'tpch_sf1', 'part' DescriptionMetadata('description', 'Part data as defined by TPC-H'), [ColumnMetadata('p_comment', None, 'text', '9', [])], False, [])
2024-07-24 20:53:59 record: TableMetadata('snowflake', 'snowflake_sample_data', 'tpch_sf100', 'supplier' DescriptionMetadata('description', 'Supplier data as defined by TPC-H'), [ColumnMetadata('s_nationkey', None, 'number', '4', [])], False, [])
2024-07-24 20:53:59 record: TableMetadata('snowflake', 'snowflake_sample_data', 'tpcds_sf100tcl', 'customer_address' None, [ColumnMetadata('ca_street_name', None, 'text', '4', [])], False, [])
2024-07-24 20:53:59 record: TableMetadata('snowflake', 'snowflake_sample_data', 'tpcds_sf100tcl', 'catalog_sales' None, [C