In [None]:
from snowflake.snowpark import Session
import yaml

# session = Session.builder.config("connection_name", "myconnection").create()
# root = Root(session)

from snowflake.snowpark.context import get_active_session
session = get_active_session()

In [None]:
def generate_sources_yml(session: session, database_name: str, schema_name: str, output_file: str = 'sources.yml'):
    """
    Generates a dbt sources.yml file from Snowflake Information Schema.

    Args:
        session: Snowpark session connected to Snowflake.
        database_name: Name of the Snowflake database to query.
        schema_name: Name of the Snowflake schema to query.
        output_file: Path to the output YAML file (default: 'sources.yml').
    """
    # Query INFORMATION_SCHEMA.TABLES for tables and views in the specified database and schema
    tables_query = f"""
    SELECT TABLE_NAME, TABLE_TYPE
    FROM {database_name}.INFORMATION_SCHEMA.TABLES
    WHERE TABLE_SCHEMA = '{schema_name}'
    AND TABLE_TYPE IN ('BASE TABLE', 'VIEW')  -- Include tables and views
    ORDER BY TABLE_NAME
    """
    tables_df = session.sql(tables_query).collect()
    
    if not tables_df:
        print(f"No tables or views found in {database_name}.{schema_name}")
        return
    
    # Build the sources.yml structure
    sources_yml = {
        'version': 2,
        
        'sources': [
            {
                'name': f"{(schema_name.split('_')[-1]).lower()}",  # Source name (e.g., mydb_myschema)
                'database': database_name.lower(),
                'schema': schema_name.lower(),
                'tables': []
            }
        ]
    }
    
    source = sources_yml['sources'][0]

    for table_row in tables_df:
        table_name = table_row['TABLE_NAME']
        table_entry = {
            'name': table_name.lower(),
        }
        source['tables'].append(table_entry)
    
    # Write to YAML file
    with open(output_file, 'w') as f:
        yaml.dump(sources_yml, f, default_flow_style=False, sort_keys=False)
    
    print(f"sources.yml generated successfully at {output_file}")

In [None]:
database_name = "dbt_dev"  # Replace with actual database name
# schema_name = "SCH_BRONZE_INVENTORY"      # Replace with actual schema name
schema_df = session.table('DBT_DEV.INFORMATION_SCHEMA.SCHEMATA')
for schema in schema_df.collect():
    # print(schema)
    schema_nm = schema['SCHEMA_NAME']
    if schema_nm.startswith('SCH_BRONZE'):
        generate_sources_yml(session, database_name, schema_nm, f"_{(schema_nm.split('_')[-1]).lower()}__sources.yml")

In [None]:
# Files created in code or from the terminal exist only for the duration of the current notebook service session. 
# When the notebook service is suspended, these files are removed. 
# Only files that are uploaded or created in Snowsight persist across sessions.
# Files created from code or the terminal do not appear in the left-hand pane. This is a temporary limitation
# files under /workspace/<hash> created by code/terminal are typically session-scoped and can disappear when the notebook service suspends. To keep them, you should persist them to a stage or push them to Git

import os
print(f"Current working directory: {os.getcwd()}")

print(f"Files in directory: {os.listdir()}")
file_path = '_sales__sources.yml'
print(f"File created at: {file_path}, size: {os.path.getsize(file_path)} bytes")

In [None]:
import shutil

shutil.copy('_inventory__sources.yml', 'files')
path_dir = '/workspace/598b666a38c489a929528c4046f9fde004c1dce47da2d799e3c9cfbd6ad96424/WS_DEMO_DB/NB/files'
print(f"Files in CWD directory: {os.listdir()}")

print(f"Files in files directory: {os.listdir(path_dir)}")
