In [3]:
import json
from snowflake.snowpark.session import Session

# Reading Snowflake Connection Details
snowflake_connection_cfg = json.loads(open('creds.json').read())

# Creating Snowpark Session
session = Session.builder.configs(snowflake_connection_cfg).create()

# Create a fresh & new schema
session.sql('create database snowpark_demo_db').collect()
session.sql('CREATE OR REPLACE SCHEMA snowpark_demo_db.MEMBERSHIP_MODELING_DEMO').collect()
session.use_schema('snowpark_demo_db.MEMBERSHIP_MODELING_DEMO')

Failed to execute query [queryID: 01a8db1d-0000-dcb2-0002-4d3e003985ce] create database snowpark_demo_db
002002 (42710): SQL compilation error:
Object 'SNOWPARK_DEMO_DB' already exists.


SnowparkSQLException: (1304): 01a8db1d-0000-dcb2-0002-4d3e003985ce: 002002 (42710): SQL compilation error:
Object 'SNOWPARK_DEMO_DB' already exists.

In [4]:
# Function to generate Data Creation SQL
def generate_data(table_name, num_rows):
    generate_data_sql = """
    -- Create fake data
    create or replace table {} (email string, gender string, MEMBERSHIP_STATUS string,  MEMBERSHIP_LENGTH double, AVG_SESSION_LENGTH double, TIME_ON_APP double, TIME_ON_WEBSITE double, YEARLY_SPENT double) as 
    with profiles as (
    select concat(lower(randstr(10, random())), '@', lower(randstr(5, random())), '.com') as EMAIL,
           case when uniform(1,10,random())<=6 then 'MALE'
                when uniform(1,10,random())<=9 then 'FEMALE'
                else 'UNKNOWN'
           end as GENDER,
           greatest(0, normal(45000,15000,random())) / 100 as YEARLY_SPENT,
           case when YEARLY_SPENT*normal(1, .05, random()) < 150 then 'BASIC'
                when YEARLY_SPENT*normal(1, .05, random()) < 250 then 'BRONZE'
                when YEARLY_SPENT*normal(1, .05, random()) < 350 then 'SILVER'
                when YEARLY_SPENT*normal(1, .05, random()) < 550 then 'GOLD'
                when YEARLY_SPENT*normal(1, .05, random()) < 650 then 'PLATIN'
                else 'DIAMOND'
           end as MEMBERSHIP_STATUS,
           case when YEARLY_SPENT*normal(1, .1, random()) < 150 then null
                when YEARLY_SPENT*normal(1, .1, random()) < 250 then greatest(0, normal(50,50,random())) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 350 then greatest(0, normal(300,100,random())) / 100
                when YEARLY_SPENT*normal(1, .15, random()) < 550 then greatest(0, normal(425,125,random())) / 100
                when YEARLY_SPENT*normal(1, .2, random()) < 650 then greatest(0, normal(625,135,random())) / 100
                else greatest(0, normal(850,140,random())) / 100
           end as MEMBERSHIP_LENGTH,
           case when YEARLY_SPENT*normal(1, .1, random()) < 120 then null
                when YEARLY_SPENT*normal(1, .1, random()) < 150 then greatest(0, normal(600,50,random())) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 250 then greatest(0, normal(850,75,random())) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 350 then greatest(0, normal(1450,200,random())) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 550 then greatest(0, normal(2400,300,random())) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 650 then greatest(0, normal(2700,400,random())) / 100
                else greatest(0, normal(3500,500,random())) / 100
           end as AVG_SESSION_LENGTH,
           case when YEARLY_SPENT < 150 then normal(5000,1000,random()) / 100
                when YEARLY_SPENT < 250 then normal(7300,2000,random()) / 100
                when YEARLY_SPENT < 350 then normal(9500,2000,random()) / 100
                when YEARLY_SPENT < 370 then normal(12500,3000,random()) / 100
                when YEARLY_SPENT < 550 then normal(19000,2000,random()) / 100
                when YEARLY_SPENT < 650 then normal(25000,2000,random()) / 100
                else normal(30000,2000,random()) / 100
           end as TIME_ON_APP,
           case when YEARLY_SPENT*normal(1, .1, random()) < 300 then normal(5000,1000,random()) / 100
                when YEARLY_SPENT*normal(1, .05, random()) < 500 then normal(7000,2500,random()) / 100
                when YEARLY_SPENT*normal(1, .1, random()) < 520 then normal(7000,2500,random()) / 100
                else normal(12000,1000,random()) / 100
           end as TIME_ON_WEBSITE
    from table(generator(rowcount=>{})))
    select email, gender, MEMBERSHIP_STATUS, MEMBERSHIP_LENGTH, AVG_SESSION_LENGTH, TIME_ON_APP, TIME_ON_WEBSITE, YEARLY_SPENT*normal(1, .05, random()) as YEARLY_SPENT from profiles;
    """.format(table_name, num_rows)
    return generate_data_sql

In [5]:
session.sql('alter warehouse SNOWPARK_DEMO_WH set warehouse_size=xxlarge;').collect()

# Generating sample data
# 10 thoursand rows
session.sql(generate_data('ECOMMERCE_CUSTOMERS_10K', 10000)).collect()
# 100 thoursand rows
session.sql(generate_data('ECOMMERCE_CUSTOMERS_100K', 100000)).collect()
# 1 million rows
session.sql(generate_data('ECOMMERCE_CUSTOMERS_1M', 1000000)).collect()
# 100 million rows
session.sql(generate_data('ECOMMERCE_CUSTOMERS_100M', 100000000)).collect()

session.sql('alter warehouse SNOWPARK_DEMO_WH set warehouse_size=xsmall;').collect()