In [1]:
from snowflake.snowpark import Session 
from dotenv import load_dotenv
import os
load_dotenv()

# use the env vars in comments above to set the vars below
OpenAI_API_KEY = os.getenv("OPENAI_API_KEY")
snowflake_account = os.getenv("SNOWFLAKE_ACCOUNT")
username = os.getenv("SNOWFLAKE_USER")
password = os.getenv("SNOWSQL_PWD")
warehouse = os.getenv("WAREHOUSE")
database = os.getenv("DATABASE")
schema = os.getenv("SCHEMA")
role = os.getenv("ROLE")


# print out all env vars using f-strings each on a separate line but x out password
print(f"OpenAI_API_KEY: {'x' * len(OpenAI_API_KEY)}")
print(f"snowflake_account: {snowflake_account}")
#print(f"username: {username}")
#print(f"password: {password}")
print(f"warehouse: {warehouse}")
print(f"database: {database}")
print(f"schema: {schema}")
print(f"role: {role}")




OpenAI_API_KEY: xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
snowflake_account: ie62028.ap-northeast-1.aws
warehouse: COMPUTE_WH
database: LEARNING_SQL
schema: PUBLIC
role: ACCOUNTADMIN


In [2]:
import snowflake.connector
import os

conn = snowflake.connector.connect(
    user=username,
    password=password,
    account=snowflake_account,
    warehouse=warehouse,
    database=database,
    schema=schema,
    role=role
    )



In [3]:
def execute_query(connection, query):
    '''
    return list of tuples from query
    usage:
    result = execute_query(conn, "DESCRIBE TABLE customer")
    for row in result:
        print(row)

    conn.close()
    '''
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        # Fetch all rows
        rows = cursor.fetchall()
        # Alternatively, fetch one row at a time with cursor.fetchone()
        return rows
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        cursor.close()

In [4]:
import pandas as pd

def query_to_dataframe(connection, query):
    '''
    # Usage
    df = query_to_dataframe(conn, "SELECT * FROM customer")
    print(df)
    '''
    cursor = connection.cursor()
    try:
        cursor.execute(query)
        # Get the result set's column names
        columns = [col[0] for col in cursor.description]
        rows = cursor.fetchall()
        return pd.DataFrame(rows, columns=columns)
    except Exception as e:
        print(f"An error occurred: {e}")
    finally:
        cursor.close()


In [5]:
connection_parameters = {
    "user": username,
    "password": password,
    "account": snowflake_account,
    "warehouse": warehouse,
    "database": database,
    "schema": schema,
    "role": role
    
}

In [6]:
query = """
SELECT 'here is a string' as output_string; 
"""

df = query_to_dataframe(conn, query)
print(df)

      OUTPUT_STRING
0  here is a string


In [7]:
query = """
select 'you haven''t reached the end yet' as output_string;
"""

df = query_to_dataframe(conn, query)
print(df)

                     OUTPUT_STRING
0  you haven't reached the end yet


In [8]:
query = """ 
select 'you haven''t reached the end yet' as output_string;
"""
df = query_to_dataframe(conn, query)
print(df)

                     OUTPUT_STRING
0  you haven't reached the end yet


In [9]:
query = """
select $$string with 4 single quotes ''''$$ as output_string; 
"""
df = query_to_dataframe(conn, query)
print(df)

                      OUTPUT_STRING
0  string with 4 single quotes ''''


In [10]:
query = """
show parameters like 'timez%';
"""
df = query_to_dataframe(conn, query)
print(df)


        key                value              default level description  \
0  TIMEZONE  America/Los_Angeles  America/Los_Angeles         time zone   

     type  
0  STRING  


In [11]:
query = """
alter session set TIMEZONE = 'Asia/Tokyo';
"""
df = query_to_dataframe(conn, query)
print(df)

                             status
0  Statement executed successfully.


In [12]:
'''
Snowflake is very flexible regarding date literals and can handle many common formats. For example, Snowflake allowed all of the following string literals to be inserted into a date column:

'24-OCT-2022'
'10/24/2022'
'2022-10-24'
'''
query = """
select current_date, current_time, current_timestamp;
"""
df = query_to_dataframe(conn, query)
print(df)


  CURRENT_DATE     CURRENT_TIME                CURRENT_TIMESTAMP
0   2023-12-12  19:11:49.330000 2023-12-12 19:11:49.330000+09:00


In [13]:
query = """
show parameters like 'date_outputs%'; 
"""
df = query_to_dataframe(conn, query)
print(df)

Empty DataFrame
Columns: [key, value, default, level, description, type]
Index: []


In [14]:
query1 = "alter session set DATE_OUTPUT_FORMAT = 'YYYY-MM-DD';"
query2 = "select current_date, current_time, current_timestamp;"

df1 = query_to_dataframe(conn, query1)
df2 = query_to_dataframe(conn, query2)

print(df1)
print(df2)


                             status
0  Statement executed successfully.
  CURRENT_DATE     CURRENT_TIME                CURRENT_TIMESTAMP
0   2023-12-12  19:11:49.690000 2023-12-12 19:11:49.690000+09:00


In [15]:
query = """
select true, false, true = true, true = false;
"""
df = query_to_dataframe(conn, query)
print(df)


   TRUE  FALSE  TRUE = TRUE  TRUE = FALSE
0  True  False         True         False


### variant is a swiss army knife that can hold any data type, use :: to cast to variant type

In [16]:
query = """
select 1::variant, 'abc'::variant, array_construct(1,2,3)::variant, current_date::variant;
"""
df = query_to_dataframe(conn, query)
print(df)


  1::VARIANT 'ABC'::VARIANT ARRAY_CONSTRUCT(1,2,3)::VARIANT  \
0          1          "abc"           [\n  1,\n  2,\n  3\n]   

  CURRENT_DATE::VARIANT  
0          "2023-12-12"  


In [17]:
query = """
select typeof('this is a character string'::variant);
"""
df = query_to_dataframe(conn, query)
print(df)


  TYPEOF('THIS IS A CHARACTER STRING'::VARIANT)
0                                       VARCHAR


In [18]:
query = "select typeof(false::variant);"
query2 = "select typeof(current_timestamp::variant);"

df = query_to_dataframe(conn, query)
df2 = query_to_dataframe(conn, query2)
print(df)
print(df2)

  TYPEOF(FALSE::VARIANT)
0                BOOLEAN
  TYPEOF(CURRENT_TIMESTAMP::VARIANT)
0                      TIMESTAMP_LTZ


In [19]:
# array
query = """
select [123, 'ABC', current_time] as my_array;
""" 
df = query_to_dataframe(conn, query)
#print as string, no index
print(df.to_string(index=False))


                            MY_ARRAY
[\n  123,\n  "ABC",\n  "19:11:51"\n]


In [20]:
query = """
select value from table(flatten(input=>[123, 'ABC', current_time]));
""" 
df = query_to_dataframe(conn, query)
#print as string, no index  
print(df.to_string(index=False))
print(df)

     VALUE
       123
     "ABC"
"19:11:52"
        VALUE
0         123
1       "ABC"
2  "19:11:52"


In [21]:
# object 
query = """
select {'new_years': '01/01', 'independence_day': '07/04', 'christmas': '12/25'} as my_object;
""" 
df = query_to_dataframe(conn, query)
#print as string, no index  
print(df.to_string(index=False))


                                                                            MY_OBJECT
{\n  "christmas": "12/25",\n  "independence_day": "07/04",\n  "new_years": "01/01"\n}


In [22]:
query = """ 
select key, value from table(flatten(
    {'new_years': '01/01', 
    'independence_day': '07/04', 
    'christmas': '12/25'}));
"""
df = query_to_dataframe(conn, query)
#print as string, no index
print(df.to_string(index=False))


             KEY   VALUE
       christmas "12/25"
independence_day "07/04"
       new_years "01/01"


In [23]:
# fetch key's value after flattening
query = """
select value from table(flatten(
    {'new_years': '01/01', 
    'independence_day': '07/04', 
    'christmas': '12/25'})) where key = 'independence_day';
"""
df = query_to_dataframe(conn, query)
#print(df)
#print as string, no index
print(df.to_string(index=False))

  VALUE
"07/04"


### creating tables

In [24]:
query = """ 
create table person
(first_name varchar(50), last_name varchar(50), birth_date date, eye_color varchar(50), 
occupation varchar(50), height float, weight float, favorite_color varchar(50));
"""
df = query_to_dataframe(conn, query)
print(df)


An error occurred: 002002 (42710): SQL compilation error:
Object 'PERSON' already exists.
None


In [25]:
# populate person table created above
query = """
insert into person
values ('John', 'Smith', '1980-01-01', 'brown', 'programmer', 72.0, 180.0, 'blue'),
('Jane', 'Doe', '1985-01-01', 'blue', 'data scientist', 64.0, 140.0, 'green'),
('Joe', 'Schmoe', '1970-01-01', 'brown', 'salesman', 68.0, 155.0, 'brown'),
('Jill', 'Smith', '1975-01-01', 'brown', 'manager', 66.0, 150.0, 'blue');
"""
df = query_to_dataframe(conn, query)
print(df)


   number of rows inserted
0                        4


In [26]:
# show table person
query = """
select * from person;
"""
df = query_to_dataframe(conn, query)
print(df)

  FIRST_NAME LAST_NAME  BIRTH_DATE EYE_COLOR      OCCUPATION  HEIGHT  WEIGHT  \
0       John     Smith  1980-01-01     brown      programmer    72.0   180.0   
1       Jane       Doe  1985-01-01      blue  data scientist    64.0   140.0   
2        Joe    Schmoe  1970-01-01     brown        salesman    68.0   155.0   
3       Jill     Smith  1975-01-01     brown         manager    66.0   150.0   
4       John     Smith  1980-01-01     brown      programmer    72.0   180.0   
5       Jane       Doe  1985-01-01      blue  data scientist    64.0   140.0   
6        Joe    Schmoe  1970-01-01     brown        salesman    68.0   155.0   
7       Jill     Smith  1975-01-01     brown         manager    66.0   150.0   

  FAVORITE_COLOR  
0           blue  
1          green  
2          brown  
3           blue  
4           blue  
5          green  
6          brown  
7           blue  


In [27]:
# insert single record in to person table with an explicit null for weight
query = """
insert into person
values ('Jack', 'Smith', '1990-01-01', 'brown', 'student', 72.0, null, 'blue');
"""
df = query_to_dataframe(conn, query)
print(df)



   number of rows inserted
0                        1
