In [5]:
import snowflake.connector


In [7]:
pip install python-dotenv


Note: you may need to restart the kernel to use updated packages.


In [None]:
from dotenv import load_dotenv
import os

# Load .env from the project root
load_dotenv()

SNOWFLAKE_USER = os.getenv("SNOWFLAKE_USER")
SNOWFLAKE_PASSWORD = os.getenv("SNOWFLAKE_PASSWORD")
SNOWFLAKE_ACCOUNT = os.getenv("SNOWFLAKE_ACCOUNT")

In [None]:
import snowflake.connector

conn = snowflake.connector.connect(
    user=SNOWFLAKE_USER,
    password=SNOWFLAKE_PASSWORD,
    account=SNOWFLAKE_ACCOUNT,
)

DatabaseError: 250001 (08001): Failed to connect to DB: qbmhuza-bnb86629.snowflakecomputing.com:443. Your user account has been temporarily locked. Try again later or contact your account administrator for assistance. For more information about this error, go to https://community.snowflake.com/s/error-your-user-login-has-been-locked.

In [None]:
cs = conn.cursor()

In [None]:
cs.execute("CREATE WAREHOUSE IF NOT EXISTS my_first_warehouse")

<snowflake.connector.cursor.SnowflakeCursor at 0xffff52b8ffe0>

In [None]:
cs.execute("CREATE DATABASE IF NOT EXISTS testdb")

<snowflake.connector.cursor.SnowflakeCursor at 0xffff52b8ffe0>

In [None]:
from pathlib import Path
import re

# ---- 0) connect  ----
conn = snowflake.connector.connect(
    host="qbmhuza-bnb86629.snowflakecomputing.com",   
    account="qbmhuza-bnb86629",                        
    user="second2",
    password="gyczeg6kaHqywownor",
    warehouse="COMPUTE_WH",                           
)
cs = conn.cursor()

# make sure the warehouse will wake itself
cs.execute("ALTER WAREHOUSE COMPUTE_WH SET AUTO_SUSPEND=60 AUTO_RESUME=TRUE")

# ---- 1) set DB/Schema (use your existing TESTDB) ----
DB, SCHEMA = "TESTDB", "PUBLIC"
cs.execute(f"CREATE DATABASE IF NOT EXISTS {DB}")
cs.execute(f"CREATE SCHEMA   IF NOT EXISTS {DB}.{SCHEMA}")
cs.execute(f"USE DATABASE {DB}")
cs.execute(f"USE SCHEMA {SCHEMA}")

# ---- 2) read and run your .pgsql file from the repo ----
sql_path = Path("/home/jovyan/Desktop/MSBA/SQL & ETL /MGTA_464_Snowflake_Project/Data/supplier_case.pgsql")     
assert sql_path.exists(), f"Not found: {sql_path.resolve()}"
txt = sql_path.read_text(encoding="utf-8")

# tiny Postgres -> Snowflake cleanups
txt = "\n".join(l for l in txt.splitlines() if not l.strip().startswith("\\"))            # drop psql meta commands
txt = re.sub(r"\bNUMERIC\b", "NUMBER", txt, flags=re.I)                                   # NUMERIC -> NUMBER (safe)
txt = re.sub(r"\bsupplier_case\b", f"{DB}.{SCHEMA}.SUPPLIER_CASE", txt, flags=re.I)       # fully-qualify table

# split on semicolons and execute
stmts = [s.strip() for s in re.split(r";\s*(?=\n|$)", txt) if s.strip()]
for s in stmts:
    cs.execute(s)

# ---- 3) visualize (still only cs.execute) ----
print("Rows:", cs.execute(f"SELECT COUNT(*) FROM {DB}.{SCHEMA}.SUPPLIER_CASE").fetchone()[0])

print("\nSample rows:")
for r in cs.execute(f"""
    SELECT SupplierID, SupplierName, PhoneNumber, WebsiteURL,
           TRY_TO_DATE(ValidFrom) AS ValidFrom, TRY_TO_DATE(ValidTo) AS ValidTo
    FROM {DB}.{SCHEMA}.SUPPLIER_CASE
    ORDER BY SupplierID
    LIMIT 10
""").fetchall():
    print(r)

print("\nSchema:")
for r in cs.execute(f"DESCRIBE TABLE {DB}.{SCHEMA}.SUPPLIER_CASE").fetchall():
    print(r[0], r[1])


Rows: 13

Sample rows:
(1, 'A Datum Corporation', '(847) 555-0100', 'http://www.adatum.com', None, None)
(2, 'Contoso, Ltd.', '(360) 555-0100', 'http://www.contoso.com', None, None)
(3, 'Consolidated Messenger', '(415) 555-0100', 'http://www.consolidatedmessenger.com', None, None)
(4, 'Fabrikam, Inc.', '(203) 555-0104', 'http://www.fabrikam.com', None, None)
(5, 'Graphic Design Institute', '(406) 555-0105', 'http://www.graphicdesigninstitute.com', None, None)
(6, 'Humongous Insurance', '(423) 555-0105', 'http://www.humongousinsurance.com', None, None)
(7, 'Litware, Inc.', '(209) 555-0108', 'http://www.litwareinc.com', None, None)
(8, 'Lucerne Publishing', '(423) 555-0103', 'http://www.lucernepublishing.com', None, None)
(9, 'Nod Publishers', '(252) 555-0100', 'http://www.nodpublishers.com', None, None)
(10, 'Northwind Electric Cars', '(201) 555-0105', 'http://www.northwindelectriccars.com', None, None)

Schema:
SUPPLIERID NUMBER(38,0)
SUPPLIERNAME VARCHAR(16777216)
SUPPLIERCATEGORYID N

In [None]:
cs.execute("USE DATABASE TESTDB")
cs.execute("USE SCHEMA PUBLIC")

cs.execute("""
CREATE OR REPLACE TABLE TESTDB.PUBLIC.SUPPLIER_CASE_CLEAN AS
SELECT
  CAST(SUPPLIERID               AS INT)        AS SUPPLIERID,
  SUPPLIERNAME                                   AS SUPPLIERNAME,
  CAST(SUPPLIERCATEGORYID       AS INT)        AS SUPPLIERCATEGORYID,
  CAST(PRIMARYCONTACTPERSONID   AS INT)        AS PRIMARYCONTACTPERSONID,
  CAST(ALTERNATECONTACTPERSONID AS INT)        AS ALTERNATECONTACTPERSONID,
  CAST(DELIVERYMETHODID         AS INT)        AS DELIVERYMETHODID,
  CAST(POSTALCITYID             AS INT)        AS POSTALCITYID,
  SUPPLIERREFERENCE                              AS SUPPLIERREFERENCE,
  PHONENUMBER                                   AS PHONENUMBER,
  WEBSITEURL                                    AS WEBSITEURL,
  DELIVERYADDRESSLINE1                           AS DELIVERYADDRESSLINE1,
  CAST(DELIVERYPOSTALCODE       AS INT)        AS DELIVERYPOSTALCODE,
  POSTALADDRESSLINE1                             AS POSTALADDRESSLINE1,
  CAST(POSTALPOSTALCODE         AS INT)        AS POSTALPOSTALCODE,
  CAST(LASTEDITEDBY             AS INT)        AS LASTEDITEDBY,
  TRY_TO_DATE(VALIDFROM)                        AS VALIDFROM,
  TRY_TO_DATE(VALIDTO)                          AS VALIDTO
FROM TESTDB.PUBLIC.SUPPLIER_CASE;
""")


<snowflake.connector.cursor.SnowflakeCursor at 0xffff52a7d460>

In [None]:
print(cs.execute("SELECT COUNT(*) FROM TESTDB.PUBLIC.SUPPLIER_CASE_CLEAN").fetchone()[0])
for r in cs.execute("""
  SELECT SUPPLIERID, SUPPLIERNAME, PHONENUMBER, WEBSITEURL, VALIDFROM, VALIDTO
  FROM TESTDB.PUBLIC.SUPPLIER_CASE_CLEAN
  ORDER BY SUPPLIERID
  LIMIT 10
""").fetchall():
    print(r)


13
(1, 'A Datum Corporation', '(847) 555-0100', 'http://www.adatum.com', None, None)
(2, 'Contoso, Ltd.', '(360) 555-0100', 'http://www.contoso.com', None, None)
(3, 'Consolidated Messenger', '(415) 555-0100', 'http://www.consolidatedmessenger.com', None, None)
(4, 'Fabrikam, Inc.', '(203) 555-0104', 'http://www.fabrikam.com', None, None)
(5, 'Graphic Design Institute', '(406) 555-0105', 'http://www.graphicdesigninstitute.com', None, None)
(6, 'Humongous Insurance', '(423) 555-0105', 'http://www.humongousinsurance.com', None, None)
(7, 'Litware, Inc.', '(209) 555-0108', 'http://www.litwareinc.com', None, None)
(8, 'Lucerne Publishing', '(423) 555-0103', 'http://www.lucernepublishing.com', None, None)
(9, 'Nod Publishers', '(252) 555-0100', 'http://www.nodpublishers.com', None, None)
(10, 'Northwind Electric Cars', '(201) 555-0105', 'http://www.northwindelectriccars.com', None, None)


In [None]:
# --- Add this to ETL_Snowflake.py (after your existing connection code) ---

import snowflake.connector

def use_first_existing_db(cs, candidate_names):
    """
    Try each candidate database name; return the first one that exists.
    Falls back to SHOW DATABASES LIKE 'WEATHER%ENVIRONMENT%' if none match exactly.
    """
    # Try exact names first
    for db in candidate_names:
        cs.execute(f"SHOW DATABASES LIKE '{db}'")
        if cs.fetchone():
            return db

    # Broad match as a fallback (handles custom naming)
    cs.execute("SHOW DATABASES LIKE 'WEATHER%ENVIRONMENT%'")
    row = cs.fetchone()
    if row:
        # SHOW DATABASES returns tuples; the 2nd field is the database NAME
        # Typical order: created_on, name, is_default, ...
        return row[1]

    raise RuntimeError(
        "Could not find a WEATHER...ENVIRONMENT database. "
        "Verify the Snowflake Marketplace subscription/name in the web UI."
    )

def print_table_sample(cs, fully_qualified_table, sample_rows=5):
    """
    Print column names, a small sample, and total row count for the given table.
    """
    print(f"\n=== {fully_qualified_table} ===")

    # Sample rows
    cs.execute(f"SELECT * FROM {fully_qualified_table} LIMIT {sample_rows}")
    rows = cs.fetchall()
    col_names = [desc[0] for desc in cs.description]
    print("Columns:", ", ".join(col_names))
    for i, r in enumerate(rows, 1):
        print(f"{i:>2}: {r}")

    # Row count (fast metadata-style count is not available; run COUNT(*))
    cs.execute(f"SELECT COUNT(*) FROM {fully_qualified_table}")
    total = cs.fetchone()[0]
    print(f"Total rows: {total}")

def print_cybersyn_weather_tables(conn, warehouse="MY_FIRST_WAREHOUSE"):
    """
    Uses only the Snowflake Python connector to:
    - set the warehouse,
    - locate the Marketplace database,
    - switch to CYBERSYN schema,
    - and print samples + counts from the two NOAA tables.
    """
    with conn.cursor() as cs:
        # Ensure a warehouse is set (you created MY_FIRST_WAREHOUSE earlier in your file)
        cs.execute(f"USE WAREHOUSE {warehouse}")

        # Part 2, step 7 database names: assignment shows WEATHER__ENVIRONMENT (double underscore),
        # but some accounts show WEATHER_ENVIRONMENT (single underscore).
        candidate_dbs = ["WEATHER__ENVIRONMENT", "WEATHER_ENVIRONMENT"]
        db_name = use_first_existing_db(cs, candidate_dbs)
        cs.execute(f"USE DATABASE {db_name}")

        # Schema from the brief is CYBERSYN
        cs.execute("USE SCHEMA CYBERSYN")

        # Fully qualified table names (no quotes needed since identifiers are simple/upper)
        tables = [
            "NOAA_WEATHER_METRICS_TIMESERIES",
            "NOAA_WEATHER_STATION_INDEX",
        ]
        for t in tables:
            fqtn = f"{db_name}.CYBERSYN.{t}"
            print_table_sample(cs, fqtn, sample_rows=5)

# --- Run it ---
if __name__ == "__main__":
    # Reuse your existing `conn` from above in ETL_Snowflake.py
    # Example assumes you already did:
    # conn = snowflake.connector.connect(user=..., password=..., account=...)
    try:
        print_cybersyn_weather_tables(conn, warehouse="MY_FIRST_WAREHOUSE")
    finally:
        try:
            conn.close()
        except Exception:
            pass


In [None]:
## Creating the PO_Table with Datatypes 
cs.execute(
"CREATE OR REPLACE TABLE PO_Data("
"purchaseorderid NUMBER(3,0), "
"supplierid NUMBER(1,0), "
"orderdate DATE, "
"deliverymethodid NUMBER(1,0), "
"contactpersonid NUMBER(1,0), "
"expecteddeliverydate DATE, "
"supplierreference VARCHAR, "
"isorderfinalized NUMBER(1,0), "
"comments VARCHAR, "
"internalcomments VARCHAR, "
"lasteditedby NUMBER(2,0), "
"lasteditedwhen TIMESTAMP_NTZ, "
"purchaseorderlineid NUMBER(4,0), "
"stockitemid NUMBER(3,0), "
"orderedouters NUMBER(3,0), "
"description VARCHAR, "
"receivedouters NUMBER(3,0), "
"packagetypeid NUMBER(1,0), "
"expectedunitpriceperouter NUMBER(4,1), "
"lastreceiptdate DATE, "
"isorderlinefinalized NUMBER(1,0), "
"right_lasteditedby NUMBER(2,0), "
"right_lasteditedwhen TIMESTAMP_NTZ"
")")


import os, glob

# Stage + file format
cs.execute("CREATE OR REPLACE STAGE po_data_stage")
cs.execute("""
CREATE OR REPLACE FILE FORMAT po_csv_ff
  TYPE = CSV
  FIELD_OPTIONALLY_ENCLOSED_BY = '"'
  SKIP_HEADER = 1
""")

local_dir = "/mnt/c/Users/digsc/Downloads/Data-1/Data/Monthly PO Data"
pattern = os.path.join(local_dir, "*.csv")

files = glob.glob(pattern)
print("Matched CSVs:", len(files))
if not files:
    raise SystemExit(f"No CSVs matched at: {pattern}")

for filepath in files:
    base = os.path.basename(filepath)
    if ":" in base:   # skip ADS like ':Zone.Identifier'
        continue
    abs_path = os.path.abspath(filepath).replace("\\", "/")  # ensure forward slashes
    file_uri = "file:///" + abs_path.lstrip("/")             # <-- exactly 3 slashes, no URL-encoding
    print("PUT ->", file_uri)
    cs.execute(
        f"PUT '{file_uri}' @po_data_stage AUTO_COMPRESS=TRUE OVERWRITE=TRUE"
    )

# sanity check what's in the stage
cs.execute("LIST @po_data_stage")
print("Staged objects (top 10):", cs.fetchall()[:10])

# load into the table
cs.execute("""
COPY INTO PO_Data
  FROM @po_data_stage
  FILE_FORMAT = (FORMAT_NAME = po_csv_ff)
  ON_ERROR = ABORT_STATEMENT
""")

cs.execute("SELECT COUNT(*) FROM PO_Data")
print("Row count:", cs.fetchone())
