In [1]:
pip install --upgrade snowflake-connector-python

Note: you may need to restart the kernel to use updated packages.


In [2]:
import snowflake.connector

In [3]:
# ---------- CONFIG ----------

SNOWFLAKE = {
    "user": "kespann",
    "password": "SnowFlakeUCSD09",
    "account": "YS37434",  # e.g., xy12345.us-west-2
}


In [4]:
def run_sql(cs, sql):
    print("\n--- Executing ---\n" + sql)
    cs.execute(sql)

In [5]:
## STAGING FILES

# PUT … @stage/path/... → lets you load files into a subfolder within the stage.

# 41 purchase CSVs - want to stage them in a structured way by country → state → city → year → month.


In [6]:
import snowflake.connector

# --- CONFIG ---
SNOWFLAKE = {
    "user": "KESPANN",
    "password": "NewStrongPassword123!",
    "account": "sptiuvl-ys37434",  # adjust if region needed, e.g., xy12345.us-west-2
}

# --- Helper functions ---
def sf_connect():
    return snowflake.connector.connect(
        user=SNOWFLAKE["user"],
        password=SNOWFLAKE["password"],
        account=SNOWFLAKE["account"],
        warehouse="CASE_WH",
        database="CASE_DB",
        schema="CASE_SCHEMA",
        role="ACCOUNTADMIN"
    )

def run_sql(cs, sql):
    print("\n--- Executing ---\n" + sql)
    cs.execute(sql)

# # --- Test connection ---
# with sf_connect() as conn:
#     cs = conn.cursor()
#     run_sql(cs, "SELECT CURRENT_DATE;")
#     print(cs.fetchone())


In [7]:
with sf_connect() as conn:
    cs = conn.cursor()

    # warehouse + db/schema setup
    cs.execute("CREATE WAREHOUSE IF NOT EXISTS CASE_WH WAREHOUSE_SIZE = 'XSMALL' AUTO_SUSPEND = 60 AUTO_RESUME = TRUE;")
    cs.execute("CREATE DATABASE IF NOT EXISTS CASE_DB;")
    cs.execute("USE DATABASE CASE_DB;")
    cs.execute("CREATE SCHEMA IF NOT EXISTS CASE_SCHEMA;")
    cs.execute("USE SCHEMA CASE_SCHEMA;")

    # stages
    cs.execute("CREATE STAGE IF NOT EXISTS stg_purchases;")
    cs.execute("CREATE STAGE IF NOT EXISTS stg_invoices;")
    cs.execute("CREATE STAGE IF NOT EXISTS stg_supplier;")

    # file formats
    cs.execute("""
        CREATE OR REPLACE FILE FORMAT ff_csv 
        TYPE = CSV 
        FIELD_OPTIONALLY_ENCLOSED_BY = '"'
        SKIP_HEADER = 1 
        NULL_IF = ('', 'NULL')
    """)
    
    cs.execute("""
        CREATE OR REPLACE FILE FORMAT ff_xml 
        TYPE = XML 
        STRIP_OUTER_ELEMENT = TRUE
    """)


In [8]:
%pip install psycopg2-binary pandas lxml python-dotenv

Note: you may need to restart the kernel to use updated packages.


In [9]:
PATHS = {
    "purchases_dir": "/home/jovyan/Desktop/Summer_Q/SQL/SQL_FINAL_PROJECT/Data-5/MonthlyPOData",   # folder with 41 CSVs
    }

In [None]:
import os, glob

with sf_connect() as conn:
    cs = conn.cursor()

    # 1️⃣ PUT all your local CSVs into the purchases stage
    run_sql(cs, f"""
        PUT file:///{PATHS['purchases_dir']}/*.csv
        @stg_purchases/United_States/California/San_Diego/2025/01/
        AUTO_COMPRESS=TRUE;
    """)

    # 2️⃣ Create table for line-level purchases (adjust column names after preview)
    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASES_LINE (
            PURCHASEORDERNUMBER VARCHAR,
            RECEIVEDOUTERS NUMBER,
            EXPECTEDUNITPRICEPEROUTER NUMBER,
            -- add other columns you want to keep from CSVs
            DUMMYCOL VARCHAR
        );
    """)

    # 3️⃣ COPY staged CSVs into the purchases table
    run_sql(cs, """
        COPY INTO PURCHASES_LINE
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    """)

    # 4️⃣ Create aggregated table with POAmount
    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASES_AGG AS
        SELECT
            PURCHASEORDERNUMBER,
            SUM(RECEIVEDOUTERS * EXPECTEDUNITPRICEPEROUTER) AS POAMOUNT
        FROM PURCHASES_LINE
        GROUP BY PURCHASEORDERNUMBER;
    """)


In [None]:
with sf_connect() as conn:
    cs = conn.cursor()
    run_sql(cs, "SELECT COUNT(*) FROM PURCHASES_LINE;")
    print(cs.fetchone())


In [10]:
with sf_connect() as conn:
    cs = conn.cursor()

    # 1️⃣ Wide staging table - 20 columns (adjust number based on CSV header count)
    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASES_RAW (
            C1 VARCHAR, C2 VARCHAR, C3 VARCHAR, C4 VARCHAR, C5 VARCHAR,
            C6 VARCHAR, C7 VARCHAR, C8 VARCHAR, C9 VARCHAR, C10 VARCHAR,
            C11 VARCHAR, C12 VARCHAR, C13 VARCHAR, C14 VARCHAR, C15 VARCHAR,
            C16 VARCHAR, C17 VARCHAR, C18 VARCHAR, C19 VARCHAR, C20 VARCHAR
        );
    """)

    # 2️⃣ Load ALL staged CSV data into raw table
    run_sql(cs, """
        COPY INTO PURCHASES_RAW
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    """)

    # 3️⃣ Check row count
    run_sql(cs, "SELECT COUNT(*) FROM PURCHASES_RAW;")
    print("Row count in PURCHASES_RAW:", cs.fetchone())

    # 4️⃣ Peek at a few rows
    run_sql(cs, "SELECT * FROM PURCHASES_RAW LIMIT 10;")
    rows = cs.fetchall()
    for r in rows:
        print(r)



--- Executing ---

        CREATE OR REPLACE TABLE PURCHASES_RAW (
            C1 VARCHAR, C2 VARCHAR, C3 VARCHAR, C4 VARCHAR, C5 VARCHAR,
            C6 VARCHAR, C7 VARCHAR, C8 VARCHAR, C9 VARCHAR, C10 VARCHAR,
            C11 VARCHAR, C12 VARCHAR, C13 VARCHAR, C14 VARCHAR, C15 VARCHAR,
            C16 VARCHAR, C17 VARCHAR, C18 VARCHAR, C19 VARCHAR, C20 VARCHAR
        );
    

--- Executing ---

        COPY INTO PURCHASES_RAW
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    

--- Executing ---
SELECT COUNT(*) FROM PURCHASES_RAW;
Row count in PURCHASES_RAW: (0,)

--- Executing ---
SELECT * FROM PURCHASES_RAW LIMIT 10;


In [11]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        CREATE OR REPLACE FILE FORMAT ff_csv
        TYPE = CSV
        FIELD_DELIMITER = ','
        SKIP_HEADER = 1
        NULL_IF = ('', 'NULL');
    """)



--- Executing ---

        CREATE OR REPLACE FILE FORMAT ff_csv
        TYPE = CSV
        FIELD_DELIMITER = ','
        SKIP_HEADER = 1
        NULL_IF = ('', 'NULL');
    


In [12]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        COPY INTO PURCHASES_RAW
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    """)

    run_sql(cs, "SELECT COUNT(*) FROM PURCHASES_RAW;")
    print("Row count in PURCHASES_RAW:", cs.fetchone())



--- Executing ---

        COPY INTO PURCHASES_RAW
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    

--- Executing ---
SELECT COUNT(*) FROM PURCHASES_RAW;
Row count in PURCHASES_RAW: (0,)


In [None]:
## not getting the data into the 'purchases_raw' table...^

In [13]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        INSERT INTO PURCHASES_RAW
        SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
               $11, $12, $13, $14, $15, $16, $17, $18, $19, $20
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        (FILE_FORMAT => ff_csv);
    """)

    run_sql(cs, "SELECT COUNT(*) FROM PURCHASES_RAW;")
    print("Row count in PURCHASES_RAW:", cs.fetchone())



--- Executing ---

        INSERT INTO PURCHASES_RAW
        SELECT $1, $2, $3, $4, $5, $6, $7, $8, $9, $10,
               $11, $12, $13, $14, $15, $16, $17, $18, $19, $20
        FROM @stg_purchases/United_States/California/San_Diego/2025/01/
        (FILE_FORMAT => ff_csv);
    

--- Executing ---
SELECT COUNT(*) FROM PURCHASES_RAW;
Row count in PURCHASES_RAW: (261865,)


In [15]:
with sf_connect() as conn:
    cs = conn.cursor()
    run_sql(cs, "SELECT * FROM PURCHASES_RAW LIMIT 5;")
    for r in cs.fetchall():
        print(r)



--- Executing ---
SELECT * FROM PURCHASES_RAW LIMIT 5;
('106', '4', '3/1/2019', '7', '2', '3/21/2019', '293092', '1', None, None, '4', '3/4/2019 7:00', '469', '77', '92', '"""The Gu"" red shirt XML tag t-shirt (White) XXS"', '92', '6', '84', '3/4/2019')
('106', '4', '3/1/2019', '7', '2', '3/21/2019', '293092', '1', None, None, '4', '3/4/2019 7:00', '470', '78', '127', '"""The Gu"" red shirt XML tag t-shirt (White) XS"', '127', '6', '84', '3/4/2019')
('106', '4', '3/1/2019', '7', '2', '3/21/2019', '293092', '1', None, None, '4', '3/4/2019 7:00', '471', '80', '20', '"""The Gu"" red shirt XML tag t-shirt (White) M"', '20', '6', '84', '3/4/2019')
('106', '4', '3/1/2019', '7', '2', '3/21/2019', '293092', '1', None, None, '4', '3/4/2019 7:00', '472', '86', '74', '"""The Gu"" red shirt XML tag t-shirt (White) 5XL"', '74', '6', '96', '3/4/2019')
('106', '4', '3/1/2019', '7', '2', '3/21/2019', '293092', '1', None, None, '4', '3/4/2019 7:00', '473', '95', '22', '"""The Gu"" red shirt XML tag t-

### Clean the data into a proper table (PURCHASES_LINE)

In [17]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASES_LINE AS
        SELECT
            C1::VARCHAR AS PURCHASEORDERID,
            C2::VARCHAR AS PURCHASEORDERLINEID,
            TRY_TO_DATE(C3, 'MM/DD/YYYY') AS ORDERDATE,
            TRY_TO_NUMBER(C4) AS SUPPLIERID,
            TRY_TO_NUMBER(C5) AS RECEIVEDOUTERS,
            TRY_TO_NUMBER(C14) AS EXPECTEDUNITPRICEPEROUTER
        FROM PURCHASES_RAW;
    """)

    # Check row count
    run_sql(cs, "SELECT COUNT(*) FROM PURCHASES_LINE;")
    print("Rows in PURCHASES_LINE:", cs.fetchone())

    # Preview first 10 rows
    run_sql(cs, "SELECT * FROM PURCHASES_LINE LIMIT 10;")
    print(cs.fetchall())




--- Executing ---

        CREATE OR REPLACE TABLE PURCHASES_LINE AS
        SELECT
            C1::VARCHAR AS PURCHASEORDERID,
            C2::VARCHAR AS PURCHASEORDERLINEID,
            TRY_TO_DATE(C3, 'MM/DD/YYYY') AS ORDERDATE,
            TRY_TO_NUMBER(C4) AS SUPPLIERID,
            TRY_TO_NUMBER(C5) AS RECEIVEDOUTERS,
            TRY_TO_NUMBER(C14) AS EXPECTEDUNITPRICEPEROUTER
        FROM PURCHASES_RAW;
    

--- Executing ---
SELECT COUNT(*) FROM PURCHASES_LINE;
Rows in PURCHASES_LINE: (261865,)

--- Executing ---
SELECT * FROM PURCHASES_LINE LIMIT 10;
[('1', '2', datetime.date(2019, 1, 1), 9, 2, 150), ('1', '2', datetime.date(2019, 1, 1), 9, 2, 151), ('1', '2', datetime.date(2019, 1, 1), 9, 2, 152), ('10', '10', datetime.date(2019, 1, 2), 8, 2, 60), ('11', '12', datetime.date(2019, 1, 2), 7, 2, 1), ('11', '12', datetime.date(2019, 1, 2), 7, 2, 3), ('11', '12', datetime.date(2019, 1, 2), 7, 2, 15), ('12', '4', datetime.date(2019, 1, 3), 7, 2, 77), ('12', '4', datetime.date(201

### Aggregated totals into PURCHASES_AGG with the calculated field POAmount 
- (sum of ReceivedOuters × ExpectedUnitPricePerOuter per order).

In [18]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASES_AGG AS
        SELECT
            PURCHASEORDERID,
            SUM(RECEIVEDOUTERS * EXPECTEDUNITPRICEPEROUTER) AS POAMOUNT
        FROM PURCHASES_LINE
        GROUP BY PURCHASEORDERID;
    """)

    run_sql(cs, "SELECT * FROM PURCHASES_AGG LIMIT 10;")
    print(cs.fetchall())



--- Executing ---

        CREATE OR REPLACE TABLE PURCHASES_AGG AS
        SELECT
            PURCHASEORDERID,
            SUM(RECEIVEDOUTERS * EXPECTEDUNITPRICEPEROUTER) AS POAMOUNT
        FROM PURCHASES_LINE
        GROUP BY PURCHASEORDERID;
    

--- Executing ---
SELECT * FROM PURCHASES_AGG LIMIT 10;
[('114', 408), ('151', 794), ('367', 1028), ('375', 1028), ('392', 794), ('399', 1028), ('1002', 154), ('954', 1028), ('965', 1162), ('975', 1162)]


### 3: Extract and load the supplier invoice XML data.

In [20]:
# Add XML file path
PATHS["invoice_xml"] = "/home/jovyan/Desktop/Summer_Q/SQL/SQL_FINAL_PROJECT/Data-5/Supplier Transactions XML.xml"

with sf_connect() as conn:
    cs = conn.cursor()

    # 1️⃣ Stage the XML file
    run_sql(cs, f"""
        PUT 'file://{PATHS['invoice_xml']}'
        @stg_invoices
        AUTO_COMPRESS=TRUE;
    """)

    # 2️⃣ Create raw XML table
    run_sql(cs, """
        CREATE OR REPLACE TABLE INVOICES_RAW (XML VARIANT);
    """)

    # 3️⃣ Load XML into raw table
    run_sql(cs, """
        COPY INTO INVOICES_RAW
        FROM @stg_invoices
        FILE_FORMAT = (FORMAT_NAME = ff_xml)
        ON_ERROR = 'CONTINUE';
    """)

    # 4️⃣ Shred XML into relational form
    # ⚠️ Tag names below ("InvoiceNumber", "InvoiceDate", etc.) may need adjustment
    run_sql(cs, """
        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            inv.value:InvoiceNumber::string       AS INVOICENUMBER,
            TRY_TO_DATE(inv.value:InvoiceDate::string) AS INVOICEDATE,
            inv.value:PurchaseOrderNumber::string AS PURCHASEORDERNUMBER,
            TRY_TO_NUMBER(inv.value:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX
        FROM INVOICES_RAW,
             LATERAL FLATTEN(input => XML:"Invoices"."Invoice") inv;
    """)

    # 5️⃣ Preview shredded data
    run_sql(cs, "SELECT * FROM INVOICES LIMIT 10;")
    rows = cs.fetchall()
    for r in rows:
        print(r)



--- Executing ---

        PUT 'file:///home/jovyan/Desktop/Summer_Q/SQL/SQL_FINAL_PROJECT/Data-5/Supplier Transactions XML.xml'
        @stg_invoices
        AUTO_COMPRESS=TRUE;
    

--- Executing ---

        CREATE OR REPLACE TABLE INVOICES_RAW (XML VARIANT);
    

--- Executing ---

        COPY INTO INVOICES_RAW
        FROM @stg_invoices
        FILE_FORMAT = (FORMAT_NAME = ff_xml)
        ON_ERROR = 'CONTINUE';
    

--- Executing ---

        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            inv.value:InvoiceNumber::string       AS INVOICENUMBER,
            TRY_TO_DATE(inv.value:InvoiceDate::string) AS INVOICEDATE,
            inv.value:PurchaseOrderNumber::string AS PURCHASEORDERNUMBER,
            TRY_TO_NUMBER(inv.value:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX
        FROM INVOICES_RAW,
             LATERAL FLATTEN(input => XML:"Invoices"."Invoice") inv;
    

--- Executing ---
SELECT * FROM INVOICES LIMIT 10;


In [21]:
with sf_connect() as conn:
    cs = conn.cursor()

    # Look at one raw XML record to understand its structure
    run_sql(cs, "SELECT XML FROM INVOICES_RAW LIMIT 1;")
    row = cs.fetchone()
    print(row[0])



--- Executing ---
SELECT XML FROM INVOICES_RAW LIMIT 1;
<row>
  <SupplierTransactionID>134</SupplierTransactionID>
  <SupplierID>2</SupplierID>
  <TransactionTypeID>5</TransactionTypeID>
  <PurchaseOrderID>1</PurchaseOrderID>
  <PaymentMethodID>4</PaymentMethodID>
  <SupplierInvoiceNumber>7290</SupplierInvoiceNumber>
  <TransactionDate>2019-01-02</TransactionDate>
  <AmountExcludingTax>313.50</AmountExcludingTax>
  <TaxAmount>47.03</TaxAmount>
  <TransactionAmount>360.53</TransactionAmount>
  <OutstandingBalance>0.00</OutstandingBalance>
  <FinalizationDate>2019-01-07</FinalizationDate>
  <IsFinalized>1</IsFinalized>
  <LastEditedBy>4</LastEditedBy>
  <LastEditedWhen>2019-01-07 09:00:00.0000000</LastEditedWhen>
</row>


In [22]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            inv.value:SupplierTransactionID::string    AS SUPPLIERTRANSACTIONID,
            inv.value:SupplierID::string               AS SUPPLIERID,
            inv.value:PurchaseOrderID::string          AS PURCHASEORDERID,
            inv.value:SupplierInvoiceNumber::string    AS INVOICENUMBER,
            TRY_TO_DATE(inv.value:TransactionDate::string, 'YYYY-MM-DD') AS INVOICEDATE,
            TRY_TO_NUMBER(inv.value:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX,
            TRY_TO_NUMBER(inv.value:TransactionAmount::string)  AS TRANSACTIONAMOUNT
        FROM INVOICES_RAW,
             LATERAL FLATTEN(input => XML:"row") inv;
    """)

    # Preview shredded table
    run_sql(cs, "SELECT * FROM INVOICES LIMIT 10;")
    print(cs.fetchall())



--- Executing ---

        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            inv.value:SupplierTransactionID::string    AS SUPPLIERTRANSACTIONID,
            inv.value:SupplierID::string               AS SUPPLIERID,
            inv.value:PurchaseOrderID::string          AS PURCHASEORDERID,
            inv.value:SupplierInvoiceNumber::string    AS INVOICENUMBER,
            TRY_TO_DATE(inv.value:TransactionDate::string, 'YYYY-MM-DD') AS INVOICEDATE,
            TRY_TO_NUMBER(inv.value:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX,
            TRY_TO_NUMBER(inv.value:TransactionAmount::string)  AS TRANSACTIONAMOUNT
        FROM INVOICES_RAW,
             LATERAL FLATTEN(input => XML:"row") inv;
    

--- Executing ---
SELECT * FROM INVOICES LIMIT 10;
[]


In [23]:
with sf_connect() as conn:
    cs = conn.cursor()

    run_sql(cs, """
        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            XML:SupplierTransactionID::string    AS SUPPLIERTRANSACTIONID,
            XML:SupplierID::string               AS SUPPLIERID,
            XML:PurchaseOrderID::string          AS PURCHASEORDERID,
            XML:SupplierInvoiceNumber::string    AS INVOICENUMBER,
            TRY_TO_DATE(XML:TransactionDate::string, 'YYYY-MM-DD') AS INVOICEDATE,
            TRY_TO_NUMBER(XML:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX,
            TRY_TO_NUMBER(XML:TransactionAmount::string)  AS TRANSACTIONAMOUNT
        FROM INVOICES_RAW;
    """)

    # Preview
    run_sql(cs, "SELECT * FROM INVOICES LIMIT 10;")
    print(cs.fetchall())



--- Executing ---

        CREATE OR REPLACE TABLE INVOICES AS
        SELECT
            XML:SupplierTransactionID::string    AS SUPPLIERTRANSACTIONID,
            XML:SupplierID::string               AS SUPPLIERID,
            XML:PurchaseOrderID::string          AS PURCHASEORDERID,
            XML:SupplierInvoiceNumber::string    AS INVOICENUMBER,
            TRY_TO_DATE(XML:TransactionDate::string, 'YYYY-MM-DD') AS INVOICEDATE,
            TRY_TO_NUMBER(XML:AmountExcludingTax::string) AS AMOUNTEXCLUDINGTAX,
            TRY_TO_NUMBER(XML:TransactionAmount::string)  AS TRANSACTIONAMOUNT
        FROM INVOICES_RAW;
    

--- Executing ---
SELECT * FROM INVOICES LIMIT 10;
[('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '3', '5', None, 7, 9), ('0', '1', '

### Step 4

In [24]:
with sf_connect() as conn:
    cs = conn.cursor()

    # Create a materialized view (or table if MV not supported)
    run_sql(cs, """
        CREATE OR REPLACE TABLE PURCHASE_ORDERS_AND_INVOICES AS
        SELECT
            i.PURCHASEORDERID,
            i.INVOICENUMBER,
            i.INVOICEDATE,
            i.AMOUNTEXCLUDINGTAX,
            p.POAMOUNT,
            (i.AMOUNTEXCLUDINGTAX - p.POAMOUNT) AS INVOICED_VS_QUOTED
        FROM INVOICES i
        JOIN PURCHASES_AGG p
          ON i.PURCHASEORDERID = p.PURCHASEORDERID;
    """)

    # Preview result
    run_sql(cs, "SELECT * FROM PURCHASE_ORDERS_AND_INVOICES LIMIT 10;")
    rows = cs.fetchall()
    for r in rows:
        print(r)

    # Count rows
    run_sql(cs, "SELECT COUNT(*) FROM PURCHASE_ORDERS_AND_INVOICES;")
    print("Rows in PURCHASE_ORDERS_AND_INVOICES:", cs.fetchone())



--- Executing ---

        CREATE OR REPLACE TABLE PURCHASE_ORDERS_AND_INVOICES AS
        SELECT
            i.PURCHASEORDERID,
            i.INVOICENUMBER,
            i.INVOICEDATE,
            i.AMOUNTEXCLUDINGTAX,
            p.POAMOUNT,
            (i.AMOUNTEXCLUDINGTAX - p.POAMOUNT) AS INVOICED_VS_QUOTED
        FROM INVOICES i
        JOIN PURCHASES_AGG p
          ON i.PURCHASEORDERID = p.PURCHASEORDERID;
    

--- Executing ---
SELECT * FROM PURCHASE_ORDERS_AND_INVOICES LIMIT 10;
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)
('3', '5', None, 7, 1128, -1121)

--- Executing ---
SELECT COUNT(*) FROM PURCHASE_ORDERS_AND_INVOICES;
Rows in PURCHASE_ORDERS_AND_INVOICES: (2438,)


### Step 5

In [25]:
## quick check

with sf_connect() as conn:
    cs = conn.cursor()

    # Count rows in join table
    run_sql(cs, "SELECT COUNT(*) FROM PURCHASE_ORDERS_AND_INVOICES;")
    print("Row count:", cs.fetchone())

    # Preview a few results
    run_sql(cs, "SELECT * FROM PURCHASE_ORDERS_AND_INVOICES LIMIT 10;")
    print(cs.fetchall())

    # Optional: sanity check some differences
    run_sql(cs, """
        SELECT
            AVG(INVOICED_VS_QUOTED) AS avg_diff,
            MIN(INVOICED_VS_QUOTED) AS min_diff,
            MAX(INVOICED_VS_QUOTED) AS max_diff
        FROM PURCHASE_ORDERS_AND_INVOICES;
    """)
    print(cs.fetchone())



--- Executing ---
SELECT COUNT(*) FROM PURCHASE_ORDERS_AND_INVOICES;
Row count: (2438,)

--- Executing ---
SELECT * FROM PURCHASE_ORDERS_AND_INVOICES LIMIT 10;
[('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121), ('3', '5', None, 7, 1128, -1121)]

--- Executing ---

        SELECT
            AVG(INVOICED_VS_QUOTED) AS avg_diff,
            MIN(INVOICED_VS_QUOTED) AS min_diff,
            MAX(INVOICED_VS_QUOTED) AS max_diff
        FROM PURCHASE_ORDERS_AND_INVOICES;
    
(Decimal('-1121.000000'), -1121, -1121)


In [None]:
import psycopg2
import csv

PG = {
    "host": "localhost",      # or your Postgres host
    "port": 5432,             # default port
    "dbname": "your_db_name", # database where supplier_case lives
    "user": "your_pg_user",
    "password": "your_pg_password"
}

# File path for local export
PATHS["supplier_case_csv"] = "/home/jovyan/Desktop/Summer_Q/SQL/SQL_FINAL_PROJECT/Data-5/supplier_case.csv"


In [None]:
with psycopg2.connect(**PG) as pgconn:
    with pgconn.cursor() as cur, open(PATHS["supplier_case_csv"], "w", newline="", encoding="utf-8") as out:
        cur.copy_expert("COPY (SELECT * FROM supplier_case) TO STDOUT WITH CSV HEADER", out)

print("Exported supplier_case to CSV:", PATHS["supplier_case_csv"])


In [None]:
with sf_connect() as conn:
    cs = conn.cursor()

    # Stage the CSV
    run_sql(cs, f"""
        PUT 'file://{PATHS['supplier_case_csv']}'
        @stg_supplier
        AUTO_COMPRESS=TRUE;
    """)

    # Create the supplier_case table (adjust columns if needed after preview)
    run_sql(cs, """
        CREATE OR REPLACE TABLE SUPPLIER_CASE (
            SUPPLIERID VARCHAR,
            SUPPLIERNAME VARCHAR,
            POSTALPOSTALCODE VARCHAR,
            OTHERCOLUMNS VARCHAR  -- add more depending on CSV header
        );
    """)

    # Copy staged data into Snowflake
    run_sql(cs, """
        COPY INTO SUPPLIER_CASE
        FROM @stg_supplier
        FILE_FORMAT = (FORMAT_NAME = ff_csv)
        ON_ERROR = 'CONTINUE';
    """)

    # Check row count
    run_sql(cs, "SELECT COUNT(*) FROM SUPPLIER_CASE;")
    print("Rows in SUPPLIER_CASE:", cs.fetchone())

    # Preview
    run_sql(cs, "SELECT * FROM SUPPLIER_CASE LIMIT 10;")
    print(cs.fetchall())


### Step 6