## Before loading csv

In [None]:
CREATE OR REPLACE SCHEMA ECOMMERCE;

In [None]:
CREATE FILE FORMAT ECOMMERCECSVFORMAT 
COMPRESSION = 'AUTO' 
FIELD_DELIMITER = ',' 
RECORD_DELIMITER = '\n' 
SKIP_HEADER = 1 
FIELD_OPTIONALLY_ENCLOSED_BY = 'NONE' 
TRIM_SPACE = FALSE 
TIMESTAMP_FORMAT = 'MM/DD/YYYY HH:MI'
ERROR_ON_COLUMN_COUNT_MISMATCH = TRUE 
ESCAPE = 'NONE' 
ESCAPE_UNENCLOSED_FIELD = '\134'
NULL_IF = ('\\N');

In [None]:
create or replace TABLE DATA (
	INVOICENO VARCHAR(38),
	STOCKCODE VARCHAR(38),
	DESCRIPTION VARCHAR(60),
	QUANTITY NUMBER(38,0),
	INVOICEDATE TIMESTAMP,
	UNITPRICE NUMBER(38,0),
	CUSTOMERID VARCHAR(10),
	COUNTRY VARCHAR(20)
);

## Load with SnowSQL

Use https://docs.snowflake.com/en/user-guide/connecting.html#logging-in-using-snowsql to install SnowSQL

In [2]:
!snowsql -v

Version: 1.2.23
[0m[0m

In [None]:
!snowsql -a ueysmdr-wk00690 -u sparsh -w compute_wh -d sparsh;

In [None]:
-- select the schema
use schema ECOMMERCE;

In [None]:
-- create stage use the file format
create stage my_upload file_format = ECOMMERCECSVFORMAT;

-- Do this in case you don't have a format specified
-- create stage
-- create stage my_upload FILE_Format = (TYPE = CSV skip_header = 1);

In [None]:
-- stage file | linux/mac put file syntax
put file://data/upload.csv @my_upload auto_compress=true;

In [None]:
-- describe the stage to check parameters
DESCRIBE STAGE my_upload;

In [None]:
-- validate before copy with 2 rows
copy into DATA from @my_upload validation_mode = 'RETURN_2_ROWS';

In [None]:
--copy staged file into table
copy into DATA from @my_upload on_error = CONTINUE;

In [None]:
-- remove staged files, because copy always copies everything
remove @my_upload;

In [None]:
-- see your table is populated now
SHOW TABLES;

In [None]:
-- alter timestamp format
alter session set timestamp_input_format='MM/DD/YYYY HH24:MI';

## Split Table

In [None]:
CREATE OR REPLACE TABLE INVOICES AS (SELECT DISTINCT CUSTOMERID, COUNTRY, INVOICEDATE, INVOICENO
FROM DATA);

In [None]:
-- expected n rows 25905
SELECT COUNT(*) FROM INVOICES;

In [None]:
CREATE OR REPLACE TABLE ITEMS AS ( SELECT STOCKCODE, DESCRIPTION, UNITPRICE,QUANTITY, INVOICENO
FROM DATA);

In [None]:
-- expected n rows 537113
SELECT COUNT(*) FROM ITEMS;

## Visualize

In [None]:
-- INVOICES TABLE
SELECT COUNT(DISTINCT COUNTRY) AS NUMBER_COUNTRIES FROM INVOICES;

In [None]:
-- TOP 2-10 countries with most clients
SELECT COUNTRY, 
       COUNT(DISTINCT CUSTOMERID) AS N_CLIENTS   
FROM INVOICES
-- REMOVE UK AS IT HAS TOO MANY CLIENTS COMPARED TO OTHER COUNTRIES
WHERE UPPER(COUNTRY) NOT LIKE 'UNITED%'
GROUP BY COUNTRY
ORDER BY N_CLIENTS DESC
LIMIT 10;

In [None]:
-- TOP clinets with most invoices
SELECT CUSTOMERID, COUNT(DISTINCT INVOICENO) AS N_ORDERS
FROM INVOICES
GROUP BY COUNTRY, CUSTOMERID
ORDER BY N_ORDERS DESC
LIMIT 10;

In [None]:
-- Most ordered items
SELECT STOCKCODE,DESCRIPTION,SUM(QUANTITY) AS TOTAL_QUANTITY
FROM ITEMS
GROUP BY STOCKCODE, DESCRIPTION
ORDER BY TOTAL_QUANTITY DESC
LIMIT 10;

In [None]:
-- ITEMS TABLE
SELECT STOCKCODE, COUNT(DISTINCT INVOICENO)
FROM ITEMS
GROUP BY STOCKCODE;

In [None]:
-- Overview of unit prices
WITH TEMP AS (
    SELECT DESCRIPTION, UNITPRICE
    FROM ITEMS
    GROUP BY STOCKCODE, DESCRIPTION, UNITPRICE
    ORDER BY UNITPRICE DESC)
SELECT COUNT(*), 
       AVG(UNITPRICE),
       MIN(UNITPRICE),
       MAX(UNITPRICE)
FROM TEMP;

In [None]:
--  Which customers bought a WHITE METAL LANTERN?
SELECT DISTINCT INVOICES.CUSTOMERID
FROM ITEMS
JOIN INVOICES ON ITEMS.INVOICENO=INVOICES.INVOICENO
WHERE ITEMS.DESCRIPTION = 'WHITE METAL LANTERN' 
AND INVOICES.CUSTOMERID IS NOT NULL;

In [None]:
-- Which ITEMS are the most revenue generating per country outside of UK?
SELECT ITEMS.DESCRIPTION, AVG(ITEMS.UNITPRICE) * SUM(ITEMS.QUANTITY) AS TOTAL_REVENUE, INVOICES.COUNTRY
FROM ITEMS
JOIN INVOICES ON ITEMS.INVOICENO=INVOICES.INVOICENO
WHERE UPPER(INVOICES.COUNTRY) NOT LIKE 'UNITED%'
GROUP BY ITEMS.DESCRIPTION, INVOICES.COUNTRY
ORDER BY TOTAL_REVENUE DESC, INVOICES.COUNTRY, ITEMS.DESCRIPTION;

In [None]:
SELECT STOCKCODE,DESCRIPTION,SUM(QUANTITY) AS TOTAL_QUANTITY
FROM ITEMS
GROUP BY STOCKCODE, DESCRIPTION
ORDER BY TOTAL_QUANTITY DESC
LIMIT 10;

## Import Task

In [None]:
list @my_upload;

In [None]:
remove @my_upload;

In [None]:
create or replace TABLE DATA (
	INVOICENO VARCHAR(38),
	STOCKCODE VARCHAR(38),
	DESCRIPTION VARCHAR(60),
	QUANTITY NUMBER(38,0),
	INVOICEDATE TIMESTAMP,
	UNITPRICE NUMBER(38,0),
	CUSTOMERID VARCHAR(10),
	COUNTRY VARCHAR(20)
);

In [None]:
create or replace task import_from_stage
	warehouse=COMPUTE_WH
	schedule='1 MINUTE'
	as copy into DATA from @my_upload
              ;

In [None]:
-- create a dependent task on the first one
create or replace task clean_stage
	warehouse=COMPUTE_WH
	after import_from_stage
	as remove @my_upload
              ;

In [None]:
-- RESUME to let it run / SUSPEND (default) to stop it
ALTER TASK clean_stage RESUME;
ALTER TASK import_from_stage RESUME;

-- A child task cannot be altered unless the parent has been, and the task has to be in suspended state inorder for it to be dropped.
ALTER TASK CLEAN_STAGE SUSPEND;
ALTER TASK IMPORT_FROM_STAGE SUSPEND;
DROP TASK IF EXISTS CLEAN_STAGE;
DROP TASK IF EXISTS IMPORT_FROM_STAGE;

In [None]:
create or replace task SPLIT_TABLE_AUTOMATIC
	warehouse=COMPUTE_WH
	schedule='1 MINUTE'
	as CREATE OR REPLACE TABLE INVOICES AS( SELECT DISTINCT CUSTOMERID, COUNTRY, INVOICEDATE, INVOICENO
               FROM DATA
              );

In [None]:
-- create a dependent task on the first one
create or replace task SPLIT_TABLE_AUTOMATIC_SECOND
	warehouse=COMPUTE_WH
	after SPLIT_TABLE_AUTOMATIC
	as CREATE OR REPLACE TABLE ITEMS AS ( SELECT STOCKCODE, DESCRIPTION, UNITPRICE,QUANTITY, INVOICENO
               FROM DATA
              );

In [None]:
-- RESUME to let it run / SUSPEND (default) to stop it
ALTER TASK SPLIT_TABLE_AUTOMATIC RESUME;
ALTER TASK SPLIT_TABLE_AUTOMATIC SUSPEND;
ALTER TASK SPLIT_TABLE_AUTOMATIC_SECOND RESUME;
ALTER TASK SPLIT_TABLE_AUTOMATIC_SECOND SUSPEND;

## S3 Integration

Use https://docs.snowflake.com/en/user-guide/data-load-s3-config-storage-integration.html to create IAM role

In [None]:
CREATE STORAGE INTEGRATION s3_storage_integration
  TYPE = EXTERNAL_STAGE
  STORAGE_PROVIDER = 'S3'
  ENABLED = TRUE
  STORAGE_AWS_ROLE_ARN = 'arn:aws:iam::684199068947:role/snowflake_role'
  STORAGE_ALLOWED_LOCATIONS = ('s3://wysde-assets/labs/lab-185-snowflake-snowsql/staging/');

In [None]:
DESC INTEGRATION s3_storage_integration

In [None]:
create or replace stage s3stage
  url = 's3://wysde-assets/labs/lab-185-snowflake-snowsql/staging/'
  file_format = ECOMMERCECSVFORMAT
  storage_integration = s3_storage_integration;

In [None]:
create or replace TABLE S3TABLE (
	INVOICENO VARCHAR(38),
	STOCKCODE VARCHAR(38),
	DESCRIPTION VARCHAR(60),
	QUANTITY NUMBER(38,0),
	INVOICEDATE TIMESTAMP,
	UNITPRICE NUMBER(38,0),
	CUSTOMERID VARCHAR(10),
	COUNTRY VARCHAR(20)
);

In [8]:
!aws s3 cp data/upload.csv s3://wysde-assets/labs/lab-185-snowflake-snowsql/staging/

In [None]:
copy into s3table from @s3stage;

In [None]:
show tables;

## S3 Pipe

In [None]:
create or replace TABLE S3TABLE (
	INVOICENO VARCHAR(38),
	STOCKCODE VARCHAR(38),
	DESCRIPTION VARCHAR(60),
	QUANTITY NUMBER(38,0),
	INVOICEDATE TIMESTAMP,
	UNITPRICE NUMBER(38,0),
	CUSTOMERID VARCHAR(10),
	COUNTRY VARCHAR(20)
);

In [None]:
show TABLES;

In [None]:
create or replace pipe S3_pipe auto_ingest=true
as copy into S3table   
from @s3stage;

In [None]:
show pipes

In [None]:
select system$pipe_status('s3_pipe');