In [1]:
import boto3
import json

In [2]:
%config SqlMagic.autopandas=True
%config SqlMagic.displaycon=False
%config SqlMagic.feedback=False
%config SqlMagic.displaylimit=5
%reload_ext sql

In [3]:
def get_secret(secret_name, region_name="us-east-1"):
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

In [4]:
secret_vals = get_secret("wysde")

postgres_endpoint = secret_vals['RDS_POSTGRES_HOST']
postgres_user = secret_vals['RDS_POSTGRES_USERNAME']
postgres_pass = secret_vals['RDS_POSTGRES_PASSWORD']
port = secret_vals['RDS_POSTGRES_PORT']
dbname = "sparsh"

conn = "postgresql+psycopg2://%s:%s@%s:%s/%s" \
% (postgres_user, postgres_pass, postgres_endpoint, port, dbname)

In [5]:
%sql {conn}

## Advanced Queries

In [6]:
%%sql

WITH order_details_nq AS (
    SELECT * FROM orders o
        JOIN order_items oi
            on o.order_id = oi.order_item_order_id
) SELECT order_date,
    order_item_product_id,
    round(sum(order_item_subtotal)::numeric, 2) AS revenue
FROM order_details_nq 
GROUP BY order_date,
    order_item_product_id
ORDER BY order_date,
    revenue DESC
LIMIT 10

Unnamed: 0,order_date,order_item_product_id,revenue
0,2013-07-25,1004,10799.46
1,2013-07-25,957,9599.36
2,2013-07-25,191,8499.15
3,2013-07-25,365,7558.74
4,2013-07-25,1073,6999.65
5,2013-07-25,1014,6397.44
6,2013-07-25,403,5589.57
7,2013-07-25,502,5100.0
8,2013-07-25,627,2879.28
9,2013-07-25,226,599.99


In [7]:
%%sql

CREATE OR REPLACE VIEW daily_product_revenue_v
AS
WITH order_details_nq AS (
    SELECT * FROM orders o
        JOIN order_items oi
            on o.order_id = oi.order_item_order_id
) SELECT order_date,
    order_item_product_id,
    round(sum(order_item_subtotal)::numeric, 2) AS revenue
FROM order_details_nq 
GROUP BY order_date,
    order_item_product_id

In [None]:
%%sql

SELECT * FROM (
    SELECT nq.*,
        dense_rank() OVER (
            PARTITION BY order_date
            ORDER BY revenue DESC
        ) AS drnk
    FROM (
        SELECT o.order_date,
            oi.order_item_product_id,
            round(sum(oi.order_item_subtotal)::numeric, 2) AS revenue
        FROM orders o 
            JOIN order_items oi
                ON o.order_id = oi.order_item_order_id
        WHERE o.order_status IN ('COMPLETE', 'CLOSED')
        GROUP BY o.order_date, oi.order_item_product_id
    ) nq
) nq1
WHERE drnk <= 5
ORDER BY order_date, revenue DESC
LIMIT 20

In [8]:
%%sql

SELECT * FROM order_items oi
WHERE oi.order_item_order_id 
    NOT IN (
        SELECT order_id FROM orders o
        WHERE o.order_id = oi.order_item_order_id
    )
LIMIT 10

In [9]:
%%sql

CREATE TABLE orders_backup
AS
SELECT order_id,
    to_char(order_date, 'yyyy')::int AS order_year,
    to_char(order_date, 'MM')::int AS order_month,
    to_char(order_date, 'dd')::int AS order_day_of_month,
    to_char(order_date, 'DDD')::int AS order_day_of_year,
    order_customer_id,
    order_status
FROM orders

In [10]:
%%sql

SELECT * FROM orders_backup LIMIT 10

Unnamed: 0,order_id,order_year,order_month,order_day_of_month,order_day_of_year,order_customer_id,order_status
0,1,2013,7,25,206,11599,CLOSED
1,2,2013,7,25,206,256,PENDING_PAYMENT
2,3,2013,7,25,206,12111,COMPLETE
3,4,2013,7,25,206,8827,CLOSED
4,5,2013,7,25,206,11318,COMPLETE
5,6,2013,7,25,206,7130,COMPLETE
6,7,2013,7,25,206,4530,COMPLETE
7,8,2013,7,25,206,2911,PROCESSING
8,9,2013,7,25,206,5657,PENDING_PAYMENT
9,10,2013,7,25,206,5648,PENDING_PAYMENT


In [11]:
%%sql

DROP TABLE IF EXISTS customers_backup;
DROP TABLE IF EXISTS orders_backup;
DROP TABLE IF EXISTS order_items_empty;

### Advanced DML

In [12]:
%%sql

CREATE TABLE customer_order_metrics_mthly (
    customer_id INT,
    order_month CHAR(7),
    order_count INT,
    order_revenue FLOAT
)

In [13]:
%%sql

ALTER TABLE customer_order_metrics_mthly
    ADD PRIMARY KEY (order_month, customer_id)

In [14]:
%%sql

SELECT o.order_customer_id,
    to_char(o.order_date, 'yyyy-MM') AS order_month,
    count(1) AS order_count,
    round(sum(order_item_subtotal)::numeric, 2) AS order_revenue
FROM orders o 
    JOIN order_items oi
        ON o.order_id = oi.order_item_order_id
GROUP BY o.order_customer_id,
    to_char(o.order_date, 'yyyy-MM')
ORDER BY order_month,
    order_count DESC
LIMIT 10

Unnamed: 0,order_customer_id,order_month,order_count,order_revenue
0,5293,2013-07,10,2781.73
1,4257,2013-07,10,2059.75
2,32,2013-07,9,2009.75
3,2071,2013-07,9,1629.84
4,7473,2013-07,9,1244.9
5,7073,2013-07,9,1377.83
6,488,2013-07,9,1365.82
7,9103,2013-07,9,1587.85
8,7604,2013-07,8,1114.85
9,8177,2013-07,8,1379.85


In [15]:
%%sql

INSERT INTO customer_order_metrics_mthly
SELECT o.order_customer_id,
    to_char(o.order_date, 'yyyy-MM') AS order_month,
    count(1) order_count,
    NULL
FROM orders o 
    JOIN order_items oi
        ON o.order_id = oi.order_item_order_id
GROUP BY o.order_customer_id,
    to_char(o.order_date, 'yyyy-MM')

In [16]:
%%sql

SELECT * FROM customer_order_metrics_mthly
ORDER BY order_month,
    customer_id
LIMIT 10

Unnamed: 0,customer_id,order_month,order_count,order_revenue
0,12,2013-07,2,
1,16,2013-07,1,
2,17,2013-07,2,
3,19,2013-07,3,
4,32,2013-07,9,
5,45,2013-07,4,
6,48,2013-07,4,
7,54,2013-07,2,
8,58,2013-07,4,
9,64,2013-07,2,


In [17]:
%%sql

UPDATE customer_order_metrics_mthly comd
SET 
    (order_count, order_revenue) = (
        SELECT count(1),
            round(sum(order_item_subtotal)::numeric, 2)
        FROM orders o 
            JOIN order_items oi
                ON o.order_id = oi.order_item_order_id
        WHERE o.order_customer_id = comd.customer_id
            AND to_char(o.order_date, 'yyyy-MM') = comd.order_month
            AND to_char(o.order_date, 'yyyy-MM') = '2013-08'
            AND comd.order_month = '2013-08'
        GROUP BY o.order_customer_id,
            to_char(o.order_date, 'yyyy-MM')
    )
WHERE EXISTS (
    SELECT 1 FROM orders o
    WHERE o.order_customer_id = comd.customer_id
        AND to_char(o.order_date, 'yyyy-MM') = comd.order_month
        AND to_char(o.order_date, 'yyyy-MM') = '2013-08'
) AND comd.order_month = '2013-08'

In [18]:
%%sql

SELECT * FROM customer_order_metrics_mthly
WHERE order_month = '2013-08'
ORDER BY order_month,
    customer_id
LIMIT 10

Unnamed: 0,customer_id,order_month,order_count,order_revenue
0,2,2013-08,5,769.82
1,13,2013-08,5,1065.93
2,14,2013-08,3,459.97
3,18,2013-08,1,129.99
4,20,2013-08,2,739.91
5,22,2013-08,5,769.96
6,24,2013-08,2,399.91
7,25,2013-08,1,129.99
8,33,2013-08,3,929.92
9,34,2013-08,4,789.92


### Merging or Upserting Data

In [20]:
%%sql

CREATE TABLE customer_order_metrics_dly (
    customer_id INT,
    order_date DATE,
    order_count INT,
    order_revenue FLOAT
)

In [21]:
%%sql

ALTER TABLE customer_order_metrics_dly
    ADD PRIMARY KEY (customer_id, order_date)

In [22]:
%%sql

INSERT INTO customer_order_metrics_dly
SELECT o.order_customer_id,
    o.order_date,
    count(1) order_count,
    NULL
FROM orders o 
    JOIN order_items oi
        ON o.order_id = oi.order_item_order_id
WHERE o.order_date BETWEEN '2013-08-01' AND '2013-08-31'
GROUP BY o.order_customer_id,
    o.order_date

In [23]:
%%sql

UPDATE customer_order_metrics_dly comd
SET 
    (order_count, order_revenue) = (
        SELECT count(1),
            round(sum(oi.order_item_subtotal)::numeric, 2)
        FROM orders o 
            JOIN order_items oi
                ON o.order_id = oi.order_item_order_id
        WHERE o.order_date BETWEEN '2013-08-01' AND '2013-10-31'
            AND o.order_customer_id = comd.customer_id
            AND o.order_date = comd.order_date
        GROUP BY o.order_customer_id,
            o.order_date
    )
WHERE comd.order_date BETWEEN '2013-08-01' AND '2013-10-31'

In [24]:
%%sql

INSERT INTO customer_order_metrics_dly
SELECT o.order_customer_id AS customer_id,
    o.order_date,
    count(1) order_count,
    round(sum(order_item_subtotal)::numeric, 2)
FROM orders o 
    JOIN order_items oi
        ON o.order_id = oi.order_item_order_id
WHERE o.order_date BETWEEN '2013-08-01' AND '2013-10-31'
    AND NOT EXISTS (
        SELECT 1 FROM customer_order_metrics_dly codm
        WHERE o.order_customer_id = codm.customer_id
            AND o.order_date = codm.order_date
    )
GROUP BY o.order_customer_id,
    o.order_date