In [4]:
import boto3
import json

In [5]:
%config SqlMagic.autopandas=True
%config SqlMagic.displaycon=False
%config SqlMagic.feedback=False
%config SqlMagic.displaylimit=5
%reload_ext sql

In [6]:
def get_secret(secret_name, region_name="us-east-1"):
    session = boto3.session.Session()
    client = session.client(
        service_name='secretsmanager',
        region_name=region_name)
    get_secret_value_response = client.get_secret_value(SecretId=secret_name)
    get_secret_value_response = json.loads(get_secret_value_response['SecretString'])
    return get_secret_value_response

In [7]:
secret_vals = get_secret("wysde")

postgres_endpoint = secret_vals['RDS_POSTGRES_HOST']
postgres_user = secret_vals['RDS_POSTGRES_USERNAME']
postgres_pass = secret_vals['RDS_POSTGRES_PASSWORD']
port = secret_vals['RDS_POSTGRES_PORT']
dbname = "sparsh"

conn = "postgresql+psycopg2://%s:%s@%s:%s/%s" \
% (postgres_user, postgres_pass, postgres_endpoint, port, dbname)

In [40]:
%sql {conn}

## Daily Product Revenue

In [9]:
%%sql

SELECT 
    o.order_date,
    oi.order_item_product_id,
    p.product_name,
    round(sum(oi.order_item_subtotal::numeric),2) AS product_revenue
FROM orders o
    JOIN order_items oi
        ON o.order_id = oi.order_item_order_id
    JOIN products p
        ON p.product_id = oi.order_item_product_id
WHERE 
o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY
o.order_date,
oi.order_item_product_id,
p.product_name
LIMIT 10

Unnamed: 0,order_date,order_item_product_id,product_name,product_revenue
0,2013-07-25,24,Elevation Training Mask 2.0,319.96
1,2013-07-25,93,Under Armour Men's Tech II T-Shirt,74.97
2,2013-07-25,134,Nike Women's Legend V-Neck T-Shirt,100.0
3,2013-07-25,191,Nike Men's Free 5.0+ Running Shoe,5099.49
4,2013-07-25,226,Bowflex SelectTech 1090 Dumbbells,599.99
5,2013-07-25,365,Perfect Fitness Perfect Rip Deck,3359.44
6,2013-07-25,403,Nike Men's CJ Elite 2 TD Football Cleat,1949.85
7,2013-07-25,502,Nike Men's Dri-FIT Victory Golf Polo,1650.0
8,2013-07-25,572,TYR Boys' Team Digi Jammer,119.97
9,2013-07-25,625,Nike Men's Kobe IX Elite Low Basketball Shoe,199.99


### Exercise 1 - Customer order count

Get order count per customer for the month of 2014 January.

Tables - orders and customers

Data should be sorted in descending order by count and ascending order by customer id.

Output should contain customer_id, customer_first_name, customer_last_name and customer_order_count.



In [11]:
%%sql 

SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'orders'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,orders,order_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,orders,order_date,2,,NO,timestamp without time zone,,,...,NO,,,,,,NO,NEVER,,YES
2,sparsh,public,orders,order_customer_id,3,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
3,sparsh,public,orders,order_status,4,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES


In [12]:
%%sql 
SELECT DISTINCT table_schema, table_name 
FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_schema = 'public'

Unnamed: 0,table_schema,table_name
0,public,DimCustomer
1,public,categories
2,public,DimMonth
3,public,taxi_zone_lookup
4,public,order_items
5,public,products
6,public,FactBilling
7,public,orders
8,public,customers
9,public,departments


In [13]:
%%sql 

SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'customers'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,customers,customer_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,customers,customer_fname,2,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
2,sparsh,public,customers,customer_lname,3,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
3,sparsh,public,customers,customer_email,4,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
4,sparsh,public,customers,customer_password,5,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
5,sparsh,public,customers,customer_street,6,,NO,character varying,255.0,1020.0,...,NO,,,,,,NO,NEVER,,YES
6,sparsh,public,customers,customer_city,7,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
7,sparsh,public,customers,customer_state,8,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
8,sparsh,public,customers,customer_zipcode,9,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES


In [14]:
%%sql

SELECT
    c.customer_id,
    c.customer_fname,
    c.customer_lname,
    COUNT(o.order_id) AS customer_order_count
FROM orders o
JOIN customers c
    ON o.order_customer_id = c.customer_id
WHERE
o.order_date >= '01-01-2014' AND o.order_date < '01-02-2014'
GROUP BY
    c.customer_id,
    c.customer_fname,
    c.customer_lname
ORDER BY
    customer_order_count DESC,
    customer_id ASC
LIMIT 20;

Unnamed: 0,customer_id,customer_fname,customer_lname,customer_order_count
0,3099,Brittany,Copeland,2
1,206,Mary,West,1
2,224,Mary,Hernandez,1
3,279,Anna,Smith,1
4,297,Catherine,Smith,1
5,363,Jennifer,Obrien,1
6,387,Mary,Levy,1
7,470,Mary,Cannon,1
8,492,Mary,Mckinney,1
9,505,Mary,Smith,1


### Exercise 2 – Dormant Customers

Get the customer details who have not placed any order for the month of 2014 January.

Tables – orders and customers

Data should be sorted in ascending order by customer_id

Output should contain all the fields from customers

In [15]:
%%sql

select c.* 
FROM customers c LEFT OUTER JOIN 
    (SELECT DISTINCT order_customer_id 
     FROM orders WHERE
     to_char(order_date, 'yyyy-MM') = '2014-01'
    ) o
    ON c.customer_id = o.order_customer_id
WHERE  o.order_customer_id IS NULL 
ORDER BY c.customer_id
LIMIT 10

Unnamed: 0,customer_id,customer_fname,customer_lname,customer_email,customer_password,customer_street,customer_city,customer_state,customer_zipcode
0,1,Richard,Hernandez,XXXXXXXXX,XXXXXXXXX,6303 Heather Plaza,Brownsville,TX,78521
1,2,Mary,Barrett,XXXXXXXXX,XXXXXXXXX,9526 Noble Embers Ridge,Littleton,CO,80126
2,3,Ann,Smith,XXXXXXXXX,XXXXXXXXX,3422 Blue Pioneer Bend,Caguas,PR,725
3,4,Mary,Jones,XXXXXXXXX,XXXXXXXXX,8324 Little Common,San Marcos,CA,92069
4,5,Robert,Hudson,XXXXXXXXX,XXXXXXXXX,10 Crystal River Mall,Caguas,PR,725
5,6,Mary,Smith,XXXXXXXXX,XXXXXXXXX,3151 Sleepy Quail Promenade,Passaic,NJ,7055
6,9,Mary,Perez,XXXXXXXXX,XXXXXXXXX,3616 Quaking Street,Caguas,PR,725
7,10,Melissa,Smith,XXXXXXXXX,XXXXXXXXX,8598 Harvest Beacon Plaza,Stafford,VA,22554
8,11,Mary,Huffman,XXXXXXXXX,XXXXXXXXX,3169 Stony Woods,Caguas,PR,725
9,12,Christopher,Smith,XXXXXXXXX,XXXXXXXXX,5594 Jagged Embers By-pass,San Antonio,TX,78227


### Exercise 3 - Revenue Per Customer

Get the revenue generated by each customer for the month of 2014 January

Tables - orders, order_items and customers

Data should be sorted in descending order by revenue and then ascending order by customer_id

Output should contain customer_id, customer_first_name, customer_last_name, customer_revenue.

If there are no orders placed by customer, then the corresponding revenue for a give customer should be 0.

Consider only COMPLETE and CLOSED orders

In [16]:
%%sql

SELECT
    c.customer_id,
    c.customer_fname,
    c.customer_lname,
    sum(round(oi.order_item_subtotal::numeric,2)) AS customer_revenue
FROM orders o
JOIN customers c
    ON o.order_customer_id = c.customer_id
JOIN order_items oi
    ON o.order_id = oi.order_item_order_id
WHERE
    o.order_status IN ('COMPLETE', 'CLOSED')
    AND to_char(o.order_date,'mm/yyyy') = '01/2014' 
GROUP BY
    c.customer_id,
    c.customer_fname,
    c.customer_lname
ORDER BY
    customer_revenue DESC,
    c.customer_id ASC
LIMIT 10

Unnamed: 0,customer_id,customer_fname,customer_lname,customer_revenue
0,2555,Mary,Long,2954.63
1,3465,Mary,Gardner,2929.74
2,3710,Ashley,Smith,2739.82
3,1780,Larry,Sharp,2689.65
4,986,Catherine,Hawkins,2629.9
5,9676,Theresa,Smith,2599.84
6,1847,Mary,Smith,2589.87
7,11901,Mary,Smith,2469.87
8,4618,Andrea,Smith,2429.82
9,10896,Victoria,Smith,2419.78


### Exercise 4 - Revenue Per Category

Get the revenue generated for each category for the month of 2014 January

Tables - orders, order_items, products and categories

Data should be sorted in ascending order by category_id.

Output should contain all the fields from category along with the revenue as category_revenue.

Consider only COMPLETE and CLOSED orders


In [17]:
%%sql 

SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'products'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,products,product_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,products,product_category_id,2,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
2,sparsh,public,products,product_name,3,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES
3,sparsh,public,products,product_description,4,,NO,character varying,255.0,1020.0,...,NO,,,,,,NO,NEVER,,YES
4,sparsh,public,products,product_price,5,,NO,double precision,,,...,NO,,,,,,NO,NEVER,,YES
5,sparsh,public,products,product_image,6,,NO,character varying,255.0,1020.0,...,NO,,,,,,NO,NEVER,,YES


In [18]:
%%sql 
SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'order_items'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,order_items,order_item_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,order_items,order_item_order_id,2,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
2,sparsh,public,order_items,order_item_product_id,3,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
3,sparsh,public,order_items,order_item_quantity,4,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
4,sparsh,public,order_items,order_item_subtotal,5,,NO,double precision,,,...,NO,,,,,,NO,NEVER,,YES
5,sparsh,public,order_items,order_item_product_price,6,,NO,double precision,,,...,NO,,,,,,NO,NEVER,,YES


In [19]:
%%sql 
SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'categories'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,categories,category_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,categories,category_department_id,2,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
2,sparsh,public,categories,category_name,3,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES


In [20]:
%%sql

SELECT c.*, 
    round(sum(oi.order_item_subtotal)::numeric, 2) AS category_revenue 
FROM categories c JOIN products p 
        ON c.category_id = p.product_category_id 
    JOIN order_items oi 
        ON p.product_id = oi.order_item_product_id 
    JOIN orders o
        ON oi.order_item_order_id = o.order_id
WHERE to_char(o.order_date, 'yyyy-MM') = '2014-01' 
    AND o.order_status IN ('COMPLETE', 'CLOSED')
GROUP BY c.category_id
ORDER BY c.category_id 
LIMIT 10;

Unnamed: 0,category_id,category_department_id,category_name,category_revenue
0,2,2,Soccer,1094.88
1,3,2,Baseball & Softball,3214.41
2,4,2,Basketball,1299.98
3,5,2,Lacrosse,1299.69
4,6,2,Tennis & Racquet,1124.75
5,7,2,Hockey,1433.0
6,9,3,Cardio Equipment,133156.77
7,10,3,Strength Training,3388.96
8,11,3,Fitness Accessories,1509.73
9,12,3,Boxing & MMA,3998.46


### Exercise 5 - Product Count Per Department

Get the products for each department.

Tables - departments, categories, products

Data should be sorted in ascending order by department_id

Output should contain all the fields from department and the product count as product_count

In [21]:
%%sql

SELECT * FROM information_schema.columns
WHERE table_catalog = '{dbname}'
AND table_name = 'departments'

Unnamed: 0,table_catalog,table_schema,table_name,column_name,ordinal_position,column_default,is_nullable,data_type,character_maximum_length,character_octet_length,...,is_identity,identity_generation,identity_start,identity_increment,identity_maximum,identity_minimum,identity_cycle,is_generated,generation_expression,is_updatable
0,sparsh,public,departments,department_id,1,,NO,integer,,,...,NO,,,,,,NO,NEVER,,YES
1,sparsh,public,departments,department_name,2,,NO,character varying,45.0,180.0,...,NO,,,,,,NO,NEVER,,YES


In [23]:
%%sql

SELECT 
    d.*,
    count(p.product_id) AS product_count
FROM departments d
JOIN categories c
    ON d.department_id = c.category_department_id
JOIN products p 
    ON p.product_category_id = c.category_id
GROUP BY
d.department_id
ORDER BY
d.department_id ASC
LIMIT 20

Unnamed: 0,department_id,department_name,product_count
0,2,Fitness,168
1,3,Footwear,168
2,4,Apparel,140
3,5,Golf,120
4,6,Outdoors,336
5,7,Fan Shop,149
