# AdventureWorks - Hard

In [1]:
# Prerequesites
from pyhive import hive
%load_ext sql
%sql hive://cloudera@quickstart.cloudera:10000/sqlzoo
%config SqlMagic.displaylimit = 20

 ·········


## 11.
**For every customer with a 'Main Office' in Dallas show AddressLine1 of the 'Main Office' and AddressLine1 of the 'Shipping' address - if there is no shipping address leave it blank. Use one row per customer.**

In [2]:
%%sql
WITH t AS (
    SELECT "CustomerAW"."CustomerID" FROM
      "CustomerAW" JOIN "CustomerAddress" ON (
          "CustomerAW"."CustomerID"="CustomerAddress"."CustomerID") JOIN
        "Address" ON ("CustomerAddress"."AddressID"="Address"."AddressID")
        WHERE "Address"."City"='Dallas' AND
            "CustomerAddress"."AddressType"='Main Office'
)
SELECT "CustomerAW"."CompanyName",
  MAX(CASE WHEN "AddressType"='Main Office' THEN "AddressLine1" ELSE '' END) main_office,
    MAX(CASE WHEN "AddressType"='Shipping' THEN "AddressLine1" ELSE '' END) shipping
    FROM "CustomerAW" JOIN "CustomerAddress" ON (
          "CustomerAW"."CustomerID"="CustomerAddress"."CustomerID") JOIN
        "Address" ON ("CustomerAddress"."AddressID"="Address"."AddressID")
        WHERE "CustomerAW"."CustomerID" IN (SELECT "CustomerID" FROM t)
        GROUP BY "CustomerAW"."CompanyName";

 * postgresql://postgres:***@localhost/sqlzoo
5 rows affected.


CompanyName,main_office,shipping
Elite Bikes,Po Box 8259024,9178 Jumping St.
Rental Bikes,"99828 Routh Street, Suite 825",
Third Bike Store,2500 North Stemmons Freeway,
Town Industries,P.O. Box 6256916,
Unsurpassed Bikes,Po Box 8035996,


## 12.
**For each order show the SalesOrderID and SubTotal calculated three ways:**

- **A) From the SalesOrderHeader**
- **B) Sum of OrderQty*UnitPrice**
- **C) Sum of OrderQty*ListPrice**

In [3]:
%%sql
WITH a AS (
    SELECT "SalesOrderID", "SubTotal" subtotal_a FROM "SalesOrderHeader"
), b AS (
    SELECT "SalesOrderID", SUM("OrderQty"*"UnitPrice"*(1-"UnitPriceDiscount")) subtotal_b
    FROM "SalesOrderDetail"
    GROUP BY "SalesOrderID"
), c AS (
    SELECT "SalesOrderID", SUM("OrderQty"*"ListPrice") subtotal_c
    FROM "SalesOrderDetail" JOIN "Product" ON (
        "SalesOrderDetail"."ProductID"="Product"."ProductID")
    GROUP BY "SalesOrderID"
)
SELECT a."SalesOrderID", subtotal_a, subtotal_b, subtotal_c
    FROM a JOIN b ON (a."SalesOrderID"=b."SalesOrderID") JOIN
    c ON (a."SalesOrderID"=c."SalesOrderID")

 * postgresql://postgres:***@localhost/sqlzoo
31 rows affected.


SalesOrderID,subtotal_a,subtotal_b,subtotal_c
71774,880.35,713.8,1189.66
71776,78.81,63.9,106.5
71780,38418.69,29922.81,56651.56
71782,39785.33,33319.68,55533.31
71783,83858.43,65682.7396,121625.43
71784,108561.83,89868.8795,151932.58
71796,57634.63,47848.02,79746.71
71797,78029.69,65122.7911,108986.4
71815,1141.58,926.91,1544.86
71816,3398.17,2847.37,4745.68


## 13.
**Show the best selling item by value.**

In [4]:
%%sql
SELECT "Product"."Name", SUM("OrderQty"*"UnitPrice") subtotal
    FROM "SalesOrderDetail" JOIN "Product" ON (
        "SalesOrderDetail"."ProductID"="Product"."ProductID")
    GROUP BY "Product"."ProductID"
    ORDER BY SUM("OrderQty"*"UnitPrice") DESC;

 * postgresql://postgres:***@localhost/sqlzoo
137 rows affected.


Name,subtotal
"Touring-1000 Blue, 60",37191.44
"Mountain-200 Black, 42",37178.73
"Mountain-200 Black, 38",35801.74
"Road-350-W Yellow, 48",33509.58
"Touring-1000 Yellow, 60",23745.32
"Touring-1000 Blue, 50",22887.04
"Mountain-200 Silver, 42",20879.85
"Road-350-W Yellow, 40",20411.8
"Mountain-200 Black, 46",19277.86
"Mountain-200 Silver, 46",18095.87


## 14.
**Show how many orders are in the following ranges (in $):**

```
    RANGE      Num Orders      Total Value
    0-  99
  100- 999
 1000-9999
10000-
```

In [5]:
%%sql
WITH t AS (
    -- generate the RANGE column
    SELECT "SubTotal", "SalesOrderID",
      CASE WHEN "SubTotal" BETWEEN 0 AND 99.99      THEN '    0-  99'
           WHEN "SubTotal" BETWEEN 100 AND 999.99   THEN '  100- 999'
           WHEN "SubTotal" BETWEEN 1000 AND 9999.99 THEN ' 1000-9999'
           ELSE                                          '10000-    '
      END "RANGE"
    FROM "SalesOrderHeader"
)
SELECT "RANGE", COUNT(*) "Num Orders", SUM("SubTotal") "Total Value" FROM t
    GROUP BY "RANGE"
    ORDER BY "RANGE";

 * postgresql://postgres:***@localhost/sqlzoo
4 rows affected.


RANGE,Num Orders,Total Value
0- 99,3,158.66
100- 999,5,2386.21
1000-9999,10,27561.43
10000-,14,835326.81


## 15.
**Identify the three most important cities. Show the break down of top level product category against city.**

In [6]:
%%sql
WITH t AS (
    -- the top three cities
    SELECT "Address"."City" FROM
      "Address" JOIN "SalesOrderHeader" ON (
          "SalesOrderHeader"."ShipToAddressID"="Address"."AddressID")
    GROUP BY "Address"."City"
    ORDER BY SUM("SubTotal") DESC
    LIMIT 3
)
SELECT "Address"."City", "ProductCategory".Name "Category",
    SUM("OrderQty"*"UnitPrice") amount FROM
      "Address" JOIN "SalesOrderHeader" ON (
        "SalesOrderHeader"."ShipToAddressID"="Address"."AddressID") JOIN
      "SalesOrderDetail" ON (
        "SalesOrderDetail"."SalesOrderID"="SalesOrderHeader"."SalesOrderID") JOIN
      "Product" ON ("Product"."ProductID"="SalesOrderDetail"."ProductID") JOIN
      "ProductCategory" ON ("Product"."ProductCategoryID"="ProductCategory"."ProductCategoryID")
    WHERE "Address"."City" IN (SELECT "City" FROM t)
    GROUP BY "Address"."City", "ProductCategory".name
    ORDER BY "Address"."City", "ProductCategory".name;

 * postgresql://postgres:***@localhost/sqlzoo
42 rows affected.


City,Category,amount
London,Bottom Brackets,388.73
London,Brakes,255.6
London,Chains,36.42
London,Cranksets,1773.81
London,Derailleurs,638.85
London,Gloves,88.14
London,Handlebars,292.63
London,Helmets,20.99
London,Mountain Bikes,50881.99
London,Mountain Frames,24018.8
