# AdventureWorks - Hard

In [1]:
import getpass
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
pwd = getpass.getpass()
engine = create_engine(
    'postgresql+psycopg2://postgres:%s@192.168.31.31:15432/sqlzoo' % (pwd))
pd.set_option('display.max_rows', 60)

········


In [2]:
cust_aw = pd.read_sql_table('CustomerAW', engine)
cust_addr = pd.read_sql_table('CustomerAddress', engine)
addr = pd.read_sql_table('Address', engine)
product = pd.read_sql_table('Product', engine)
order_det = pd.read_sql_table('SalesOrderDetail', engine)
order_head = pd.read_sql_table('SalesOrderHeader', engine)
prod_model = pd.read_sql_table('ProductModel', engine)
prod_model_prod = pd.read_sql_table('ProductModelProductDescription', engine)
prod_desc = pd.read_sql_table('ProductDescription', engine)
prod_cat = pd.read_sql_table('ProductCategory', engine)

## 11.
**For every customer with a 'Main Office' in Dallas show AddressLine1 of the 'Main Office' and AddressLine1 of the 'Shipping' address - if there is no shipping address leave it blank. Use one row per customer.**

In [3]:
a = (cust_aw.merge(cust_addr.loc[cust_addr['AddressType']=='Main Office'],
                   on='CustomerID')
     .merge(addr.loc[addr['City']=='Dallas'], on='AddressID')
     ['CustomerID'].drop_duplicates())
b = (cust_aw.merge(cust_addr.loc[cust_addr['CustomerID'].isin(a)], 
                   on='CustomerID')
     .merge(addr, on='AddressID'))
(b.pivot(index='CompanyName', columns='AddressType', values='AddressLine1')
 .fillna(''))

AddressType,Main Office,Shipping
CompanyName,Unnamed: 1_level_1,Unnamed: 2_level_1
Elite Bikes,Po Box 8259024,9178 Jumping St.
Rental Bikes,"99828 Routh Street, Suite 825",
Third Bike Store,2500 North Stemmons Freeway,
Town Industries,P.O. Box 6256916,
Unsurpassed Bikes,Po Box 8035996,


## 12.
**For each order show the SalesOrderID and SubTotal calculated three ways:**

- **A) From the SalesOrderHeader**
- **B) Sum of OrderQty*UnitPrice**
- **C) Sum of OrderQty*ListPrice**

In [4]:
a = order_head[['SalesOrderID', 'SubTotal']]
b = order_det.assign(SubTotal=order_det['OrderQty'] * order_det['UnitPrice'] * 
                     (1-order_det['UnitPriceDiscount']))
b = b.groupby(b['SalesOrderID'])['SubTotal'].sum().reset_index()
c = order_det.merge(product, on='ProductID')
c['SubTotal'] = c['OrderQty'] * c['ListPrice']
c = c.groupby(c['SalesOrderID'])['SubTotal'].sum().reset_index()

a.merge(b, on='SalesOrderID').merge(c, on='SalesOrderID')

Unnamed: 0,SalesOrderID,SubTotal_x,SubTotal_y,SubTotal
0,71774,880.35,713.8,1189.66
1,71776,78.81,63.9,106.5
2,71780,38418.69,29922.81,56651.56
3,71782,39785.33,33319.68,55533.31
4,71783,83858.43,65682.7396,121625.43
5,71784,108561.83,89868.8795,151932.58
6,71796,57634.63,47848.02,79746.71
7,71797,78029.69,65122.7911,108986.4
8,71815,1141.58,926.91,1544.86
9,71816,3398.17,2847.37,4745.68


## 13.
**Show the best selling item by value.**

In [5]:
a = order_det.merge(product, on='ProductID')
a['SubTotal'] = a['OrderQty'] * a['UnitPrice']
(a.groupby(['ProductID', 'Name'])['SubTotal'].sum()
 .reset_index().sort_values('SubTotal', ascending=False))

Unnamed: 0,ProductID,Name,SubTotal
109,969,"Touring-1000 Blue, 60",37191.44
20,783,"Mountain-200 Black, 42",37178.73
19,782,"Mountain-200 Black, 38",35801.74
115,976,"Road-350-W Yellow, 48",33509.58
97,957,"Touring-1000 Yellow, 60",23745.32
...,...,...,...
68,907,Rear Brakes,63.90
72,913,HL Road Seat/Saddle,63.16
47,874,"Racing Socks, M",59.29
88,947,HL Touring Handlebars,54.94


## 14.
**Show how many orders are in the following ranges (in $):**

```
    RANGE      Num Orders      Total Value
    0-  99
  100- 999
 1000-9999
10000-
```

In [6]:
t = order_head[['SubTotal', 'SalesOrderID']].assign(RANGE='10000-    ')

t.loc[t['SubTotal'].between(0, 99.99), 'RANGE'] = '    0-  99'
t.loc[t['SubTotal'].between(100, 999.99), 'RANGE'] = '  100- 999'
t.loc[t['SubTotal'].between(1000, 9999.99), 'RANGE'] = ' 1000-9999'

t.groupby('RANGE').agg({'SubTotal': ['count', 'sum']}).sort_values('RANGE')

Unnamed: 0_level_0,SubTotal,SubTotal
Unnamed: 0_level_1,count,sum
RANGE,Unnamed: 1_level_2,Unnamed: 2_level_2
0- 99,3,158.66
100- 999,5,2386.21
1000-9999,10,27561.43
10000-,14,835326.81


## 15.
**Identify the three most important cities. Show the break down of top level product category against city.**

In [7]:
top3 = (addr.merge(order_head, left_on='AddressID', right_on='ShipToAddressID'))
top3 = (top3.groupby('City')['SubTotal'].sum()
        .reset_index().sort_values('SubTotal', ascending=False)
        .iloc[:3])
a = (addr.loc[addr['City'].isin(top3['City'])]
     .merge(order_head, left_on='AddressID', right_on='ShipToAddressID')
     .merge(order_det, on='SalesOrderID')
     .merge(product, on='ProductID')
     .merge(prod_cat, on='ProductCategoryID'))
a['amount'] = a['OrderQty'] * a['UnitPrice']
(a.groupby(['City', 'name'])['amount'].sum()
 .reset_index().sort_values(['City', 'name']))

Unnamed: 0,City,name,amount
0,London,Bottom Brackets,388.73
1,London,Brakes,255.6
2,London,Chains,36.42
3,London,Cranksets,1773.81
4,London,Derailleurs,638.85
5,London,Gloves,88.14
6,London,Handlebars,292.63
7,London,Helmets,20.99
8,London,Mountain Bikes,50881.99
9,London,Mountain Frames,24018.8
