# Dressmaker - Medium

In [1]:
import getpass
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
pwd = getpass.getpass()
engine = create_engine(
    'postgresql+psycopg2://postgres:%s@192.168.31.31:15432/sqlzoo' % (pwd))
pd.set_option('display.max_rows', 60)


 ········


In [2]:
jmcust = pd.read_sql_table('jmcust', engine)
dressmaker = pd.read_sql_table('dressmaker', engine)
dress_order = pd.read_sql_table('dress_order', engine)
construction = pd.read_sql_table('construction', engine)
quantities = pd.read_sql_table('quantities', engine)
order_line = pd.read_sql_table('order_line', engine)
garment = pd.read_sql_table('garment', engine)
material = pd.read_sql_table('material', engine)

## 1.
Assuming that any garment could be made in any of the available materials, list the garments (description, fabric, colour and pattern) which are expensive to make, that is, those for which the labour costs are 80% or more of the total cost.

In [3]:
t = (garment.assign(flag=0).merge(material.assign(flag=0), on='flag'))
t.loc[t['labour_cost']/(t['labour_cost']+t['cost'])>0.8, 
     ['description', 'fabric', 'colour', 'pattern']]

Unnamed: 0,description,fabric,colour,pattern
2,Trousers,Cotton,Yellow Stripe,Woven
3,Trousers,Cotton,Green Stripe,Woven
4,Trousers,Cotton,Black Dotted,Woven
5,Trousers,Cotton,Red Stripe,Woven
6,Trousers,Polyester,Pale Yellow,Printed
7,Trousers,Cotton,Blue Stripe,Woven
8,Trousers,Cotton,Pink Check,Woven
10,Trousers,Rayon,Red/Orange,Printed
12,Trousers,Cotton,Blue Abstract,Printed
13,Trousers,Cotton,Green Abstract,Printed


## 2.
List the descriptions and the number of orders of the less popular garments, that is those for which less than the average number of orders per garment have been placed. Also print out the average number of orders per garment. When calculating the average, ignore any garments for which no orders have been made.

In [4]:
t = (garment.merge(order_line, left_on='style_no', right_on='ol_style')
     .groupby(['style_no', 'description'])
     .agg(n_orders=pd.NamedAgg(column='order_ref', aggfunc='count'))
     .reset_index())
avg = np.round(t.loc[t['n_orders']>0, 'n_orders'].mean(), 2)
t.loc[t['n_orders']<avg, ['description', 'n_orders']].assign(avg=avg)

Unnamed: 0,description,n_orders,avg
2,Shorts,5,5.17
3,Short Skirt,5,5.17
4,Sundress,5,5.17
5,Suntop,4,5.17


## 3.
Which is the most popular line, that is, the garment with the highest number of orders. Bearing in mind the fact that there may be several such garments, list the garment description(s) and number(s) of orders.

In [5]:
t = (garment.merge(order_line, left_on='style_no', right_on='ol_style')
     .groupby(['style_no', 'description'])
     .agg(n_orders=pd.NamedAgg(column='order_ref', aggfunc='count'))
     .reset_index())
t.loc[t['n_orders']==t['n_orders'].max(), ['description', 'n_orders']]

Unnamed: 0,description,n_orders
0,Trousers,6
1,Long Skirt,6


## 4.
List the descriptions, and costs of the more expensive size 8, Cotton garments which might be ordered, that is those costing more than the average (labour costs + material costs) to make.

In [6]:
t = (garment.merge(order_line, left_on='style_no', right_on='ol_style')
     .merge(material, left_on='ol_material', right_on='material_no')
     .merge(quantities, left_on=['ol_size', 'ol_style'],
            right_on=['size_q', 'style_q']))
t['tot_cost'] = (t['labour_cost'] + t['quantity'] * t['cost']).round(2)

t.loc[(t['ol_size'] == 8) & (t['fabric'].str.strip().str.lower() == 'cotton') &
      (t['tot_cost'] > t.loc[t['tot_cost']>0, 'tot_cost'].mean()),
      ['description', 'material_no', 'tot_cost']]

Unnamed: 0,description,material_no,tot_cost
26,Sundress,14,31.2


## 5.
What is the most common size ordered for each garment type? List description, size and number of orders, assuming that there could be several equally popular sizes for each type.

In [7]:
# %%sql
# WITH t AS (
#     SELECT style_no, description, ol_size, COUNT(*) n_orders,
#       RANK() OVER (PARTITION BY style_no ORDER BY COUNT(*) DESC) rank
#       FROM garment JOIN order_line ON (order_line.ol_style=garment.style_no)
#         GROUP BY style_no, ol_size
# )
# SELECT description, ol_size, n_orders
#     FROM t
#     WHERE rank=1;
t = (garment.merge(order_line, left_on='style_no', right_on='ol_style')
     .groupby(['style_no', 'description', 'ol_size'])
     .agg(n_orders=pd.NamedAgg(column='order_ref', aggfunc='count')))
(t.groupby(['style_no', 'description', 'ol_size']).tail(1).reset_index())

Unnamed: 0,style_no,description,ol_size,n_orders
0,1,Trousers,8,3
1,1,Trousers,14,1
2,1,Trousers,16,1
3,1,Trousers,18,1
4,2,Long Skirt,8,3
5,2,Long Skirt,14,1
6,2,Long Skirt,16,1
7,2,Long Skirt,18,1
8,3,Shorts,8,1
9,3,Shorts,10,1
