# Dressmaker - Hard
You may need to create views to complete these questions - but you do not have permission to create tables or views in the default schema. Your SQL commands are executed by user scott in schema gisq - you may create or drop views and tables in schema scott but not in gisq.

In [1]:
import getpass
import psycopg2
from sqlalchemy import create_engine
import pandas as pd
import numpy as np
pwd = getpass.getpass()
engine = create_engine(
    'postgresql+psycopg2://postgres:%s@192.168.31.31:15432/sqlzoo' % (pwd))
pd.set_option('display.max_rows', 60)


 ········


In [2]:
jmcust = pd.read_sql_table('jmcust', engine)
dressmaker = pd.read_sql_table('dressmaker', engine)
dress_order = pd.read_sql_table('dress_order', engine)
construction = pd.read_sql_table('construction', engine)
quantities = pd.read_sql_table('quantities', engine)
order_line = pd.read_sql_table('order_line', engine)
garment = pd.read_sql_table('garment', engine)
material = pd.read_sql_table('material', engine)

## 1.
When creating a view in scott you must specify the schema name of the sources and the destination.

In [3]:
scott = pd.DataFrame()

## 2.
It is decided to review the materials stock. How much did each material contribute to turnover in 2002?

In [4]:
t = (material.merge(order_line, left_on='material_no', right_on='ol_material')
     .merge(quantities, left_on=['ol_size', 'ol_style'], 
            right_on=['size_q', 'style_q'])
     .merge(dress_order.loc[dress_order['order_date'].dt.year==2002],
            left_on='order_ref', right_on='order_no'))
t['cost'] = t['cost'] * t['quantity']
(t.groupby(['material_no', 'fabric', 'colour', 'pattern'])
 .agg({'quantity': 'sum', 'cost': 'sum'}).reset_index())

Unnamed: 0,material_no,fabric,colour,pattern,quantity,cost
0,1,Silk,Black,Plain,4.9,34.3
1,2,Silk,Red Abstract,Printed,9.3,93.0
2,3,Cotton,Yellow Stripe,Woven,5.7,17.1
3,4,Cotton,Green Stripe,Woven,2.2,6.6
4,5,Cotton,Black Dotted,Woven,6.4,19.2
5,6,Cotton,Red Stripe,Woven,2.2,6.6
6,7,Polyester,Pale Yellow,Printed,4.3,10.965
7,8,Cotton,Blue Stripe,Woven,4.2,12.6
8,9,Cotton,Pink Check,Woven,4.6,13.8
9,10,Silk,Green Abstract,Printed,8.3,124.5


## 3.
An order for shorts has just been placed and the work is to be distributed amongst the workforce, and we wish to know how busy the shorts makers are. For each of the workers who have experience of making shorts show the number of hours work that she is currently committed to, assuming a meagre wage of £4.50 per hour

In [5]:
shorts = (dressmaker.merge(construction, left_on='d_no', right_on='maker')
          .merge(dress_order, left_on='order_ref', right_on='order_no')
          .merge(order_line, left_on='order_no', right_on='order_ref')
          .merge(garment.loc[garment['description'].str.strip().str.lower()=='shorts'], 
                 left_on='ol_style', right_on='style_no')
          ['d_no'].drop_duplicates())
(dressmaker.loc[dressmaker['d_no'].isin(shorts)]
  .merge(construction, left_on='d_no', right_on='maker')
  .merge(dress_order.loc[dress_order['completed']=='N'], 
         left_on='order_ref', right_on='order_no')
  .merge(order_line, left_on='order_no', right_on='order_ref')
  .merge(garment.assign(hrs=garment['labour_cost']/4.5), 
         left_on='ol_style', right_on='style_no')
  .merge(quantities, left_on=['ol_size', 'ol_style'],
         right_on=['size_q', 'style_q'])
  .groupby('d_name')['hrs'].sum()
  .reset_index())


Unnamed: 0,d_name,hrs
0,Miss Pins,28.166667
1,Miss Stitch,49.166667
2,Mr Needles,18.611111
3,Mr Seam,28.166667
4,Mr Taylor,18.611111
5,Ms Sew,18.611111


## 4.
"Big spender of the year" is the customer who spends the most on high value items. Identify the "Big spender of the year 2002" if the "high value" threshold is set at £30. Also who would it be if the threshold was £20 or £50?

In [7]:
t = (order_line.merge(quantities, left_on=['ol_style', 'ol_size'], 
                      right_on=['style_q', 'size_q'])
     .merge(garment, left_on='ol_style', right_on='style_no')
     .merge(material, left_on='ol_material', right_on='material_no'))
t['tot_cost'] = t['labour_cost'] + t['quantity'] * t['cost']

c = (dress_order.loc[dress_order['order_date'].dt.year==2002]
     .merge(jmcust, left_on='cust_no', right_on='c_no')
     .merge(t[['order_ref', 'line_no', 'tot_cost']], 
            left_on='order_no', right_on='order_ref'))

def find_big_spender(thres: float):
    ret = (c.loc[c['tot_cost']>=thres].groupby('c_name')['tot_cost'].sum()
           .reset_index().sort_values('tot_cost', ascending=False).iloc[:1])
    return ret[['c_name', 'tot_cost']].assign(thres=thres)

pd.concat(map(find_big_spender, [20, 30, 50]))

Unnamed: 0,c_name,tot_cost,thres
1,Mr Brass,198.54,20
4,Ms White,173.55,30
0,Mr Brass,72.0,50


## 5.
Who is the fastest at making trousers?

In [8]:
t = (dressmaker.merge(construction, left_on='d_no', right_on='maker')
     .merge(order_line, left_on=['order_ref', 'line_ref'],
            right_on=['order_ref', 'line_no'])
     .merge(garment.loc[garment['description'].str.strip().str.lower()=='trousers'], 
            left_on='ol_style', right_on='style_no'))
t['days'] = t['finish_date'] - t['start_date']
t[['d_no', 'd_name', 'days']].sort_values('days').iloc[:1]

Unnamed: 0,d_no,d_name,days
3,3,Mr Needles,3 days


## 6.
"Employee of the month" is the seamstress who completes the greatest value of clothes. Show the "employees of the month" for months in 2002.

In [9]:
t = (material.merge(order_line, left_on='material_no', right_on='ol_material')
     .merge(quantities, left_on=['ol_size', 'ol_style'], 
            right_on=['size_q', 'style_q'])
     .merge(garment, left_on='ol_style', right_on='style_no')
     .merge(construction.loc[construction['start_date'].dt.year==2002], 
            left_on=['order_ref', 'line_no'],
            right_on=['order_ref', 'line_ref'])
     .merge(dressmaker, left_on='maker', right_on='d_no'))
t['month'] = t['start_date'].dt.month
t['val'] = t['quantity'] * t['cost'] + t['labour_cost']
(t.groupby(['d_name', 'month'])['val'].sum().reset_index()
 .sort_values('val').groupby('month').tail(1)
 .sort_values('month'))

Unnamed: 0,d_name,month,val
2,Miss Stitch,1,49.0
14,Mrs Hem,2,122.25
4,Miss Stitch,3,97.2
