In [None]:
""" Aggregate CAT Inspection Scores at granularity 'Item' for the year """

import numpy as np
import pandas as pd
import math
import sqlalchemy as sa
from mysql.connector import Error
import login

try:
    engine = sa.create_engine('mysql+mysqlconnector://{}:{}@{}/flbwmass'.format(login.username, login.password, login.mysql_host), isolation_level='READ UNCOMMITTED')
    df = pd.read_sql("""
    SELECT 
        LEFT(period, 4) AS period, 
        c.massis_type_description, 
        'CAT Team' AS team, 
        wo_type, 
        item, 
        AVG(CAST(item_score AS DECIMAL(17,14))) AS item_score 
    FROM (
        SELECT 
            m.period, 
            m.contract, 
            m.wo_type, 
            report_serial_num, 
            RIGHT(LEFT(item_description, 2), 1) AS `item`, 
            COALESCE(subaspect, IF(aspect = 'SM', 'GSM', aspect)) AS `aspect`, 
            CASE grade WHEN 'A' THEN 100.0 WHEN 'B' THEN 60.0 WHEN 'C' THEN 40.0 WHEN 'D' THEN 20.0 END AS item_score 
        FROM flbwmass.cat_mass_inspection_items 
        LEFT JOIN flbwmass.cat_mass_inspections AS m USING (report_serial_num) 
        WHERE period >= '2016' and grade <> 'N' AND m.status NOT IN ('Initial Inspection') 
        ORDER BY m.period , m.contract , report_serial_num , `item` , `aspect` , `grade`) AS s 
    LEFT JOIN master.contracts AS c USING (contract) 
    GROUP BY report_serial_num, item;
    """, con=engine)
except Error as e:
    print(e)
finally:
    engine.dispose()

In [None]:
def a_uqs(series):
    count = series.count()
    n = math.ceil(count / 4)
    return series.nlargest(n)[-1:]

def a_lqs(series):
    count = series.count()
    n = count - math.floor(count / 4) + 1
    return series.nlargest(n)[-1:]

cat_items = df
statistic = {'item_score': [np.max, a_uqs, np.median, np.mean, a_lqs, np.min]}

aggregated = cat_items.groupby(['period', 'massis_type_description', 'team', 'wo_type', 'item']).agg(statistic)
aggregated.columns.set_levels(['max', 'uqs', 'median', 'mean', 'lqs', 'min'], level=1, inplace=True)
aggregated.columns.rename(names=['item_score', 'statistic'], inplace=True)
result = aggregated.stack(1).reset_index()