## PK Quality vs 2nd Pressing Oil Content

This report aims to analyze the relationship between 2nd pressing oil content - today, and receiving quality - yesterday.

Data is from FOSS Machine and MSSQL Server.

[MSSQL Server]
1. PNET
2. SD
3. Moist

[FOSS]
1. 2nd Pressing Oil Content

In [1]:
import pandas as pd
import numpy as np
import data
from data import gsheet
from utils import config
jinlee = data.mssql()

In [None]:
# TODO: Reverse the direction i.e. today's oil content, yesterday's receiving

In [4]:
def quality_v_oil_content():
    
    today = pd.to_datetime('today')
    yesterday = today - pd.Timedelta('1 day')
    saturday = today - pd.Timedelta('2 days')
    
    def format_date(date):
        return date.strftime('%Y%m%d')
    
    if yesterday.strftime('%a') == 'Sun':
        date = saturday
    else: 
        date = yesterday
    
    def check_quality_by_line():
        
        print(f"Getting receiving data for {date.strftime('%a %d/%m')} ... ")
        
        sql = f"""
        SELECT TICKET, VEHICLE, MILL.SNAME, FIBRE AS LINE, 
        (PGROSS-PTARE)/1000 AS PNET, SD, MOIST
        FROM RECEIVE
        LEFT JOIN MILL ON MILL.CODENO = RECEIVE.MILL
        WHERE DATE = '{format_date(date)}'
        """
        return pd.read_sql(sql, jinlee.cnxn)
    
    wap = lambda x: np.ma.average(x, axis=0, weights=quality_df.loc[x.index, 'PNET'])
    
    quality_df = check_quality_by_line().dropna()
    quality_df['LINE'] = quality_df['LINE'].str.split('-').str[0]    
    
    summary = quality_df.pivot_table(
        index='LINE',
        values=['PNET', 'SD', 'MOIST'],
        aggfunc={
            'PNET': sum,
            'SD': wap,
            'MOIST': wap
        },
        margins=True
    ).round(2)
    
    def get_oil_content():

        # Connect to data
        foss = gsheet("foss")
        df = foss.sheet2df(0)

        # Set up datetimeindex for filtering
        df['Analysis Time'] = df['Analysis Time'].apply(pd.to_datetime, format='%d-%b-%y %H:%M:%S %p')
        df.set_index('Analysis Time', inplace=True)

        print(f"Getting FOSS data for {today.strftime('%a %d/%m')} ... ")
        _df = df = df.loc[format_date(today)].copy()

        # Filter to 2nd pressing
        _df = _df[_df['Product Name'] == 'PKE Second Pressed JL'].copy()
        _df.reset_index(inplace=True)

        # Retain letter of line only i.e. ABCD
        _df['Sample Number'] = _df['Sample Number'].str.split('LINE').str[1].str.split('2').str[1]

        cols = ['Sample Number', '% Oil/WM']
        oil_content = _df[cols].copy()
        oil_content = oil_content.rename(columns={'Sample Number': 'LINE',
                                                  '% Oil/WM': 'Oil (%)'})
        oil_content.set_index('LINE', inplace=True)
              
        return oil_content
              
    oil_content = get_oil_content()

    # WB Summary left join Oil Content
    final_df = summary.merge(oil_content, 
                             left_on=summary.index, 
                             right_on=oil_content.index, 
                             how='left')
          
    def wrap(date):
        return ' (' + date.strftime('%a %d/%m') + ')'
    
    final_df = final_df.rename(columns={
        'key_0': 'Line',
        'MOIST': 'Moisture' + wrap(date),
        'SD': 'SD' + wrap(date),
        'PNET': 'Net' + wrap(date),
        'Oil (%)': 'Oil (%)' + wrap(today)
    })
    
    # Rearrange columns
    final_df = final_df.iloc[:, np.r_[0, 2, 3, 1, 4]]
    final_df = final_df[final_df['Line'] != 'All'].copy()
    return final_df.sort_values(by='Line').fillna('')

In [5]:
quality_v_oil_content()

Getting receiving data for Sat 28/09 ... 
Connected to: foss 1Ce6EGnlmXUTyTzBv9xo3XMfGW_XnSTjzBcRKuAVLbL8 using account: googlesheets@jinlee-8.iam.gserviceaccount.com
Getting FOSS data for Mon 30/09 ... 


Unnamed: 0,Line,Net (Sat 28/09),SD (Sat 28/09),Moisture (Sat 28/09),Oil (%) (Mon 30/09)
0,A,263.06,5.93,5.05,4.99
1,B,77.81,6.43,5.84,5.06
2,C,179.52,4.44,6.01,4.89
3,D,172.74,4.92,6.62,4.8
4,S,82.06,4.36,5.26,
