In [1]:
# In Terminal, "pip install ibis-framework[duckdb] pyjanitor"
import pandas as pd
import ibis
from ibis import selectors as s
from ibis import _
ibis.options.interactive = True

In [2]:
from janitor import clean_names

In [3]:
# Path
from pathlib import Path
path = Path("~/datasets/home-dataset/jupyterlab/ZX05")
db_path = path / "db"
meta_path = path / "meta"
output_path = path / "output"

In [4]:
# Read DB file
df = pd.read_csv(db_path / "PL_2024.csv", dtype={"costctr":str})

In [5]:
pl = ibis.memtable(df)
pl.head(3)

In [6]:
# Process numeric columns
pl = (pl
    .mutate(s.across(["actual", "plan", "target"], _ / -1000))
    .mutate(delta_to_plan = (_.actual - _.plan).round(3))
)    
pl.head(3)

In [7]:
# Add volume difference
pl = pl.mutate(
    volume_difference = (_.plan - _.target).round(3)
)
pl.head(3)

### Read master data

In [8]:
# Read master data
df_acc = pd.read_csv(meta_path / "0000_TABLE_MASTER_Acc level.csv", dtype=str).clean_names()
df_cc_general = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_general.csv", dtype=str).clean_names()
df_cc_hierarchy = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_hierarchy.csv", dtype=str).clean_names()
df_coom = pd.read_csv(meta_path / "0004_TABLE_MASTER_COOM_2023.csv", dtype=str, usecols=[0, 1, 2]).clean_names()
df_poc = pd.read_csv(meta_path / "POC.csv", dtype=str).clean_names()

In [9]:
acc = ibis.memtable(df_acc)
acc.head(3)

In [10]:
cc_general = ibis.memtable(df_cc_general)
cc_general.head(3)

In [11]:
cc_hierarchy = ibis.memtable(df_cc_hierarchy)
cc_hierarchy.head(3)

In [12]:
cc = cc_general.join(cc_hierarchy, "cctr", how="left")
cc.head(3)

In [13]:
coom = ibis.memtable(df_coom)
coom.head(3)

In [14]:
poc = ibis.memtable(df_poc)
poc.head(3)

### Data wrangling

In [15]:
# Join tables to get master data
pl = (pl
    .join(acc, pl.gl_accounts == acc.account_no_, how="left")
    .join(cc, pl.costctr == cc.cctr, how="left")
    .join(poc, cc.pctr == poc.profit_center, how="left")         
    .drop("cctr", "account_no_", "cctr_right", "pctr")
)
pl = (pl
    .join(coom, (pl.costctr == coom.cctr) & (pl.gl_accounts == coom.account_no_), how="left")
    .drop("cctr", "account_no_")
)
pl.head(3)

In [16]:
# Process COOM data for fix and variable costs
pl = pl.mutate(
    coom = ibis.case()
    .when((pl.fix_var == "Var") & (pl.gl_accounts == "K399"), "Var")
    .when(pl.coom == ibis.NA, "Fix")
    .when(True, pl.coom).end()
)
pl.head(3)

In [17]:
# Extract function from cost center hierarchy level 3
pl = pl.mutate(
    function_2 = pl.lv3.split("-")[1]
)
pl.head(3)


### PL fix costs

In [18]:
pl_fix = (pl
          .filter(pl.coom == "Fix")
          # remove_s90xxx_accounts
          .filter(pl.acc_lv6 != "Assessments to COPA")
)          
pl_fix.head(3)

In [19]:
# Add account information
pl_fix = pl_fix.mutate(
    ce_text = ibis.case()
        # PV Costs : special logic for Division P in 2023 (temporary)
        .when((pl_fix.function == "PV") & (pl_fix.gl_accounts == "S99116"), "12_PMME Others")
        # PV Costs
        .when(pl_fix.function == "PV", "10_Product Validation / Requalification after G60")
        .when(pl_fix.costctr[:2] == "58", "10_Product Validation / Requalification after G60")
        # E01-585
        .when((pl_fix.gl_accounts == "K66270") | (pl_fix.gl_accounts == "K66271"), "01_NSHS Allocations in PE MGK & PE FGK")
        .when(pl_fix.gl_accounts == "K66280", "02_NSHS Services in PE MGK & PE FGK")
        # E01-299
        .when(pl_fix.acc_lv2 == "299 Total Labor Costs", "06_Compensation")
        # E01-465
        .when(pl_fix.gl_accounts == "K403", "08_PMME Depreciation intangible development assets")
        .when(pl_fix.acc_lv1 == "345 Depreciation long life", "09_PMME Depreciation w/o intangible")
        .when(pl_fix.acc_lv1 == "320 Purchased maintenance", "07_Maintenance")
        .when(pl_fix.acc_lv1 == "325 Project costs", "11_Related project expenses (RPE)")
        # E01-520 / 525
        # FSC costs changed from PMME to FG&A from FY2023
        .when(pl_fix.gl_accounts == "S87564", "Assessment from FSC (CDP) to FG&A")
        # FF Assessment from FY2023 for QMPP reorganization
        .when(pl_fix.gl_accounts == "S87310", "04_Assessment from FF (520)")
        # Normal case
        .when(pl_fix.acc_lv2 == "520 Assessments In", "03_Assessment from Central Functions (520)")
        .when(pl_fix.acc_lv2 == "525 Residual Costs", "03_Assessment from Central Functions (520)")  # CM specific topic from 2024
        # E01-535
        .when(pl_fix.gl_accounts == "K6626", '05_Shared equipment "K662x" accounts')
        .when(pl_fix.gl_accounts == "K6620", "12_PMME Others_US regident Q engineer")
        # E01-630
        # S99xxx accounts for te-minute, tgb-minute, ast-hours
        # .when(pl.gl_accounts[:3] == "S99", "12_PMME Others")
        .when(True, "12_PMME Others").end()
)
pl_fix.head(3)

In [20]:
# Add RACE item
pl_fix = pl_fix.mutate(
    race_item = ibis.case()
        .when(pl_fix.function_2 == "FGK", "PE production")
        .when(pl_fix.function_2 == "MGK", "PE materials management")
        .when(pl_fix.function_2 == "WVK", "PE plant administration")
        .when(pl_fix.function_2 == "VK", "PE distribution")
        # ALLOC
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66271"), "PE production")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66270"), "PE materials management")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66281"), "R, D & E allocation in")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66283"), "R, D & E allocation in")        
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66273"), "PE selling")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66275"), "PE communication")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66278"), "F, G & A expenses")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "S87564"), "F, G & A expenses")
        .when(True, "NA").end()
)
pl_fix.head(3)

In [21]:
# Remove unnecessary columns
pl_fix = pl_fix.drop(
                    "validity", "responsible", "account_description", "acc_lv1_by_consolidated",
                    "acc_lv3", "acc_lv4", "acc_lv5", "acc_lv6"
)
pl_fix.head(3)

### PL var costs

In [22]:
pl_var = (pl
          .filter(pl.coom == "Var")
          # remove_s90xxx_accounts
          .filter(pl.acc_lv6 != "Assessments to COPA")
)          
pl_var.head(3)

In [23]:
# Add LDC / MDC
pl_var = pl_var.mutate(
    ldc_mdc = ibis.case()
        .when(pl_var.costctr[:1] == "8", "Start up costs")  # CC that starts with "8"
        .when((pl_var.function_2 == "FGK") & (pl_var.acc_lv2 == "299 Total Labor Costs"), "LDC")
        .when((pl_var.function_2 == "FGK") & (pl_var.acc_lv2 == "465 Cost of materials"), "MDC")
        .when(True, "NA").end()
)

In [24]:
# Add account information
pl_var = pl_var.mutate(
    ce_text = ibis.case()
    # LDC
    .when((pl_var.gl_accounts == "K250") | (pl_var.gl_accounts == "K256"), "120 Premium wages")
    .when(pl_var.acc_lv1 == "158 Social benefit rate wages variable", "158 SLB wages")
    .when(pl_var.acc_lv2 == "299 Total Labor Costs", "115 Direct labor")
    # MDC
    .when(True, pl_var.acc_lv1).end()
)
pl_var.head(3)

In [25]:
# Remove unnecessary columns
pl_var = pl_var.drop(
                    "validity", "responsible", "account_description", "acc_lv1_by_consolidated",
                    "acc_lv3", "acc_lv4", "acc_lv5", "acc_lv6"
)
pl_var.head(3)

### Output data

In [26]:
# Output data
pl_fix.to_pandas().to_csv(output_path / "PL fix costs.csv", index=False)
pl_var.to_pandas().to_csv(output_path / "PL var costs.csv", index=False)