In [1]:
# In Terminal, "pip install ibis-framework[duckdb] pyjanitor"
import pandas as pd
import ibis
from ibis import selectors as s
from ibis import _
ibis.options.interactive = True

In [2]:
from janitor import clean_names

In [3]:
# Path
from pathlib import Path
path = Path("~/datasets/home-dataset/jupyterlab/ZX05")
data_path = path / "data"
meta_path = path / "meta"
output_path = path / "output"

In [4]:
# Input data
input_file_pl = data_path / "FC_2024/PL_FC2_test.dat"

In [5]:
def read_data(path):
    df = pd.read_csv(path, sep="\t").clean_names()
    df = df.rename(columns={"cost_center": "text_col"})
    return df

In [6]:
# Extract Cost center and GL accounts using regex
def extract_text(df):
    df = df.assign(
        costctr=df["text_col"].str.extract(r"(^[0-9]{4,5}|^IC-.{4,5}|^CY-.{4,5}|^DUMMY_.{3})"),  # ICH-.{4,5}|
        gl_accounts=df["text_col"].str.extract(r"(^K[0-9]+|^S[0-9]+)"),
    )
    df["costctr"] = df["costctr"].str.strip()
    # Fill in missing values for CostCtr
    df["costctr"] = df["costctr"].bfill()  # .fillna(method="backfill")
    return df

In [7]:
df = read_data(input_file_pl)
df.head(3)

Unnamed: 0,text_col,fc_1,fc_2,fc_3,fc_4,fc_5,fc_6,fc_7,fc_8,fc_9,fc_10,fc_11,fc_12,fc_tot,plan
0,K66270 Allocation Material Management,25696099,25129748,26977857,26534817,27315573,27023300,28746302,28746302,28746302,28746302,28746302,28746302,0,301080240
1,K66271 Allocation Production,189733978,185552230,199198035,195926876,201691663,199533687,179535297,179535297,179535297,179535297,179535297,179535297,0,2223104884
2,K66273 Allocation S&D: Selling,110161563,107733613,115656511,113757242,117104344,115851398,93118704,93118704,93118704,93118704,93118704,93118704,0,1290758679


In [8]:
df = extract_text(df)
df.head(3)

Unnamed: 0,text_col,fc_1,fc_2,fc_3,fc_4,fc_5,fc_6,fc_7,fc_8,fc_9,fc_10,fc_11,fc_12,fc_tot,plan,costctr,gl_accounts
0,K66270 Allocation Material Management,25696099,25129748,26977857,26534817,27315573,27023300,28746302,28746302,28746302,28746302,28746302,28746302,0,301080240,IC-99LD,K66270
1,K66271 Allocation Production,189733978,185552230,199198035,195926876,201691663,199533687,179535297,179535297,179535297,179535297,179535297,179535297,0,2223104884,IC-99LD,K66271
2,K66273 Allocation S&D: Selling,110161563,107733613,115656511,113757242,117104344,115851398,93118704,93118704,93118704,93118704,93118704,93118704,0,1290758679,IC-99LD,K66273


In [9]:
pl = ibis.memtable(df)
pl = pl.select("costctr", "gl_accounts", s.numeric())
pl.head(3)

In [10]:
# Process numeric columns
pl = (pl
      .mutate(fc_tot = _.fc_1 + _.fc_2 + _.fc_3 + _.fc_4 + _.fc_5 + _.fc_6 + _.fc_7 + _.fc_8 + _.fc_9 + _.fc_10 + _.fc_11 + _.fc_12)
      .mutate(s.across(s.numeric(), _ / -1000))
      .mutate(delta_to_plan = (_.fc_tot - _.plan).round(3))
)
pl.head(3)

### Read master data

In [11]:
# Read master data
df_acc = pd.read_csv(meta_path / "0000_TABLE_MASTER_Acc level.csv", dtype=str).clean_names()
df_cc_general = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_general.csv", dtype=str).clean_names()
df_cc_hierarchy = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_hierarchy.csv", dtype=str).clean_names()
df_coom = pd.read_csv(meta_path / "0004_TABLE_MASTER_COOM_2023.csv", dtype=str, usecols=[0, 1, 2]).clean_names()
df_poc = pd.read_csv(meta_path / "POC.csv", dtype=str).clean_names()

In [12]:
acc = ibis.memtable(df_acc)
acc.head(3)

In [13]:
cc_general = ibis.memtable(df_cc_general)
cc_general.head(3)

In [14]:
cc_hierarchy = ibis.memtable(df_cc_hierarchy)
cc_hierarchy.head(3)

In [15]:
cc = cc_general.join(cc_hierarchy, "cctr", how="left")
cc.head(3)

In [16]:
coom = ibis.memtable(df_coom)
coom.head(3)

In [17]:
poc = ibis.memtable(df_poc)
poc.head(3)

### Data wrangling

In [18]:
# Join tables to get master data
pl = (pl
    .join(acc, pl.gl_accounts == acc.account_no_, how="left")
    .join(cc, pl.costctr == cc.cctr, how="left")
    .join(poc, cc.pctr == poc.profit_center, how="left")         
    .drop("cctr", "account_no_", "cctr_right", "pctr")
)
pl = (pl
    .join(coom, (pl.costctr == coom.cctr) & (pl.gl_accounts == coom.account_no_), how="left")
    .drop("cctr", "account_no_")
)
pl.head(3)

In [19]:
# Process COOM data for fix and variable costs
pl = pl.mutate(
    coom = ibis.case()
    .when((pl.fix_var == "Var") & (pl.gl_accounts == "K399"), "Var")
    .when(pl.coom == ibis.NA, "Fix")
    .when(True, pl.coom).end()
)
pl.head(3)

In [20]:
# Extract function from cost center hierarchy level 3
pl = pl.mutate(
    function_2 = pl.lv3.split("-")[1]
)
pl.head(3)


### PL fix costs

In [21]:
pl_fix = (pl
          .filter(pl.coom == "Fix")
          # remove_s90xxx_accounts
          .filter(pl.acc_lv6 != "Assessments to COPA")
)          
pl_fix.head(3)

In [22]:
# Add account information
pl_fix = pl_fix.mutate(
    ce_text = ibis.case()
        # PV Costs : special logic for Division P in 2023 (temporary)
        .when((pl_fix.function == "PV") & (pl_fix.gl_accounts == "S99116"), "12_PMME Others")
        # PV Costs
        .when(pl_fix.function == "PV", "10_Product Validation / Requalification after G60")
        .when(pl_fix.costctr[:2] == "58", "10_Product Validation / Requalification after G60")
        # E01-585
        .when((pl_fix.gl_accounts == "K66270") | (pl_fix.gl_accounts == "K66271"), "01_NSHS Allocations in PE MGK & PE FGK")
        .when(pl_fix.gl_accounts == "K66280", "02_NSHS Services in PE MGK & PE FGK")
        # E01-299
        .when(pl_fix.acc_lv2 == "299 Total Labor Costs", "06_Compensation")
        # E01-465
        .when(pl_fix.gl_accounts == "K403", "08_PMME Depreciation intangible development assets")
        .when(pl_fix.acc_lv1 == "345 Depreciation long life", "09_PMME Depreciation w/o intangible")
        .when(pl_fix.acc_lv1 == "320 Purchased maintenance", "07_Maintenance")
        .when(pl_fix.acc_lv1 == "325 Project costs", "11_Related project expenses (RPE)")
        # E01-520 / 525
        # FSC costs changed from PMME to FG&A from FY2023
        .when(pl_fix.gl_accounts == "S87564", "Assessment from FSC (CDP) to FG&A")
        # FF Assessment from FY2023 for QMPP reorganization
        .when(pl_fix.gl_accounts == "S87310", "04_Assessment from FF (520)")
        # Normal case
        .when(pl_fix.acc_lv2 == "520 Assessments In", "03_Assessment from Central Functions (520)")
        .when(pl_fix.acc_lv2 == "525 Residual Costs", "03_Assessment from Central Functions (520)")  # CM specific topic from 2024
        # E01-535
        .when(pl_fix.gl_accounts == "K6626", '05_Shared equipment "K662x" accounts')
        .when(pl_fix.gl_accounts == "K6620", "12_PMME Others_US regident Q engineer")
        # E01-630
        # S99xxx accounts for te-minute, tgb-minute, ast-hours
        # .when(pl.gl_accounts[:3] == "S99", "12_PMME Others")
        .when(True, "12_PMME Others").end()
)
pl_fix.head(3)

In [23]:
# Add RACE item
pl_fix = pl_fix.mutate(
    race_item = ibis.case()
        .when(pl_fix.function_2 == "FGK", "PE production")
        .when(pl_fix.function_2 == "MGK", "PE materials management")
        .when(pl_fix.function_2 == "WVK", "PE plant administration")
        .when(pl_fix.function_2 == "VK", "PE distribution")
        # ALLOC
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66271"), "PE production")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66270"), "PE materials management")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66281"), "R, D & E allocation in")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66283"), "R, D & E allocation in")        
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66273"), "PE selling")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66275"), "PE communication")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "K66278"), "F, G & A expenses")
        .when((pl_fix.function_2 == "ALLOC") & (pl_fix.gl_accounts == "S87564"), "F, G & A expenses")
        .when(True, "NA").end()
)
pl_fix.head(3)

In [24]:
# Remove unnecessary columns
pl_fix = pl_fix.drop(
                    "validity", "responsible", "account_description", "acc_lv1_by_consolidated",
                    "acc_lv3", "acc_lv4", "acc_lv5", "acc_lv6"
)
pl_fix.head(3)

### Output data

In [25]:
# Output data
pl_fix.to_pandas().to_csv(output_path / "PL fix costs FC_tes.csv", index=False)