In [1]:
# In Terminal, "pip install ibis-framework[duckdb] pyjanitor"
import pandas as pd
import ibis
from ibis import selectors as s
from ibis import _
ibis.options.interactive = True

In [2]:
from janitor import clean_names

In [3]:
# Path
from pathlib import Path
path = Path("~/datasets/home-dataset/jupyterlab/ZX05")
db_path = path / "db"
meta_path = path / "meta"
output_path = path / "output"

In [4]:
# Read DB file
# pl_2020 = pd.read_csv(path + "db/PL_costs/PL_2020.csv", dtype="str").clean_names()
# pl_2021 = pd.read_csv(path + "db/PL_costs/PL_2021.csv", dtype="str").clean_names()
# pl_2022 = pd.read_csv(path + "db/PL_costs/PL_2022.csv", dtype="str").clean_names()
pl_2024 = pd.read_csv(db_path / "PL_2024.csv")
df_pl = pd.concat([pl_2024]) # pl_2020, pl_2021, pl_2022, 
df_pl

Unnamed: 0,fy,period,costctr,gl_accounts,actual,plan,target
0,2024,1,IC-99LD,K66270,27005141,27584485,27584485
1,2024,1,IC-99LD,K66271,184178530,188129602,188129602
2,2024,1,IC-99LD,K66273,113885517,116328686,116328686
3,2024,1,IC-99LD,K66275,4795603,4898472,4898472
4,2024,1,IC-99LD,K66278,188203298,192240828,192240828
...,...,...,...,...,...,...,...
12180,2024,2,6995,S90717,24664,0,0
12181,2024,2,CY-999MK,S87453,668138,744405,744405
12182,2024,2,CY-999MK,S90808,-668138,0,0
12183,2024,2,CY-599MK,S87561,5846237,6153861,6153861


In [5]:
pl = ibis.memtable(df_pl)
pl.head(3)

In [6]:
# Process numeric columns
pl = (pl
    .mutate(s.across(["actual", "plan", "target"], _ / -1000))
    .mutate(delta_to_plan = (_.actual - _.plan).round(3))
)    
pl.head(3)

In [7]:
# Add volume difference
pl = pl.mutate(
    volume_difference = (_.plan - _.target).round(3)
)
pl.head(3)

### Read master data

In [8]:
# Read master data
df_acc = pd.read_csv(meta_path / "0000_TABLE_MASTER_Acc level.csv", dtype=str).clean_names()
df_cc_general = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_general.csv", dtype=str).clean_names()
df_cc_hierarchy = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_hierarchy.csv", dtype=str).clean_names()
df_coom = pd.read_csv(meta_path / "0004_TABLE_MASTER_COOM_2023.csv", dtype=str, usecols=[0, 1, 2]).clean_names()
df_poc = pd.read_csv(meta_path / "POC.csv", dtype=str).clean_names()

In [9]:
acc = ibis.memtable(df_acc)
acc.head(3)

In [10]:
cc_general = ibis.memtable(df_cc_general)
cc_general.head(3)

In [11]:
cc_hierarchy = ibis.memtable(df_cc_hierarchy)
cc_hierarchy.head(3)

In [12]:
cc = cc_general.join(cc_hierarchy, "cctr", how="left")
cc.head(3)

In [13]:
coom = ibis.memtable(df_coom)
coom.head(3)

In [14]:
poc = ibis.memtable(df_poc)
poc.head(3)

### Data wrangling

In [15]:
# Join tables to get master data
pl = (pl
    .join(acc, pl.gl_accounts == acc.account_no_, how="left")
    .join(cc, pl.costctr == cc.cctr, how="left")
    .join(poc, cc.pctr == poc.profit_center, how="left")         
    .drop("cctr", "account_no_", "cctr_right", "pctr")
)
pl = (pl
    .join(coom, (pl.costctr == coom.cctr) & (pl.gl_accounts == coom.account_no_), how="left")
    .drop("cctr", "account_no_")
)
pl.head(3)

In [16]:
# Process COOM data for fix and variable costs
pl = pl.mutate(
    coom = ibis.case()
    .when((pl.fix_var == "Var") & (pl.gl_accounts == "K399"), "Var")
    .when(pl.coom == ibis.NA, "Fix")
    .when(True, pl.coom).end()
)
pl.head(3)

In [17]:
# Extract function from cost center hierarchy level 3
pl = pl.mutate(
    function_2 = _.lv3.split("-")[1]
)
pl.head(3)


### Overtime costs

In [18]:
# Filter variable costs
pl_var = pl.filter(_.coom == "Var")
pl_var.head(3)

In [19]:
# Filter overtime_accounts
overtime_accounts = [
    # Variable CC
    "K250", "K256",
    # Fix CC
    "K2501", "K2561",
    # Central function
    "K301", "K302",
]
pl_var = pl_var.filter(_.gl_accounts.isin(overtime_accounts))
pl_var.head(3)

In [20]:
# Add LDC / MDC
pl_var = pl_var.mutate(
    ldc_mdc = ibis.case()
        .when(pl_var.costctr[:1] == "8", "Start up costs")  # CC that starts with "8"
        .when((pl_var.function_2 == "FGK") & (pl_var.acc_lv2 == "299 Total Labor Costs"), "LDC")
        .when((pl_var.function_2 == "FGK") & (pl_var.acc_lv2 == "465 Cost of materials"), "MDC")
        .when(True, "NA").end()
)

In [21]:
# Add account information
pl_var = pl_var.mutate(
    ce_text = ibis.case()
    # LDC
    .when((pl_var.gl_accounts == "K250") | (pl_var.gl_accounts == "K256"), "120 Premium wages")
    .when(pl_var.acc_lv1 == "158 Social benefit rate wages variable", "158 SLB wages")
    .when(pl_var.acc_lv2 == "299 Total Labor Costs", "115 Direct labor")
    # MDC
    .when(True, pl_var.acc_lv1).end()
)
pl_var.head(3)

In [22]:
# Remove unnecessary columns
pl_var = pl_var.drop(
                    "validity", "responsible", "account_description", "acc_lv1_by_consolidated",
                    "acc_lv3", "acc_lv4", "acc_lv5", "acc_lv6"
)
pl_var.head(3)

### Output data

In [23]:
# Output data
pl_var.to_pandas().to_csv(output_path / "PL var OT costs 2020-2024 YTD.csv", index=False)