In [1]:
# In Terminal, "pip install ibis-framework[duckdb] pyjanitor"
import pandas as pd
import ibis
from ibis import selectors as s
from ibis import _
ibis.options.interactive = True

In [2]:
from janitor import clean_names

In [3]:
# Path
from pathlib import Path
path = Path("~/datasets/home-dataset/jupyterlab/ZX05")
db_path = path / "db"
meta_path = path / "meta"
output_path = path / "output"

In [4]:
# Read DB file
df = pd.read_csv(db_path / "CF_2024.csv", dtype={"costctr":str})

In [5]:
cf = ibis.memtable(df)
cf.head(3)

In [6]:
# Process numeric columns
cf = (cf
    .mutate(s.across(["actual", "plan", "target"], _ / -1000))
    .mutate(delta_to_plan = (_.actual - _.plan).round(3))
)    
cf.head(3)

### Read master data

In [7]:
# Read master data
df_acc = pd.read_csv(meta_path / "0000_TABLE_MASTER_Acc level.csv", dtype=str).clean_names()
df_cc_general = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_general.csv", dtype=str).clean_names()
df_cc_hierarchy = pd.read_csv(meta_path / "0000_TABLE_MASTER_Cost center_hierarchy.csv", dtype=str).clean_names()

In [8]:
acc = ibis.memtable(df_acc)
acc.head(3)

In [9]:
cc_general = ibis.memtable(df_cc_general)
cc_general.head(3)

In [10]:
cc_hierarchy = ibis.memtable(df_cc_hierarchy)
cc_hierarchy.head(3)

In [11]:
cc = cc_general.join(cc_hierarchy, "cctr", how="left")
cc.head(3)

### Data wrangling

In [12]:
# Join tables to get master data
cf = (cf
    .join(acc, cf.gl_accounts == acc.account_no_, how="left")
    .join(cc, cf.costctr == cc.cctr, how="left")
    .drop("cctr", "account_no_", "cctr_right")
)
cf.head(3)

In [13]:
# Add account information
cf = cf.mutate(
    ce_text = ibis.case()
        # E01-299
        .when(cf.acc_lv2 == "299 Total Labor Costs", "10_Compensation")
        # E01-465
        .when((cf.acc_lv1 == "345 Depreciation long life") | (cf.acc_lv1 == "370 Rental/Leasing"), "11_Depreciation & Leasing")
        .when(cf.acc_lv1 == "375 Utilities", "12_Energy")
        .when(cf.acc_lv1 == "435 Fees and purchased services", "13_Fees and Purchased Services")
        .when(cf.acc_lv1 == "320 Purchased maintenance", "15_Maintenance")
        .when((cf.acc_lv1 == "430 Entertainment/Travel expense") | (cf.acc_lv1 == "440 Recruitment/Training/Development"), "16_Travel Training")
        .when(cf.acc_lv2 == "465 Cost of materials", "17_Other cost")
        # E01-535
        .when(cf.gl_accounts == "K6620", "18_Services In / Out")
        .when(cf.gl_accounts == "K6626", "19_Transfer out")
        .when((cf.gl_accounts == "K6623") | (cf.gl_accounts == "K6624") | (cf.gl_accounts == "K6625"), "20_IT Allocation")
        # E01-520
        .when(cf.acc_lv2 == "520 Assessments In", "CF cost assessment out")
        .when(True, "NA").end()
)
cf.head(3)

In [14]:
# Remove unnecessary columns
cf = cf.drop(
            "validity", "responsible", "account_description", "acc_lv1_by_consolidated",
            "acc_lv3", "acc_lv4", "acc_lv5", "acc_lv6",
            )
cf.head(3)

### Output data

In [15]:
# Output data
cf.to_pandas().to_csv(output_path / "CF costs.csv", index=False)