In [1]:
 # In Terminal, "pip install ibis-framework[duckdb] pyjanitor"
import pandas as pd
import ibis
from ibis import selectors as s
from ibis import _
ibis.options.interactive = True

In [2]:
import re
from janitor import clean_names

In [3]:
# Path
from pathlib import Path
path = Path(r"C:\Users\uid98421\Vitesco Technologies\Controlling VT Korea - Documents\120. Data automation\jupyterlab\RnD")
data_path = path / "data" / "GLORIA"
meta_path = path / "meta"
output_path = path / "output"

In [4]:
# Input data
f_gloria = data_path / "202307_ICN.csv"
f_ps_db = data_path / "202307_PS_DB_display.xlsx"
f_acc_config = meta_path / "ACC_Config_2023.xlsx"
f_cc_config = meta_path / "CC_Config_2023.xlsx"
f_ps_config = meta_path / "PS_Config_2023.xlsx"
f_race_gloria = meta_path / "RACE_GLORIA.csv"
f_race = data_path / "Analysis FS Item Hierarchy for CU 698_LC.xlsx"

In [5]:
df_gloria = pd.read_csv(f_gloria, dtype={"Outlet":str, "Cost_ITEM":str, "Head_ITEM":str, }).clean_names()
df_gloria.head(3)

Unnamed: 0,year,vt,location,outlet,subdepart,la_typ,la_pos,cost_item,text_ci,costs_rnd,head_item,user_name,per,costs,obcur,heads,um,created_on
0,2024,4,ICN,7110,,,,510,,19730318,,,7,19730318,KRW,0,,
1,2024,4,ICN,7110,,L6,1.0,10,TOTAL COMPENSATION,"31,884,702-",10.0,,7,-31884702,KRW,4,,
2,2024,4,ICN,7110,,L6,4.0,15,THEREOF COMPENSATION FOR LEASING,,20.0,,7,0,KRW,0,,


In [6]:
df_ps_db = pd.read_excel(f_ps_db, sheet_name="Sheet1", dtype={"Cost item":str}).clean_names()
df_ps_db.head(3)

Unnamed: 0,project_definition,global_project_number,project_description,ba_pr,prftctr_pr,wbs_element,wbs_element_description,ra_key,coar,cocd,...,total_actual_revenues,total_actual_hours,prev_per_actual_cos,prev_per_committmen,prev_per_actual_rev,prev_per_actual_hou,eval_per_actual_cos,eval_per_committmen,eval_per_actual_rev,eval_per_actual_hou
0,DK-300001,DG-009595,HMC DRY DCT,,50803-026,DK-300001-1000,HMC DRYDCT R&D,,5668,5668,...,0,0.0,0,0,0,0.0,0,0,0,0.0
1,DK-300002,DG-013779,SIM2K-91 - 4/5AT TCU,,50803-026,DK-300002-1000,SIM2K91 R&D,,5668,5668,...,0,0.0,0,0,0,0.0,0,0,0,0.0
2,DK-300003,DG-013530,HMC 7STEP DRY-DCT,,50803-026,DK-300003-1000,HMC7STEP R&D,,5668,5668,...,0,0.0,0,0,0,0.0,0,0,0,0.0


In [7]:
df_ps_db.columns

Index(['project_definition', 'global_project_number', 'project_description',
       'ba_pr', 'prftctr_pr', 'wbs_element', 'wbs_element_description',
       'ra_key', 'coar', 'cocd', 'ba_wbs', 'prftctr_wb', 'product_gr', 'typ',
       'sub_ptype', 'cust_ctr_', 'customer', 'customer_name', 'tr_prt',
       'cl_o_settl', 'respons_', 'name_of_resp_person', 'stat', 'status_text',
       'order', 'opac', 'order_activity_description', 'type', 'cat', 'ctrl',
       'plnt', 'work_ctr', 'work_center_description', 'unit', 'cost_ctr',
       'cctc', 'cost_center_description', 'bus_area', 'prftctr_cc',
       'value_category', 'value_cat_description', 'cost_item',
       'cost_item_description', 'proj_start', 'proj_end', 'wbs_start',
       'wbs_end', 'act_start', 'act_end', 'crcy', 'um', 'total_budget',
       'total_available_budg', 'total_plan_wbs_ava', 'total_actual_costs',
       'total_committments', 'total_actual_revenues', 'total_actual_hours',
       'prev_per_actual_cos', 'prev_per_commit

In [8]:
df_acc_config = pd.read_excel(f_acc_config, dtype=str).clean_names()
df_acc_config.head(3)

Unnamed: 0,location,outlet,profit_center,subdepartment,div
0,ICN,7851,50803-04,,Div E
1,ICN,7221,50803-042,,Div E
2,ICN,7511,50803-053,,Div P


In [9]:
df_cc_config = pd.read_excel(f_cc_config, dtype=str).clean_names()
df_cc_config.head(3)

Unnamed: 0,cost_center,cc_group,cc_name,location,outlet,subdepartment,div
0,5220,,VT E CT AEE,ICN,7221,16,Div E
1,5230,,VT E CT ASW,ICN,7221,17,Div E
2,5142,,VT E CT BSC,ICN,7221,19,Div E


In [10]:
df_ps_config = pd.read_excel(f_ps_config, dtype=str).clean_names()
df_ps_config.head(3)

Unnamed: 0,location,outlet,subdepartment,profit_center
0,ICN,7220,,50803-004
1,ICN,7851,,50803-04
2,ICN,7221,,50803-042


In [11]:
df_race_gloria = pd.read_csv(f_race_gloria, dtype=str).clean_names()
df_race_gloria.head(3)

Unnamed: 0,gl_cost_item,gl_text_ci,ps_data_ci,fs_item,fs_item_text
0,10,Total Compensation,,311500600.0,"Primary R, D & E costs"
1,15,thereof Compensation for Leasing,,,
2,20,External Services on Cost Center,,311500600.0,"Primary R, D & E costs"


In [12]:
def read_race_file(path):
    df = pd.read_excel(path, sheet_name="Query", skiprows=11)

    df = df.astype({"ConsUnit": str, "Plant": str, "Outlet": str})
    df = df.rename(
        columns={
            "Unnamed: 1": "FS item description",
            "Unnamed: 3": "CU name",
            "Unnamed: 5": "Plant name",
            "Unnamed: 7": "Outlet name",
            "YTD - 1": "YTD PM",   # PM means Previous Month
        }
    )
    df = df.rename(columns=lambda x: re.sub("\nACT", "", x))
    df = clean_names(df)
    return df

In [13]:
df_race = read_race_file(f_race)
df_race.head(3)

Unnamed: 0,financial_statement_item,fs_item_description,consunit,cu_name,plant,plant_name,outlet,outlet_name,py,lfc,...,ytd_3,ytd_4,ytd_5,ytd_6,ytd_7,ytd_8,ytd_9,ytd_10,ytd_11,ytd_12
0,BALANCE SHEET,Balance Sheet IFRS,698,VT Korea,9,none,732,PL DAC,10033580.0,,...,,,,,,,,,,
1,BALANCE SHEET,Balance Sheet IFRS,698,VT Korea,9,none,781,PL ENC,-18590430.0,,...,,,,,,,,,,
2,BALANCE SHEET,Balance Sheet IFRS,698,VT Korea,9,none,7000,Holding,-3.518,,...,,,,,,,,,,


In [14]:
race = ibis.memtable(df_race)
race = (race
    .select("financial_statement_item", "fs_item_description", "plant", "outlet", "period")
    .filter(_.financial_statement_item == "312502700" or _.financial_statement_item.startswith("3115"))
    .filter(_.plant == "483")    
)
race.head(3)

In [15]:
race_gloria = ibis.memtable(df_race_gloria)
race_gloria = race_gloria.filter(_.ps_data_ci == ibis.NA)
race_gloria.head(3)

In [16]:
race_gloria = ibis.memtable(df_race_gloria)
race_gloria = race_gloria.filter(_.ps_data_ci != ibis.NA)
race_gloria.head(3)

In [17]:
ps_db = ibis.memtable(df_ps_db)
ps_db = ps_db.group_by(["prftctr_pr", "prftctr_cc", "value_category", "value_cat_description", "cost_item", "cost_item_description"]).aggregate(
    cost = _.eval_per_actual_cos.sum(),
    sales = _.eval_per_actual_rev.sum(),
)
ps_db.head(3)

In [18]:
df_ps_config = df_ps_config.drop(columns=["subdepartment"])  # only NULL values

In [19]:
ps_config = ibis.memtable(df_ps_config)
ps_config.head(3)

In [20]:
ps_db_config = ps_db.join(ps_config, ps_db.prftctr_pr == ps_config.profit_center, how="left")
ps_db_config.head(3)

In [21]:
project_specific = ps_db_config.join(race_gloria, joined.cost_item == race_gloria.ps_data_ci, how="left")
project_specific = project_specific.filter(_.ps_data_ci != ibis.NA)
project_specific.head(3)

NameError: name 'joined' is not defined

In [None]:
gloria = ibis.memtable(df_gloria)
cc_costs = gloria.join(race_gloria, gloria.cost_item == race_gloria.gl_cost_item, how="left")
cc_costs = cc_costs.filter(_.cost_item != "105")
gloria.head(3)

In [None]:
project_specific.df().to_csv(output_path + "ps.csv")
cc_costs.df().to_csv(output_path + "cc.csv")

In [None]:
joined = ps_db_config.join(race_gloria, ps_db_config.cost_item == race_gloria.ps_data_ci, how="left")
joined = joined.filter(_.cost_item == "07")
joined = joined.filter(_.prftctr_pr != _.prftctr_cc)
joined.head(3)

In [None]:
ps_db_config.head(3)