In [5]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [6]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN Production DATA
prod_file = CLEAN / "production.csv"
if not prod_file.exists():
    raise RuntimeError(f"[ERROR] File production.csv tidak ditemukan di {CLEAN}")

df_prod = pd.read_csv(prod_file, parse_dates=["week_start"])

In [7]:
# KONVERSI PIT_ID & STOCKPILE KE NUMERIC UNIK
unique_pits = df_prod['pit_id'].unique()
pit_map = {k: i+1 for i, k in enumerate(unique_pits)}
df_prod['pit_id_num'] = df_prod['pit_id'].map(pit_map)

unique_stockpiles = df_prod['stockpile'].unique()
stockpile_map = {k: i+1 for i, k in enumerate(unique_stockpiles)}
df_prod['stockpile_num'] = df_prod['stockpile'].map(stockpile_map)

# Placeholder kolom Fleet & HE
fleet_he_cols = [
    "effective_capacity_ton", "total_active_equipment", "avg_operating_hours",
    "breakdown_trucks", "predicted_repair_hours"
]
for col in fleet_he_cols:
    df_prod[col] = np.nan

# Placeholder kolom Road
road_cols = [
    "road_condition_score", "expected_cycle_time_min", "effective_truck_throughput_week"
]
for col in road_cols:
    df_prod[col] = np.nan

# Fitur Turunan Production dengan Fleet & HE
df_prod = df_prod.sort_values(["pit_id", "week_start"])

# realized vs capacity
df_prod["realized_vs_capacity_ratio"] = np.where(
    df_prod["effective_capacity_ton"] > 0,
    df_prod["realized_ton"] / df_prod["effective_capacity_ton"],
    0
)

# capacity utilization change
df_prod["effective_capacity_prev"] = df_prod.groupby("pit_id")["effective_capacity_ton"].shift(1)
df_prod["capacity_utilization_change"] = (
    (df_prod["effective_capacity_ton"] - df_prod["effective_capacity_prev"]) / df_prod["effective_capacity_prev"]
)

# breakdown impact
df_prod["breakdown_impact"] = df_prod["breakdown_trucks"] * df_prod["predicted_repair_hours"]

# lag features produksi
df_prod["realized_ton_lag1"] = df_prod.groupby("pit_id")["realized_ton"].shift(1)
df_prod["realized_ton_lag2"] = df_prod.groupby("pit_id")["realized_ton"].shift(2)
df_prod["realized_ton_roll_4w"] = df_prod.groupby("pit_id")["realized_ton"].rolling(4, min_periods=1).mean().reset_index(0, drop=True)

# equipment supply index
df_prod["equipment_supply_index"] = df_prod["total_active_equipment"] * df_prod["avg_operating_hours"]

# Placeholder kolom Stockpile
stockpile_cols = [
    "incoming_production_ton", "stock_change", "stock_coverage_weeks",
    "shortage_flag", "rolling_sum_inflow_4w", "rolling_sum_outflow_4w", "cumulative_deficit"
]
for col in stockpile_cols:
    df_prod[col] = np.nan


# Simpan ke CSV
fe_file = FE_DIR / "production_fe.csv"
df_prod.to_csv(fe_file, index=False)
print(f"FE Production selesai dan disimpan di {fe_file}")

# Preview
display(df_prod.head())


FE Production selesai dan disimpan di ..\all_dataset\feature_dataset\production_fe.csv


Unnamed: 0,week_start,pit_id,stockpile,target_ton,progress_ratio,realized_ton,differential,pit_id_num,stockpile_num,effective_capacity_ton,...,realized_ton_lag2,realized_ton_roll_4w,equipment_supply_index,incoming_production_ton,stock_change,stock_coverage_weeks,shortage_flag,rolling_sum_inflow_4w,rolling_sum_outflow_4w,cumulative_deficit
0,2023-01-02,PIT-1,SP-1,9860,0.889,8761,-1099,1,1,,...,,8761.0,,,,,,,,
1,2023-01-09,PIT-1,SP-1,12772,0.858,10955,-1817,1,1,,...,,9858.0,,,,,,,,
2,2023-01-16,PIT-1,SP-1,12092,0.793,9592,-2500,1,1,,...,8761.0,9769.333333,,,,,,,,
3,2023-01-23,PIT-1,SP-1,9466,0.76,7189,-2277,1,1,,...,10955.0,9124.25,,,,,,,,
4,2023-01-30,PIT-1,SP-1,13426,0.831,11162,-2264,1,1,,...,9592.0,9724.5,,,,,,,,
