In [4]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [5]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN fleet DATA
stock_file = CLEAN / "stockpile.csv"
if not stock_file.exists():
    raise RuntimeError(f"[ERROR] File stockpile.csv tidak ditemukan di {CLEAN}")

df_stock = pd.read_csv(stock_file, parse_dates=["week_start"])

In [6]:
# Mapping unik untuk ID
unique_pits = df_stock['pit_id'].dropna().unique()
pit_map = {k: i+1 for i, k in enumerate(unique_pits)}
df_stock['pit_id_num'] = df_stock['pit_id'].map(pit_map)

unique_stockpiles = df_stock['stockpile_id'].dropna().unique()
stockpile_map = {k: i+1 for i, k in enumerate(unique_stockpiles)}
df_stock['stockpile_id_num'] = df_stock['stockpile_id'].map(stockpile_map)

# pit_id_num & stockpile_id_num untuk groupby / rolling
df_stock = df_stock.sort_values(["stockpile_id_num", "week_start"])

# Placeholder kolom Production
prod_cols = [
    "realized_ton", "target_ton", "progress_ratio", "differential",
    "realized_vs_capacity_ratio", "capacity_utilization_change",
    "equipment_supply_index", "breakdown_impact",
    "realized_ton_lag1", "realized_ton_lag2", "realized_ton_roll_4w"
]
for col in prod_cols:
    df_stock[col] = np.nan

# Placeholder kolom Road
road_cols = ["road_condition_score", "expected_cycle_time_min", "effective_truck_throughput_week"]
for col in road_cols:
    df_stock[col] = np.nan

# Fitur turunan Stockpile
df_stock = df_stock.sort_values(["stockpile_id", "week_start"])

# stock_change = stock_after_loading - current_stock
df_stock["stock_change"] = df_stock["stock_after_loading_ton"] - df_stock["current_stock_ton"]

# stock_coverage_weeks: placeholder, dihitung = stock_after_loading / avg_weekly_shipping_demand
df_stock["stock_coverage_weeks"] = np.nan

# shortage_flag
safety_stock_threshold = 5000 
df_stock["shortage_flag"] = (df_stock["stock_after_loading_ton"] < safety_stock_threshold).astype(int)

# rolling sum inflow/outflow 4 minggu
df_stock["rolling_sum_inflow_4w"] = df_stock.groupby("stockpile_id")["incoming_production_ton"].rolling(4, min_periods=1).sum().reset_index(0, drop=True)
df_stock["rolling_sum_outflow_4w"] = df_stock.groupby("stockpile_id")["planned_loading_ton"].rolling(4, min_periods=1).sum().reset_index(0, drop=True)

# cumulative deficit early warning
df_stock["cumulative_deficit"] = (df_stock["rolling_sum_outflow_4w"] - df_stock["rolling_sum_inflow_4w"]).clip(lower=0)

# Fitur turunan Shipping
# ship_required_ton, stock_after_vs_demand, ship_to_stock_ratio, reallocation_needed_flag, queued_ships_estimate
shipping_cols = [
    "ship_required_ton", "stock_after_vs_demand", "ship_to_stock_ratio",
    "reallocation_needed_flag", "queued_ships_estimate"
]
for col in shipping_cols:
    df_stock[col] = np.nan

# Simpan ke CSV
fe_file = FE_DIR / "stockpile_fe.csv"
df_stock.to_csv(fe_file, index=False)
print(f"FE Stockpile selesai dan disimpan di {fe_file}")

# Preview
display(df_stock.head())


FE Stockpile selesai dan disimpan di ..\all_dataset\feature_dataset\stockpile_fe.csv


Unnamed: 0,week_start,pit_id,stockpile_id,current_stock_ton,incoming_production_ton,planned_loading_ton,stock_after_loading_ton,pit_id_num,stockpile_id_num,realized_ton,...,stock_coverage_weeks,shortage_flag,rolling_sum_inflow_4w,rolling_sum_outflow_4w,cumulative_deficit,ship_required_ton,stock_after_vs_demand,ship_to_stock_ratio,reallocation_needed_flag,queued_ships_estimate
0,2023-01-02,PIT-1,sp-1,63654,45795,20860,88589,1,1,,...,,0,45795.0,20860.0,0.0,,,,,
1,2023-01-09,PIT-1,sp-1,88589,68158,64732,92015,1,1,,...,,0,113953.0,85592.0,0.0,,,,,
2,2023-01-16,PIT-1,sp-1,92015,41284,26265,107034,1,1,,...,,0,155237.0,111857.0,0.0,,,,,
3,2023-01-23,PIT-1,sp-1,107034,46850,57194,96690,1,1,,...,,0,202087.0,169051.0,0.0,,,,,
4,2023-01-30,PIT-1,sp-1,96690,51962,67191,81461,1,1,,...,,0,208254.0,215382.0,7128.0,,,,,
