In [4]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [5]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN Heavy Equipment DATA
he_file = CLEAN / "heavy_equipment.csv"
if not he_file.exists():
    raise RuntimeError(f"[ERROR] File heavy_equipment.csv tidak ditemukan di {CLEAN}")

df_he = pd.read_csv(he_file, parse_dates=["week_start"])

In [6]:
# KONVERSI PIT_ID KE NUMERIC UNIK
unique_pits = df_he['pit_id'].unique()
pit_map = {k: i+1 for i, k in enumerate(unique_pits)}
df_he['pit_id_num'] = df_he['pit_id'].map(pit_map)

# Placeholder kolom Fleet
fleet_cols = [
    "available_trucks", "breakdown_trucks", "utilization_pct",
    "capacity_ton", "effective_capacity_ton"
]
for col in fleet_cols:
    df_he[col] = np.nan

# Placeholder kolom Production
prod_cols = [
    "realized_ton", "target_ton", "progress_ratio", "differential",
    "realized_vs_capacity_ratio", "capacity_utilization_change",
    "equipment_supply_index", "breakdown_impact", 
    "realized_ton_lag1", "realized_ton_lag2", "realized_ton_roll_4w"
]
for col in prod_cols:
    df_he[col] = np.nan

# Placeholder kolom Road
road_cols = [
    "road_condition_score", "expected_cycle_time_min", "effective_truck_throughput_week"
]
for col in road_cols:
    df_he[col] = np.nan

# Placeholder kolom Weather
weather_cols = [
    "heavy_rain_flag", "wind_alert_flag", "weather_index",
    "weather_lag1", "weather_trend_4w",
    "pit_weather_index", "rom_weather_index", "jetty_weather_index"
]
for col in weather_cols:
    df_he[col] = np.nan

# Fitur turunan Heavy Equipment
df_he = df_he.sort_values(["pit_id", "week_start"])

# total active equipment
df_he["total_active_equipment"] = df_he[["excavator_active","dozer_active","grader_active"]].fillna(0).sum(axis=1)

# equipment supply index (pre-normalisasi, nanti bisa gabung dengan tonase)
df_he["equipment_supply_index_calc"] = df_he["total_active_equipment"] * df_he["avg_operating_hours"]

# breakdown proxy
df_he["breakdown_impact_calc"] = df_he["maintenance_units"]  # bisa dikalikan prediksi repair hours jika ada

# lag features operational hours & fuel burn
df_he["avg_operating_hours_lag1"] = df_he.groupby("pit_id")["avg_operating_hours"].shift(1)
df_he["avg_operating_hours_lag2"] = df_he.groupby("pit_id")["avg_operating_hours"].shift(2)
df_he["fuel_burn_rate_lph_roll_4w"] = df_he.groupby("pit_id")["fuel_burn_rate_lph"].rolling(4, min_periods=1).mean().reset_index(0, drop=True)

# Simpan ke CSV
fe_file = FE_DIR / "heavy_equipment_fe.csv"
df_he.to_csv(fe_file, index=False)
print(f"FE Heavy Equipment selesai dan disimpan di {fe_file}")

# Preview
display(df_he.head())


FE Heavy Equipment selesai dan disimpan di ..\all_dataset\feature_dataset\heavy_equipment_fe.csv


Unnamed: 0,week_start,pit_id,excavator_active,dozer_active,grader_active,maintenance_units,avg_operating_hours,fuel_burn_rate_lph,potential_breakdown_flag,pit_id_num,...,weather_trend_4w,pit_weather_index,rom_weather_index,jetty_weather_index,total_active_equipment,equipment_supply_index_calc,breakdown_impact_calc,avg_operating_hours_lag1,avg_operating_hours_lag2,fuel_burn_rate_lph_roll_4w
0,2023-01-02,PIT-1,21,13,3,3,17.5,18.3,0,1,...,,,,,37,647.5,3,,,18.3
1,2023-01-09,PIT-1,18,8,5,9,16.7,24.0,0,1,...,,,,,31,517.7,9,17.5,,21.15
2,2023-01-16,PIT-1,22,8,5,4,16.1,23.7,0,1,...,,,,,35,563.5,4,16.7,17.5,22.0
3,2023-01-23,PIT-1,19,8,4,8,14.7,28.0,0,1,...,,,,,31,455.7,8,16.1,16.7,23.5
4,2023-01-30,PIT-1,21,10,5,8,14.4,21.7,0,1,...,,,,,36,518.4,8,14.7,16.1,24.35
