In [13]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [14]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN fleet DATA
fleet_file = CLEAN / "fleet.csv"
if not fleet_file.exists():
    raise RuntimeError(f"[ERROR] File fleet.csv tidak ditemukan di {CLEAN}")

df_fleet = pd.read_csv(fleet_file, parse_dates=["week_start"])

In [15]:
# KONVERSI PIT_ID KE NUMERIC UNIK
unique_pits = df_fleet['pit_id'].unique()
pit_map = {k: i+1 for i, k in enumerate(unique_pits)}
df_fleet['pit_id_num'] = df_fleet['pit_id'].map(pit_map)

# Konversi tipe data numerik
num_cols = [
    "tonnage_last_week", "effective_capacity_ton",
    "breakdown_trucks", "predicted_repair_hours"
]

for col in num_cols:
    df_fleet[col] = pd.to_numeric(df_fleet[col], errors='coerce')

# Fitur Turunan

# realized_vs_capacity_ratio
df_fleet["realized_vs_capacity_ratio"] = np.where(
    df_fleet["effective_capacity_ton"] > 0,
    df_fleet["tonnage_last_week"] / df_fleet["effective_capacity_ton"],
    0
)

# capacity_utilization_change
df_fleet = df_fleet.sort_values(["pit_id","week_start"])
df_fleet["effective_capacity_ton_prev"] = df_fleet.groupby("pit_id")["effective_capacity_ton"].shift(1)

df_fleet["capacity_utilization_change"] = np.where(
    df_fleet['effective_capacity_ton_prev'] > 0,
    (df_fleet['effective_capacity_ton'] - df_fleet['effective_capacity_ton_prev']) / df_fleet['effective_capacity_ton_prev'],
    0
)

# breakdown impact
df_fleet["breakdown_impact"] = df_fleet["breakdown_trucks"] * df_fleet["predicted_repair_hours"]

# prod lag features
df_fleet["realized_ton_lag1"] = df_fleet.groupby("pit_id")["tonnage_last_week"].shift(1)
df_fleet["realized_ton_lag2"] = df_fleet.groupby("pit_id")["tonnage_last_week"].shift(2)

# rolling 4 minggu
df_fleet["realized_ton_roll_4w"] = df_fleet.groupby("pit_id")["tonnage_last_week"] \
    .transform(lambda x: x.rolling(4, min_periods=1).mean())

# equipment supply index (placeholder)
df_fleet["equipment_supply_index"] = np.nan

# Placeholder kolom integrasi

# Road & Weather
road_weather_cols = [
    "road_condition_score", "expected_cycle_time_min",
    "effective_truck_throughput_week", "heavy_rain_flag", "weather_index"
]
for col in road_weather_cols:
    df_fleet[col] = np.nan

# Production / HE
prod_cols = ["total_active_equipment", "avg_operating_hours", "he_supply_index"]
for col in prod_cols:
    df_fleet[col] = np.nan

# Simpan ke CSV
fe_file = FE_DIR / "fleet_fe.csv"
df_fleet.to_csv(fe_file, index=False)
print(f"FE Fleet selesai dan disimpan di {fe_file}")

# Preview
display(df_fleet.head())

FE Fleet selesai dan disimpan di ..\all_dataset\feature_dataset\fleet_fe.csv


Unnamed: 0,week_start,pit_id,available_trucks,breakdown_trucks,utilization_pct,capacity_ton,effective_capacity_ton,tonnage_last_week,predicted_repair_hours,pit_id_num,...,realized_ton_roll_4w,equipment_supply_index,road_condition_score,expected_cycle_time_min,effective_truck_throughput_week,heavy_rain_flag,weather_index,total_active_equipment,avg_operating_hours,he_supply_index
0,2023-01-02,PIT-1,24,2,81.11,45,876.04,10000,6.299674,1,...,10000.0,,,,,,,,,
1,2023-01-09,PIT-1,28,3,91.19,45,1148.96,9266,17.89875,1,...,9633.0,,,,,,,,,
2,2023-01-16,PIT-1,18,2,72.86,50,655.77,9281,8.79788,1,...,9515.666667,,,,,,,,,
3,2023-01-23,PIT-1,31,0,81.7,50,1266.29,10128,12.701389,1,...,9668.75,,,,,,,,,
4,2023-01-30,PIT-1,20,3,90.44,45,813.96,10195,10.010415,1,...,9717.5,,,,,,,,,
