In [4]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [5]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN TRUCK_TO_SHIP DATA
tts_file = CLEAN / "truck_to_ship.csv"
if not tts_file.exists():
    raise RuntimeError(f"[ERROR] File truck_to_ship.csv tidak ditemukan di {CLEAN}")

df_tts = pd.read_csv(tts_file, parse_dates=["week_start"])

In [6]:
# -------------------------------
# TRUCK & JETTY ID
# -------------------------------
unique_trucks = sorted(df_tts["truck_id"].dropna().unique())
truck_map = {tid: i+1 for i, tid in enumerate(unique_trucks)}
df_tts["truck_id_num"] = df_tts["truck_id"].map(truck_map)

unique_jetty = sorted(df_tts["jetty_id"].dropna().unique())
jetty_map = {jid: i+1 for i, jid in enumerate(unique_jetty)}
df_tts["jetty_id_num"] = df_tts["jetty_id"].map(jetty_map)

df_tts["truck_id_num"] = pd.to_numeric(df_tts["truck_id_num"], errors="coerce")
df_tts["jetty_id_num"] = pd.to_numeric(df_tts["jetty_id_num"], errors="coerce")

# -------------------------------
# FLAG COLUMNS (0/1)
# -------------------------------
flag_cols = ["allocated_for_shipping", "road_flood_flag", "crossing_queue_flag"]
for col in flag_cols:
    if col not in df_tts.columns:
        df_tts[col] = 0
    df_tts[col] = df_tts[col].fillna(0).astype(int)

# -------------------------------
# SUPPLY FEATURES
# -------------------------------
df_tts["supply_per_trip_ton"] = df_tts["tonnage_moved_ton"] / df_tts["trip_count"].replace(0, np.nan)
df_tts["ton_per_hour"] = df_tts["tonnage_moved_ton"] / (df_tts["avg_cycle_time_min"] / 60)
df_tts["trip_efficiency_score"] = df_tts["trip_count"] / df_tts["avg_cycle_time_min"]

# -------------------------------
# AGGREGATED WEEKLY PER JETTY
# -------------------------------
weekly = (
    df_tts.groupby(["week_start", "jetty_id"], as_index=False)
          .agg(
              weekly_truck_supply_ton=("tonnage_moved_ton", "sum"),
              weekly_trips_total=("trip_count", "sum"),
              avg_cycle_time_weighted=("avg_cycle_time_min", "mean")
          )
)

df_tts = df_tts.merge(weekly, on=["week_start", "jetty_id"], how="left")

# -------------------------------
# PLACEHOLDER COLUMNS
# -------------------------------
placeholder_cols = {
    # Weather
    "weekly_rain_mm": np.nan,
    "heavy_rain_flag": np.nan,
    "avg_wind_ms": np.nan,
    "max_wind_ms": np.nan,
    "wind_gust_flag": np.nan,
    "jetty_weather_index": np.nan,
    # Shipping demand & utilization
    "weekly_ship_demand_ton": np.nan,
    "supply_alignment_ratio": np.nan,
    "estimated_queue_hours": np.nan,
    "truck_to_ship_utilization": np.nan,
    "reassign_flag": np.nan,
    # Consistency check
    "supply_vs_capacity_check": np.nan
}

for col, val in placeholder_cols.items():
    if col not in df_tts.columns:
        df_tts[col] = val

# -------------------------------
# COMPUTED PLACEHOLDERS
# -------------------------------
# Truck utilization (default: supply / supply = 1)
df_tts["truck_to_ship_utilization"] = df_tts["weekly_truck_supply_ton"] / df_tts["weekly_truck_supply_ton"].replace(0, np.nan)

# supply_alignment_ratio: supply / demand (isi demand dulu dengan supply jika kosong)
df_tts["weekly_ship_demand_ton"] = df_tts["weekly_ship_demand_ton"].fillna(df_tts["weekly_truck_supply_ton"])
df_tts["supply_alignment_ratio"] = df_tts["weekly_truck_supply_ton"] / df_tts["weekly_ship_demand_ton"].replace(0, np.nan)

# estimated queue hours (dummy 0)
df_tts["estimated_queue_hours"] = df_tts["estimated_queue_hours"].fillna(0)

# -------------------------------
# WINDOWS & LAG FEATURES
# -------------------------------
df_tts = df_tts.sort_values(["jetty_id_num", "week_start"])

df_tts["weekly_truck_supply_ton_lag1"] = (
    df_tts.groupby("jetty_id_num")["weekly_truck_supply_ton"].shift(1)
)

df_tts["weekly_truck_supply_ton_roll4"] = (
    df_tts.groupby("jetty_id_num")["weekly_truck_supply_ton"]
          .rolling(4, min_periods=1)
          .mean()
          .reset_index(level=0, drop=True)
)

# -------------------------------
# CONVERT ALL NUMERIC COLUMNS
# -------------------------------
numeric_cols = [
    "truck_id_num", "jetty_id_num",
    "trip_count", "tonnage_moved_ton", "avg_cycle_time_min",
    "supply_per_trip_ton", "ton_per_hour", "trip_efficiency_score",
    "weekly_truck_supply_ton", "weekly_trips_total",
    "avg_cycle_time_weighted", "weekly_truck_supply_ton_lag1", "weekly_truck_supply_ton_roll4",
    "supply_vs_capacity_check",
    "weekly_rain_mm", "heavy_rain_flag", "avg_wind_ms", "max_wind_ms", "wind_gust_flag", "jetty_weather_index",
    "weekly_ship_demand_ton", "supply_alignment_ratio",
    "estimated_queue_hours", "truck_to_ship_utilization", "reassign_flag"
]

for col in numeric_cols:
    if col in df_tts.columns:
        df_tts[col] = pd.to_numeric(df_tts[col], errors="coerce")

# -------------------------------
# SAVE FEATURE ENGINEERING FILE
# -------------------------------
fe_file = FE_DIR / "truck_to_ship_fe.csv"
df_tts.to_csv(fe_file, index=False)
print(f"FE Truck-to-Ship selesai dan disimpan di {fe_file}")

# Preview
print(df_tts.head())
print(df_tts.info())


FE Truck-to-Ship selesai dan disimpan di ..\all_dataset\feature_dataset\truck_to_ship_fe.csv
  week_start      truck_id  allocated_for_shipping  avg_cycle_time_min  \
0 2023-01-02   shiptruck_1                       1                 117   
1 2023-01-02  shiptruck_14                       1                  99   
2 2023-01-02   shiptruck_9                       1                  96   
3 2023-01-02   shiptruck_2                       1                 115   
4 2023-01-02  shiptruck_16                       1                  92   

   trip_count  tonnage_moved_ton  road_flood_flag  crossing_queue_flag  \
0          15              460.3                0                    0   
1          30              719.5                1                    0   
2          23              607.0                1                    0   
3          16              391.6                0                    0   
4          28              613.1                0                    0   

        jetty_id 

In [7]:
print(df_tts["truck_id"].unique()[:10])
print(df_tts["jetty_id"].unique()[:10])
print(df_tts.dtypes)


['shiptruck_1' 'shiptruck_14' 'shiptruck_9' 'shiptruck_2' 'shiptruck_16'
 'shiptruck_6' 'shiptruck_20' 'shiptruck_12' 'shiptruck_4' 'shiptruck_5']
['jetty_primary' 'jetty_secondary']
week_start                       datetime64[ns]
truck_id                                 object
allocated_for_shipping                    int64
avg_cycle_time_min                        int64
trip_count                                int64
tonnage_moved_ton                       float64
road_flood_flag                           int64
crossing_queue_flag                       int64
jetty_id                                 object
truck_id_num                              int64
jetty_id_num                              int64
supply_per_trip_ton                     float64
ton_per_hour                            float64
trip_efficiency_score                   float64
weekly_truck_supply_ton                 float64
weekly_trips_total                        int64
avg_cycle_time_weighted                 float64
w