In [8]:
# Import Library
import pandas as pd
import numpy as np
from pathlib import Path

In [9]:
# PATH CONFIG
BASE = Path("../all_dataset")
CLEAN = BASE / "clean_dataset"
FE_DIR = BASE / "feature_dataset"
FE_DIR.mkdir(exist_ok=True, parents=True)

# LOAD CLEAN Heavy Equipment DATA
ship_file = CLEAN / "ship_schedule.csv"
if not ship_file.exists():
    raise RuntimeError(f"[ERROR] File ship_schedule.csv tidak ditemukan di {CLEAN}")

df_ship = pd.read_csv(ship_file, parse_dates=["week_start"])

In [10]:
# Konversi ID & datetime
# jetty_id jadi numeric unik
unique_jetties = df_ship['jetty_id'].unique()
jetty_map = {k: i+1 for i, k in enumerate(unique_jetties)}
df_ship['jetty_id'] = df_ship['jetty_id'].map(jetty_map)

# ETA & ETD jadi datetime
df_ship['eta'] = pd.to_datetime(df_ship['eta'], errors='coerce').astype('int64') // 10**9
df_ship['etd'] = pd.to_datetime(df_ship['etd'], errors='coerce').astype('int64') // 10**9


# Placeholder kolom Stockpile
stockpile_cols = [
    "stock_after_loading_ton", "stock_coverage_weeks",
    "shortage_flag", "rolling_sum_inflow_4w", "rolling_sum_outflow_4w",
    "cumulative_deficit"
]
for col in stockpile_cols:
    df_ship[col] = np.nan

# Placeholder kolom Truck-to-Ship
truck_cols = [
    "weekly_truck_supply_ton", "truck_to_ship_utilization",
    "supply_alignment_ratio", "estimated_queue_hours", "reassign_flag",
    "supply_alignment_ratio_lag1"
]
for col in truck_cols:
    df_ship[col] = np.nan

# Placeholder kolom Weather
weather_cols = [
    "heavy_rain_flag", "wind_alert_flag", "weather_index",
    "weather_lag1", "weather_trend_4w",
    "jetty_weather_index"
]
for col in weather_cols:
    df_ship[col] = np.nan

# Fitur turunan Ship Schedule
df_ship = df_ship.sort_values(["jetty_id", "week_start"])

# ship_required_ton: langsung dari planned_load
df_ship["ship_required_ton"] = df_ship["planned_load"].fillna(0)

# stock_after_vs_demand: placeholder, nanti diisi saat merge dengan stockpile
df_ship["stock_after_vs_demand"] = np.nan

# ship_to_stock_ratio
df_ship["ship_to_stock_ratio"] = np.nan

# reallocation_needed_flag: placeholder
df_ship["reallocation_needed_flag"] = 0
df_ship["reallocation_needed_flag"] = df_ship["reallocation_needed_flag"].astype(int)

# queued_ships_estimate: placeholder
df_ship["queued_ships_estimate"] = np.nan

# Windows & lags
# supply_alignment_ratio_lag1 (dummy)
df_ship["supply_alignment_ratio_lag1"] = np.nan

# TIPE DATA DEFENSIF
# Numerik tetap numerik dan flag integer
num_cols = [
    "ship_required_ton",
] + stockpile_cols + truck_cols + weather_cols + ["queued_ships_estimate","stock_after_vs_demand","ship_to_stock_ratio"]
for col in num_cols:
    df_ship[col] = pd.to_numeric(df_ship[col], errors='coerce')

# Simpan ke CSV
fe_file = FE_DIR / "ship_schedule_fe.csv"
df_ship.to_csv(fe_file, index=False)
print(f"FE Ship Schedule selesai dan disimpan di {fe_file}")

# Preview
print(df_ship.head())


FE Ship Schedule selesai dan disimpan di ..\all_dataset\feature_dataset\ship_schedule_fe.csv
  week_start         eta         etd  ship_size  jetty_id  loading_rate_tph  \
0 2023-01-02  1672876800  1672956000      55000         1              2500   
1 2023-01-02  1672704000  1672797600      65000         1              2500   
2 2023-01-09  1673481600  1673632800      65000         1              2500   
3 2023-01-09  1673308800  1673395200      60000         1              2500   
4 2023-01-09  1673308800  1673388000      55000         1              2500   

   est_loading_hours  weather_delay_h  queue_delay_h  planned_load  ...  \
0               22.0                0              0         55000  ...   
1               26.0                0              0         65000  ...   
2               26.0               12              4         65000  ...   
3               24.0                0              0         60000  ...   
4               22.0                0              0     