# **Import Library**

In [2]:
import requests
import pandas as pd
import numpy as np

# **1. Weathers Raw**

In [None]:
# CONFIGURATION
START = "20230101"
END   = "20251001"

# PARAMETER NASA POWER
PARAMS = [
    "PRECTOTCORR",                 # Curah hujan (mm/day)
    "RH2M",                       # Kelembaban (%)
    "ALLSKY_SFC_SW_DWN",          # Radiasi matahari
    "T2M", "T2M_MAX", "T2M_MIN",  # Suhu
    "WS10M",                      # Angin 10 meter
    "CLOUD_AMT"                   # Cloud cover
]

# TITIK KOORDINAT
locations = {
    "pit_entrance": (-0.4560, 117.1100),
    "active_pit":   (-0.4700, 117.1300),
    "rom":          (-0.4900, 117.1700)
}

# FETCH NASA POWER
def fetch_weather(name, lat, lon):

    print(f"Downloading NASA POWER data for: {name} ({lat}, {lon})")

    url = (
        "https://power.larc.nasa.gov/api/temporal/daily/point?"
        f"start={START}&end={END}"
        f"&latitude={lat}&longitude={lon}"
        f"&parameters={','.join(PARAMS)}"
        "&format=JSON"
        "&community=AG"
    )

    r = requests.get(url)
    data = r.json()

    # VALIDATION
    if "properties" not in data or "parameter" not in data["properties"]:
        print("NASA API ERROR")
        print(data)
        raise ValueError("Invalid NASA response — cek parameter atau format.")

    params = data["properties"]["parameter"]

    first_key = list(params.keys())[0]
    dates = list(params[first_key].keys())

    # DataFrame
    df = pd.DataFrame({
        "date": pd.to_datetime(dates),
        "location": name,
        "lat": lat,
        "lon": lon,
        "rainfall_mm": list(params["PRECTOTCORR"].values()),
        "humidity_pct": list(params["RH2M"].values()),
        "solar_radiation_wm2": list(params["ALLSKY_SFC_SW_DWN"].values()),
        "temp_c": list(params["T2M"].values()),
        "temp_max_c": list(params["T2M_MAX"].values()),
        "temp_min_c": list(params["T2M_MIN"].values()),
        "wind_speed_10m_mps": list(params["WS10M"].values()),
        "cloud_cover_pct": list(params["CLOUD_AMT"].values())
    })

    return df


# GENERATE ALL
def generate_all():
    all_dfs = []

    for name, (lat, lon) in locations.items():
        df = fetch_weather(name, lat, lon)
        df.to_csv(f"weather_{name}.csv", index=False)
        print(f"saved weather_{name}.csv")
        all_dfs.append(df)

    # COMBINE
    df_all = pd.concat(all_dfs, ignore_index=True)
    df_all.to_csv("weather.csv", index=False)
    print("saved weather.csv")

generate_all()

Downloading NASA POWER data for: pit_entrance (-0.456, 117.11)
saved weather_pit_entrance.csv
Downloading NASA POWER data for: active_pit (-0.47, 117.13)
saved weather_active_pit.csv
Downloading NASA POWER data for: rom (-0.49, 117.17)
saved weather_rom.csv
saved weather.csv


# **2. Roads Raw**

In [3]:
# FIXED WEATHER COORDINATES
locations = {
    "pit_entrance": (-0.4560, 117.1100),
    "active_pit":   (-0.4700, 117.1300),
    "rom":          (-0.4900, 117.1700)
}

# ROAD SEGMENT DEFINITION
# (connecting the weather points according to the original hauling route)
road_segments = [
    # 1. Pit Entrance to Active Pit (In-Pit Road)
    {
        "road_id": "R-01",
        "segment_name": "Pit Entrance to Active Pit",
        "segment_type": "inpit",
        "start": "pit_entrance",
        "end": "active_pit",
        "surface_type": "laterite"
    },

    # 2. Active Pit to ROM (Ramp Road)
    {
        "road_id": "R-02",
        "segment_name": "Active Pit to ROM",
        "segment_type": "ramp",
        "start": "active_pit",
        "end": "rom",
        "surface_type": "gravel"
    },

    # 3. ROM to Stockpile (Main Haul Road)
    {
        "road_id": "R-03",
        "segment_name": "ROM to Stockpile",
        "segment_type": "main_haul",
        "start": "rom",
        "end": "stockpile",
        "surface_type": "hard-rock"
    },

    # 4. Stockpile to Jetty (Jetty Road)
    {
        "road_id": "R-04",
        "segment_name": "Stockpile to Jetty",
        "segment_type": "jetty_road",
        "start": "stockpile",
        "end": "jetty",
        "surface_type": "coal_ash"
    }
]

# ROAD DATAFRAME
def generate_road_dataset():

    rows = []

    for seg in road_segments:
        start_lat, start_lon = locations[seg["start"]]
        end_lat, end_lon = locations[seg["end"]]

        # SYNTHETIC VALUES
        length = round(np.random.uniform(1.0, 3.0), 2)   # 1–3 km
        slope = round(np.random.uniform(3, 12), 1)       # 3–12%
        curvature = np.random.choice(["low", "medium", "high"])
        elev_start = np.random.randint(40, 80)
        elev_end   = elev_start + np.random.randint(-5, 12)

        rows.append({
            "road_id": seg["road_id"],
            "segment_name": seg["segment_name"],
            "segment_type": seg["segment_type"],
            "length_km": length,
            "slope_pct": slope,
            "curvature_level": curvature,
            "surface_type": seg["surface_type"],
            "elevation_start_m": elev_start,
            "elevation_end_m": elev_end,
            "lat_start": start_lat,
            "lon_start": start_lon,
            "lat_end": end_lat,
            "lon_end": end_lon
        })

    return pd.DataFrame(rows)

# GENERATE
df_road = generate_road_dataset()
df_road.to_csv("road.csv", index=False)

print("Road dataset generated:")
print(df_road)

KeyError: 'stockpile'

# **3. Fleet Raw**

In [3]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"
SEED = 42

np.random.seed(SEED)

# GENERATE WEEKLY DATE RANGE
week_range = pd.date_range(start=START_DATE, end=END_DATE, freq="W-MON")

# SYNTHETIC FLEET DATA GENERATOR
data = {
    "week_start": [],
    "truck_available": [],
    "truck_breakdown": [],
    "truck_utilization_pct": [],
    "truck_capacity_ton": [],
    "effective_capacity_ton": [],
    "tonnage_moved_last_week": []
}

# Initial baseline for first week
last_week_tonnage = 10000  # arbitrary baseline starter

for week in week_range:

    # Core Inputs (User-like behaviour)
    truck_available = np.random.randint(15, 30)       # fleet available
    truck_breakdown = np.random.randint(0, 5)         # broken units
    utilization = np.random.uniform(60, 90)           # %
    capacity = np.random.choice([35, 45, 50])         # ton per truck

    # FE Calculation
    effective_capacity = truck_available * capacity * (utilization / 100)

    # tonnage moved last week
    # simulate tonnage change influenced by capacity
    tonnage_this_week = last_week_tonnage + np.random.randint(-1500, 1500)
    tonnage_this_week = max(5000, tonnage_this_week)

    # Append to dataset
    data["week_start"].append(week)
    data["truck_available"].append(truck_available)
    data["truck_breakdown"].append(truck_breakdown)
    data["truck_utilization_pct"].append(round(utilization, 2))
    data["truck_capacity_ton"].append(capacity)
    data["effective_capacity_ton"].append(round(effective_capacity, 2))
    data["tonnage_moved_last_week"].append(last_week_tonnage)

    # Update for next loop
    last_week_tonnage = tonnage_this_week

# CONVERT TO DATAFRAME
df_fleet = pd.DataFrame(data)

# SAVE TO CSV
output_path = "fleet.csv"
df_fleet.to_csv(output_path, index=False)

print("Fleet Dataset Generated:", df_fleet.shape)
df_fleet.head()


Fleet Dataset Generated: (144, 7)


Unnamed: 0,week_start,truck_available,truck_breakdown,truck_utilization_pct,truck_capacity_ton,effective_capacity_ton,tonnage_moved_last_week
0,2023-01-02,21,3,88.52,50,929.48,10000
1,2023-01-09,27,4,64.68,50,873.19,9595
2,2023-01-16,25,2,85.99,50,1074.82,9333
3,2023-01-23,19,1,81.66,45,698.19,9518
4,2023-01-30,16,3,65.5,45,471.62,10871


# **4. Heavy Equipment Raw**

In [4]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"
NUM_WEEKS = pd.date_range(start=START_DATE, end=END_DATE, freq='W-MON').shape[0]

# GENERATE HEAVY EQUIPMENT DATA
np.random.seed(42)

data = {
    "week_start": pd.date_range(start=START_DATE, end=END_DATE, freq='W-MON'),
    "total_equipment_active": np.random.randint(35, 50, size=NUM_WEEKS),
    "total_equipment_standby": np.random.randint(5, 15, size=NUM_WEEKS),
    "total_equipment_maintenance": np.random.randint(2, 10, size=NUM_WEEKS),
    "avg_operating_hours": np.round(np.random.uniform(15, 20, size=NUM_WEEKS), 1),
    "est_maintenance_hours": np.round(np.random.uniform(10, 16, size=NUM_WEEKS), 1),
    "potential_breakdown_flag": np.random.choice([0,1], size=NUM_WEEKS, p=[0.7,0.3]),
}

# DataFrame
df_heavy_equipment = pd.DataFrame(data)

# GENERATE
df_heavy_equipment.to_csv("heavy_equipment.csv", index=False)
print("\n Heavy Equipment Dataset Generated.")
print(df_heavy_equipment.head())


  week_start  total_equipment_active  total_equipment_standby  \
0 2023-01-02                      41                        7   
1 2023-01-09                      38                        7   
2 2023-01-16                      47                        5   
3 2023-01-23                      49                        9   
4 2023-01-30                      45                       14   

   total_equipment_maintenance  avg_operating_hours  est_maintenance_hours  \
0                            2                 16.8                   10.6   
1                            9                 16.3                   10.3   
2                            7                 16.5                   15.8   
3                            5                 16.6                   15.1   
4                            6                 19.2                   12.1   

   potential_breakdown_flag  
0                         1  
1                         0  
2                         1  
3                   

# **5. Production Raw**

In [6]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"

# GENERATE WEEKLY DATES
week_dates = pd.date_range(start=START_DATE, end=END_DATE, freq='W-MON')
np.random.seed(42)
n_weeks = len(week_dates)

# Random Target Tonnage
target_tonase = np.random.randint(8000, 12001, size=n_weeks)

# Progress Last Week
progress_last_week = np.random.randint(70, 101, size=n_weeks)

# Production Realized
production_realized = target_tonase * (progress_last_week / 100) * np.random.uniform(0.9, 1.1, size=n_weeks)
production_realized = np.round(production_realized, 0)

# DATAFRAME
df_production = pd.DataFrame({
    "week_start": week_dates,
    "target_tonase": target_tonase,
    "progress_last_week": progress_last_week,
    "production_realized": production_realized
})

# GENERATE
df_production.to_csv("production.csv", index=False)
print("Dataset production mingguan berhasil dibuat!")
print(df_production.head())


Dataset production mingguan berhasil dibuat!
  week_start  target_tonase  progress_last_week  production_realized
0 2023-01-02          11174                 100              10975.0
1 2023-01-09          11507                  80               8346.0
2 2023-01-16           8860                  88               7555.0
3 2023-01-23           9294                  86               8208.0
4 2023-01-30           9130                  77               7284.0


# **6. Ship Schedule Raw**

In [7]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"
WEEKS = pd.date_range(start=START_DATE, end=END_DATE, freq='W-MON')

NUM_SHIPS = 5  # number of simulation ships
JETTIES = ['Jetty_A', 'Jetty_B', 'Jetty_C']  # jetty destination
CAPACITY_RANGE = (5000, 20000)  # ship capacity in tons

# GENERATE SHIP ID
ship_ids = [f"Ship_{i+1}" for i in range(NUM_SHIPS)]

# SIMULATE DATA
data = []
np.random.seed(42)  # reproducibility

for week_start in WEEKS:
    for ship in ship_ids:
        # ETA (Estimated Time of Arrival)
        eta = week_start + pd.Timedelta(days=np.random.randint(0,5))
        # ETD (Estimated Time of Departure)
        etd = eta + pd.Timedelta(days=np.random.randint(2,6))
        # Capacity
        capacity = np.random.randint(CAPACITY_RANGE[0], CAPACITY_RANGE[1]+1)
        # Jetty destination
        jetty = np.random.choice(JETTIES)

        data.append({
            'week_start': week_start,
            'ship_id': ship,
            'eta': eta,
            'etd': etd,
            'capacity_ton': capacity,
            'jetty_id': jetty
        })

# DATAFRAME
df_ship_schedule = pd.DataFrame(data)

# GENERATE
print(df_ship_schedule.head(10))
df_ship_schedule.to_csv('ship_schedule.csv', index=False)


  week_start ship_id        eta        etd  capacity_ton jetty_id
0 2023-01-02  Ship_1 2023-01-05 2023-01-07         10390  Jetty_C
1 2023-01-02  Ship_2 2023-01-06 2023-01-08         10734  Jetty_B
2 2023-01-02  Ship_3 2023-01-04 2023-01-08          9426  Jetty_C
3 2023-01-02  Ship_4 2023-01-06 2023-01-11         16111  Jetty_C
4 2023-01-02  Ship_5 2023-01-06 2023-01-09         15583  Jetty_B
5 2023-01-09  Ship_1 2023-01-10 2023-01-15         10051  Jetty_A
6 2023-01-09  Ship_2 2023-01-09 2023-01-14          8385  Jetty_B
7 2023-01-09  Ship_3 2023-01-13 2023-01-18         16096  Jetty_A
8 2023-01-09  Ship_4 2023-01-11 2023-01-15          7558  Jetty_B
9 2023-01-09  Ship_5 2023-01-12 2023-01-17         14167  Jetty_C


# **7. Stockpile Raw**

In [9]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"
STOCKPILE_LIST = ["SP-1", "SP-2", "SP-3"]

# GENERATE WEEKLY DATE RANGE
dates = pd.date_range(start=START_DATE, end=END_DATE, freq="W-MON")

# GENERATE SYNTHETIC STOCKPILE DATA
rows = []

np.random.seed(42)

for sp in STOCKPILE_LIST:
    # initialize current stock baseline
    current_stock = np.random.randint(40000, 70000)

    for dt in dates:
        # simulate values
        incoming_prod = np.random.randint(30000, 80000)      # production arrives next week
        planned_load  = np.random.randint(20000, 70000)      # ship requirements

        # count the stock after loading
        stock_after = current_stock + incoming_prod - planned_load

        # data rows
        rows.append({
            "stockpile_id": sp,
            "date": dt,
            "current_stock_ton": current_stock,
            "incoming_production_ton": incoming_prod,
            "planned_loading_ton": planned_load,
            "stock_after_loading_ton": stock_after
        })

        # update stock
        current_stock = max(stock_after, 0)

# DATAFRAME
df_stockpile = pd.DataFrame(rows)

df_stockpile = df_stockpile.sort_values(["stockpile_id", "date"]).reset_index(drop=True)
df_stockpile.to_csv("stockpile.csv", index=False)
print("File saved as stockpile.csv")
df_stockpile.head()


File saved as stockpile.csv


Unnamed: 0,stockpile_id,date,current_stock_ton,incoming_production_ton,planned_loading_ton,stock_after_loading_ton
0,SP-1,2023-01-02,63654,45795,20860,88589
1,SP-1,2023-01-09,88589,68158,64732,92015
2,SP-1,2023-01-16,92015,41284,26265,107034
3,SP-1,2023-01-23,107034,46850,57194,96690
4,SP-1,2023-01-30,96690,51962,67191,81461


# **8. Truck to Ship**

In [10]:
# CONFIGURATION
START_DATE = "20230101"
END_DATE   = "20251001"

WEEKS = pd.date_range(start=START_DATE, end=END_DATE, freq="W-MON")

NUM_TRUCKS = 20
TRUCK_IDS = [f"ShipTruck_{i+1}" for i in range(NUM_TRUCKS)]

JETTIES = ["Jetty_A", "Jetty_B", "Jetty_C"]

np.random.seed(42)

rows = []

for week_start in WEEKS:
    for truck in TRUCK_IDS:

        # truck dedicated to shipping
        allocated = np.random.choice([0, 1], p=[0.2, 0.8])

        # cycle time per trip (minutes) - shipping is usually longer
        cycle_time = np.random.randint(60, 180)

        # trip count per week
        trips = np.random.randint(15, 45)

        # tonnage moved per week (per truck)
        tonnage = trips * np.random.randint(20, 35) * 0.9

        # road disturbance
        road_flood = np.random.choice([0, 1], p=[0.9, 0.1])
        crossing_queue = np.random.choice([0, 1], p=[0.85, 0.15])

        # choose jetty
        jetty = np.random.choice(JETTIES)

        rows.append({
            "week_start": week_start,
            "truck_id": truck,
            "allocated_for_shipping": allocated,
            "avg_cycle_time_min": cycle_time,
            "trip_count": trips,
            "tonnage_moved_ton": tonnage,
            "road_flood_flag": road_flood,
            "crossing_queue_flag": crossing_queue,
            "jetty_id": jetty
        })

df_shipping_fleet = pd.DataFrame(rows)
df_shipping_fleet.to_csv("truck_to_ship.csv", index=False)
df_shipping_fleet.head(10)


Unnamed: 0,week_start,truck_id,allocated_for_shipping,avg_cycle_time_min,trip_count,tonnage_moved_ton,road_flood_flag,crossing_queue_flag,jetty_id
0,2023-01-02,ShipTruck_1,1,152,29,783.0,0,0,Jetty_B
1,2023-01-02,ShipTruck_2,0,134,25,607.5,0,0,Jetty_C
2,2023-01-02,ShipTruck_3,0,61,38,1060.2,1,0,Jetty_A
3,2023-01-02,ShipTruck_4,1,117,36,1036.8,0,0,Jetty_C
4,2023-01-02,ShipTruck_5,1,151,42,1285.2,0,0,Jetty_C
5,2023-01-02,ShipTruck_6,1,175,39,772.2,0,0,Jetty_A
6,2023-01-02,ShipTruck_7,0,63,39,1088.1,0,0,Jetty_B
7,2023-01-02,ShipTruck_8,0,143,42,1285.2,0,0,Jetty_C
8,2023-01-02,ShipTruck_9,0,140,18,340.2,0,0,Jetty_B
9,2023-01-02,ShipTruck_10,1,63,36,1036.8,1,0,Jetty_B
