In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14")


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_model_W7_E114_B64_DO0.02.h5"


output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)

# Pastikan rentang tanggal mencakup 2024
if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)

# -----------------------------
# SIAPKAN HISTORI (sampai cutoff) dan TARGET BACKTEST (setelah cutoff)
# -----------------------------
series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")

predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist()  
for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)

    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Surabaya 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Surabaya_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Surabaya Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Surabaya_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Surabaya 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 2.6390%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Surabaya_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Surabaya_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error

WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14")

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)

def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)


if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")


with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")

-
predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist() 

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

    
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Banyuwangi 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Banyuwangi_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Banyuwangi Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Banyuwangi_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Banyuwangi 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 4.1406%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Banyuwangi_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Banyuwangi_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error

WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14")


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Blitar.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_model_W7_E114_B64_DO0.02.h5"

output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
   
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    
    df = df.sort_index()
    return df


if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)


if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist() 

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
   
    window_scaled = scaler.transform(window_vals)
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

   
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Blitar 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Blitar_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Blitar Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Blitar_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Blitar 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 3.4628%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Blitar_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Blitar_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error



WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14")

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Jember.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_model_W7_E114_B64_DO0.02.h5"

output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)


if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist()  

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    # Normalisasi window berdasarkan scaler TRAIN yang sudah disimpan
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    # Prediksi 1 langkah (skala 0-1) -> inverse ke skala asli
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

   
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Jember 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Jember_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Jember Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Jember_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Jember 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 3.9588%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error



WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14")

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Kediri.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    df = df.sort_index()
    return df


if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)

# Pastikan rentang tanggal mencakup 2024
if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist()  

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)

    window_scaled = scaler.transform(window_vals)
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

  
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

   
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Kediri 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Kediri_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Kediri Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Kediri_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Kediri 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 2.2605%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14") 

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Madiun.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df


if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)


if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist()  

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    # Normalisasi window berdasarkan scaler TRAIN yang sudah disimpan
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    # Prediksi 1 langkah (skala 0-1) -> inverse ke skala asli
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

    
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Madiun 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Madiun_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Madiun Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Madiun_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Madiun 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 4.5632%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Madiun_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Madiun_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14") 

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Malang.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)

# Pastikan rentang tanggal mencakup 2024
if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []



hist_array = train_hist.values.tolist()  

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    # Normalisasi window berdasarkan scaler TRAIN yang sudah disimpan
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    # Prediksi 1 langkah (skala 0-1) -> inverse ke skala asli
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

    
    hist_array.append(float(t_value))

actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Malang 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Malang_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Malang Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Malang_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Malang 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 4.6465%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Malang_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Malang_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14") 

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Probolinggo.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)


def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)

# Pastikan rentang tanggal mencakup 2024
if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)


series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []


hist_array = train_hist.values.tolist()

for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    # Normalisasi window berdasarkan scaler TRAIN yang sudah disimpan
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    # Prediksi 1 langkah (skala 0-1) -> inverse ke skala asli
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

   
    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Probolinggo 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Probolinggo_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Probolinggo Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Probolinggo_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Probolinggo 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 2.4025%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Probolinggo_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Probolinggo_Backtest_2024_W7.png


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error



WINDOW_SIZE = 7
CUTOFF_DATE = pd.Timestamp("2024-03-14") 

# PATH input (ganti jika perlu)
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Sumenep.xlsx"
path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_scaler_W7_E114_B64_DO0.02.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_model_W7_E114_B64_DO0.02.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)

def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):

    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    # Jika sudah index datetime, tetap sort
    df = df.sort_index()
    return df

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File data tidak ditemukan: {path_file_input}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df = pd.read_excel(path_file_input)
df = ensure_price_column(df)
df = prepare_series(df)

# Pastikan rentang tanggal mencakup 2024
if df.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data tidak mencakup periode backtest (setelah 2024-04-30).")

# Load scaler dan model
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)

series = df['Harga (Rp)'].astype(float)
train_hist = series.loc[:CUTOFF_DATE].copy()
test_period = series.loc[CUTOFF_DATE + pd.Timedelta(days=1):].copy()

if len(train_hist) < WINDOW_SIZE:
    raise ValueError(f"Histori sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")

if test_period.empty:
    raise ValueError("Tidak ada data setelah cutoff untuk backtest.")


predictions = []
actuals = []
dates = []



hist_array = train_hist.values.tolist()  
for t_idx, (t_date, t_value) in enumerate(test_period.items()):
    # Pastikan ada cukup window
    if len(hist_array) < WINDOW_SIZE:
        raise RuntimeError("Histori tidak cukup untuk mengambil window.")

    window_vals = np.array(hist_array[-WINDOW_SIZE:], dtype=float).reshape(-1, 1)
    # Normalisasi window berdasarkan scaler TRAIN yang sudah disimpan
    window_scaled = scaler.transform(window_vals)
    # Bentuk input [1, W, 1]
    X_input = window_scaled.reshape(1, WINDOW_SIZE, 1)

    # Prediksi 1 langkah (skala 0-1) -> inverse ke skala asli
    y_pred_scaled = model.predict(X_input, verbose=0)
    y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

    predictions.append(y_pred_real)
    actuals.append(float(t_value))
    dates.append(t_date)

    hist_array.append(float(t_value))


actuals_arr = np.array(actuals, dtype=float)
preds_arr = np.array(predictions, dtype=float)
mape_2024 = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

print(f"Backtest Sumenep 2024 (setelah {CUTOFF_DATE.date()}):")
print(f"- Periode uji: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
print(f"- WINDOW_SIZE: {WINDOW_SIZE}")
print(f"- MAPE 2024: {mape_2024:.4f}%")

# Simpan tabel hasil ke Excel
out_df = pd.DataFrame({
    'Tanggal': dates,
    'Actual (Real)': actuals_arr,
    'Predicted (Real)': preds_arr,
    'Selisih': actuals_arr - preds_arr,
})
# Error (%) per baris (proteksi nol)
y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

excel_out = os.path.join(output_dir, "Sumenep_Backtest_2024_W7.xlsx")
out_df.to_excel(excel_out, index=False)
print(f"✓ Hasil backtest disimpan: {excel_out}")

# Simpan plot
plt.figure(figsize=(11, 4))
plt.plot(dates, actuals_arr, label='Actual 2024')
plt.plot(dates, preds_arr, label='Predicted 2024')
plt.title(f'Sumenep Backtest 2024 | W{WINDOW_SIZE} | MAPE={mape_2024:.4f}%')
plt.xlabel('Tanggal')
plt.ylabel('Harga (Rp)')
plt.legend()
plt.grid(True)
plt.tight_layout()
plot_out = os.path.join(output_dir, "Sumenep_Backtest_2024_W7.png")
plt.savefig(plot_out, dpi=150, bbox_inches='tight')
plt.close()
print(f"✓ Plot backtest disimpan: {plot_out}")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


Backtest Sumenep 2024 (setelah 2024-03-14):
- Periode uji: 2024-03-15 s.d. 2024-12-31 | 208 hari
- WINDOW_SIZE: 7
- MAPE 2024: 2.3627%
✓ Hasil backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Sumenep_Backtest_2024_W7.xlsx
✓ Plot backtest disimpan: E:\SKRIPSI 2025\dataset\BACKTEST\Sumenep_Backtest_2024_W7.png


: 

UJI COBA DATA 2024-2025

In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7
CUTOFF_2024 = pd.Timestamp("2024-04-30")


path_file_2024 = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Kediri.xlsx"
path_file_2025 = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Kediri_2025.xlsx"

path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_scaler_W7_E114_B64_DO0.2.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_model_W7_E114_B64_DO0.2.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)

# Nama wilayah untuk label output
wilayah_name = "Kediri"

# =========================
# UTILITAS
# =========================
def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
    # Pastikan Tanggal sebagai index dan terurut naik
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
 
    df = df.sort_index()
    return df

def walk_forward_predict(series_hist, series_test, scaler, model, window_size):
    """
    Walk-forward satu langkah per hari.
    - series_hist: pandas Series histori awal (nilai aktual) yang dipakai untuk window
    - series_test: pandas Series periode uji (nilai aktual yang akan dibandingkan)
    - scaler: MinMaxScaler dari training (jangan refit)
    - model: Keras LSTM .h5 yang sudah diload
    - window_size: panjang window (7)
    Returns: dates, actuals_arr, preds_arr
    """
    predictions = []
    actuals = []
    dates = []
    hist_list = series_hist.astype(float).values.tolist()

    for t_date, t_value in series_test.items():
        if len(hist_list) < window_size:
            raise RuntimeError("Histori tidak cukup untuk mengambil window.")

        window_vals = np.array(hist_list[-window_size:], dtype=float).reshape(-1, 1)
        # Normalisasi window dengan scaler training (anti-leakage, tidak refit)
        window_scaled = scaler.transform(window_vals)
        X_input = window_scaled.reshape(1, window_size, 1)

        # Prediksi (skala 0-1) lalu inverse ke skala harga asli
        y_pred_scaled = model.predict(X_input, verbose=0)
        y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

        predictions.append(float(y_pred_real))
        actuals.append(float(t_value))
        dates.append(t_date)

        hist_list.append(float(t_value))

    return dates, np.array(actuals, dtype=float), np.array(predictions, dtype=float)

def save_outputs(wilayah, label, dates, actuals_arr, preds_arr, output_dir, window_size, extra_title=""):
    """
    Simpan Excel dan Plot untuk hasil backtest.
    label: mis. "2024" atau "2025"
    """
    # MAPE di skala asli
    mape_val = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

    # Excel
    out_df = pd.DataFrame({
        'Tanggal': dates,
        'Actual (Real)': actuals_arr,
        'Predicted (Real)': preds_arr,
        'Selisih': actuals_arr - preds_arr,
    })
    # Error (%) per baris, dengan proteksi nol
    y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
    out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

    excel_out = os.path.join(output_dir, f"{wilayah}_Backtest_{label}_W{window_size}.xlsx")
    out_df.to_excel(excel_out, index=False)

    # Plot
    plt.figure(figsize=(11, 4))
    plt.plot(dates, actuals_arr, label=f'Actual {label}')
    plt.plot(dates, preds_arr, label=f'Predicted {label}')
    title = f'{wilayah} Backtest {label} | W{window_size} | MAPE={mape_val:.4f}%'
    if extra_title:
        title += f' | {extra_title}'
    plt.title(title)
    plt.xlabel('Tanggal')
    plt.ylabel('Harga (Rp)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_out = os.path.join(output_dir, f"{wilayah}_Backtest_{label}_W{window_size}.png")
    plt.savefig(plot_out, dpi=150, bbox_inches='tight')
    plt.close()

    print(f"[{wilayah}] Backtest {label}:")
    print(f"- Periode: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
    print(f"- WINDOW_SIZE: {window_size}")
    print(f"- MAPE {label}: {mape_val:.4f}%")
    print(f"✓ Excel: {excel_out}")
    print(f"✓ Plot : {plot_out}")
    return mape_val

# =========================
# VALIDASI PATH & LOAD
# =========================
if not os.path.exists(path_file_2024):
    raise FileNotFoundError(f"File data 2024 tidak ditemukan: {path_file_2024}")
if not os.path.exists(path_file_2025):
    raise FileNotFoundError(f"File data 2025 tidak ditemukan: {path_file_2025}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df2024 = pd.read_excel(path_file_2024)
df2024 = ensure_price_column(df2024)
df2024 = prepare_series(df2024)

df2025 = pd.read_excel(path_file_2025)
df2025 = ensure_price_column(df2025)
df2025 = prepare_series(df2025)

# Pastikan rentang tanggal
if df2024.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data 2024 tidak mencakup periode uji (Mei–Des).")
if df2025.index.min() > pd.Timestamp("2025-01-01"):
    print("Peringatan: Data 2025 dimulai setelah 1 Jan 2025. Backtest akan dimulai dari tanggal pertama yang tersedia setelah memastikan window.")

# Load scaler dan model (otak)
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)

# =========================
# SKENARIO B — BACKTEST TERPISAH
# =========================
series2024 = df2024['Harga (Rp)'].astype(float)
series2025 = df2025['Harga (Rp)'].astype(float)

# --- Backtest 2024 (Mei–Des), histori = data 2024 sampai cutoff ---
hist_2024 = series2024.loc[:CUTOFF_2024].copy()
test_2024 = series2024.loc[CUTOFF_2024 + pd.Timedelta(days=1):].copy()

if len(hist_2024) < WINDOW_SIZE:
    raise ValueError(f"Histori 2024 sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")
if test_2024.empty:
    raise ValueError("Tidak ada periode uji setelah cutoff di file 2024.")

dates_2024, actuals_2024, preds_2024 = walk_forward_predict(hist_2024, test_2024, scaler, model, WINDOW_SIZE)
mape_2024 = save_outputs(wilayah_name, "2024", dates_2024, actuals_2024, preds_2024, output_dir, WINDOW_SIZE)


if len(series2024) < WINDOW_SIZE:
    raise ValueError(f"Data 2024 kurang dari WINDOW_SIZE={WINDOW_SIZE} untuk seed 2025.")

seed_2025_hist = series2024.iloc[-WINDOW_SIZE:].copy()

# Periode uji 2025 adalah seluruh data 2025 yang tersedia (Jan–Nov)
test_2025 = series2025.copy()
if test_2025.empty:
    raise ValueError("File 2025 tidak memiliki data untuk periode uji.")

dates_2025, actuals_2025, preds_2025 = walk_forward_predict(seed_2025_hist, test_2025, scaler, model, WINDOW_SIZE)
mape_2025 = save_outputs(wilayah_name, "2025", dates_2025, actuals_2025, preds_2025, output_dir, WINDOW_SIZE, extra_title="Seed dari akhir 2024")

# =========================
# RINGKASAN
# =========================
print("\n=== RINGKASAN BACKTEST ===")
print(f"- {wilayah_name} MAPE 2024: {mape_2024:.4f}%")
print(f"- {wilayah_name} MAPE 2025: {mape_2025:.4f}%")
print("Catatan:")
print("- Scaler .pkl dan model .h5 dari training digunakan apa adanya (anti-leakage).")
print("- Window = 7 konsisten sesuai konfigurasi.")
print("- 2025 menggunakan seed window dari 7 nilai akhir 2024 tanpa menggabungkan file.")
print("- Jika banyak nilai 2025 berada di luar rentang scaler training, pertimbangkan retrain dan re-fit scaler.")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


[Kediri] Backtest 2024:
- Periode: 2024-05-01 s.d. 2024-12-31 | 175 hari
- WINDOW_SIZE: 7
- MAPE 2024: 1.9644%
✓ Excel: E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2024_W7.xlsx
✓ Plot : E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2024_W7.png
[Kediri] Backtest 2025:
- Periode: 2025-01-01 s.d. 2025-11-28 | 238 hari
- WINDOW_SIZE: 7
- MAPE 2025: 2.1900%
✓ Excel: E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2025_W7.xlsx
✓ Plot : E:\SKRIPSI 2025\dataset\BACKTEST\Kediri_Backtest_2025_W7.png

=== RINGKASAN BACKTEST ===
- Kediri MAPE 2024: 1.9644%
- Kediri MAPE 2025: 2.1900%
Catatan:
- Scaler .pkl dan model .h5 dari training digunakan apa adanya (anti-leakage).
- Window = 7 konsisten sesuai konfigurasi.
- 2025 menggunakan seed window dari 7 nilai akhir 2024 tanpa menggabungkan file.
- Jika banyak nilai 2025 berada di luar rentang scaler training, pertimbangkan retrain dan re-fit scaler.


In [None]:
import os
import pickle
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from tensorflow.keras.models import load_model
from sklearn.metrics import mean_absolute_percentage_error


WINDOW_SIZE = 7

CUTOFF_2024 = pd.Timestamp("2024-04-30")

path_file_2024 = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Jember.xlsx"
path_file_2025 = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Jember_2025.xlsx"

path_scaler = r"E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_scaler_W7_E114_B64_DO0.2.pkl"
path_model = r"E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_model_W7_E114_B64_DO0.2.h5"

# Folder output backtest
output_dir = r"E:\SKRIPSI 2025\dataset\BACKTEST"
os.makedirs(output_dir, exist_ok=True)

# Nama wilayah untuk label output
wilayah_name = "Jember"

# =========================
# UTILITAS
# =========================
def ensure_price_column(df):
    if 'Harga (Rp)' not in df.columns:
        raise KeyError("Kolom 'Harga (Rp)' tidak ditemukan.")
    return df

def prepare_series(df):
 
    if 'Tanggal' in df.columns:
        df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
        df = df.dropna(subset=['Tanggal']).sort_values('Tanggal').set_index('Tanggal')
    
    df = df.sort_index()
    return df

def walk_forward_predict(series_hist, series_test, scaler, model, window_size):
    """
    Walk-forward satu langkah per hari.
    - series_hist: pandas Series histori awal (nilai aktual) yang dipakai untuk window
    - series_test: pandas Series periode uji (nilai aktual yang akan dibandingkan)
    - scaler: MinMaxScaler dari training (jangan refit)
    - model: Keras LSTM .h5 yang sudah diload
    - window_size: panjang window (7)
    Returns: dates, actuals_arr, preds_arr
    """
    predictions = []
    actuals = []
    dates = []
    hist_list = series_hist.astype(float).values.tolist()

    for t_date, t_value in series_test.items():
        if len(hist_list) < window_size:
            raise RuntimeError("Histori tidak cukup untuk mengambil window.")

        window_vals = np.array(hist_list[-window_size:], dtype=float).reshape(-1, 1)
        
        window_scaled = scaler.transform(window_vals)
        X_input = window_scaled.reshape(1, window_size, 1)

        y_pred_scaled = model.predict(X_input, verbose=0)
        y_pred_real = scaler.inverse_transform(y_pred_scaled).ravel()[0]

        predictions.append(float(y_pred_real))
        actuals.append(float(t_value))
        dates.append(t_date)

        
        hist_list.append(float(t_value))

    return dates, np.array(actuals, dtype=float), np.array(predictions, dtype=float)

def save_outputs(wilayah, label, dates, actuals_arr, preds_arr, output_dir, window_size, extra_title=""):
    """
    Simpan Excel dan Plot untuk hasil backtest.
    label: mis. "2024" atau "2025"
    """
    # MAPE di skala asli
    mape_val = mean_absolute_percentage_error(actuals_arr, preds_arr) * 100.0

    # Excel
    out_df = pd.DataFrame({
        'Tanggal': dates,
        'Actual (Real)': actuals_arr,
        'Predicted (Real)': preds_arr,
        'Selisih': actuals_arr - preds_arr,
    })
    # Error (%) per baris, dengan proteksi nol
    y_safe = np.where(actuals_arr == 0, np.finfo(float).eps, actuals_arr)
    out_df['Error (%)'] = np.abs((actuals_arr - preds_arr) / y_safe) * 100.0

    excel_out = os.path.join(output_dir, f"{wilayah}_Backtest_{label}_W{window_size}.xlsx")
    out_df.to_excel(excel_out, index=False)

    # Plot
    plt.figure(figsize=(11, 4))
    plt.plot(dates, actuals_arr, label=f'Actual {label}')
    plt.plot(dates, preds_arr, label=f'Predicted {label}')
    title = f'{wilayah} Backtest {label} | W{window_size} | MAPE={mape_val:.4f}%'
    if extra_title:
        title += f' | {extra_title}'
    plt.title(title)
    plt.xlabel('Tanggal')
    plt.ylabel('Harga (Rp)')
    plt.legend()
    plt.grid(True)
    plt.tight_layout()
    plot_out = os.path.join(output_dir, f"{wilayah}_Backtest_{label}_W{window_size}.png")
    plt.savefig(plot_out, dpi=150, bbox_inches='tight')
    plt.close()

    print(f"[{wilayah}] Backtest {label}:")
    print(f"- Periode: {dates[0].date()} s.d. {dates[-1].date()} | {len(dates)} hari")
    print(f"- WINDOW_SIZE: {window_size}")
    print(f"- MAPE {label}: {mape_val:.4f}%")
    print(f"✓ Excel: {excel_out}")
    print(f"✓ Plot : {plot_out}")
    return mape_val


if not os.path.exists(path_file_2024):
    raise FileNotFoundError(f"File data 2024 tidak ditemukan: {path_file_2024}")
if not os.path.exists(path_file_2025):
    raise FileNotFoundError(f"File data 2025 tidak ditemukan: {path_file_2025}")
if not os.path.exists(path_scaler):
    raise FileNotFoundError(f"File scaler tidak ditemukan: {path_scaler}")
if not os.path.exists(path_model):
    raise FileNotFoundError(f"File model tidak ditemukan: {path_model}")

df2024 = pd.read_excel(path_file_2024)
df2024 = ensure_price_column(df2024)
df2024 = prepare_series(df2024)

df2025 = pd.read_excel(path_file_2025)
df2025 = ensure_price_column(df2025)
df2025 = prepare_series(df2025)

# Pastikan rentang tanggal
if df2024.index.max() < pd.Timestamp("2024-05-01"):
    raise ValueError("Data 2024 tidak mencakup periode uji (Mei–Des).")
if df2025.index.min() > pd.Timestamp("2025-01-01"):
    print("Peringatan: Data 2025 dimulai setelah 1 Jan 2025. Backtest akan dimulai dari tanggal pertama yang tersedia setelah memastikan window.")

# Load scaler dan model (otak)
with open(path_scaler, 'rb') as f:
    scaler = pickle.load(f)
model = load_model(path_model)

# =========================
# SKENARIO B — BACKTEST TERPISAH
# =========================
series2024 = df2024['Harga (Rp)'].astype(float)
series2025 = df2025['Harga (Rp)'].astype(float)

# --- Backtest 2024 (Mei–Des), histori = data 2024 sampai cutoff ---
hist_2024 = series2024.loc[:CUTOFF_2024].copy()
test_2024 = series2024.loc[CUTOFF_2024 + pd.Timedelta(days=1):].copy()

if len(hist_2024) < WINDOW_SIZE:
    raise ValueError(f"Histori 2024 sebelum cutoff kurang dari WINDOW_SIZE={WINDOW_SIZE}.")
if test_2024.empty:
    raise ValueError("Tidak ada periode uji setelah cutoff di file 2024.")

dates_2024, actuals_2024, preds_2024 = walk_forward_predict(hist_2024, test_2024, scaler, model, WINDOW_SIZE)
mape_2024 = save_outputs(wilayah_name, "2024", dates_2024, actuals_2024, preds_2024, output_dir, WINDOW_SIZE)

if len(series2024) < WINDOW_SIZE:
    raise ValueError(f"Data 2024 kurang dari WINDOW_SIZE={WINDOW_SIZE} untuk seed 2025.")

seed_2025_hist = series2024.iloc[-WINDOW_SIZE:].copy()

# Periode uji 2025 adalah seluruh data 2025 yang tersedia (Jan–Nov)
test_2025 = series2025.copy()
if test_2025.empty:
    raise ValueError("File 2025 tidak memiliki data untuk periode uji.")

dates_2025, actuals_2025, preds_2025 = walk_forward_predict(seed_2025_hist, test_2025, scaler, model, WINDOW_SIZE)
mape_2025 = save_outputs(wilayah_name, "2025", dates_2025, actuals_2025, preds_2025, output_dir, WINDOW_SIZE, extra_title="Seed dari akhir 2024")

# =========================
# RINGKASAN
# =========================
print("\n=== RINGKASAN BACKTEST ===")
print(f"- {wilayah_name} MAPE 2024: {mape_2024:.4f}%")
print(f"- {wilayah_name} MAPE 2025: {mape_2025:.4f}%")
print("Catatan:")
print("- Scaler .pkl dan model .h5 dari training digunakan apa adanya (anti-leakage).")
print("- Window = 7 konsisten sesuai konfigurasi.")
print("- 2025 menggunakan seed window dari 7 nilai akhir 2024 tanpa menggabungkan file.")
print("- Jika banyak nilai 2025 berada di luar rentang scaler training, pertimbangkan retrain dan re-fit scaler.")

  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)
  df['Tanggal'] = pd.to_datetime(df['Tanggal'], errors='coerce', dayfirst=True, infer_datetime_format=True)


[Jember] Backtest 2024:
- Periode: 2024-05-01 s.d. 2024-12-31 | 175 hari
- WINDOW_SIZE: 7
- MAPE 2024: 3.1953%
✓ Excel: E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2024_W7.xlsx
✓ Plot : E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2024_W7.png
[Jember] Backtest 2025:
- Periode: 2025-01-01 s.d. 2025-11-28 | 238 hari
- WINDOW_SIZE: 7
- MAPE 2025: 2.2948%
✓ Excel: E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2025_W7.xlsx
✓ Plot : E:\SKRIPSI 2025\dataset\BACKTEST\Jember_Backtest_2025_W7.png

=== RINGKASAN BACKTEST ===
- Jember MAPE 2024: 3.1953%
- Jember MAPE 2025: 2.2948%
Catatan:
- Scaler .pkl dan model .h5 dari training digunakan apa adanya (anti-leakage).
- Window = 7 konsisten sesuai konfigurasi.
- 2025 menggunakan seed window dari 7 nilai akhir 2024 tanpa menggabungkan file.
- Jika banyak nilai 2025 berada di luar rentang scaler training, pertimbangkan retrain dan re-fit scaler.
