In [None]:
#SESUAI KONFIG TERBAIK DIBAB 3 
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 90
FIXED_EPOCH = 50
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\DATASET TRAINING"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).
[Info] Total: 953 | Train: 762 | Val: 77 | Test: 191
   -> Selesai epoch : 50
   -> Best epoch    : 50 (val_loss=0.000698)
   -> Last loss     : 0.000901 | Last val_loss : 0.000698
   -> MAPE          : 3.0860%
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\DATASET TRAINING\Banyuwangi_w90_e50_b64\Banyuwangi_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\DATASET TRAINING\Banyuwangi_w90_e50_b64\Banyuwangi_Prediksi_W90_E50_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\DATASET TRAINING\Banyuwangi_w90_e50_b64\Banyuwangi_Summary_W90_E50_B64.xlsx

Selesai untuk file ini.


In [None]:
#KONFIG BERBEDA EPOCH 114 
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 90
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).
[Info] Total: 953 | Train: 762 | Val: 77 | Test: 191
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000409)
   -> Last loss     : 0.000656 | Last val_loss : 0.000409
   -> MAPE          : 2.8492%
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\Banyuwangi_w90_e114_b64\Banyuwangi_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\Banyuwangi_w90_e114_b64\Banyuwangi_Prediksi_W90_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\Banyuwangi_w90_e114_b64\Banyuwangi_Summary_W90_E114_B64.xlsx

Selesai untuk file ini.


In [None]:
#KONFIG BERBEDA EPOCH 114 DDROPOUT 0.2
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 90
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
output_root = r"E:\SKRIPSI 2025"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).
[Info] Total: 953 | Train: 762 | Val: 77 | Test: 191
   -> Selesai epoch : 114
   -> Best epoch    : 113 (val_loss=0.000439)
   -> Last loss     : 0.000828 | Last val_loss : 0.000490
   -> MAPE          : 2.8423%
   ✓ Plot disimpan: E:\SKRIPSI 2025\Banyuwangi_w90_e114_b64\Banyuwangi_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\Banyuwangi_w90_e114_b64\Banyuwangi_Prediksi_W90_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\Banyuwangi_w90_e114_b64\Banyuwangi_Summary_W90_E114_B64.xlsx

Selesai untuk file ini.


In [None]:
#KONFIG BERBEDA EPOCH 114 DDROPOUT 0.02 WINDOWSIZE 7
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
output_root = r"E:\SKRIPSI 2025"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.2))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.2))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000274)
   -> Last loss     : 0.000716 | Last val_loss : 0.000274
   -> MAPE          : 2.6209%
   ✓ Plot disimpan: E:\SKRIPSI 2025\Banyuwangi_w7_e114_b64\Banyuwangi_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\Banyuwangi_w7_e114_b64\Banyuwangi_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\Banyuwangi_w7_e114_b64\Banyuwangi_Summary_W7_E114_B64.xlsx

Selesai untuk file ini.


UJI COBA FINAL CODE MEMBUAT MODEL .H5
DENGAN KONFIG WINDOWSIZE 7 EPOCH 114 DROPOUT 0.2

In [2]:
#BANYUWANGI
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000274)
   -> Last loss     : 0.000716 | Last val_loss : 0.000274
   -> MAPE          : 2.6209%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi_w7_e114_b64\Banyuwangi_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Banyuwangi

In [None]:
#SURABAYA
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 111 (val_loss=0.000227)
   -> Last loss     : 0.000980 | Last val_loss : 0.000441
   -> MAPE          : 3.7366%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Surabaya_w7_e114_b64\Surabaya_Su

In [3]:
#BLITAR
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Blitar.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Blitar.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 107 (val_loss=0.000592)
   -> Last loss     : 0.001126 | Last val_loss : 0.000662
   -> MAPE          : 3.3419%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Blitar_w7_e114_b64\Blitar_Summary_W7_E114_B64.xlsx

Se

In [9]:
#JEMBER
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Jember.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Jember.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 113 (val_loss=0.000323)
   -> Last loss     : 0.000594 | Last val_loss : 0.000337
   -> MAPE          : 4.0159%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Jember_w7_e114_b64\Jember_Summary_W7_E114_B64.xlsx

Se

In [4]:
#KEDIRI
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Kediri.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Kediri.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 113 (val_loss=0.000207)
   -> Last loss     : 0.000798 | Last val_loss : 0.000226
   -> MAPE          : 2.2610%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Kediri_w7_e114_b64\Kediri_Summary_W7_E114_B64.xlsx

Se

In [5]:
#MADIUN
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Madiun.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Madiun.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000330)
   -> Last loss     : 0.000922 | Last val_loss : 0.000330
   -> MAPE          : 2.7484%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Madiun_w7_e114_b64\Madiun_Summary_W7_E114_B64.xlsx

Se

In [6]:
#MALANG
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Malang.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Malang.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 105 (val_loss=0.000197)
   -> Last loss     : 0.001020 | Last val_loss : 0.000407
   -> MAPE          : 3.4254%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Malang_w7_e114_b64\Malang_Summary_W7_E114_B64.xlsx

Se

In [7]:
#PROBOLINGGO
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Probolinggo.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Probolinggo.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000451)
   -> Last loss     : 0.001199 | Last val_loss : 0.000451
   -> MAPE          : 3.4859%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Probolinggo_w7_e114_b64\Probolinggo_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL

In [8]:
#SUMENEP
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout

# ============ SEEDS ============
def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

# ============ KONFIG TETAP (HASIL TERBAIK) ============
FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.2
LEARNING_RATE = 0.001

# ============ PATH FILE (GANTI PER WILAYAH) ============
path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Sumenep.xlsx"
output_root = r"E:\SKRIPSI 2025\dataset\FINAL"
os.makedirs(output_root, exist_ok=True)

# ============ UTIL ============
def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

# ============ LOAD DATA (SATU EXCEL) ============
if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

# ============ WINDOW DI SKALA ASLI ============
X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  # uji di skala asli

# ============ SCALER FIT HANYA TRAIN (ANTI-LEAKAGE) ============
raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

# ============ WINDOWING PADA DATA SCALED ============
X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")

# ============ TRAINING DENGAN KONFIG TETAP ============
reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))

# ============ EVALUASI DI TEST (DENORMALIZED) ============
predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")

# ============ OUTPUT ============
region_out_dir = os.path.join(output_root, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

# Simpan MODEL (.h5) dan SCALER (.pkl) + metadata
model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

# Simpan model
model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

# Simpan metadata konfigurasi dan metrik
metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")

# Plot Actual vs Predicted
plt.figure(figsize=(10, 4))
plt.plot(y_test_orig, label='Actual (Real Data)')
plt.plot(predictions_real, label='Predicted (Denormalized)')
plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Index')
plt.ylabel('Harga')
plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Sumenep.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000221)
   -> Last loss     : 0.000738 | Last val_loss : 0.000221
   -> MAPE          : 2.0828%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_model_W7_E114_B64_DO0.2.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_scaler_W7_E114_B64_DO0.2.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_metadata.json
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_Prediksi_W7_E114_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\FINAL\Sumenep_w7_e114_b64\Sumenep_Summary_W7_E114