In [2]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000222)
   -> Last loss     : 0.000576 | Last val_loss : 0.000222
   -> MAPE          : 2.6390%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Surabaya_w7_e114_b64\Surabaya_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Surabaya_w7_e114_b64\Surabaya_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Surabaya_w7_e114_b64\Surabaya_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Surabaya_w7_e114_b64\Surabaya_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Surabaya_w7_e114_b64\Surabaya_Plot.png

In [3]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Banyuwangi.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 112 (val_loss=0.000267)
   -> Last loss     : 0.001090 | Last val_loss : 0.000321
   -> MAPE          : 4.1406%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Banyuwangi_w7_e114_b64\Banyuwangi_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Banyuwangi_w7_e114_b64\Banyuwangi_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Banyuwangi_w7_e114_b64\Banyuwangi_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Banyuwangi_w7_e114_b64\Banyuwangi_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Banyuwangi_w7_e114_b

In [4]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Blitar.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Blitar.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000583)
   -> Last loss     : 0.000890 | Last val_loss : 0.000583
   -> MAPE          : 3.4628%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Blitar_w7_e114_b64\Blitar_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Blitar_w7_e114_b64\Blitar_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Blitar_w7_e114_b64\Blitar_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Blitar_w7_e114_b64\Blitar_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Blitar_w7_e114_b64\Blitar_Plot.png
   ✓ Prediksi disimpa

In [5]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_jember.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_jember.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000313)
   -> Last loss     : 0.000446 | Last val_loss : 0.000313
   -> MAPE          : 3.9588%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\jember_w7_e114_b64\jember_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\jember_w7_e114_b64\jember_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\jember_w7_e114_b64\jember_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\jember_w7_e114_b64\jember_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\jember_w7_e114_b64\jember_Plot.png
   ✓ Prediksi disimpa

In [6]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_kediri.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_kediri.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 111 (val_loss=0.000212)
   -> Last loss     : 0.000511 | Last val_loss : 0.000216
   -> MAPE          : 2.2605%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\kediri_w7_e114_b64\kediri_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\kediri_w7_e114_b64\kediri_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\kediri_w7_e114_b64\kediri_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\kediri_w7_e114_b64\kediri_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\kediri_w7_e114_b64\kediri_Plot.png
   ✓ Prediksi disimpa

In [10]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Madiun.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Madiun.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 113 (val_loss=0.000370)
   -> Last loss     : 0.000936 | Last val_loss : 0.000710
   -> MAPE          : 4.5632%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Madiun_w7_e114_b64\Madiun_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Madiun_w7_e114_b64\Madiun_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Madiun_w7_e114_b64\Madiun_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Madiun_w7_e114_b64\Madiun_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Madiun_w7_e114_b64\Madiun_Plot.png
   ✓ Prediksi disimpa

In [7]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Malang.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Malang.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 99 (val_loss=0.000196)
   -> Last loss     : 0.000923 | Last val_loss : 0.000863
   -> MAPE          : 4.6465%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Malang_w7_e114_b64\Malang_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Malang_w7_e114_b64\Malang_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Malang_w7_e114_b64\Malang_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Malang_w7_e114_b64\Malang_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Malang_w7_e114_b64\Malang_Plot.png
   ✓ Prediksi disimpan

In [8]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Probolinggo.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Probolinggo.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 114 (val_loss=0.000483)
   -> Last loss     : 0.000655 | Last val_loss : 0.000483
   -> MAPE          : 2.4025%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Probolinggo_w7_e114_b64\Probolinggo_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Probolinggo_w7_e114_b64\Probolinggo_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Probolinggo_w7_e114_b64\Probolinggo_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Probolinggo_w7_e114_b64\Probolinggo_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Probolinggo

In [9]:
#VISUALISASI LOSS DATA (REVISI FINAL)
import os
import json
import pickle
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
from sklearn.preprocessing import MinMaxScaler
import matplotlib.dates as mdates
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


FIXED_WINDOW_SIZE = 7
FIXED_EPOCH = 114
FIXED_BATCH_SIZE = 64
VAL_RATIO = 0.1
DROPOUT_RATE = 0.02
LEARNING_RATE = 0.001

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Sumenep.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA"
os.makedirs(path_output_folder, exist_ok=True)

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(DROPOUT_RATE))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(DROPOUT_RATE))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=LEARNING_RATE),
                  loss='mean_squared_error')
    return model

if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)
nama_kolom_tanggal = 'Tanggal' 

if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")

df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")

X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Validasi tail dari TRAIN
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)

model_path = os.path.join(region_out_dir, f"{region_name}_model_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.h5")
scaler_path = os.path.join(region_out_dir, f"{region_name}_scaler_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}_DO{DROPOUT_RATE}.pkl")
meta_path = os.path.join(region_out_dir, f"{region_name}_metadata.json")

model.save(model_path)
print(f"   ✓ Model disimpan: {model_path}")

# Simpan scaler
with open(scaler_path, 'wb') as f:
    pickle.dump(scaler, f)
print(f"   ✓ Scaler disimpan: {scaler_path}")

metadata = {
    "region": region_name,
    "window_size": int(FIXED_WINDOW_SIZE),
    "epoch": int(FIXED_EPOCH),
    "batch_size": int(FIXED_BATCH_SIZE),
    "dropout_rate": float(DROPOUT_RATE),
    "learning_rate": float(LEARNING_RATE),
    "val_ratio": float(VAL_RATIO),
    "train_samples": int(len(X_train_all)),
    "val_samples": int(len(X_val)),
    "test_samples": int(len(X_test)),
    "best_epoch_val": int(best_epoch),
    "best_val_loss": float(best_val),
    "last_loss": float(last_loss),
    "last_val_loss": float(last_val_loss) if last_val_loss is not None else None,
    "mape_test_percent": float(mape)
}
with open(meta_path, 'w', encoding='utf-8') as f:
    json.dump(metadata, f, ensure_ascii=False, indent=2)
print(f"   ✓ Metadata disimpan: {meta_path}")


plt.figure(figsize=(12, 6)) 

# Plot Garis
plt.plot(history.history['loss'], label='Training Loss (Data Latih)', linewidth=2)
if 'val_loss' in history.history:
    plt.plot(history.history['val_loss'], label='Validation Loss (Data Validasi)', linewidth=2)

plt.xlim(0, FIXED_EPOCH)
ticks = list(range(0, FIXED_EPOCH, 20)) + [FIXED_EPOCH]
plt.xticks(ticks)

plt.title(f'Grafik Penurunan Error (Loss) | {region_name}\n(Semakin turun mendekati 0 = Semakin Akurat)', fontsize=14)
plt.ylabel('Tingkat Error (MSE - Skala Normalisasi 0-1)', fontsize=12)
plt.xlabel('Epoch', fontsize=12)


plt.legend(fontsize=12)
plt.grid(True, which='both', linestyle='--', alpha=0.7)

# Simpan
loss_plot_path = os.path.join(region_out_dir, f"{region_name}_Loss_Graph_Revisi.png")
plt.savefig(loss_plot_path, dpi=300, bbox_inches='tight')
plt.close()
print(f"   ✓ Grafik Loss REVISI disimpan: {loss_plot_path}")


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]

# Ambil tanggal khusus untuk bagian TEST saja
test_dates = all_dates[train_size:]
plt.figure(figsize=(12, 6))


plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE} DO{DROPOUT_RATE}')
plt.xlabel('Tanggal dan Tahun')
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1)) 
plt.gcf().autofmt_xdate() 
plt.legend()
plt.grid(True)

plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100
out_df = pd.DataFrame({
    'Tanggal': test_dates,  
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")


summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Dropout': float(DROPOUT_RATE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Sumenep.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 1036 (window=7)
✓ Data ternormalisasi (0-1).
[Info] Total: 1036 | Train: 828 | Val: 83 | Test: 208
   -> Selesai epoch : 114
   -> Best epoch    : 111 (val_loss=0.000212)
   -> Last loss     : 0.000545 | Last val_loss : 0.000242
   -> MAPE          : 2.3627%
   ✓ Model disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Sumenep_w7_e114_b64\Sumenep_model_W7_E114_B64_DO0.02.h5
   ✓ Scaler disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Sumenep_w7_e114_b64\Sumenep_scaler_W7_E114_B64_DO0.02.pkl
   ✓ Metadata disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Sumenep_w7_e114_b64\Sumenep_metadata.json
   ✓ Grafik Loss REVISI disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Sumenep_w7_e114_b64\Sumenep_Loss_Graph_Revisi.png
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\VISUALISASI LOSS DATA\Sumenep_w7_e114_b64\Sumenep_Plot.png
   ✓ Predi