MULAI PADA UJI COBA SESUAI BAB 3

In [1]:
#MENCARI KONFIGURASI WINDOW SIZE TERBAIK SESUAI BAB 3
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import random as python_random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.callbacks import EarlyStopping


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3"
os.makedirs(path_output_folder, exist_ok=True)

if os.path.exists(path_file_input):
    df = pd.read_excel(path_file_input)
 
    nama_kolom_tanggal = 'Tanggal' 
    if nama_kolom_tanggal not in df.columns:
         
        if 'Date' in df.columns:
            nama_kolom_tanggal = 'Date'
        else:
            raise KeyError("Kolom Tanggal tidak ditemukan!")
            
    df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
    print("✓ Data berhasil dimuat.")
else:
    raise FileNotFoundError("File tidak ditemukan!")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model

scenario_windows = [30, 60, 90] 
FIXED_EPOCH = 16                 
FIXED_BATCH_SIZE = 30
early_stop = EarlyStopping(monitor='val_loss', patience=100, restore_best_weights=True)

results = []

print("\n" + "="*50)
print("MULAI SKENARIO A (WINDOW BESAR)")
print("="*50)

for win_size in scenario_windows:
    print(f"\n[Proses] Training Window Size: {win_size} ...")
    reset_seeds()

    X_full_raw, Y_full_raw = create_sliding_window(raw_data, win_size)
    print(f"   -> Window samples: {len(X_full_raw)} (window={win_size})") 
    
    train_size = int(len(X_full_raw) * 0.8)
    y_test_orig = Y_full_raw[train_size:] 
    
    

    all_dates_Y = df[nama_kolom_tanggal].values[win_size:] 
    test_dates = all_dates_Y[train_size:] 
    

    raw_train_segment = raw_data[:train_size + win_size]
    scaler = MinMaxScaler(feature_range=(0, 1))
    scaler.fit(raw_train_segment)
    scaled_data = scaler.transform(raw_data)
    
    X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, win_size)
    X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)
    X_train, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
    y_train = Y_full_scaled[:train_size]

    model = build_model_lstm((X_train.shape[1], 1))

  
    history = model.fit(
        X_train, y_train,
        epochs=FIXED_EPOCH,
        batch_size=FIXED_BATCH_SIZE,
        callbacks=[early_stop],
        verbose=0,
        shuffle=False,
        validation_split=0.1  
    )

    finished_epoch = len(history.history['loss'])
    best_epoch = np.argmin(history.history['val_loss']) + 1 if 'val_loss' in history.history else np.argmin(history.history['loss']) + 1
    best_loss = np.min(history.history['val_loss']) if 'val_loss' in history.history else np.min(history.history['loss'])
    last_loss = float(history.history['loss'][-1]) 
    last_val_loss = float(history.history['val_loss'][-1]) if 'val_loss' in history.history else None  

    print(f"   -> Selesai epoch : {finished_epoch}")
    print(f"   -> Best epoch    : {best_epoch} (val_loss={best_loss:.6f})")
    if last_val_loss is not None:
        print(f"   -> Last loss      : {last_loss:.6f} | Last val_loss : {last_val_loss:.6f}")
    else:
        print(f"   -> Last loss      : {last_loss:.6f}")

    
    predictions_scaled = model.predict(X_test, verbose=0) 
    predictions_real = scaler.inverse_transform(predictions_scaled)

    mape = mean_absolute_percentage_error(y_test_orig, predictions_real) * 100
    print(f"   -> MAPE Window {win_size}: {mape:.4f}%")

    results.append({
        'Window Size': win_size,
        'MAPE (%)': float(mape),
        'Best Epoch': int(best_epoch),
        'Best Loss': float(best_loss),
        'Last Loss': float(last_loss),
        'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None
    })

 
    plt.figure(figsize=(12, 6)) 
    plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
    plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')
    
    plt.title(f'Actual vs Predicted (Window {win_size})')
    plt.xlabel('Tanggal dan Tahun')
    plt.ylabel('Harga (Rp)')
    
    # Format Tanggal
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    plt.gcf().autofmt_xdate()
    
    plt.legend()
    plt.grid(True)
    
    # Simpan Gambar otomatis biar bisa dilihat nanti
    img_path = os.path.join(path_output_folder, f"Grafik_Window_{win_size}.png")
    plt.savefig(img_path, bbox_inches='tight')
    plt.close() 
    print(f"   ✓ Grafik disimpan: {img_path}")


    y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
    err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100

    out_df = pd.DataFrame({
        'Tanggal': test_dates, 
        'Actual (Real)': y_test_orig.flatten(),
        'Predicted (Real)': predictions_real.flatten(),
        'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
        'Error (%)': err_pct
    })

    out_path = os.path.join(path_output_folder, f"Prediksi_Window_{win_size}.xlsx")
    out_df.to_excel(out_path, index=False)
    print(f"   ✓ Hasil Excel disimpan: {out_path}")

print("\n" + "="*50)
results_df = pd.DataFrame(results)
print(results_df)

results_df.to_excel(os.path.join(path_output_folder, "REKAP_MAPE_ALL_WINDOWS.xlsx"), index=False)
print("="*50)

✓ Data berhasil dimuat.
✓ Jumlah baris asli (raw): 1043

MULAI SKENARIO A (WINDOW BESAR)

[Proses] Training Window Size: 30 ...
   -> Window samples: 1013 (window=30)
   -> Selesai epoch : 16
   -> Best epoch    : 16 (val_loss=0.000621)
   -> Last loss      : 0.001971 | Last val_loss : 0.000621
   -> MAPE Window 30: 4.5381%
   ✓ Grafik disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Grafik_Window_30.png
   ✓ Hasil Excel disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Prediksi_Window_30.xlsx

[Proses] Training Window Size: 60 ...
   -> Window samples: 983 (window=60)
   -> Selesai epoch : 16
   -> Best epoch    : 16 (val_loss=0.000693)
   -> Last loss      : 0.001436 | Last val_loss : 0.000693
   -> MAPE Window 60: 4.3833%
   ✓ Grafik disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Grafik_Window_60.png
   ✓ Hasil Excel disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Prediksi_Window_60.xlsx

[Proses] Training Window Size: 90 ...
   -> Window sa

NORMALISASI DATA DAN MELIHAT DATA HASIL NORMALISASI

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import os


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"

# Cek apakah file ada
if os.path.exists(path_file_input):
    df = pd.read_excel(path_file_input)
    print("✓ Data berhasil dimuat.")
else:
    raise FileNotFoundError(f"File tidak ditemukan di: {path_file_input}")

scaler = MinMaxScaler(feature_range=(0, 1))


train_size = int(len(df) * 0.8)
raw_train = df['Harga (Rp)'].values[:train_size].reshape(-1, 1)

scaler.fit(raw_train)  

all_data_raw = df['Harga (Rp)'].values.reshape(-1, 1)
scaled_data = scaler.transform(all_data_raw)

# Masukkan hasil ke DataFrame
df['Harga_Normalized'] = scaled_data


tabel_cek = pd.DataFrame({
    'Tanggal': df['Tanggal'].iloc[:20] if 'Tanggal' in df.columns else range(20),
    'Harga Asli (Rp)': df['Harga (Rp)'].iloc[:20].values,
    'Harga Normalisasi': df['Harga_Normalized'].iloc[:20].values
})

print("\n--- Sampel Hasil Normalisasi (20 Baris Pertama) ---")
print(tabel_cek)

✓ Data berhasil dimuat.

--- Sampel Hasil Normalisasi (20 Baris Pertama) ---
      Tanggal  Harga Asli (Rp)  Harga Normalisasi
0  2021-01-01            28250           0.150000
1  2021-01-04            28250           0.150000
2  2021-01-05            28250           0.150000
3  2021-01-06            28250           0.150000
4  2021-01-07            28250           0.150000
5  2021-01-08            28250           0.150000
6  2021-01-11            27250           0.131818
7  2021-01-12            27250           0.131818
8  2021-01-13            27250           0.131818
9  2021-01-14            27750           0.140909
10 2021-01-15            27250           0.131818
11 2021-01-18            27750           0.140909
12 2021-01-19            27750           0.140909
13 2021-01-20            27250           0.131818
14 2021-01-21            26250           0.113636
15 2021-01-22            26250           0.113636
16 2021-01-25            26250           0.113636
17 2021-01-26          

In [2]:
#MENCARI KONFIGURASI EPOCH TERBAIK SESUAI BAB 3
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import random as python_random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3"
os.makedirs(path_output_folder, exist_ok=True)


if os.path.exists(path_file_input):
    df = pd.read_excel(path_file_input)
    

    nama_kolom_tanggal = 'Tanggal' 
    if nama_kolom_tanggal not in df.columns:
        raise KeyError("Kolom 'Tanggal' tidak ditemukan di Excel!")
            
   
    df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
    print("✓ Data dan Tanggal berhasil dimuat.")
else:
    raise FileNotFoundError("File tidak ditemukan.")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")


FIXED_WINDOW_SIZE = 90
FIXED_BATCH_SIZE = 30
scenario_epochs = [16, 30, 50]  
VAL_RATIO = 0.1                  


def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model


X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Jumlah sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:]  


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]
test_dates = all_dates[train_size:]

raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)

scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")

X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)

X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)
X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]

# Split validasi dari tail train
val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print("\n" + "="*60)
print("MULAI FASE 2: SKENARIO B (Grid Epoch: 16, 30, 50)")
print("Kondisi: Dengan Normalisasi, Tanpa EarlyStopping (epoch tetap), Val=tail train")
print("="*60)

results = []


for epoch_count in scenario_epochs:
    print(f"\n[Training] Epoch = {epoch_count}, Window = {FIXED_WINDOW_SIZE}")
    reset_seeds()

    model = build_model_lstm((X_train.shape[1], 1))
   
    history = model.fit(
        X_train, y_train,
        epochs=epoch_count,
        batch_size=FIXED_BATCH_SIZE,
        validation_data=(X_val, y_val),
        verbose=0,
        shuffle=False
    )

    
    finished_epoch = epoch_count
    train_loss = history.history['loss']
    val_loss = history.history.get('val_loss', None)

    if val_loss is not None:
        best_epoch = int(np.argmin(val_loss) + 1)
        best_loss = float(np.min(val_loss))
        last_loss = float(train_loss[-1])
        last_val_loss = float(val_loss[-1])
        print(f"   -> Selesai epoch : {finished_epoch}")
        print(f"   -> Best epoch    : {best_epoch} (val_loss={best_loss:.6f})")
    else:
        
        best_epoch = int(np.argmin(train_loss) + 1)
        best_loss = float(np.min(train_loss))
        last_loss = float(train_loss[-1])
        last_val_loss = None
        print(f"   -> Selesai epoch : {finished_epoch}")

    
    predictions_scaled = model.predict(X_test, verbose=0)
    predictions_real = scaler.inverse_transform(predictions_scaled)

    mape = mean_absolute_percentage_error(y_test_orig, predictions_real) * 100
    print(f"   -> MAPE          : {mape:.4f}%")

    results.append({
        'Jumlah Epoch': int(epoch_count),
        'Best Epoch (val)': int(best_epoch),
        'Best Loss (val)': float(best_loss),
        'Last Loss': float(last_loss),
        'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
        'MAPE (%)': float(mape)
    })


    plt.figure(figsize=(12, 6)) 
    plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
    plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')
    
    plt.title(f'Actual vs Predicted (Epoch {epoch_count}, Window {FIXED_WINDOW_SIZE})')
    plt.xlabel('Tanggal dan Tahun')
    plt.ylabel('Harga (Rp)')
    
    # Format Tanggal
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    plt.gcf().autofmt_xdate()
    
    plt.legend()
    plt.grid(True)
    
    
    img_path = os.path.join(path_output_folder, f"Grafik_Epoch_{epoch_count}_Window_{FIXED_WINDOW_SIZE}.png")
    plt.savefig(img_path, bbox_inches='tight')
    plt.close() 
    print(f"   ✓ Grafik disimpan: {img_path}")


    y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
    err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100

    out_df = pd.DataFrame({
        'Tanggal': test_dates, 
        'Actual (Real)': y_test_orig.flatten(),
        'Predicted (Real)': predictions_real.flatten(),
        'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
        'Error (%)': err_pct
    })
    out_path = os.path.join(path_output_folder, f"Prediksi_Epoch_{epoch_count}_Window_{FIXED_WINDOW_SIZE}.xlsx")
    out_df.to_excel(out_path, index=False)
    print(f"   ✓ Excel disimpan: {out_path}")


print("\n" + "="*60)
results_df = pd.DataFrame(results)
print(results_df)
print("="*60)


best_result = results_df.loc[results_df['MAPE (%)'].idxmin()]
best_epoch_param = int(best_result['Jumlah Epoch'])
print(f"Kesimpulan: Epoch terbaik (grid) = {best_epoch_param} dengan MAPE {best_result['MAPE (%)']:.4f}%")

✓ Data dan Tanggal berhasil dimuat.
✓ Jumlah baris asli (raw): 1043
✓ Jumlah sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).

MULAI FASE 2: SKENARIO B (Grid Epoch: 16, 30, 50)
Kondisi: Dengan Normalisasi, Tanpa EarlyStopping (epoch tetap), Val=tail train

[Training] Epoch = 16, Window = 90
   -> Selesai epoch : 16
   -> Best epoch    : 14 (val_loss=0.001218)
   -> MAPE          : 4.1190%
   ✓ Grafik disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Grafik_Epoch_16_Window_90.png
   ✓ Excel disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Prediksi_Epoch_16_Window_90.xlsx

[Training] Epoch = 30, Window = 90
   -> Selesai epoch : 30
   -> Best epoch    : 30 (val_loss=0.000986)
   -> MAPE          : 3.7639%
   ✓ Grafik disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Grafik_Epoch_30_Window_90.png
   ✓ Excel disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Prediksi_Epoch_30_Window_90.xlsx

[Training] Epoch = 50, Window = 90
   -> Selesai epo

In [3]:
#MENCARI KONFIGURASI batch_sizes TERBAIK SESUAI BAB 3
import numpy as np
import pandas as pd
import tensorflow as tf
import os
import random as python_random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates 
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3"
os.makedirs(path_output_folder, exist_ok=True)


if os.path.exists(path_file_input):
    df = pd.read_excel(path_file_input)
    
    
    nama_kolom_tanggal = 'Tanggal' 
    if nama_kolom_tanggal not in df.columns:
        raise KeyError(f"Kolom '{nama_kolom_tanggal}' tidak ditemukan di Excel!")
    
    df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
    print("✓ Data dan Tanggal berhasil dimuat.")
else:
    raise FileNotFoundError("File tidak ditemukan.")

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}") 


FIXED_WINDOW_SIZE = 90
FIXED_EPOCH = 50
scenario_batch_sizes = [16, 30, 64]
VAL_RATIO = 0.1


def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    # 2 LSTM layer
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02))
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02))
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model


X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Jumlah sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})") 

train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:] 


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]
test_dates = all_dates[train_size:] 


raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)


scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)


X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]


val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"\n[Info] Dataset Window Size: {FIXED_WINDOW_SIZE}")
print("----------------------------------------")
print(f"Total Window Samples: {len(X_full_scaled)} | Latih: {len(X_train_all)} | Uji: {len(X_test)}")
print("----------------------------------------")

print("\n" + "="*60)
print("MULAI FASE 2: SKENARIO C (Mencari Batch Size Terbaik)")
print(f"Konfigurasi: Window {FIXED_WINDOW_SIZE} | Epoch {FIXED_EPOCH}")
print("="*60)

results = []

for batch_count in scenario_batch_sizes:
    print(f"\n[Proses] Training dengan Batch Size: {batch_count} ...")
    reset_seeds()

    model = build_model_lstm((X_train.shape[1], 1))

    history = model.fit(
        X_train, y_train,
        epochs=FIXED_EPOCH,
        batch_size=batch_count,
        validation_data=(X_val, y_val),
        verbose=0,
        shuffle=False
    )

    # Ringkasan loss
    train_loss = history.history['loss']
    val_loss = history.history.get('val_loss', None)
    last_loss = float(train_loss[-1])
    
    if val_loss is not None:
        last_val_loss = float(val_loss[-1])
        best_epoch = int(np.argmin(val_loss) + 1)
        best_val = float(np.min(val_loss))
        print(f"   -> Selesai epoch : {FIXED_EPOCH}")
        print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
        print(f"   -> Last loss     : {last_loss:.6f} | Last val_loss : {last_val_loss:.6f}")
    else:
        best_epoch = int(np.argmin(train_loss) + 1)
        best_tr = float(np.min(train_loss))
        last_val_loss = None
        best_val = None
        print(f"   -> Selesai epoch : {FIXED_EPOCH}")
        print(f"   -> Best epoch    : {best_epoch} (loss={best_tr:.6f})")
        print(f"   -> Last loss     : {last_loss:.6f}")

   
    predictions_scaled = model.predict(X_test, verbose=1)
    predictions_real = scaler.inverse_transform(predictions_scaled)

    # MAPE terhadap data uji asli
    mape = mean_absolute_percentage_error(y_test_orig, predictions_real) * 100
    print(f"   -> Selesai. Hasil MAPE: {mape:.4f}%")

    # Simpan ringkasan
    results.append({
        'Batch Size': batch_count,
        'Best Epoch (val)': best_epoch,
        'Best Val Loss': float(best_val) if val_loss is not None else None,
        'Last Loss': last_loss,
        'Last Val Loss': last_val_loss if val_loss is not None else None,
        'MAPE (%)': float(mape)
    })

    plt.figure(figsize=(12, 6)) 
    plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
    plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')
    
    plt.title(f'Actual vs Predicted (Batch {batch_count}, Window {FIXED_WINDOW_SIZE}, Epoch {FIXED_EPOCH})')
    plt.xlabel('Tanggal dan Tahun')
    plt.ylabel('Harga (Rp)')
    
    # Format Tanggal
    plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
    plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
    plt.gcf().autofmt_xdate()
    
    plt.legend()
    plt.grid(True)
    
    img_path = os.path.join(path_output_folder, f"Grafik_Batch_{batch_count}_Window_{FIXED_WINDOW_SIZE}.png")
    plt.savefig(img_path, bbox_inches='tight')
    plt.close()
    print(f"   ✓ Grafik disimpan: {img_path}")


    y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
    err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100

    out_df = pd.DataFrame({
        'Tanggal': test_dates, # <--- Kolom Baru
        'Actual (Real)': y_test_orig.flatten(),
        'Predicted (Real)': predictions_real.flatten(),
        'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
        'Error (%)': err_pct
    })
    out_path = os.path.join(path_output_folder, f"Prediksi_Batch_{batch_count}_Window_{FIXED_WINDOW_SIZE}_Epoch_{FIXED_EPOCH}.xlsx")
    out_df.to_excel(out_path, index=False)
    print(f"   ✓ Excel disimpan: {out_path}")


print("\n" + "="*60)
results_df = pd.DataFrame(results)
print(results_df)

best_result = results_df.loc[results_df['MAPE (%)'].idxmin()]
best_batch = int(best_result['Batch Size'])

results_df.to_excel(os.path.join(path_output_folder, "REKAP_MAPE_ALL_BATCHES.xlsx"), index=False)

print("-" * 60)
print("KESIMPULAN FINAL (KONFIGURASI MODEL TERBAIK):")
print(f"1. Normalisasi : YA (MinMaxScaler; fit di segmen TRAIN, transform 100% data)")
print(f"2. Window Size : {FIXED_WINDOW_SIZE}")
print(f"3. Epoch       : {FIXED_EPOCH}")
print(f"4. Batch Size  : {best_batch}")
print(f"MAPE Terendah  : {best_result['MAPE (%)']:.4f}%")
print("="*60)

✓ Data dan Tanggal berhasil dimuat.
✓ Jumlah baris asli (raw): 1043
✓ Jumlah sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).

[Info] Dataset Window Size: 90
----------------------------------------
Total Window Samples: 953 | Latih: 762 | Uji: 191
----------------------------------------

MULAI FASE 2: SKENARIO C (Mencari Batch Size Terbaik)
Konfigurasi: Window 90 | Epoch 50

[Proses] Training dengan Batch Size: 16 ...
   -> Selesai epoch : 50
   -> Best epoch    : 46 (val_loss=0.000737)
   -> Last loss     : 0.001218 | Last val_loss : 0.001011
   -> Selesai. Hasil MAPE: 5.5420%
   ✓ Grafik disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Grafik_Batch_16_Window_90.png
   ✓ Excel disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Prediksi_Batch_16_Window_90_Epoch_50.xlsx

[Proses] Training dengan Batch Size: 30 ...
   -> Selesai epoch : 50
   -> Best epoch    : 47 (val_loss=0.000808)
   -> Last loss     : 0.001033 | Last val_loss : 0.000959
   -> Selesai. H

In [4]:
#UJI COBA KONFIG TERBAIK DIBAB 3 
import os
import numpy as np
import pandas as pd
import tensorflow as tf
import random as python_random
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_absolute_percentage_error
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout


def reset_seeds():
    np.random.seed(42)
    python_random.seed(42)
    tf.random.set_seed(42)

reset_seeds()

FIXED_WINDOW_SIZE = 90  
FIXED_EPOCH = 50        
FIXED_BATCH_SIZE = 64  
VAL_RATIO = 0.1         


path_file_input = r"E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx"
path_output_folder = r"E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3"
os.makedirs(path_output_folder, exist_ok=True)


def create_sliding_window(dataset, window_size):
    X, Y = [], []
    for i in range(len(dataset) - window_size):
        X.append(dataset[i:(i + window_size), 0])
        Y.append(dataset[i + window_size, 0])
    return np.array(X), np.array(Y)

def build_model_lstm(input_shape):
    tf.random.set_seed(42)
    model = Sequential()
    model.add(LSTM(100, return_sequences=True, input_shape=input_shape))
    model.add(Dropout(0.02)) 
    model.add(LSTM(100, return_sequences=False))
    model.add(Dropout(0.02)) 
    model.add(Dense(25, activation='relu'))
    model.add(Dense(1))
    model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
                  loss='mean_squared_error')
    return model


if not os.path.exists(path_file_input):
    raise FileNotFoundError(f"File tidak ditemukan: {path_file_input}")

df = pd.read_excel(path_file_input)


nama_kolom_tanggal = 'Tanggal' 
if 'Harga (Rp)' not in df.columns:
    raise KeyError(f"Kolom 'Harga (Rp)' tidak ditemukan di file {path_file_input}")


df[nama_kolom_tanggal] = pd.to_datetime(df[nama_kolom_tanggal])
# =============================

raw_data = df['Harga (Rp)'].values.reshape(-1, 1)
print(f"✓ Data: {path_file_input}")
print(f"✓ Jumlah baris asli (raw): {len(raw_data)}")

region_name = os.path.basename(path_file_input).replace("Data_Clean_", "").replace(".xlsx", "")


X_full_raw, Y_full_raw = create_sliding_window(raw_data, FIXED_WINDOW_SIZE)
print(f"✓ Sampel window: {len(X_full_raw)} (window={FIXED_WINDOW_SIZE})")
train_size = int(len(X_full_raw) * 0.8)
y_test_orig = Y_full_raw[train_size:] 

raw_train_segment = raw_data[:train_size + FIXED_WINDOW_SIZE]
scaler = MinMaxScaler(feature_range=(0, 1))
scaler.fit(raw_train_segment)
scaled_data = scaler.transform(raw_data)
print("✓ Data ternormalisasi (0-1).")


X_full_scaled, Y_full_scaled = create_sliding_window(scaled_data, FIXED_WINDOW_SIZE)
X_full_scaled = X_full_scaled.reshape(X_full_scaled.shape[0], X_full_scaled.shape[1], 1)

X_train_all, X_test = X_full_scaled[:train_size], X_full_scaled[train_size:]
y_train_all = Y_full_scaled[:train_size]


val_cut = int(len(X_train_all) * (1 - VAL_RATIO))
X_train, X_val = X_train_all[:val_cut], X_train_all[val_cut:]
y_train, y_val = y_train_all[:val_cut], y_train_all[val_cut:]

print(f"[Info] Total: {len(X_full_scaled)} | Train: {len(X_train_all)} | Val: {len(X_val)} | Test: {len(X_test)}")


reset_seeds()
model = build_model_lstm((X_train.shape[1], 1))

history = model.fit(
    X_train, y_train,
    epochs=FIXED_EPOCH,
    batch_size=FIXED_BATCH_SIZE,
    validation_data=(X_val, y_val),
    verbose=0,
    shuffle=False
)

train_loss = history.history['loss']
val_loss = history.history.get('val_loss', None)
last_loss = float(train_loss[-1])
last_val_loss = float(val_loss[-1]) if val_loss is not None else None
best_epoch = int(np.argmin(val_loss) + 1) if val_loss is not None else int(np.argmin(train_loss) + 1)
best_val = float(np.min(val_loss)) if val_loss is not None else float(np.min(train_loss))

print(f"   -> Selesai epoch : {FIXED_EPOCH}")
print(f"   -> Best epoch    : {best_epoch} (val_loss={best_val:.6f})")
print(f"   -> Last loss     : {last_loss:.6f}" + (f" | Last val_loss : {last_val_loss:.6f}" if last_val_loss is not None else ""))


predictions_scaled = model.predict(X_test, verbose=0)
predictions_real = scaler.inverse_transform(predictions_scaled)
mape = mean_absolute_percentage_error(y_test_orig.ravel(), predictions_real.ravel()) * 100
print(f"   -> MAPE          : {mape:.4f}%")


region_out_dir = os.path.join(path_output_folder, f"{region_name}_w{FIXED_WINDOW_SIZE}_e{FIXED_EPOCH}_b{FIXED_BATCH_SIZE}")
os.makedirs(region_out_dir, exist_ok=True)


all_dates = df[nama_kolom_tanggal].values[FIXED_WINDOW_SIZE:]
test_dates = all_dates[train_size:]
# =======================================


plt.figure(figsize=(12, 6)) 
plt.plot(test_dates, y_test_orig, label='Actual (Real Data)')
plt.plot(test_dates, predictions_real, label='Predicted (Denormalized)')

plt.title(f'{region_name} | W{FIXED_WINDOW_SIZE} E{FIXED_EPOCH} B{FIXED_BATCH_SIZE}')
plt.xlabel('Tanggal dan Tahun') 
plt.ylabel('Harga (Rp)')


plt.gca().xaxis.set_major_formatter(mdates.DateFormatter('%b %Y'))
plt.gca().xaxis.set_major_locator(mdates.MonthLocator(interval=1))
plt.gcf().autofmt_xdate()


plt.legend()
plt.grid(True)
plot_path = os.path.join(region_out_dir, f"{region_name}_Plot.png")
plt.savefig(plot_path, dpi=150, bbox_inches='tight')
plt.close()
print(f"   ✓ Plot disimpan: {plot_path}")

# Excel per-baris
y_test_safe = np.where(y_test_orig.flatten() == 0, np.finfo(float).eps, y_test_orig.flatten())
err_pct = np.abs((y_test_orig.flatten() - predictions_real.flatten()) / y_test_safe) * 100

out_df = pd.DataFrame({
    'Tanggal': test_dates,
    'Actual (Real)': y_test_orig.flatten(),
    'Predicted (Real)': predictions_real.flatten(),
    'Selisih': (y_test_orig.flatten() - predictions_real.flatten()),
    'Error (%)': err_pct
})
# ==============================================

excel_path = os.path.join(region_out_dir, f"{region_name}_Prediksi_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
out_df.to_excel(excel_path, index=False)
print(f"   ✓ Prediksi disimpan: {excel_path}")

# Ringkasan singkat
summary = pd.DataFrame([{
    'Region': region_name,
    'Window Size': int(FIXED_WINDOW_SIZE),
    'Epoch': int(FIXED_EPOCH),
    'Batch Size': int(FIXED_BATCH_SIZE),
    'Best Epoch (val)': int(best_epoch),
    'Best Val Loss': float(best_val),
    'Last Loss': float(last_loss),
    'Last Val Loss': float(last_val_loss) if last_val_loss is not None else None,
    'MAPE (%)': float(mape)
}])
summary_path = os.path.join(region_out_dir, f"{region_name}_Summary_W{FIXED_WINDOW_SIZE}_E{FIXED_EPOCH}_B{FIXED_BATCH_SIZE}.xlsx")
summary.to_excel(summary_path, index=False)
print(f"   ✓ Ringkasan disimpan: {summary_path}")

print("\nSelesai untuk file ini.")

✓ Data: E:\Skripsi\Prediksi-bawang-merah\python\data\data clean\Data_Clean_Surabaya.xlsx
✓ Jumlah baris asli (raw): 1043
✓ Sampel window: 953 (window=90)
✓ Data ternormalisasi (0-1).
[Info] Total: 953 | Train: 762 | Val: 77 | Test: 191
   -> Selesai epoch : 50
   -> Best epoch    : 49 (val_loss=0.000944)
   -> Last loss     : 0.000899 | Last val_loss : 0.000987
   -> MAPE          : 3.3422%
   ✓ Plot disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Surabaya_w90_e50_b64\Surabaya_Plot.png
   ✓ Prediksi disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Surabaya_w90_e50_b64\Surabaya_Prediksi_W90_E50_B64.xlsx
   ✓ Ringkasan disimpan: E:\SKRIPSI 2025\dataset\UJI COBA SESUAI DI BAB 3\Surabaya_w90_e50_b64\Surabaya_Summary_W90_E50_B64.xlsx

Selesai untuk file ini.
