In [1]:
!pip install neuralforecast
!pip install torchinfo
!pip install codecarbon



In [3]:
# ==============================================================================
# Import Libraries
# ==============================================================================
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, LSTM
from neuralforecast.losses.pytorch import MAE, MSE # Impor loss lain jika perlu
from sklearn.preprocessing import MinMaxScaler
import time
import psutil # Diimpor tapi tidak digunakan di versi ini
import os
from codecarbon import EmissionsTracker
import random
import torch
import logging
import requests # Untuk membaca URL

# ==============================================================================
# Konfigurasi & Hyperparameters
# ==============================================================================
# --- Pengaturan Reproducibility ---
seed = 42
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)
    # Opsi tambahan untuk determinisme CUDA (bisa sedikit lebih lambat)
    # torch.backends.cudnn.deterministic = True
    # torch.backends.cudnn.benchmark = False

# --- Pengaturan Dataset & Preprocessing ---
# URL raw file TSF dari GitHub
DATASET_URL = "https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/5b204ef45db85a9ff4e283dd74941dbc117ad287/dataset/australian_electricity_demand_dataset.tsf"
# Indeks negara bagian yang akan digunakan (0: NSW, 1: QLD, 2: SA, 3: TAS, 4: VIC)
SELECTED_STATE_INDEX = 0
# Metode imputasi NaN ('ffill_bfill', atau bisa dikembangkan 'mean', 'median', 'interpolation', dll.)
NAN_IMPUTATION_METHOD = 'ffill_bfill'

# --- Pengaturan Model NeuralForecast ---
# Pilihan: 'NBEATS', 'NHITS', 'LSTM'
# Catatan: Parameter spesifik model mungkin perlu disesuaikan jika model diganti
MODEL_NAME = 'NHITS'
# Ukuran window input (jumlah langkah waktu historis untuk prediksi)
INPUT_WINDOW_SIZE = 48
# Horizon prediksi (jumlah langkah waktu ke depan yang ingin diprediksi)
HORIZON = 24
# Fungsi loss untuk training model (dari neuralforecast.losses.pytorch)
LOSS_FUNCTION = MAE()
# Jumlah epoch training per fold
EPOCHS = 10 # Mungkin perlu ditingkatkan untuk konvergensi yang lebih baik
# Ukuran batch untuk training
BATCH_SIZE = 32
# Ukuran validasi internal saat fit (menggunakan data terakhir dari training set fold)
VALIDATION_SIZE_FIT = HORIZON

# --- Pengaturan Cross-Validation ---
# Jumlah fold cross-validation
N_CROSSVALIDATION_FOLDS = 3
# Ukuran langkah (step) pergeseran window untuk fold berikutnya
CV_STEP_SIZE = 24 # Harus kelipatan HORIZON untuk evaluasi yang tidak tumpang tindih

# --- Pengaturan Lainnya ---
# Set environment variable untuk kompatibilitas NeuralForecast
os.environ['NIXTLA_ID_AS_COL'] = '1'
# Mengatur level log CodeCarbon agar tidak terlalu verbose
logging.getLogger("codecarbon").setLevel(logging.ERROR)
# Frekuensi data (misalnya '30min', 'H', 'D') - sesuaikan dengan dataset
DATA_FREQ = '30min'

# ==============================================================================
# Fungsi Helper
# ==============================================================================

def read_tsf_from_url(url):
    """Membaca file TSF dari URL dan mengembalikan list string data."""
    print(f"Mengunduh data dari: {url}")
    try:
        response = requests.get(url)
        response.raise_for_status() # Check jika request gagal
        lines = response.text.splitlines()
        print("Data berhasil diunduh.")

        series_data = []
        reading_data = False
        for line in lines:
            line = line.strip()
            if not line or line.startswith("#"):
                continue
            if line.startswith("@data"):
                reading_data = True
                continue
            if reading_data:
                series_data.append(line)
        print(f"Menemukan {len(series_data)} series dalam data.")
        return series_data
    except requests.exceptions.RequestException as e:
        print(f"Error mengunduh data: {e}")
        return None
    except Exception as e:
        print(f"Error memproses data: {e}")
        return None

def parse_series_data(series_data):
    """Mem-parsing data TSF mentah menjadi state names, start times, dan time series."""
    parsed_data = []
    state_names = []
    start_times = []
    print("Memulai parsing data series...")
    for i, row in enumerate(series_data):
        parts = row.split(":")
        if len(parts) >= 4:
            try:
                # Ekstraksi nama state, timestamp awal, dan data series
                state_name = parts[1]
                start_time_str = parts[2]
                # Coba parsing timestamp dengan format umum, bisa disesuaikan jika format berbeda
                start_time = pd.Timestamp(start_time_str)
                # Memisahkan nilai series, menangani kemungkinan nilai kosong
                time_series_str = parts[3].split(",")
                time_series = [float(val) for val in time_series_str if val] # Konversi ke float

                state_names.append(state_name)
                start_times.append(start_time)
                parsed_data.append(time_series)
            except ValueError as e:
                print(f"Peringatan: Skipping baris {i+1} karena error parsing: {e} - Data: {row[:100]}...")
            except Exception as e:
                print(f"Peringatan: Skipping baris {i+1} karena error tak terduga: {e} - Data: {row[:100]}...")
        else:
             print(f"Peringatan: Skipping baris {i+1} karena format tidak sesuai (kurang dari 4 bagian dipisah ':'). Data: {row[:100]}...")
    print(f"Parsing selesai. {len(parsed_data)} series berhasil diparsing.")
    return state_names, start_times, parsed_data

def select_state(states, start_times, time_series_data, index):
    """Memilih data time series untuk negara bagian berdasarkan indeks."""
    if not states:
         raise ValueError("Tidak ada data state yang berhasil diparsing.")
    if 0 <= index < len(states):
        state = states[index]
        start_time = start_times[index]
        time_series = time_series_data[index]
        print(f"Memilih state: {state} (Index: {index}) dengan {len(time_series)} titik data, mulai dari {start_time}.")
        return state, start_time, time_series
    else:
        raise ValueError(f"Indeks state tidak valid: {index}. Pilih antara 0 dan {len(states) - 1}.")

def handle_nan_values(ts, method='ffill_bfill'):
    """Menangani nilai NaN dalam time series."""
    ts_series = pd.Series(ts)
    initial_nan_count = ts_series.isna().sum()
    if initial_nan_count == 0:
        print("Tidak ada nilai NaN dalam data.")
        return ts_series.tolist()

    print(f"Menangani {initial_nan_count} nilai NaN menggunakan metode: {method}")
    if method == 'ffill_bfill':
        # Isi maju dulu, lalu isi mundur untuk menangani NaN di awal
        filled_ts = ts_series.ffill().bfill()
    # Tambahkan metode lain di sini jika perlu (misal: mean, median, interpolate)
    # elif method == 'mean':
    #     mean_val = ts_series.mean()
    #     filled_ts = ts_series.fillna(mean_val)
    else:
        print(f"Peringatan: Metode imputasi NaN '{method}' tidak dikenal. Menggunakan ffill().bfill().")
        filled_ts = ts_series.ffill().bfill()

    final_nan_count = filled_ts.isna().sum()
    if final_nan_count > 0:
        print(f"Peringatan: Masih ada {final_nan_count} nilai NaN setelah imputasi!")
    else:
        print("Semua nilai NaN berhasil ditangani.")
    return filled_ts.tolist()

def prepare_dataframe_for_neuralforecast(time_series, unique_id, start_time, freq):
    """Mempersiapkan Pandas DataFrame dalam format yang dibutuhkan NeuralForecast."""
    timestamps = pd.date_range(start=start_time, periods=len(time_series), freq=freq)
    df = pd.DataFrame({
        "ds": timestamps,
        "y": time_series
    })
    df["unique_id"] = unique_id # Nama unik untuk time series
    return df

def create_timeseries_cv_folds(data, horizon, step_size, n_crossvalidation):
    """Membagi data time series menjadi beberapa fold untuk cross-validation (sliding window)."""
    dataset_length = len(data)
    # Panjang total yang dibutuhkan untuk semua test set dan overlap step
    total_test_step_length = horizon * n_crossvalidation + step_size * (n_crossvalidation - 1)

    if total_test_step_length >= dataset_length:
         raise ValueError(f"Dataset terlalu pendek ({dataset_length}) untuk konfigurasi CV ini "
                          f"(membutuhkan setidaknya {total_test_step_length} poin untuk {n_crossvalidation} folds "
                          f"dengan horizon {horizon} dan step {step_size}). Kurangi jumlah fold atau step size.")

    # Panjang window training dihitung agar fold terakhir pas
    train_window_length = dataset_length - total_test_step_length

    print(f"Membuat {n_crossvalidation} fold CV:")
    print(f"  Panjang data total: {dataset_length}")
    print(f"  Horizon (Test set per fold): {horizon}")
    print(f"  Step size antar fold: {step_size}")
    print(f"  Panjang window training per fold: {train_window_length}")

    if train_window_length <= 0:
         raise ValueError("Panjang window training dihitung menjadi nol atau negatif. "
                          "Periksa konfigurasi horizon, step_size, dan n_crossvalidation.")

    folds = []
    for i in range(n_crossvalidation):
        start_train = i * step_size
        end_train = start_train + train_window_length
        start_test = end_train
        end_test = start_test + horizon

        # Pastikan index tidak keluar batas (meskipun perhitungan di atas harusnya mencegah ini)
        if end_test > dataset_length:
            print(f"Peringatan: Fold {i+1} melebihi panjang dataset. Ini seharusnya tidak terjadi.")
            continue # Lewati fold ini jika terjadi error perhitungan

        train_data = data[start_train:end_train]
        test_data = data[start_test:end_test]

        print(f"  Fold {i+1}: Train indices [{start_train}:{end_train}], Test indices [{start_test}:{end_test}]")
        folds.append((train_data, test_data))

    return folds

def denormalize(data_normalized, scaler):
    """Mengembalikan data yang dinormalisasi ke skala aslinya."""
    # Pastikan input adalah numpy array dengan shape (n_samples, 1)
    if isinstance(data_normalized, pd.Series):
        data_normalized = data_normalized.to_numpy()
    data_reshaped = data_normalized.reshape(-1, 1)
    data_denormalized = scaler.inverse_transform(data_reshaped)
    return data_denormalized.flatten() # Kembalikan sebagai array 1D

# --- Metrik Evaluasi Tambahan ---
def mean_absolute_percentage_error(y_true, y_pred):
    """Menghitung MAPE."""
    # Hindari pembagian dengan nol
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    mask = y_true != 0
    if np.sum(mask) == 0:
        return np.inf # Atau 0, tergantung definisi yang diinginkan jika semua true = 0
    return np.mean(np.abs((y_true[mask] - y_pred[mask]) / y_true[mask])) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    """Menghitung sMAPE."""
    y_true, y_pred = np.array(y_true), np.array(y_pred)
    numerator = np.abs(y_true - y_pred)
    denominator = (np.abs(y_true) + np.abs(y_pred)) / 2
    # Hindari pembagian dengan nol
    mask = denominator != 0
    if np.sum(mask) == 0:
         return np.inf # Atau 0
    # Perbaiki perhitungan sMAPE (kali 100 di luar mean)
    valid_smape = numerator[mask] / denominator[mask]
    return np.mean(valid_smape) * 100


# ==============================================================================
# Proses Utama
# ==============================================================================

# --- 1. Membaca dan Memparsing Data ---
raw_data = read_tsf_from_url(DATASET_URL)
if raw_data is None:
    exit("Gagal membaca data. Keluar.")

states, start_times, time_series_data = parse_series_data(raw_data)
if not states:
     exit("Tidak ada data series yang berhasil diparsing. Keluar.")

# --- 2. Memilih dan Membersihkan Data State ---
try:
    selected_state_name, dataset_start_time, ts_raw = select_state(
        states, start_times, time_series_data, SELECTED_STATE_INDEX
    )
except ValueError as e:
    print(e)
    exit()

# Handle NaN values
ts_cleaned = handle_nan_values(ts_raw, method=NAN_IMPUTATION_METHOD)

# --- 3. Normalisasi Data ---
print("Normalisasi data (MinMaxScaler ke rentang [0, 1])...")
scaler = MinMaxScaler(feature_range=(0, 1))
# Reshape diperlukan karena scaler mengharapkan input 2D
ts_normalized = scaler.fit_transform(np.array(ts_cleaned).reshape(-1, 1)).flatten()

# --- 4. Membuat Fold Cross-Validation ---
try:
    folds = create_timeseries_cv_folds(
        ts_normalized, HORIZON, CV_STEP_SIZE, N_CROSSVALIDATION_FOLDS
    )
except ValueError as e:
    print(f"Error saat membuat fold CV: {e}")
    exit()

# --- 5. Inisialisasi Model NeuralForecast ---
# *Saran*: Untuk performa terbaik, lakukan hyperparameter tuning untuk model
# (misalnya: learning rate, layer size, jumlah block, dll.) menggunakan library
# seperti Optuna atau Ray Tune. Parameter di bawah ini adalah contoh.
print(f"\nInisialisasi model: {MODEL_NAME}")
if MODEL_NAME == 'NHITS':
    # Contoh parameter NHITS, bisa disesuaikan
    model = NHITS(h=HORIZON,
                  input_size=INPUT_WINDOW_SIZE,
                  loss=LOSS_FUNCTION,
                  max_steps=EPOCHS, # max_steps di NeuralForecast mirip epoch
                  batch_size=BATCH_SIZE,
                  valid_loss=LOSS_FUNCTION, # Gunakan loss yang sama untuk validasi
                  # Tambahkan parameter NHITS lain jika perlu:
                  # n_blocks=[1, 1, 1], mlp_units=[[512, 512], [512, 512], [512, 512]],
                  # n_pool_kernel_size=[2, 2, 1], n_freq_downsample=[4, 2, 1],
                  # dropout_prob_theta=0.1, activation='ReLU',
                  )
elif MODEL_NAME == 'NBEATS':
     model = NBEATS(h=HORIZON, input_size=INPUT_WINDOW_SIZE, loss=LOSS_FUNCTION,
                    max_steps=EPOCHS, batch_size=BATCH_SIZE, valid_loss=LOSS_FUNCTION,
                    # stack_types=['T','S','G'], n_blocks=[3,3,3], mlp_units=[[512, 512]]*3,
                    )
elif MODEL_NAME == 'LSTM':
     model = LSTM(h=HORIZON, input_size=INPUT_WINDOW_SIZE, loss=LOSS_FUNCTION,
                  max_steps=EPOCHS, batch_size=BATCH_SIZE, valid_loss=LOSS_FUNCTION,
                  # encoder_hidden_size=200, encoder_n_layers=2,
                 )
else:
    raise ValueError(f"Model '{MODEL_NAME}' tidak dikenal. Pilih dari 'NBEATS', 'NHITS', 'LSTM'.")

# Membuat objek NeuralForecast
nf = NeuralForecast(models=[model], freq=DATA_FREQ)

# --- 6. Cross-Validation Training & Evaluation ---
print("\nMemulai Cross-Validation...")
overall_start_time = time.time()

all_metrics = [] # Simpan tuple (mae, mse, rmse, mape, smape) per fold
all_emissions = [] # Simpan emisi CO2 per fold
all_last_train_losses = [] # Simpan loss training terakhir per fold
all_min_val_losses = [] # Simpan minimum validation loss per fold

for i, (train_fold_norm, test_fold_norm) in enumerate(folds):
    fold_num = i + 1
    print(f"\n--- Processing Fold {fold_num}/{N_CROSSVALIDATION_FOLDS} ---")
    fold_start_time = time.time()

    # Tracker untuk emisi per fold
    fold_tracker = EmissionsTracker(measure_power_secs=1, log_level='error') # Hanya log error
    fold_tracker.start()

    # --- Persiapan Data Fold ---
    # Dapatkan timestamp yang benar untuk fold ini
    train_start_time_fold = dataset_start_time + pd.Timedelta(minutes=i * CV_STEP_SIZE * 30) # 30 min freq
    train_timestamps = pd.date_range(start=train_start_time_fold, periods=len(train_fold_norm), freq=DATA_FREQ)
    test_timestamps = pd.date_range(start=train_timestamps[-1] + pd.Timedelta(minutes=30), periods=len(test_fold_norm), freq=DATA_FREQ) # 30 min freq

    # Buat DataFrame untuk NeuralForecast (menggunakan data normalisasi)
    train_df = pd.DataFrame({"ds": train_timestamps, "y": train_fold_norm, "unique_id": selected_state_name})
    # test_df dibuat hanya untuk mendapatkan y_true nanti (tidak dilewatkan ke fit/predict)
    test_df_true = pd.DataFrame({"ds": test_timestamps, "y": test_fold_norm, "unique_id": selected_state_name})

    # --- Training Model ---
    print(f"Training model untuk fold {fold_num}...")
    # `nf.fit` akan melatih ulang model dari awal pada data fold ini
    # `val_size` menggunakan data terakhir dari `train_df` untuk validasi internal epoch
    nf.fit(df=train_df, val_size=VALIDATION_SIZE_FIT)

    # --- Prediksi ---
    print(f"Melakukan prediksi untuk horizon {HORIZON} setelah data training fold {fold_num}...")
    # `nf.predict()` akan menggunakan data terakhir yang dilihatnya (train_df) untuk membuat prediksi
    forecast_df = nf.predict().reset_index()

    # --- Evaluasi Fold ---
    print(f"Mengevaluasi fold {fold_num}...")

    # 1. Dapatkan nilai aktual (y_true) dari data test fold (masih normal)
    y_true_normalized = test_df_true['y'].to_numpy()

    # 2. Dapatkan nilai prediksi (y_pred) dari hasil forecast (juga normal)
    # Pastikan nama kolom sesuai dengan nama model (default: NBEATS, NHITS, LSTM)
    if MODEL_NAME not in forecast_df.columns:
         raise KeyError(f"Kolom prediksi '{MODEL_NAME}' tidak ditemukan di output forecast. Kolom yang tersedia: {forecast_df.columns}")
    y_pred_normalized = forecast_df[MODEL_NAME].to_numpy()

    # 3. Denormalisasi kedua nilai untuk evaluasi pada skala asli
    y_true_denorm = denormalize(y_true_normalized, scaler)
    y_pred_denorm = denormalize(y_pred_normalized, scaler)

    # 4. Hitung Metrik (menggunakan nilai denormalisasi)
    mae = mean_absolute_error(y_true_denorm, y_pred_denorm)
    mse = mean_squared_error(y_true_denorm, y_pred_denorm)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true_denorm, y_pred_denorm)
    smape = symmetric_mean_absolute_percentage_error(y_true_denorm, y_pred_denorm)

    # --- Pelacakan Loss ---
    # Ambil riwayat loss dari model yang baru saja di-fit
    current_model_history = nf.models[0]
    train_losses_fold_raw = current_model_history.train_trajectories
    val_losses_fold_raw = current_model_history.valid_trajectories

    # Ekstraksi nilai loss dengan aman (menangani kemungkinan tuple)
    last_train_loss_fold = float('nan')
    if train_losses_fold_raw:
        last_train_entry = train_losses_fold_raw[-1]
        # Jika tuple (misal: (step, loss)), ambil elemen terakhir
        if isinstance(last_train_entry, tuple) and len(last_train_entry) > 0:
            potential_loss = last_train_entry[-1]
            if isinstance(potential_loss, (int, float)): # Pastikan itu angka
                 last_train_loss_fold = float(potential_loss)
        elif isinstance(last_train_entry, (int, float)): # Jika sudah angka
            last_train_loss_fold = float(last_train_entry)

    min_val_loss_fold = float('nan')
    if val_losses_fold_raw:
        val_loss_values = []
        for entry in val_losses_fold_raw:
            if isinstance(entry, tuple) and len(entry) > 0:
                potential_loss = entry[-1]
                if isinstance(potential_loss, (int, float)):
                    val_loss_values.append(float(potential_loss))
            elif isinstance(entry, (int, float)):
                 val_loss_values.append(float(entry))
        if val_loss_values: # Pastikan ada nilai loss valid
             min_val_loss_fold = min(val_loss_values)

    all_last_train_losses.append(last_train_loss_fold)
    all_min_val_losses.append(min_val_loss_fold)

    # --- Stop Tracker Emisi ---
    # Hentikan tracker setelah semua komputasi utama selesai
    try:
        fold_emissions = fold_tracker.stop()
        if fold_emissions is None:
             print("Peringatan: Data emisi CO2 tidak tersedia untuk fold ini.")
             fold_emissions = 0.0 # Atau np.nan
        all_emissions.append(fold_emissions)
    except Exception as e:
        print(f"Error saat menghentikan CodeCarbon tracker: {e}")
        all_emissions.append(0.0) # Atau np.nan

    fold_end_time = time.time()
    fold_duration = fold_end_time - fold_start_time

    # --- Cetak Hasil Fold ---
    print(f"  Hasil Fold {fold_num}:")
    print(f"    Durasi Proses : {fold_duration:.2f} detik")
    print(f"    Metrik (Skala Asli):")
    print(f"      MAE         : {mae:.4f}")
    print(f"      MSE         : {mse:.4f}")
    print(f"      RMSE        : {rmse:.4f}")
    print(f"      MAPE        : {mape:.2f}%")
    print(f"      sMAPE       : {smape:.2f}%")
    print(f"    Loss:")
    # Gunakan format aman jika loss bisa jadi NaN
    print(f"      Training (Epoch Terakhir): {last_train_loss_fold:.6f}" if not np.isnan(last_train_loss_fold) else "      Training (Epoch Terakhir): N/A")
    print(f"      Validation (Minimum)   : {min_val_loss_fold:.6f}" if not np.isnan(min_val_loss_fold) else "      Validation (Minimum)   : N/A")
    print(f"    Estimasi Emisi CO2 : {fold_emissions:.6f} kg")

    all_metrics.append((mae, mse, rmse, mape, smape))

# --- 7. Ringkasan Keseluruhan ---
overall_end_time = time.time()
overall_duration = overall_end_time - overall_start_time

print(f"\n=============================================================")
print(f" Ringkasan Cross-Validation Selesai ({MODEL_NAME})")
print(f"=============================================================")
print(f" State Diteliti      : {selected_state_name}")
print(f" Total Waktu Proses : {overall_duration:.2f} detik")
print(f" Jumlah Fold        : {N_CROSSVALIDATION_FOLDS}")
print(f" Horizon Prediksi   : {HORIZON}")
print(f" Input Window Size  : {INPUT_WINDOW_SIZE}")

print("\n--- Metrik Rata-rata (di semua fold): ---")
if all_metrics:
    avg_metrics = np.mean(all_metrics, axis=0)
    print(f"  Rata-rata MAE   : {avg_metrics[0]:.4f}")
    print(f"  Rata-rata MSE   : {avg_metrics[1]:.4f}")
    print(f"  Rata-rata RMSE  : {avg_metrics[2]:.4f}")
    print(f"  Rata-rata MAPE  : {avg_metrics[3]:.2f}%")
    print(f"  Rata-rata sMAPE : {avg_metrics[4]:.2f}%")
else:
    print("  Tidak ada data metrik untuk dirata-ratakan.")

print("\n--- Statistik Loss (di semua fold): ---")
# Filter out NaN values before calculating stats
valid_train_losses = [loss for loss in all_last_train_losses if not np.isnan(loss)]
valid_val_losses = [loss for loss in all_min_val_losses if not np.isnan(loss)]

if valid_train_losses:
     print(f"  Training Loss (Epoch Terakhir):")
     print(f"    Rata-rata : {np.mean(valid_train_losses):.6f}")
     print(f"    Min       : {np.min(valid_train_losses):.6f}")
     print(f"    Max       : {np.max(valid_train_losses):.6f}")
else:
     print("  Tidak ada data loss training yang valid.")

if valid_val_losses:
    print(f"  Validation Loss (Minimum):")
    print(f"    Rata-rata : {np.mean(valid_val_losses):.6f}")
    print(f"    Min       : {np.min(valid_val_losses):.6f}")
    print(f"    Max       : {np.max(valid_val_losses):.6f}")
else:
    print("  Tidak ada data loss validasi yang valid.")


print("\n--- Estimasi Emisi CO2: ---")
if all_emissions:
    total_emissions = sum(all_emissions)
    # Filter out potential NaNs if any were added during error handling
    valid_emissions = [e for e in all_emissions if isinstance(e, (int, float)) and not np.isnan(e)]
    avg_emissions = np.mean(valid_emissions) if valid_emissions else 0.0
    print(f"  Total Estimasi  : {total_emissions:.6f} kg")
    print(f"  Rata-rata per Fold: {avg_emissions:.6f} kg")
else:
    print("  Tidak ada data emisi CO2.")

print("\n--- Metrik Detail per Fold: ---")
for i, metrics in enumerate(all_metrics):
    fold_emissions_val = all_emissions[i] if i < len(all_emissions) else 'N/A'
    # Format emissions safely
    fold_emissions_str = f"{fold_emissions_val:.6f} kg" if isinstance(fold_emissions_val, (int, float)) else str(fold_emissions_val)
    print(f"  Fold {i+1}: MAE={metrics[0]:.4f}, MSE={metrics[1]:.4f}, RMSE={metrics[2]:.4f}, "
          f"MAPE={metrics[3]:.2f}%, sMAPE={metrics[4]:.2f}%, CO2={fold_emissions_str}")

print("\nEksperimen Selesai.")

# --- Opsional: Plotting (Contoh: Fold Terakhir) ---
# Pastikan y_true_denorm dan y_pred_denorm tersedia dari fold terakhir
# try:
#     plt.figure(figsize=(12, 6))
#     # Ambil timestamp dari test_df_true yang dibuat di fold terakhir
#     last_fold_test_ds = test_df_true['ds']
#     # Ambil timestamp dari forecast_df yang dibuat di fold terakhir
#     last_fold_forecast_ds = forecast_df['ds']
#
#     plt.plot(last_fold_test_ds, y_true_denorm, label='Aktual (Fold Terakhir)', marker='.')
#     plt.plot(last_fold_forecast_ds, y_pred_denorm, label=f'Prediksi {MODEL_NAME} (Fold Terakhir)', marker='.', linestyle='--')
#     plt.title(f'Prediksi vs Aktual - Fold Terakhir ({selected_state_name})')
#     plt.xlabel('Timestamp')
#     plt.ylabel('Demand (Skala Asli)')
#     plt.legend()
#     plt.grid(True)
#     plt.xticks(rotation=45)
#     plt.tight_layout()
#     plt.show()
# except NameError:
#      print("\nVariabel plot (dari fold terakhir) tidak tersedia. Lewati plot.")
# except Exception as e:
#      print(f"\nError saat membuat plot: {e}")

Mengunduh data dari: https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/5b204ef45db85a9ff4e283dd74941dbc117ad287/dataset/australian_electricity_demand_dataset.tsf
Data berhasil diunduh.
Menemukan 5 series dalam data.
Memulai parsing data series...


INFO:lightning_fabric.utilities.seed:Seed set to 1


Parsing selesai. 5 series berhasil diparsing.
Memilih state: NSW (Index: 0) dengan 230736 titik data, mulai dari 2002-01-01 00:00:00+00:00.
Tidak ada nilai NaN dalam data.
Normalisasi data (MinMaxScaler ke rentang [0, 1])...
Membuat 3 fold CV:
  Panjang data total: 230736
  Horizon (Test set per fold): 24
  Step size antar fold: 24
  Panjang window training per fold: 230616
  Fold 1: Train indices [0:230616], Test indices [230616:230640]
  Fold 2: Train indices [24:230640], Test indices [230640:230664]
  Fold 3: Train indices [48:230664], Test indices [230664:230688]

Inisialisasi model: NHITS

Memulai Cross-Validation...

--- Processing Fold 1/3 ---
Training model untuk fold 1...


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Melakukan prediksi untuk horizon 24 setelah data training fold 1...


Predicting: |          | 0/? [00:00<?, ?it/s]

Mengevaluasi fold 1...
  Hasil Fold 1:
    Durasi Proses : 21.04 detik
    Metrik (Skala Asli):
      MAE         : 427.8868
      MSE         : 220293.5179
      RMSE        : 469.3544
      MAPE        : 6.29%
      sMAPE       : 6.55%
    Loss:
      Training (Epoch Terakhir): 0.062791
      Validation (Minimum)   : 0.063551
    Estimasi Emisi CO2 : 0.000063 kg

--- Processing Fold 2/3 ---
Training model untuk fold 2...


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Melakukan prediksi untuk horizon 24 setelah data training fold 2...


Predicting: |          | 0/? [00:00<?, ?it/s]

Mengevaluasi fold 2...
  Hasil Fold 2:
    Durasi Proses : 19.68 detik
    Metrik (Skala Asli):
      MAE         : 422.9582
      MSE         : 346940.6486
      RMSE        : 589.0167
      MAPE        : 8.49%
      sMAPE       : 7.77%
    Loss:
      Training (Epoch Terakhir): 0.061419
      Validation (Minimum)   : 0.045678
    Estimasi Emisi CO2 : 0.000058 kg

--- Processing Fold 3/3 ---
Training model untuk fold 3...


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Melakukan prediksi untuk horizon 24 setelah data training fold 3...


Predicting: |          | 0/? [00:00<?, ?it/s]

Mengevaluasi fold 3...
  Hasil Fold 3:
    Durasi Proses : 18.83 detik
    Metrik (Skala Asli):
      MAE         : 368.4283
      MSE         : 181560.2654
      RMSE        : 426.0989
      MAPE        : 5.39%
      sMAPE       : 5.20%
    Loss:
      Training (Epoch Terakhir): 0.068883
      Validation (Minimum)   : 0.045152
    Estimasi Emisi CO2 : 0.000056 kg

 Ringkasan Cross-Validation Selesai (NHITS)
 State Diteliti      : NSW
 Total Waktu Proses : 59.55 detik
 Jumlah Fold        : 3
 Horizon Prediksi   : 24
 Input Window Size  : 48

--- Metrik Rata-rata (di semua fold): ---
  Rata-rata MAE   : 406.4244
  Rata-rata MSE   : 249598.1440
  Rata-rata RMSE  : 494.8233
  Rata-rata MAPE  : 6.72%
  Rata-rata sMAPE : 6.50%

--- Statistik Loss (di semua fold): ---
  Training Loss (Epoch Terakhir):
    Rata-rata : 0.064364
    Min       : 0.061419
    Max       : 0.068883
  Validation Loss (Minimum):
    Rata-rata : 0.051461
    Min       : 0.045152
    Max       : 0.063551

--- Estimasi 