In [3]:
# ==============================================================================
# KODE GABUNGAN UNTUK EKSPLORASI DATASET TESIS (VERSI FINAL + SOLAR POWER)
# ==============================================================================
# Perubahan: Dataset 7 diganti dengan Solar Power Generation.
#            Parser CSV dikembalikan ke versi generik (tanpa filter store/item).
# Perbaikan sebelumnya dipertahankan (encoding, subsetting data panjang).
#
# Harap baca komentar "Penting" di bawah sebelum menjalankan.
# Sesuaikan `selected_series_...` untuk M3, M4 Daily, M4 Hourly, Tourism jika perlu.
# !! PENTING !! Uncomment bagian Google Drive Mount jika ingin menganalisis
#                M4 Daily (Dataset 4) atau Solar Power (Dataset 7).
# !! PENTING !! Verifikasi NAMA FILE dan NAMA KOLOM ('DATE_TIME', 'DC_POWER')
#                untuk Dataset 7 di DATASET_CONFIG[7] sesuai file Anda!

# --- Instalasi Library ---
# !pip install neuralforecast statsmodels requests pandas numpy matplotlib seaborn -q
# !pip install tsfresh # Opsional, jika ingin fitur ekstraksi time series
# !pip install codecarbon # Opsional, jika ingin melacak emisi nanti (tidak digunakan di sini)
# print("Instalasi library eksternal selesai.")

# --- Import Libraries Utama ---
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.seasonal import seasonal_decompose
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf # Untuk analisis autokorelasi
from scipy.stats import describe # Statistik deskriptif lebih detail
import requests
from io import StringIO
import os
import traceback
import random
import torch # Meskipun tidak training, perlu untuk seed jika ada
import warnings

warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning)

print("Library Python berhasil diimpor.")

# --- Konfigurasi & Seed ---
seed = 42
np.random.seed(seed)
random.seed(seed)
# torch.manual_seed(seed) # Kurang relevan untuk eksplorasi saja

DATASET_CONFIG = {
    1: {
        'name': 'Australian Electricity Demand', 'type': 'tsf',
        'source': "https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/5b204ef45db85a9ff4e283dd74941dbc117ad287/dataset/australian_electricity_demand_dataset.tsf",
        'freq': '30min', 'parser_variant': 'australia', 'value_column': None, 'time_column': None,
        'justification_points': ["Frekuensi tinggi (30min)", "Domain energi", "Musiman ganda kuat (harian, mingguan)", "Potensi outlier minor"],
        'series_to_select_name_hint': 'T1_NSW',
    },
    2: {
        'name': 'Bike Sharing Daily', 'type': 'csv',
        'source': 'https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/e2e72b49171794fbc1f285f3eaceff32a8faa5e8/dataset/bike_sharing_dataset.csv',
        'freq': 'D', 'parser_variant': None, 'value_column': 'cnt', 'time_column': 'dteday',
        'justification_points': ["Frekuensi sedang (harian)", "Domain transportasi", "Volatilitas tinggi (pengaruh eksternal)", "Potensi outlier/perubahan level", "Benchmark permintaan harian"],
        'series_to_select_name_hint': 'Gunakan indeks 0',
    },
    3: {
        'name': 'M3 Monthly', 'type': 'tsf',
        'source': 'https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/e2e72b49171794fbc1f285f3eaceff32a8faa5e8/dataset/m3_monthly_dataset.tsf',
        'freq': 'MS', 'parser_variant': 'standard', 'value_column': None, 'time_column': None,
        'justification_points': ["Frekuensi rendah (bulanan)", "Domain Industri/Demografi (pilih seri representatif)", "Benchmark klasik bulanan", "Variasi panjang, trend, musiman tahunan"],
        'series_to_select_name_hint': 'N1005 (Contoh - verifikasi ID/Indeks!)',
    },
    4: {
        'name': 'M4 Daily', 'type': 'tsf',
        'source': '/content/drive/My Drive/S2/Thesis/loss-function-comparison/dataset/m4_daily_dataset.tsf', # Perlu Google Drive Mount!
        'freq': 'D', 'parser_variant': 'standard', 'value_column': None, 'time_column': None,
        'justification_points': ["Frekuensi sedang (harian)", "Domain Finansial/Ekonomi (pilih seri representatif)", "Benchmark M4 Harian", "Potensi noise/volatilitas tinggi", "Pola kurang jelas/kompleks"],
        'series_to_select_name_hint': 'D412 (Contoh - verifikasi ID/Indeks!)',
    },
     5: {
        'name': 'M4 Hourly', 'type': 'tsf',
        'source': 'https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/e2e72b49171794fbc1f285f3eaceff32a8faa5e8/dataset/m4_hourly_dataset.tsf',
        'freq': 'H', 'parser_variant': 'standard', 'value_column': None, 'time_column': None,
        'justification_points': ["Frekuensi tinggi (jam)", "Domain campuran (benchmark M4)", "Kontras domain dengan Energi (NSW)", "Potensi variabilitas/noise tinggi"],
        'series_to_select_name_hint': 'H1 (Contoh - verifikasi ID/Indeks!)',
    },
    6: {
        'name': 'Tourism Monthly', 'type': 'tsf',
        'source': 'https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/e2e72b49171794fbc1f285f3eaceff32a8faa5e8/dataset/tourism_monthly_dataset.tsf',
        'freq': 'MS', 'parser_variant': 'standard', 'value_column': None, 'time_column': None,
        'justification_points': ["Frekuensi rendah (bulanan)", "Domain Pariwisata", "Benchmark Tourism Competition", "Musiman tahunan biasanya kuat", "Rentan guncangan/outlier"],
        'series_to_select_name_hint': 'T1 (Contoh - verifikasi ID/Indeks!)',
    },
    # --- DATASET 7 DIGANTI ---
    7: {
        'name': 'Solar Power Generation', # Nama baru
        'type': 'csv',
        'source': '/content/drive/My Drive/S2/Thesis/loss-function-comparison/dataset/solar_power_generation_dataset_1.csv', # Path baru - Perlu Google Drive Mount!
        'freq': '15min', # Frekuensi data solar (per 15 menit)
        'parser_variant': None,
        'value_column': 'DC_POWER', # Kolom nilai baru - !! VERIFIKASI NAMA INI !!
        'time_column': 'DATE_TIME',  # Kolom waktu baru - !! VERIFIKASI NAMA INI !!
        'justification_points': [
            "Frekuensi tinggi (15min)",
            "Domain Energi (Terbarukan)",
            "Intermittency tinggi (nilai nol saat malam)", # Alasan utama penggantian
            "Pola harian kuat (siklus diurnal)",
            "Pengaruh cuaca (noise/volatilitas)",
            "Potensi anomali sensor/outlier"
        ],
        'series_to_select_name_hint': 'Gunakan indeks 0 (CSV dibaca sbg 1 series)',
    }
    # --------------------------
}
NAN_IMPUTATION_METHOD = 'ffill_bfill'
os.environ['NIXTLA_ID_AS_COL'] = '1'

print("Konfigurasi dataset (Dataset 7 diganti dgn Solar Power) dimuat.")

# --- Fungsi Helper (Loading, Parsing, Cleaning, Preparing) ---

def load_data_from_source(source_path_or_url):
    """Membaca konten data dari URL atau file lokal dengan fallback encoding."""
    source_str = str(source_path_or_url)
    if source_str.startswith('http'):
        print(f"Mengunduh data dari: {source_str}")
        try:
            response = requests.get(source_str, timeout=60)
            response.raise_for_status()
            print("Data berhasil diunduh.")
            try:
                encoding = response.encoding if response.encoding else 'utf-8'
                print(f"  Mencoba decode dengan encoding terdeteksi/default: {encoding}")
                return response.content.decode(encoding)
            except UnicodeDecodeError as e_utf:
                print(f"  Gagal decode dengan {encoding}: {e_utf}. Mencoba iso-8859-1...")
                try:
                    decoded_content = response.content.decode('iso-8859-1')
                    print("  Berhasil decode dengan iso-8859-1.")
                    return decoded_content
                except UnicodeDecodeError as e_latin1:
                    print(f"  Gagal decode dengan iso-8859-1 juga: {e_latin1}")
                    print("Error: Gagal decode konten unduhan dgn encoding yg dicoba."); return None
        except requests.exceptions.RequestException as e: print(f"Error mengunduh data: {e}"); return None
        except Exception as e: print(f"Error lain saat mengunduh/proses awal: {e}"); return None
    else:
        print(f"Membaca data dari file lokal: {source_str}")
        try:
            if not os.path.exists(source_str):
                print(f"Error: File tdk ditemukan di {source_str}")
                if 'drive/My Drive' in source_str: print("Tips: Pastikan Google Drive ter-mount."); return None
            encodings_to_try = ['utf-8', 'iso-8859-1', 'latin1']
            content = None; last_exception = None
            for enc in encodings_to_try:
                try:
                    with open(source_str, 'r', encoding=enc) as f: content = f.read()
                    print(f"Data berhasil dibaca dgn encoding '{enc}'."); break
                except UnicodeDecodeError: print(f"Gagal baca dgn enc '{enc}'."); last_exception = f"Gagal decode dgn {enc}"
                except Exception as e: print(f"Error baca file {source_str} dgn enc '{enc}': {e}"); last_exception = e; break # Stop jika error lain
            if content is None: print(f"Error: Gagal baca file dgn enc yg dicoba. Err terakhir: {last_exception}"); return None
            return content
        except Exception as e: print(f"Error tak terduga baca file lokal {source_str}: {e}"); return None

def parse_tsf_data(raw_content, parser_variant):
    """Mem-parsing data TSF mentah (dari string)."""
    parsed_series, series_ids, start_times = [], [], []
    print(f"Memulai parsing TSF (varian: {parser_variant})...")
    lines = raw_content.splitlines(); reading_data = False; skipped_lines = 0; parsed_count = 0
    for i, line in enumerate(lines):
        line = line.strip()
        if not line or line.startswith(("#", "@relation", "@attribute", "@frequency", "@horizon", "@missing", "@equallength")): continue
        if line.startswith("@data"): reading_data = True; continue
        if reading_data:
            parts = line.split(":")
            try:
                if parser_variant == 'australia' and len(parts) >= 4: series_name, state_name, start_time_str, values_str = parts[0], parts[1], parts[2], parts[3]; unique_id = f"{series_name}_{state_name}"
                elif parser_variant == 'standard' and len(parts) >= 3: series_name, start_time_str, values_str = parts[0], parts[1], parts[2]; unique_id = series_name
                else: skipped_lines += 1; continue
                try: start_time = pd.Timestamp(start_time_str.replace(' ', 'T'))
                except ValueError:
                     try: start_time = pd.to_datetime(start_time_str, format='%Y-%m-%d %H-%M-%S')
                     except ValueError: skipped_lines += 1; continue
                time_series = []
                for val_str in values_str.split(","):
                    val_str = val_str.strip()
                    if val_str and val_str != '?':
                        try: time_series.append(float(val_str))
                        except ValueError: time_series.append(np.nan)
                    elif val_str == '?': time_series.append(np.nan)
                if time_series: parsed_series.append(time_series); series_ids.append(unique_id); start_times.append(start_time); parsed_count += 1
                else: skipped_lines += 1
            except Exception as e: skipped_lines += 1; pass
    if skipped_lines > 0: print(f"Peringatan: Melewati {skipped_lines} baris/series saat parsing TSF.")
    print(f"Parsing TSF selesai. {parsed_count} series berhasil diparsing.")
    return series_ids, start_times, parsed_series

# !! FUNGSI parse_csv_data DIKEMBALIKAN KE VERSI GENERIK (TANPA FILTER STORE/ITEM) !!
def parse_csv_data(raw_content, time_col, value_col, dataset_name):
    """
    Mem-parsing data CSV mentah (dari string).
    Mencoba mendeteksi multi-series tapi memproses semua baris sebagai satu.
    """
    print(f"Memulai parsing CSV (time: '{time_col}', value: '{value_col}')...")
    try:
        df = pd.read_csv(StringIO(raw_content))
        if time_col not in df.columns: raise ValueError(f"Kolom waktu '{time_col}' TIDAK DITEMUKAN. Kolom: {df.columns.tolist()}. Periksa config!")
        if value_col not in df.columns: raise ValueError(f"Kolom nilai '{value_col}' TIDAK DITEMUKAN. Kolom: {df.columns.tolist()}. Periksa config!")

        # --- Pengecekan Multi-Series Generik (Tanpa Filter) ---
        potential_id_cols = [col for col in ['store', 'item', 'ID', 'Id', 'PLANT_ID', 'SOURCE_KEY'] if col in df.columns] # Tambah ID potensial
        if potential_id_cols:
            try:
                num_series_in_file = df[potential_id_cols].drop_duplicates().shape[0]
                if num_series_in_file > 1:
                     print(f"PERINGATAN: File CSV '{dataset_name}' tampaknya berisi {num_series_in_file} kombinasi unik dari kolom {potential_id_cols}.")
                     print("            Kode ini akan memproses SEMUA baris sebagai SATU series tunggal.")
                     print("            Jika ingin analisis per series unik, filter data SEBELUM parsing atau modifikasi parser ini.")
            except Exception as e:
                print(f"Info: Gagal menghitung kombinasi unik ID {potential_id_cols}: {e}")
        # ------------------------------------------------------

        df[time_col] = pd.to_datetime(df[time_col])
        df[value_col] = pd.to_numeric(df[value_col], errors='coerce')
        # Urutkan semua baris berdasarkan waktu (penting jika ada beberapa series yang dibaca jadi satu)
        df = df.sort_values(by=time_col).reset_index(drop=True)

        start_time = df[time_col].iloc[0]
        time_series = df[value_col].tolist()
        # ID generik untuk series gabungan (jika ada)
        unique_id = f"{dataset_name.replace(' ', '_')}_Series"

        nan_count = df[value_col].isna().sum()
        if nan_count > 0: print(f"Peringatan: {nan_count} NaN di kolom '{value_col}' stlh parsing CSV.")

        print(f"Parsing CSV selesai. 1 series ('{unique_id}') ditemukan dgn {len(time_series)} data.")
        return [unique_id], [start_time], [time_series]

    except Exception as e:
        print(f"Error parsing CSV: {e}\n{traceback.format_exc()}")
        return [], [], []

def load_and_parse_data(dataset_index, config):
    """Fungsi utama memuat dan mem-parsing dataset."""
    cfg = config.get(dataset_index);
    if not cfg: raise ValueError(f"Config u/ Indeks Dataset '{dataset_index}' tdk ditemukan.")
    dataset_name = cfg['name']
    print(f"\n-- Memuat Dataset {dataset_index}: {dataset_name} --")
    raw_content = load_data_from_source(cfg['source'])
    if raw_content is None: print(f"Gagal muat konten u/ '{dataset_name}'. Eksplorasi dibatalkan."); return None, None, None, None, None
    data_freq = cfg['freq']
    if cfg['type'] == 'tsf': ids, starts, series_list = parse_tsf_data(raw_content, cfg['parser_variant'])
    elif cfg['type'] == 'csv':
        if not cfg.get('time_column') or not cfg.get('value_column'): print(f"Error: Config CSV '{dataset_name}' kurang time/value col."); return None, None, None, None, None
        # Akan memanggil parse_csv_data versi generik
        ids, starts, series_list = parse_csv_data(raw_content, cfg['time_column'], cfg['value_column'], dataset_name)
    else: raise ValueError(f"Tipe dataset '{cfg['type']}' tdk dikenal u/ '{dataset_name}'.")
    if not ids: print(f"Tdk ada series diparsing u/ '{dataset_name}'."); return None, None, None, None, None
    return ids, starts, series_list, data_freq, dataset_name

def select_series(all_ids, all_start_times, all_series_data, index_or_name, dataset_name):
    """Memilih time series spesifik berdasarkan indeks numerik atau nama."""
    if not all_ids: raise ValueError(f"Tdk ada data series tersedia u/ dipilih dari '{dataset_name}'.")
    selected_index = -1
    if isinstance(index_or_name, int):
        if 0 <= index_or_name < len(all_ids): selected_index = index_or_name
        else: raise ValueError(f"Indeks series numerik {index_or_name} tdk valid u/ '{dataset_name}'. Pilih 0-{len(all_ids) - 1}.")
    elif isinstance(index_or_name, str):
        try: selected_index = all_ids.index(index_or_name)
        except ValueError:
            print(f"Peringatan: Nama series persis '{index_or_name}' tdk ditemukan di ID u/ '{dataset_name}'.")
            print(f"  ID tersedia (sampai 10): {all_ids[:10]}")
            lower_name = index_or_name.lower()
            found_indices = [i for i, id_val in enumerate(all_ids) if id_val.lower() == lower_name]
            if len(found_indices) == 1:
                selected_index = found_indices[0]; print(f"  -> Ditemukan match case-insensitive: '{all_ids[selected_index]}'. Menggunakan ini.")
            elif len(found_indices) > 1: raise ValueError(f"Nama '{index_or_name}' (case-insensitive) cocok >1 ID. Gunakan ID unik/indeks.")
            else: raise ValueError(f"Nama series '{index_or_name}' (persis/case-insensitive) tdk ditemukan.")
    else: raise TypeError(f"'index_or_name' hrs int/str. Diberikan: {type(index_or_name)}")
    selected_id = all_ids[selected_index]; start_time = all_start_times[selected_index]; time_series = all_series_data[selected_index]
    print(f"\nMemilih series: '{selected_id}' (Index: {selected_index}, Diminta: '{index_or_name}') dari '{dataset_name}'")
    if not time_series: print(f"Peringatan: Time series '{selected_id}' kosong."); return selected_id, start_time, []
    print(f"  -> {len(time_series)} titik data, mulai dari {start_time}.")
    return selected_id, start_time, time_series

def handle_nan_values(ts, method='ffill_bfill'):
    """Menangani nilai NaN."""
    if not isinstance(ts, (list, np.ndarray, pd.Series)): raise TypeError("Input 'ts' hrs list/array/Series.")
    if len(ts) == 0: print("Warn: TS kosong sblm handle NaN."); return []
    ts_series = pd.Series(ts, dtype=float); initial_nan_count = ts_series.isna().sum()
    if initial_nan_count == 0: print("Tidak ada nilai NaN."); return ts_series.tolist()
    print(f"Menangani {initial_nan_count}/{len(ts_series)} NaN dgn metode: {method}")
    filled_ts = None
    if method == 'ffill_bfill': filled_ts = ts_series.ffill().bfill()
    elif method == 'mean': mean_val = ts_series.mean(); fill_value = mean_val if pd.notna(mean_val) else 0; filled_ts = ts_series.fillna(fill_value); print(f"  Imputasi mean: {fill_value:.4f}" + ("" if pd.notna(mean_val) else " (rata2 NaN, isi 0)"))
    elif method == 'median': median_val = ts_series.median(); fill_value = median_val if pd.notna(median_val) else 0; filled_ts = ts_series.fillna(fill_value); print(f"  Imputasi median: {fill_value:.4f}" + ("" if pd.notna(median_val) else " (median NaN, isi 0)"))
    elif method == 'interpolate_linear': filled_ts = ts_series.interpolate(method='linear', limit_direction='both').ffill().bfill()
    else: print(f"Warn: Metode '{method}' tdk dikenal. Pakai 'ffill_bfill'."); filled_ts = ts_series.ffill().bfill()
    final_nan_count = filled_ts.isna().sum()
    if final_nan_count > 0: print(f"Warn: Masih ada {final_nan_count} NaN stlh '{method}'. Isi sisa dgn 0."); filled_ts = filled_ts.fillna(0)
    if initial_nan_count > 0 and final_nan_count == 0: print("Semua NaN berhasil ditangani.")
    return filled_ts.tolist()

def prepare_dataframe_for_neuralforecast(time_series, unique_id, start_time, freq):
    """Mempersiapkan DataFrame format NeuralForecast."""
    if not isinstance(time_series, (list, np.ndarray)): raise TypeError("time_series hrs list/array")
    if len(time_series) == 0: raise ValueError("time_series kosong")
    if not isinstance(start_time, pd.Timestamp):
         try: start_time = pd.Timestamp(start_time)
         except Exception as e: raise ValueError(f"start_time tdk valid: {start_time} - {e}")
    if not freq: raise ValueError("freq tdk boleh kosong/None")
    try:
        timestamps = pd.date_range(start=start_time, periods=len(time_series), freq=freq)
    except (ValueError, pd.errors.OutOfBoundsDatetime, OverflowError) as e:
        print(f"Error saat membuat date_range: start={start_time}, periods={len(time_series)}, freq='{freq}'")
        print(f"Error detail: {e}")
        print("Ini mungkin terjadi jika 'periods' terlalu besar (misal, data CSV multi-series dibaca sbg satu).")
        raise
    df = pd.DataFrame({"ds": timestamps, "y": time_series}); df["unique_id"] = unique_id
    df['y'] = df['y'].astype(float); return df

# --- Fungsi Tambahan untuk Eksplorasi (plot_time_series_analysis DIMODIFIKASI) ---
def plot_time_series_analysis(df, series_name, freq):
    """
    Melakukan plot time series dasar, dekomposisi, ACF, dan PACF.
    Jika data > MAX_POINTS_FOR_DETAIL_ANALYSIS, dekomposisi & ACF/PACF dilakukan pada subset terakhir.
    """
    if not isinstance(df.index, pd.DatetimeIndex): print("Error: Indeks DataFrame bukan DatetimeIndex."); return
    y = df['y']
    print(f"\n--- Analisis Visual Time Series: {series_name} ---")
    print("Membuat plot time series keseluruhan...")
    plt.figure(figsize=(15, 6)); plt.plot(df.index, y, label=f'Observed ({series_name})', linewidth=0.8); plt.title(f'Time Series Plot (Keseluruhan): {series_name}'); plt.xlabel('Timestamp'); plt.ylabel('Value'); plt.legend(); plt.grid(True); plt.tight_layout(); plt.show()
    MAX_POINTS_FOR_DETAIL_ANALYSIS = 5 * 365 # Default u/ harian
    if freq == '15min': MAX_POINTS_FOR_DETAIL_ANALYSIS = 3 * (52560 * 4) # ~3 thn data 15min
    elif freq == '30min': MAX_POINTS_FOR_DETAIL_ANALYSIS = 3 * 52560
    elif freq == 'H': MAX_POINTS_FOR_DETAIL_ANALYSIS = 3 * 8760
    y_analysis = y; is_subset = False
    if len(y) > MAX_POINTS_FOR_DETAIL_ANALYSIS:
        print(f"\nINFO: Data terlalu panjang ({len(y)}). Analisis detail pd {MAX_POINTS_FOR_DETAIL_ANALYSIS} poin terakhir.")
        y_analysis = y.tail(MAX_POINTS_FOR_DETAIL_ANALYSIS); is_subset = True
        plt.figure(figsize=(15, 5)); plt.plot(y_analysis.index, y_analysis, label=f'Subset u/ Analisis ({series_name})', linewidth=1); plt.title(f'Plot Subset Terakhir u/ Analisis Detail: {series_name}'); plt.xlabel('Timestamp'); plt.ylabel('Value'); plt.legend(); plt.grid(True); plt.tight_layout(); plt.show()
    else: print("\nMelakukan analisis detail pada seluruh data.")
    data_to_analyze = y_analysis.dropna()
    if data_to_analyze.empty: print("Data u/ analisis detail kosong stlh dropna(). Lewati dekomp & ACF/PACF."); return
    period = None; len_analyze = len(data_to_analyze)
    if freq == '15min': period = 96 * 7 if len_analyze >= 2 * (96 * 7) else 96 if len_analyze >= 2 * 96 else None # 96 = 24*4 (harian)
    elif freq == '30min': period = 48 * 7 if len_analyze >= 2 * (48 * 7) else 48 if len_analyze >= 2 * 48 else None
    elif freq == 'H': period = 24 * 7 if len_analyze >= 2 * (24 * 7) else 24 if len_analyze >= 2 * 24 else None
    elif freq == 'D': period = 365 if len_analyze >= 2 * 365 else 7 if len_analyze >= 2 * 7 else None
    elif freq in ['M', 'MS']: period = 12 if len_analyze >= 2 * 12 else None
    if period and len_analyze > period * 2 :
        print(f"\nMelakukan dekomposisi (Model Aditif, Periode={period})...")
        try:
            decomposition = seasonal_decompose(data_to_analyze, model='additive', period=period, extrapolate_trend='freq')
            print("  Dekomposisi selesai. Membuat plot...")
            fig, axes = plt.subplots(4, 1, figsize=(15, 10), sharex=True)
            decomposition.observed.plot(ax=axes[0], legend=False); axes[0].set_ylabel('Observed')
            decomposition.trend.plot(ax=axes[1], legend=False); axes[1].set_ylabel('Trend')
            decomposition.seasonal.plot(ax=axes[2], legend=False); axes[2].set_ylabel('Seasonal')
            decomposition.resid.plot(ax=axes[3], legend=False); axes[3].set_ylabel('Residual')
            plt.suptitle(f'Dekomposisi: {series_name} (Periode: {period}){" - Subset Terakhir" if is_subset else ""}')
            plt.xlabel('Timestamp'); plt.tight_layout(rect=[0, 0.03, 1, 0.97]); plt.show()
            print("\n  Statistik Deskriptif Residual:"); resid_stats = pd.Series(decomposition.resid).dropna().describe()
            if not resid_stats.empty: print(resid_stats.to_string()); plt.figure(figsize=(10, 4)); sns.histplot(decomposition.resid.dropna(), kde=True); plt.title(f'Distribusi Residual: {series_name}'); plt.xlabel('Residual Value'); plt.show()
            else: print("  Tdk ada residual valid.")
        except Exception as e: print(f"  Gagal dekomposisi: {e}\n{traceback.format_exc()}")
    else: print(f"\n  Dekomposisi musiman dilewati (data {len_analyze} poin, perlu > {period*2 if period else 'N/A'} u/ periode {period}).")
    lags = min(96 * 2, len_analyze//2 - 1) if freq == '15min' else min(40, len_analyze//2 - 1) # Lags lebih banyak u/ 15min
    if lags > 5:
        print(f"\nMelakukan plotting ACF dan PACF (lags={lags})...")
        try:
            fig, axes = plt.subplots(1, 2, figsize=(15, 5))
            plot_acf(data_to_analyze, lags=lags, ax=axes[0], title=f'ACF: {series_name}{" - Subset" if is_subset else ""}')
            plot_pacf(data_to_analyze, lags=lags, ax=axes[1], method='ywm', title=f'PACF: {series_name}{" - Subset" if is_subset else ""}')
            axes[0].grid(True); axes[1].grid(True); plt.tight_layout(); plt.show()
        except Exception as e: print(f"  Gagal plot ACF/PACF: {e}")
    else: print(f"\n  Plot ACF/PACF dilewati (data {len_analyze} poin terlalu pendek u/ {lags} lags).")

def display_statistics_and_distribution(df, series_name):
    """ Menampilkan statistik deskriptif dan plot distribusi. """
    y = df['y']
    print(f"\n--- Analisis Statistik & Distribusi: {series_name} ---")
    print("\nStatistik Deskriptif Utama:"); print(y.describe().to_string())
    print("\nStatistik Tambahan:")
    print(f"  Skewness: {y.skew():.4f}"); print(f"  Kurtosis: {y.kurt():.4f}")
    nan_count = y.isna().sum(); print(f"  Jumlah NaN (setelah imputasi): {nan_count}")
    zero_count = (y == 0).sum(); print(f"  Jumlah Nilai Nol: {zero_count} ({zero_count / len(y) * 100:.2f}%)") # Hitung nilai nol
    plt.figure(figsize=(12, 5)); sns.histplot(y.dropna(), kde=True, bins=50); plt.title(f'Distribusi Nilai: {series_name}'); plt.xlabel('Value'); plt.ylabel('Frequency/Density'); plt.grid(True, axis='y', alpha=0.5); plt.show()
    plt.figure(figsize=(8, 6)); sns.boxplot(y=y.dropna()); plt.title(f'Box Plot Nilai: {series_name}'); plt.ylabel('Value'); plt.grid(True, axis='y', alpha=0.5); plt.show()

print("Fungsi helper eksplorasi dimuat.")

# --- (Opsional) Mount Google Drive ---
# !! PENTING !! Uncomment jika ingin menganalisis M4 Daily (Dataset 4)
#               atau Solar Power (Dataset 7) dari Google Drive.
# -----------------------------------------------------------------------------
from google.colab import drive
try:
    drive.mount('/content/drive')
    print("Google Drive berhasil di-mount.")
    m4_daily_path = '/content/drive/My Drive/S2/Thesis/loss-function-comparison/dataset/m4_daily_dataset.tsf' # Verifikasi Path!
    if os.path.exists(m4_daily_path): print(f"File M4 Daily ditemukan di: {m4_daily_path}")
    else: print(f"PERINGATAN: File M4 Daily TIDAK ditemukan di: {m4_daily_path}.")
    solar_path = '/content/drive/My Drive/S2/Thesis/loss-function-comparison/dataset/solar_power_generation_dataset.csv' # Verifikasi Path!
    if os.path.exists(solar_path): print(f"File Solar Power ditemukan di: {solar_path}")
    else: print(f"PERINGATAN: File Solar Power TIDAK ditemukan di: {solar_path}.")
except Exception as e: print(f"Gagal mount Google Drive: {e}")
# -----------------------------------------------------------------------------

# --- Fungsi Utama untuk Menjalankan Analisis per Dataset ---
def explore_dataset(dataset_index, series_index_or_name):
    """ Fungsi utama untuk memuat, membersihkan, menganalisis, dan memvisualisasikan satu dataset. """
    config = DATASET_CONFIG.get(dataset_index)
    if not config: print(f"Error: Config u/ dataset index {dataset_index} tdk ditemukan."); return
    print(f"\n{'='*80}\n Memulai Eksplorasi Dataset {dataset_index}: {config['name']} \n{'='*80}")
    print("Justifikasi Kunci:"); [print(f"- {point}") for point in config.get('justification_points', [])]; print("-" * 30)
    all_ids, all_start_times, all_series_data, data_freq, dataset_name = load_and_parse_data(dataset_index, DATASET_CONFIG)
    if all_ids is None: return
    if config['type'] == 'tsf' and len(all_ids) > 1: print(f"\nID Series TSF Tersedia (sampai 10): {all_ids[:10]}...")
    elif config['type'] == 'csv': print(f"\nID Series CSV Dihasilkan: {all_ids}")
    print(f"Mencoba memilih berdasarkan '{series_index_or_name}'...")
    try:
        selected_id, dataset_start_time, ts_raw = select_series(all_ids, all_start_times, all_series_data, series_index_or_name, dataset_name)
        if not ts_raw: print(f"Error: Series '{selected_id}' kosong stlh seleksi. Eksplorasi dibatalkan."); return
    except (ValueError, IndexError, TypeError) as e: print(f"Error saat memilih series: {e}"); return
    print(f"\nMembersihkan data mentah (Metode: {NAN_IMPUTATION_METHOD})..."); ts_cleaned = handle_nan_values(ts_raw, method=NAN_IMPUTATION_METHOD)
    if not ts_cleaned: print(f"Error: Data kosong stlh cleaning NaN. Eksplorasi dibatalkan."); return
    print("\nMempersiapkan DataFrame...");
    try:
        df = prepare_dataframe_for_neuralforecast(ts_cleaned, selected_id, dataset_start_time, data_freq)
        df = df.set_index('ds')
        print(f"DataFrame siap ({len(df)} baris). Contoh:\n{df.head().to_string()}")
    except Exception as e: print(f"Error prep DataFrame: {e}"); return
    display_statistics_and_distribution(df, selected_id)
    plot_time_series_analysis(df, selected_id, data_freq)
    print(f"\n{'='*80}\n Eksplorasi Selesai: {config['name']} (Series: {selected_id})\n{'='*80}\n\n")

print("Fungsi `explore_dataset` siap digunakan.")


# ==============================================================================
# EKSEKUSI ANALISIS PER DATASET
# ==============================================================================
# !! PENTING: Untuk TSF (1, 3, 4, 5, 6), ganti CONTOH INDEKS/NAMA jika perlu.
# !! PENTING: Untuk CSV (2, 7), gunakan indeks 0 karena parser menghasilkan 1 series.
# !! PENTING: Uncomment eksekusi dataset 4 & 7 HANYA jika Drive sudah mount.
# !! PENTING: VERIFIKASI NAMA FILE & KOLOM untuk Dataset 7 di config!

# --- Dataset 1: Australian Electricity Demand ---
selected_series_aus_elec = "T1_NSW"
explore_dataset(dataset_index=1, series_index_or_name=selected_series_aus_elec)

# --- Dataset 2: Bike Sharing Daily ---
selected_series_bike = 0
explore_dataset(dataset_index=2, series_index_or_name=selected_series_bike)

# --- Dataset 3: M3 Monthly ---
selected_series_m3 = 0 # <--- CONTOH INDEKS, PERIKSA & GANTI JIKA PERLU
explore_dataset(dataset_index=3, series_index_or_name=selected_series_m3)

# --- Dataset 4: M4 Daily ---
selected_series_m4_daily = 0 # <--- CONTOH INDEKS, PERIKSA & GANTI JIKA PERLU
explore_dataset(dataset_index=4, series_index_or_name=selected_series_m4_daily)
print("\nEksplorasi Dataset 4 (M4 Daily) - Dikomentari. Uncomment jika Drive mount & path/indeks benar.\n")


# --- Dataset 5: M4 Hourly ---
selected_series_m4_hourly = 0 # <--- CONTOH INDEKS (H1?), PERIKSA & GANTI JIKA PERLU
explore_dataset(dataset_index=5, series_index_or_name=selected_series_m4_hourly)

# --- Dataset 6: Tourism Monthly ---
selected_series_tourism = 0 # <--- CONTOH INDEKS (T1?), PERIKSA & GANTI JIKA PERLU
explore_dataset(dataset_index=6, series_index_or_name=selected_series_tourism)

# --- Dataset 7: Solar Power Generation ---
# Menggunakan indeks 0 karena parser CSV menghasilkan 1 series.
# !! PENTING: Pastikan nama kolom 'DATE_TIME' & 'DC_POWER' di config[7] benar!
# !! PENTING: Uncomment baris di bawah HANYA JIKA Google Drive sudah di-mount !!
selected_series_solar = 0
explore_dataset(dataset_index=7, series_index_or_name=selected_series_solar)
# print("\nEksplorasi Dataset 7 (Solar Power) - Dikomentari. Uncomment jika Drive mount & path/kolom benar.\n")


print("=== SEMUA EKSEKUSI (YANG TIDAK DIKOMENTARI) SELESAI ===")

Output hidden; open in https://colab.research.google.com to view.