In [1]:
!pip install neuralforecast
!pip install torchinfo
!pip install codecarbon

Collecting neuralforecast
  Downloading neuralforecast-2.0.0-py3-none-any.whl.metadata (14 kB)
Collecting coreforecast>=0.0.6 (from neuralforecast)
  Downloading coreforecast-0.0.15-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.8 kB)
Collecting pytorch-lightning>=2.0.0 (from neuralforecast)
  Downloading pytorch_lightning-2.5.0.post0-py3-none-any.whl.metadata (21 kB)
Collecting ray>=2.2.0 (from ray[tune]>=2.2.0->neuralforecast)
  Downloading ray-2.40.0-cp310-cp310-manylinux2014_x86_64.whl.metadata (17 kB)
Collecting optuna (from neuralforecast)
  Downloading optuna-4.1.0-py3-none-any.whl.metadata (16 kB)
Collecting utilsforecast>=0.2.3 (from neuralforecast)
  Downloading utilsforecast-0.2.10-py3-none-any.whl.metadata (7.4 kB)
Collecting torchmetrics>=0.7.0 (from pytorch-lightning>=2.0.0->neuralforecast)
  Downloading torchmetrics-1.6.1-py3-none-any.whl.metadata (21 kB)
Collecting lightning-utilities>=0.10.0 (from pytorch-lightning>=2.0.0->neuralforecast)
  Down

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, LSTM
from neuralforecast.losses.pytorch import MAE
from sklearn.preprocessing import MinMaxScaler
import time
import psutil
import os
from codecarbon import EmissionsTracker
import random
import torch
import logging
logging.getLogger("codecarbon").setLevel(logging.ERROR)

# Set environment variable to suppress future warning
# This environment variable ensures compatibility with the NeuralForecast library by specifying the column type.
os.environ['NIXTLA_ID_AS_COL'] = '1'

# Set seed untuk reproducibility
seed = 42  # Pilih angka seed sesuai keinginan
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# Inisialisasi model
window_size = 48
horizon = 24
model_name = 'NHITS'  # Pilihan: 'NBEATS', 'NHITS', 'LSTM'
loss_function = MAE()  # Fungsi loss
epochs = 10
batch_size = 32

# Membagi dataset menjadi beberapa fold untuk cross-validation
step_size = 24  # Geser 24 langkah setiap fold
n_crossvalidation = 3  # Jumlah fold cross-validation

# Fungsi untuk membaca file TSF dari URL
def read_tsf_from_url(url):
    import requests
    response = requests.get(url)
    lines = response.text.splitlines()

    series_data = []
    reading_data = False

    for line in lines:
        if line.strip() == "" or line.startswith("#"):
            continue

        if line.startswith("@data"):
            reading_data = True
            continue

        if reading_data:
            series_data.append(line.strip())

    return series_data

# Parsing time series data
def parse_series_data(series_data):
    parsed_data = []
    state_names = []
    start_times = []

    for row in series_data:
        parts = row.split(":")
        if len(parts) >= 4:
            state_names.append(parts[1])  # Extract the state name
            start_times.append(pd.Timestamp(parts[2]))  # Extract start timestamp
            time_series = list(map(float, parts[3].split(","))) if "," in parts[3] else []
            parsed_data.append(time_series)

    return state_names, start_times, parsed_data

# Fungsi untuk memilih negara bagian
def select_state(states, start_times, time_series_data, index):
    if 0 <= index < len(states):
        state = states[index]
        start_time = start_times[index]
        time_series = time_series_data[index]
        return state, start_time, time_series
    else:
        raise ValueError(f"Invalid index {index}. Please select a value between 0 and {len(states) - 1}.")

# Fungsi untuk mempersiapkan dataset menjadi format NeuralForecast
def prepare_dataset(time_series, state_name, start_time):
    timestamps = pd.date_range(start=start_time, periods=len(time_series), freq="30min")
    df = pd.DataFrame({"ds": timestamps, "y": time_series})
    df["unique_id"] = state_name
    return df

# Fungsi untuk membagi dataset menjadi beberapa folds untuk time series cross-validation
def create_timeseries_cv_folds(data, horizon, step_size, n_crossvalidation):
    """
    Membagi dataset time series menjadi beberapa fold untuk cross-validation.

    Parameters:
    - data: List atau array dari dataset time series.
    - horizon: Jumlah titik waktu yang akan diprediksi.
    - step_size: Jumlah titik waktu yang akan digeser setiap fold.
    - n_crossvalidation: Jumlah fold cross-validation.

    Returns:
    - List of tuples, dimana setiap tuple berisi (train_data, test_data).
    """
    dataset_length = len(data)
    window_length = (dataset_length - horizon) - (step_size * (n_crossvalidation - 1))

    folds = []

    for i in range(n_crossvalidation):
        start_train = i * step_size
        end_train = start_train + window_length
        start_test = end_train
        end_test = start_test + horizon

        train_data = data[start_train:end_train]
        test_data = data[start_test:end_test]

        folds.append((train_data, test_data))

    return folds

# URL raw file dari GitHub
url = "https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/5b204ef45db85a9ff4e283dd74941dbc117ad287/dataset/australian_electricity_demand_dataset.tsf"

# Read the TSF file from the URL
raw_data = read_tsf_from_url(url)
states, start_times, time_series_data = parse_series_data(raw_data)

# Pilih dataset negara bagian
selected_state_index = 0  # Ubah indeks 0-4 untuk memilih negara bagian
try:
    selected_state, dataset_start_time, ts = select_state(states, start_times, time_series_data, selected_state_index)
except ValueError as e:
    print(e)
    exit()

# Handle NaN values
ts = pd.Series(ts).ffill().bfill().tolist()

# Normalisasi data
scaler = MinMaxScaler(feature_range=(0, 1))
ts_normalized = scaler.fit_transform(np.array(ts).reshape(-1, 1)).flatten()

# Persiapkan dataset
dataset = prepare_dataset(ts_normalized, selected_state, dataset_start_time)

# Membagi dataset menjadi beberapa fold untuk cross-validation
folds = create_timeseries_cv_folds(ts_normalized, horizon, step_size, n_crossvalidation)

# Menggunakan window_size untuk pengaturan input_size model
input_size = window_size

model = NHITS(h=horizon, input_size=input_size, max_steps=epochs, batch_size=batch_size, loss=loss_function)

# Membuat objek NeuralForecast
nf = NeuralForecast(models=[model], freq='30min')

# Fungsi untuk menghitung MAPE dan sMAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100

# Cross-validation training
fit_start_time = time.time()
all_metrics = []
all_emissions = []
all_train_losses = []
all_val_losses = []

def denormalize(data, scaler):
    """Mengembalikan data ke skala aslinya."""
    return scaler.inverse_transform(data.reshape(-1, 1)).flatten()

for i, (train_fold, test_fold) in enumerate(folds):
    print(f"Processing fold {i+1}/{n_crossvalidation}")

    # Tracker untuk emisi per fold
    fold_tracker = EmissionsTracker(measure_power_secs=1)
    fold_tracker.start()

    train_timestamps = pd.date_range(start=dataset_start_time, periods=len(train_fold), freq="30min")
    test_timestamps = pd.date_range(start=train_timestamps[-1] + pd.Timedelta(minutes=30), periods=len(test_fold), freq="30min")

    train_df = pd.DataFrame({"ds": train_timestamps, "y": train_fold, "unique_id": selected_state})
    test_df = pd.DataFrame({"ds": test_timestamps, "y": test_fold, "unique_id": selected_state})

    # Train model
    nf.fit(df=train_df, val_size=horizon)

    # Track training and validation losses
    train_losses = nf.models[0].train_trajectories
    val_losses = nf.models[0].valid_trajectories

    all_train_losses.append(train_losses[-1])
    all_val_losses.append(val_losses[-1])

    forecast = nf.predict().reset_index()

    y_true = test_df['y']
    y_pred_normalized = forecast['NHITS']

    # Denormalisasi hasil prediksi
    y_pred = denormalize(y_pred_normalized.to_numpy(), scaler)

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    smape = symmetric_mean_absolute_percentage_error(y_true, y_pred)

    fold_emissions = fold_tracker.stop()
    all_emissions.append(fold_emissions)

    avg_train_loss = np.mean(all_train_losses)
    min_train_loss = np.min(all_train_losses)
    max_train_loss = np.max(all_train_losses)

    avg_val_loss = np.mean(all_val_losses)
    min_val_loss = np.min(all_val_losses)
    max_val_loss = np.max(all_val_losses)

    train_val_ratio = avg_train_loss / avg_val_loss if avg_val_loss != 0 else None

    print(f"  Fold {i+1} Metrics:")
    print(f"    Mean Absolute Error (MAE): {mae:.2f}")
    print(f"    Mean Squared Error (MSE): {mse:.2f}")
    print(f"    Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"    Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print(f"    Symmetric Mean Absolute Percentage Error (sMAPE): {smape:.2f}%")
    if fold_emissions is not None:
        print(f"    CO2 Emissions for Fold {i+1}: {fold_emissions:.4f} kg")
    else:
        print(f"    CO2 Emissions for Fold {i+1}: Emission data not available.")

    print(f"\tTrain Loss  \n\tAvg: {avg_train_loss:.4f} \n\tMin: {min_train_loss:.4f}\n\tMax: {max_train_loss:.4f}")
    print(f"\tValidation Loss \n\tAvg: {avg_val_loss:.4f}\n\tMin: {min_val_loss:.4f}\n\tMax: {max_val_loss:.4f}")
    print(f"\tPerbandingan Loss (Avg Train Loss/Avg Validation Loss) ; {train_val_ratio:.4f}")

    all_metrics.append((mae, mse, rmse, mape, smape))

fit_end_time = time.time()

# Print summary statistics
print(f"\nEvaluasi Model untuk Semua Fold:")
for i, metrics in enumerate(all_metrics):
    print(f"  Fold {i+1}:")
    print(f"    Mean Absolute Error (MAE): {metrics[0]:.2f}")
    print(f"    Mean Squared Error (MSE): {metrics[1]:.2f}")
    print(f"    Root Mean Squared Error (RMSE): {metrics[2]:.2f}")
    print(f"    Mean Absolute Percentage Error (MAPE): {metrics[3]:.2f}%")
    print(f"    Symmetric Mean Absolute Percentage Error (sMAPE): {metrics[4]:.2f}%")
    if all_emissions[i] is not None:
        print(f"    CO2 Emissions for Fold {i+1}: {all_emissions[i]:.4f} kg")
    else:
        print(f"    CO2 Emissions for Fold {i+1}: Emission data not available.")


Dask dataframe query planning is disabled because dask-expr is not installed.

You can install it with `pip install dask[dataframe]` or `conda install dask`.
This will raise in a future version.

INFO:lightning_fabric.utilities.seed:Seed set to 1
[codecarbon INFO @ 04:14:56] [setup] RAM Tracking...
[codecarbon INFO @ 04:14:56] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU



Processing fold 1/3


[codecarbon INFO @ 04:14:58] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:14:58] [setup] GPU Tracking...
[codecarbon INFO @ 04:14:58] No GPU found.
[codecarbon INFO @ 04:14:58] >>> Tracker's metadata:
[codecarbon INFO @ 04:14:58]   Platform system: Linux-6.1.85+-x86_64-with-glibc2.35
[codecarbon INFO @ 04:14:58]   Python version: 3.10.12
[codecarbon INFO @ 04:14:58]   CodeCarbon version: 2.8.2
[codecarbon INFO @ 04:14:58]   Available RAM : 12.675 GB
[codecarbon INFO @ 04:14:58]   CPU count: 2
[codecarbon INFO @ 04:14:58]   CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:14:58]   GPU count: None
[codecarbon INFO @ 04:14:58]   GPU model: None
[codecarbon INFO @ 04:14:58] Saving emissions data to file /content/emissions.csv
[codecarbon INFO @ 04:14:59] Energy consumed for RAM : 0.000001 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:14:59] Energy consumed for all CPUs : 0.000012 kWh. Total CPU Power : 42.5 W
[code

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:15:15] Energy consumed for RAM : 0.000022 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:15] Energy consumed for all CPUs : 0.000200 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:15] 0.000222 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:16] Energy consumed for RAM : 0.000023 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:16] Energy consumed for all CPUs : 0.000212 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:16] 0.000235 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:17] Energy consumed for RAM : 0.000024 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:17] Energy consumed for all CPUs : 0.000225 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:17] 0.000249 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:18] Energy consumed for RAM : 0.000026 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:18] Energy consumed for a

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:15:24] Energy consumed for RAM : 0.000033 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:24] Energy consumed for all CPUs : 0.000300 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:24] 0.000332 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:24] [setup] RAM Tracking...
[codecarbon INFO @ 04:15:24] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU



  Fold 1 Metrics:
    Mean Absolute Error (MAE): 6970.48
    Mean Squared Error (MSE): 49284263.11
    Root Mean Squared Error (RMSE): 7020.28
    Mean Absolute Percentage Error (MAPE): 1844697.61%
    Symmetric Mean Absolute Percentage Error (sMAPE): 199.98%
    CO2 Emissions for Fold 1: 0.0001 kg
	Train Loss  
	Avg: 4.5322 
	Min: 0.0645
	Max: 9.0000
	Validation Loss 
	Avg: 5.0270
	Min: 0.0540
	Max: 10.0000
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 0.9016
Processing fold 2/3


[codecarbon INFO @ 04:15:25] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:15:25] [setup] GPU Tracking...
[codecarbon INFO @ 04:15:25] No GPU found.
[codecarbon INFO @ 04:15:25] >>> Tracker's metadata:
[codecarbon INFO @ 04:15:25]   Platform system: Linux-6.1.85+-x86_64-with-glibc2.35
[codecarbon INFO @ 04:15:25]   Python version: 3.10.12
[codecarbon INFO @ 04:15:25]   CodeCarbon version: 2.8.2
[codecarbon INFO @ 04:15:25]   Available RAM : 12.675 GB
[codecarbon INFO @ 04:15:25]   CPU count: 2
[codecarbon INFO @ 04:15:25]   CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:15:25]   GPU count: None
[codecarbon INFO @ 04:15:25]   GPU model: None
[codecarbon INFO @ 04:15:25] Saving emissions data to file /content/emissions.csv
[codecarbon INFO @ 04:15:26] Energy consumed for RAM : 0.000001 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:26] Energy consumed for all CPUs : 0.000013 kWh. Total CPU Power : 42.5 W
[code

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:15:40] Energy consumed for RAM : 0.000018 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:40] Energy consumed for all CPUs : 0.000168 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:40] 0.000186 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:41] Energy consumed for RAM : 0.000020 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:41] Energy consumed for all CPUs : 0.000180 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:41] 0.000200 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:42] Energy consumed for RAM : 0.000021 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:42] Energy consumed for all CPUs : 0.000192 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:42] 0.000213 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:42] 0.003718 g.CO2eq/s mean an estimation of 117.24447367644458 kg.CO2eq/year
[codecarbon INFO @ 04:15:43] Energy consumed for

Validation: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:15:46] Energy consumed for RAM : 0.000026 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:46] Energy consumed for all CPUs : 0.000239 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:46] 0.000265 kWh of electricity used since the beginning.
INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:15:46] Energy consumed for RAM : 0.000027 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:46] Energy consumed for all CPUs : 0.000242 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:15:46] 0.000268 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:15:46] [setup] RAM Tracking...
[codecarbon INFO @ 04:15:46] [setup] CPU Tracking...
 Linux OS detected: Please ensure RAPL files exist at \sys\class\powercap\intel-rapl to measure CPU



  Fold 2 Metrics:
    Mean Absolute Error (MAE): 6514.14
    Mean Squared Error (MSE): 43667712.69
    Root Mean Squared Error (RMSE): 6608.16
    Mean Absolute Percentage Error (MAPE): 3792247.46%
    Symmetric Mean Absolute Percentage Error (sMAPE): 199.99%
    CO2 Emissions for Fold 2: 0.0001 kg
	Train Loss  
	Avg: 4.5315 
	Min: 0.0617
	Max: 9.0000
	Validation Loss 
	Avg: 5.0195
	Min: 0.0238
	Max: 10.0000
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 0.9028
Processing fold 3/3


[codecarbon INFO @ 04:15:47] CPU Model on constant consumption mode: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:15:47] [setup] GPU Tracking...
[codecarbon INFO @ 04:15:47] No GPU found.
[codecarbon INFO @ 04:15:47] >>> Tracker's metadata:
[codecarbon INFO @ 04:15:47]   Platform system: Linux-6.1.85+-x86_64-with-glibc2.35
[codecarbon INFO @ 04:15:47]   Python version: 3.10.12
[codecarbon INFO @ 04:15:47]   CodeCarbon version: 2.8.2
[codecarbon INFO @ 04:15:47]   Available RAM : 12.675 GB
[codecarbon INFO @ 04:15:47]   CPU count: 2
[codecarbon INFO @ 04:15:47]   CPU model: Intel(R) Xeon(R) CPU @ 2.20GHz
[codecarbon INFO @ 04:15:47]   GPU count: None
[codecarbon INFO @ 04:15:47]   GPU model: None
[codecarbon INFO @ 04:15:47] Saving emissions data to file /content/emissions.csv
[codecarbon INFO @ 04:15:48] Energy consumed for RAM : 0.000001 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:15:48] Energy consumed for all CPUs : 0.000012 kWh. Total CPU Power : 42.5 W
[code

Sanity Checking: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:16:01] Energy consumed for RAM : 0.000015 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:16:01] Energy consumed for all CPUs : 0.000147 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:16:01] 0.000163 kWh of electricity used since the beginning.


Training: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:16:02] Energy consumed for RAM : 0.000017 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:16:02] Energy consumed for all CPUs : 0.000159 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:16:02] 0.000176 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:16:03] Energy consumed for RAM : 0.000018 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:16:03] Energy consumed for all CPUs : 0.000171 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:16:03] 0.000189 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:16:04] Energy consumed for RAM : 0.000019 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:16:04] Energy consumed for all CPUs : 0.000183 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:16:04] 0.000203 kWh of electricity used since the beginning.
[codecarbon INFO @ 04:16:04] 0.003631 g.CO2eq/s mean an estimation of 114.52101233482912 kg.CO2eq/year
[codecarbon INFO @ 04:16:05] Energy consumed for

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[codecarbon INFO @ 04:16:14] Energy consumed for RAM : 0.000033 kWh. RAM Power : 4.753036022186279 W
[codecarbon INFO @ 04:16:14] Energy consumed for all CPUs : 0.000309 kWh. Total CPU Power : 42.5 W
[codecarbon INFO @ 04:16:14] 0.000343 kWh of electricity used since the beginning.


  Fold 3 Metrics:
    Mean Absolute Error (MAE): 6068.33
    Mean Squared Error (MSE): 37114295.95
    Root Mean Squared Error (RMSE): 6092.15
    Mean Absolute Percentage Error (MAPE): 1833979.30%
    Symmetric Mean Absolute Percentage Error (sMAPE): 199.98%
    CO2 Emissions for Fold 3: 0.0001 kg
	Train Loss  
	Avg: 4.5357 
	Min: 0.0617
	Max: 9.0000
	Validation Loss 
	Avg: 5.0166
	Min: 0.0219
	Max: 10.0000
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 0.9041

Evaluasi Model untuk Semua Fold:
  Fold 1:
    Mean Absolute Error (MAE): 6970.48
    Mean Squared Error (MSE): 49284263.11
    Root Mean Squared Error (RMSE): 7020.28
    Mean Absolute Percentage Error (MAPE): 1844697.61%
    Symmetric Mean Absolute Percentage Error (sMAPE): 199.98%
    CO2 Emissions for Fold 1: 0.0001 kg
  Fold 2:
    Mean Absolute Error (MAE): 6514.14
    Mean Squared Error (MSE): 43667712.69
    Root Mean Squared Error (RMSE): 6608.16
    Mean Absolute Percentage Error (MAPE): 3792247.46%
    Sym

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import mean_absolute_error, mean_squared_error
from neuralforecast import NeuralForecast
from neuralforecast.models import NBEATS, NHITS, LSTM
from neuralforecast.losses.pytorch import MAE
import time
import psutil
import os
from codecarbon import EmissionsTracker
import random
import torch
import logging
logging.getLogger("codecarbon").setLevel(logging.ERROR)

# Set environment variable to suppress future warning
# This environment variable ensures compatibility with the NeuralForecast library by specifying the column type.
os.environ['NIXTLA_ID_AS_COL'] = '1'

# Set seed untuk reproducibility
seed = 42  # Pilih angka seed sesuai keinginan
np.random.seed(seed)
random.seed(seed)
torch.manual_seed(seed)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(seed)

# Inisialisasi model
window_size = 48
horizon = 24
model_name = 'NHITS'  # Pilihan: 'NBEATS', 'NHITS', 'LSTM'
loss_function = MAE()  # Fungsi loss
epochs = 10
batch_size = 32

# Membagi dataset menjadi beberapa fold untuk cross-validation
step_size = 24  # Geser 24 langkah setiap fold
n_crossvalidation = 3  # Jumlah fold cross-validation

# Fungsi untuk membaca file TSF dari URL
def read_tsf_from_url(url):
    import requests
    response = requests.get(url)
    lines = response.text.splitlines()

    series_data = []
    reading_data = False

    for line in lines:
        if line.strip() == "" or line.startswith("#"):
            continue

        if line.startswith("@data"):
            reading_data = True
            continue

        if reading_data:
            series_data.append(line.strip())

    return series_data

# Parsing time series data
def parse_series_data(series_data):
    parsed_data = []
    state_names = []
    start_times = []

    for row in series_data:
        parts = row.split(":")
        if len(parts) >= 4:
            state_names.append(parts[1])  # Extract the state name
            start_times.append(pd.Timestamp(parts[2]))  # Extract start timestamp
            time_series = list(map(float, parts[3].split(","))) if "," in parts[3] else []
            parsed_data.append(time_series)

    return state_names, start_times, parsed_data

# Fungsi untuk memilih negara bagian
def select_state(states, start_times, time_series_data, index):
    if 0 <= index < len(states):
        state = states[index]
        start_time = start_times[index]
        time_series = time_series_data[index]
        return state, start_time, time_series
    else:
        raise ValueError(f"Invalid index {index}. Please select a value between 0 and {len(states) - 1}.")

# Fungsi untuk mempersiapkan dataset menjadi format NeuralForecast
def prepare_dataset(time_series, state_name, start_time):
    timestamps = pd.date_range(start=start_time, periods=len(time_series), freq="30min")
    df = pd.DataFrame({"ds": timestamps, "y": time_series})
    df["unique_id"] = state_name
    return df

# Fungsi untuk membagi dataset menjadi beberapa folds untuk time series cross-validation
def create_timeseries_cv_folds(data, horizon, step_size, n_crossvalidation):
    """
    Membagi dataset time series menjadi beberapa fold untuk cross-validation.

    Parameters:
    - data: List atau array dari dataset time series.
    - horizon: Jumlah titik waktu yang akan diprediksi.
    - step_size: Jumlah titik waktu yang akan digeser setiap fold.
    - n_crossvalidation: Jumlah fold cross-validation.

    Returns:
    - List of tuples, dimana setiap tuple berisi (train_data, test_data).
    """
    dataset_length = len(data)
    window_length = (dataset_length - horizon) - (step_size * (n_crossvalidation - 1))

    folds = []

    for i in range(n_crossvalidation):
        start_train = i * step_size
        end_train = start_train + window_length
        start_test = end_train
        end_test = start_test + horizon

        train_data = data[start_train:end_train]
        test_data = data[start_test:end_test]

        folds.append((train_data, test_data))

    return folds

# URL raw file dari GitHub
url = "https://raw.githubusercontent.com/kanadakurniawan/loss-function-comparison/5b204ef45db85a9ff4e283dd74941dbc117ad287/dataset/australian_electricity_demand_dataset.tsf"

# Read the TSF file from the URL
raw_data = read_tsf_from_url(url)
states, start_times, time_series_data = parse_series_data(raw_data)

# Pilih dataset negara bagian
selected_state_index = 0  # Ubah indeks 0-4 untuk memilih negara bagian
try:
    selected_state, dataset_start_time, ts = select_state(states, start_times, time_series_data, selected_state_index)
except ValueError as e:
    print(e)
    exit()

# Handle NaN values
ts = pd.Series(ts).ffill().bfill().tolist()

# Persiapkan dataset
dataset = prepare_dataset(ts, selected_state, dataset_start_time)

# Membagi dataset menjadi beberapa fold untuk cross-validation
folds = create_timeseries_cv_folds(ts, horizon, step_size, n_crossvalidation)

# Menggunakan window_size untuk pengaturan input_size model
input_size = window_size

model = NHITS(h=horizon, input_size=input_size, max_steps=epochs, batch_size=batch_size, loss=loss_function)

# Membuat objek NeuralForecast
nf = NeuralForecast(models=[model], freq='30min')

# Fungsi untuk menghitung MAPE dan sMAPE
def mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(np.abs((y_true - y_pred) / y_true)) * 100

def symmetric_mean_absolute_percentage_error(y_true, y_pred):
    return np.mean(2 * np.abs(y_true - y_pred) / (np.abs(y_true) + np.abs(y_pred))) * 100

# Cross-validation training
fit_start_time = time.time()
all_metrics = []
all_emissions = []
all_train_losses = []
all_val_losses = []

for i, (train_fold, test_fold) in enumerate(folds):
    print(f"Processing fold {i+1}/{n_crossvalidation}")

    # Tracker untuk emisi per fold
    fold_tracker = EmissionsTracker(measure_power_secs=1)
    fold_tracker.start()

    train_timestamps = pd.date_range(start=dataset_start_time, periods=len(train_fold), freq="30min")
    test_timestamps = pd.date_range(start=train_timestamps[-1] + pd.Timedelta(minutes=30), periods=len(test_fold), freq="30min")

    train_df = pd.DataFrame({"ds": train_timestamps, "y": train_fold, "unique_id": selected_state})
    test_df = pd.DataFrame({"ds": test_timestamps, "y": test_fold, "unique_id": selected_state})

    # Train model
    nf.fit(df=train_df, val_size=horizon)

    # Track training and validation losses
    train_losses = nf.models[0].train_trajectories
    val_losses = nf.models[0].valid_trajectories

    all_train_losses.append(train_losses[-1])
    all_val_losses.append(val_losses[-1])

    forecast = nf.predict().reset_index()

    y_true = test_df['y']
    y_pred = forecast['NHITS']

    mae = mean_absolute_error(y_true, y_pred)
    mse = mean_squared_error(y_true, y_pred)
    rmse = np.sqrt(mse)
    mape = mean_absolute_percentage_error(y_true, y_pred)
    smape = symmetric_mean_absolute_percentage_error(y_true, y_pred)

    fold_emissions = fold_tracker.stop()
    all_emissions.append(fold_emissions)

    avg_train_loss = np.mean(all_train_losses)
    min_train_loss = np.min(all_train_losses)
    max_train_loss = np.max(all_train_losses)

    avg_val_loss = np.mean(all_val_losses)
    min_val_loss = np.min(all_val_losses)
    max_val_loss = np.max(all_val_losses)

    train_val_ratio = avg_train_loss / avg_val_loss if avg_val_loss != 0 else None

    print(f"  Fold {i+1} Metrics:")
    print(f"    Mean Absolute Error (MAE): {mae:.2f}")
    print(f"    Mean Squared Error (MSE): {mse:.2f}")
    print(f"    Root Mean Squared Error (RMSE): {rmse:.2f}")
    print(f"    Mean Absolute Percentage Error (MAPE): {mape:.2f}%")
    print(f"    Symmetric Mean Absolute Percentage Error (sMAPE): {smape:.2f}%")
    if fold_emissions is not None:
        print(f"    CO2 Emissions for Fold {i+1}: {fold_emissions:.4f} kg")
    else:
        print(f"    CO2 Emissions for Fold {i+1}: Emission data not available.")

    print(f"\tTrain Loss  \n\tAvg: {avg_train_loss:.4f} \n\tMin: {min_train_loss:.4f}\n\tMax: {max_train_loss:.4f}")
    print(f"\tValidation Loss \n\tAvg: {avg_val_loss:.4f}\n\tMin: {min_val_loss:.4f}\n\tMax: {max_val_loss:.4f}")
    print(f"\tPerbandingan Loss (Avg Train Loss/Avg Validation Loss) ; {train_val_ratio:.4f}")

    all_metrics.append((mae, mse, rmse, mape, smape))

fit_end_time = time.time()

# Print summary statistics
print(f"\nEvaluasi Model untuk Semua Fold:")
for i, metrics in enumerate(all_metrics):
    print(f"  Fold {i+1}:")
    print(f"    Mean Absolute Error (MAE): {metrics[0]:.2f}")
    print(f"    Mean Squared Error (MSE): {metrics[1]:.2f}")
    print(f"    Root Mean Squared Error (RMSE): {metrics[2]:.2f}")
    print(f"    Mean Absolute Percentage Error (MAPE): {metrics[3]:.2f}%")
    print(f"    Symmetric Mean Absolute Percentage Error (sMAPE): {metrics[4]:.2f}%")
    if all_emissions[i] is not None:
        print(f"    CO2 Emissions for Fold {i+1}: {all_emissions[i]:.4f} kg")
    else:
        print(f"    CO2 Emissions for Fold {i+1}: Emission data not available.")


INFO:lightning_fabric.utilities.seed:Seed set to 1
[codecarbon ERROR @ 15:12:37] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


Processing fold 1/3


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[codecarbon ERROR @ 15:12:58] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


  Fold 1 Metrics:
    Mean Absolute Error (MAE): 447.86
    Mean Squared Error (MSE): 250994.45
    Root Mean Squared Error (RMSE): 500.99
    Mean Absolute Percentage Error (MAPE): 6.45%
    Symmetric Mean Absolute Percentage Error (sMAPE): 6.44%
    CO2 Emissions for Fold 1: Emission data not available.
	Train Loss  
	Avg: 419.7021 
	Min: 9.0000
	Max: 830.4042
	Validation Loss 
	Avg: 417.1118
	Min: 10.0000
	Max: 824.2236
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 1.0062
Processing fold 2/3


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

[codecarbon ERROR @ 15:13:20] Error: Another instance of codecarbon is probably running as we find `/tmp/.codecarbon.lock`. Turn off the other instance to be able to run this one or use `allow_multiple_runs` or delete the file. Exiting.


  Fold 2 Metrics:
    Mean Absolute Error (MAE): 605.15
    Mean Squared Error (MSE): 577261.76
    Root Mean Squared Error (RMSE): 759.78
    Mean Absolute Percentage Error (MAPE): 12.75%
    Symmetric Mean Absolute Percentage Error (sMAPE): 11.52%
    CO2 Emissions for Fold 2: Emission data not available.
	Train Loss  
	Avg: 415.4601 
	Min: 9.0000
	Max: 830.4042
	Validation Loss 
	Avg: 280.2858
	Min: 10.0000
	Max: 824.2236
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 1.4823
Processing fold 3/3


INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs
INFO:pytorch_lightning.callbacks.model_summary:
  | Name         | Type          | Params | Mode 
-------------------------------------------------------
0 | loss         | MAE           | 0      | train
1 | padder_train | ConstantPad1d | 0      | train
2 | scaler       | TemporalNorm  | 0      | train
3 | blocks       | ModuleList    | 2.5 M  | train
-------------------------------------------------------
2.5 M     Trainable params
0         Non-trainable params
2.5 M     Total params
10.040    Total estimated model params size (MB)
34        Modules in train mode
0         Modules in eval mode


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

Training: |          | 0/? [00:00<?, ?it/s]

Validation: |          | 0/? [00:00<?, ?it/s]

INFO:pytorch_lightning.utilities.rank_zero:`Trainer.fit` stopped: `max_steps=10` reached.
INFO:pytorch_lightning.utilities.rank_zero:GPU available: False, used: False
INFO:pytorch_lightning.utilities.rank_zero:TPU available: False, using: 0 TPU cores
INFO:pytorch_lightning.utilities.rank_zero:HPU available: False, using: 0 HPUs


Predicting: |          | 0/? [00:00<?, ?it/s]

  Fold 3 Metrics:
    Mean Absolute Error (MAE): 573.17
    Mean Squared Error (MSE): 391125.64
    Root Mean Squared Error (RMSE): 625.40
    Mean Absolute Percentage Error (MAPE): 8.74%
    Symmetric Mean Absolute Percentage Error (sMAPE): 8.79%
    CO2 Emissions for Fold 3: Emission data not available.
	Train Loss  
	Avg: 428.7906 
	Min: 9.0000
	Max: 901.9030
	Validation Loss 
	Avg: 320.1635
	Min: 10.0000
	Max: 824.2236
	Perbandingan Loss (Avg Train Loss/Avg Validation Loss) ; 1.3393

Evaluasi Model untuk Semua Fold:
  Fold 1:
    Mean Absolute Error (MAE): 447.86
    Mean Squared Error (MSE): 250994.45
    Root Mean Squared Error (RMSE): 500.99
    Mean Absolute Percentage Error (MAPE): 6.45%
    Symmetric Mean Absolute Percentage Error (sMAPE): 6.44%
    CO2 Emissions for Fold 1: Emission data not available.
  Fold 2:
    Mean Absolute Error (MAE): 605.15
    Mean Squared Error (MSE): 577261.76
    Root Mean Squared Error (RMSE): 759.78
    Mean Absolute Percentage Error (MAPE): 1

In [None]:
for i, (train_fold, test_fold) in enumerate(folds):
    print(f"Processing fold {folds[i]}")

Output hidden; open in https://colab.research.google.com to view.

In [None]:
for i, (train_fold, test_fold) in enumerate(folds):
    print(f"Fold {i + 1}:")
    print(f"  Train data shape: {np.shape(train_fold)}")
    print(f"  Test data shape: {np.shape(test_fold)}")

Fold 1:
  Train data shape: (230664,)
  Test data shape: (24,)
Fold 2:
  Train data shape: (230664,)
  Test data shape: (24,)
Fold 3:
  Train data shape: (230664,)
  Test data shape: (24,)


In [None]:
len(ts)

230736