In [196]:
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch_geometric.nn import GCNConv
from torch_geometric.utils import dense_to_sparse
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

In [197]:
# Load data
df = pd.read_csv("harga_beras_premium.csv")
df

Unnamed: 0,Tanggal,Komoditas,Provinsi,Harga
0,17/05/2022,Beras Premium,Nusa Tenggara Barat,10667.0
1,17/05/2022,Beras Premium,Sulawesi Selatan,10918.0
2,17/05/2022,Beras Premium,Bengkulu,11475.0
3,17/05/2022,Beras Premium,Banten,11163.0
4,17/05/2022,Beras Premium,D.I Yogyakarta,11240.0
...,...,...,...,...
41643,16/05/2025,Beras Premium,Kalimantan Tengah,17439.0
41644,16/05/2025,Beras Premium,Papua Barat,18100.0
41645,16/05/2025,Beras Premium,Papua Selatan,18700.0
41646,16/05/2025,Beras Premium,Papua Tengah,19167.0


In [198]:
# Konversi kolom tanggal
df["Tanggal"] = pd.to_datetime(df["Tanggal"], dayfirst=True)
df

Unnamed: 0,Tanggal,Komoditas,Provinsi,Harga
0,2022-05-17,Beras Premium,Nusa Tenggara Barat,10667.0
1,2022-05-17,Beras Premium,Sulawesi Selatan,10918.0
2,2022-05-17,Beras Premium,Bengkulu,11475.0
3,2022-05-17,Beras Premium,Banten,11163.0
4,2022-05-17,Beras Premium,D.I Yogyakarta,11240.0
...,...,...,...,...
41643,2025-05-16,Beras Premium,Kalimantan Tengah,17439.0
41644,2025-05-16,Beras Premium,Papua Barat,18100.0
41645,2025-05-16,Beras Premium,Papua Selatan,18700.0
41646,2025-05-16,Beras Premium,Papua Tengah,19167.0


In [199]:
# Pivot: baris = tanggal, kolom = provinsi, nilai = harga
df_pivot = df.pivot(index="Tanggal", columns="Provinsi", values="Harga")
df_pivot

Provinsi,Aceh,Bali,Banten,Bengkulu,D.I Yogyakarta,DKI Jakarta,Gorontalo,Jambi,Jawa Barat,Jawa Tengah,...,Papua Tengah,Riau,Sulawesi Barat,Sulawesi Selatan,Sulawesi Tengah,Sulawesi Tenggara,Sulawesi Utara,Sumatera Barat,Sumatera Selatan,Sumatera Utara
Tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2022-05-17,11762.0,11396.0,11163.0,11475.0,11240.0,12333.0,11258.0,11909.0,11558.0,11456.0,...,,14548.0,11333.0,10918.0,12036.0,11653.0,11762.0,13320.0,11406.0,12416.0
2022-05-18,11703.0,11407.0,11186.0,11475.0,11240.0,12333.0,11092.0,11909.0,11559.0,11431.0,...,,14586.0,11333.0,10895.0,12036.0,11740.0,11762.0,13317.0,11441.0,12448.0
2022-05-19,11762.0,11407.0,11286.0,11475.0,11200.0,12333.0,11250.0,11909.0,11559.0,11384.0,...,,14685.0,11333.0,10895.0,11950.0,11575.0,11762.0,13472.0,11469.0,12474.0
2022-05-20,11811.0,11407.0,11225.0,11475.0,11200.0,12333.0,11258.0,11909.0,11561.0,11431.0,...,,14537.0,11333.0,10873.0,12036.0,11573.0,11754.0,13332.0,11441.0,12471.0
2022-05-21,11752.0,11407.0,11225.0,11475.0,11200.0,12333.0,11275.0,11909.0,11580.0,11368.0,...,,14667.0,11283.0,10873.0,12040.0,11544.0,11792.0,13312.0,11406.0,12447.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-12,14776.0,15778.0,14692.0,15468.0,14063.0,15338.0,15304.0,15110.0,14649.0,14689.0,...,18250.0,16253.0,14258.0,14446.0,15375.0,15197.0,15175.0,17002.0,14243.0,15440.0
2025-05-13,14765.0,15769.0,14750.0,15359.0,14089.0,15338.0,15365.0,15110.0,14673.0,14701.0,...,18350.0,16269.0,14628.0,14500.0,15378.0,15222.0,15274.0,16986.0,14231.0,15435.0
2025-05-14,14795.0,15778.0,14738.0,15397.0,14089.0,15338.0,15148.0,15148.0,14674.0,14666.0,...,18375.0,16245.0,14550.0,14503.0,15325.0,15180.0,15218.0,17035.0,14273.0,15444.0
2025-05-15,14797.0,15778.0,14529.0,15397.0,14038.0,15338.0,15330.0,15148.0,14610.0,14734.0,...,18917.0,16271.0,14491.0,14472.0,15461.0,15176.0,15234.0,17004.0,14382.0,15420.0


In [200]:
# Imputasi missing value: isi dengan harga hari sebelumnya (forward fill)
df_pivot = df_pivot.sort_index().fillna(method="ffill")
df_pivot.isnull().sum()

  df_pivot = df_pivot.sort_index().fillna(method="ffill")


Provinsi
Aceh                           0
Bali                           0
Banten                         0
Bengkulu                       0
D.I Yogyakarta                 0
DKI Jakarta                    0
Gorontalo                      0
Jambi                          0
Jawa Barat                     0
Jawa Tengah                    0
Jawa Timur                     0
Kalimantan Barat               0
Kalimantan Selatan             0
Kalimantan Tengah              0
Kalimantan Timur               0
Kalimantan Utara               0
Kepulauan Bangka Belitung      0
Kepulauan Riau                 0
Lampung                        0
Maluku                         0
Maluku Utara                   0
Nusa Tenggara Barat            0
Nusa Tenggara Timur            0
Papua                          0
Papua Barat                    0
Papua Barat Daya             611
Papua Pegunungan             613
Papua Selatan                612
Papua Tengah                 612
Riau                           0
S

In [201]:
# Tambah fitur 'week of year'
df_pivot["calweek"] = df_pivot.index.isocalendar().week

# Preview
df_pivot.tail()

Provinsi,Aceh,Bali,Banten,Bengkulu,D.I Yogyakarta,DKI Jakarta,Gorontalo,Jambi,Jawa Barat,Jawa Tengah,...,Riau,Sulawesi Barat,Sulawesi Selatan,Sulawesi Tengah,Sulawesi Tenggara,Sulawesi Utara,Sumatera Barat,Sumatera Selatan,Sumatera Utara,calweek
Tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2025-05-12,14776.0,15778.0,14692.0,15468.0,14063.0,15338.0,15304.0,15110.0,14649.0,14689.0,...,16253.0,14258.0,14446.0,15375.0,15197.0,15175.0,17002.0,14243.0,15440.0,20
2025-05-13,14765.0,15769.0,14750.0,15359.0,14089.0,15338.0,15365.0,15110.0,14673.0,14701.0,...,16269.0,14628.0,14500.0,15378.0,15222.0,15274.0,16986.0,14231.0,15435.0,20
2025-05-14,14795.0,15778.0,14738.0,15397.0,14089.0,15338.0,15148.0,15148.0,14674.0,14666.0,...,16245.0,14550.0,14503.0,15325.0,15180.0,15218.0,17035.0,14273.0,15444.0,20
2025-05-15,14797.0,15778.0,14529.0,15397.0,14038.0,15338.0,15330.0,15148.0,14610.0,14734.0,...,16271.0,14491.0,14472.0,15461.0,15176.0,15234.0,17004.0,14382.0,15420.0,20
2025-05-16,14695.0,15739.0,14625.0,15288.0,14000.0,15600.0,15125.0,15148.0,14650.0,14505.0,...,16131.0,14530.0,14459.0,15280.0,15198.0,15083.0,17008.0,14482.0,15468.0,20


In [202]:
# Rolling average window size
ROLLING_WINDOWS = [0, 28]

# Buat dict untuk menyimpan semua versi data
datasets = {}

# Loop setiap window
for window in ROLLING_WINDOWS:
    if window == 0:
        smoothed = df_pivot.copy()
    else:
        smoothed = df_pivot.rolling(window=window, min_periods=1).mean()
    
    # Simpan ke dict
    datasets[f"ra{window}"] = smoothed

# Fungsi pembagian data
def split_data(df, train_end="2018-12-31", val_end="2020-12-31"):
    df_train = df.loc[:train_end]
    df_val = df.loc[train_end:val_end].iloc[1:] # Exclude the last day of train
    df_test = df.loc[val_end:].iloc[1:] # Exclude the last day of val
    return df_train, df_val, df_test

# Contoh split untuk ra28
# Adjust dates to be within the data range
# Data starts from 2022-05-17 and ends 2025-05-16
train_df, val_df, test_df = split_data(datasets["ra28"], train_end="2023-12-31", val_end="2024-12-31")

# Lihat shape dan range tanggal
if not train_df.empty:
    print("Train:", train_df.shape, train_df.index[0], "→", train_df.index[-1])
else:
    print("Train: Empty DataFrame", train_df.shape)

if not val_df.empty:
    print("Val  :", val_df.shape, val_df.index[0], "→", val_df.index[-1])
else:
    print("Val  : Empty DataFrame", val_df.shape)

if not test_df.empty:
    print("Test :", test_df.shape, test_df.index[0], "→", test_df.index[-1])
else:
    print("Test : Empty DataFrame", test_df.shape)

Train: (594, 39) 2022-05-17 00:00:00 → 2023-12-31 00:00:00
Val  : (366, 39) 2024-01-01 00:00:00 → 2024-12-31 00:00:00
Test : (136, 39) 2025-01-01 00:00:00 → 2025-05-16 00:00:00


In [203]:
# Encode calweek as a cyclical feature
def encode_cyclic_feature(df, col, max_val):
    """
    Encode a cyclical feature like week of year using sine and cosine transformations
    """
    df[f"{col}_sin"] = np.sin(2 * np.pi * df[col] / max_val)
    df[f"{col}_cos"] = np.cos(2 * np.pi * df[col] / max_val)
    return df

def create_time_series_tensor(df, window_size=60, horizon=7, target_columns=None, include_calweek=True):
    """
    Create time series tensors for model input and targets
    
    Parameters:
    - df: DataFrame with features
    - window_size: Number of time steps to use as input sequence
    - horizon: How far ahead to predict
    - target_columns: Which columns to predict (defaults to all columns except calweek features)
    - include_calweek: Whether to include calweek features in input
    """
    # Select features and targets
    if target_columns is None:
        target_columns = [col for col in df.columns if not col.startswith("calweek")]
    
    # Determine which columns to use as features
    if include_calweek:
        # Use all columns
        feature_cols = df.columns
    else:
        # Exclude calweek features
        feature_cols = [col for col in df.columns if not col.startswith("calweek")]
    
    # Convert data to numpy arrays
    data = df[feature_cols].values.astype(np.float32)
    X, y = [], []

    for i in range(len(data) - window_size - horizon + 1):
        x_window = data[i:i+window_size]
        y_target = df.iloc[i+window_size+horizon-1][target_columns].values
        X.append(x_window)
        y.append(y_target)

    X = np.stack(X)  # shape: (samples, window_size, num_features)
    y = np.stack(y)  # shape: (samples, num_targets)
    return torch.tensor(X), torch.tensor(y)

# Pilih rolling version ra28
selected_df = datasets["ra28"]

# Drop baris yang masih ada NaN (jika ada sisa)
selected_df = selected_df.dropna()

# Encode calweek as cyclical features
selected_df = encode_cyclic_feature(selected_df, "calweek", 53)

# Tentukan parameter
WINDOW_SIZE = 30
HORIZON = 7
TARGET_COLUMNS = [col for col in selected_df.columns if not col.startswith("calweek")]

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_sin"] = np.sin(2 * np.pi * df[col] / max_val)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_cos"] = np.cos(2 * np.pi * df[col] / max_val)


In [204]:
print("Train df length:", len(train_df))
print("Val df length  :", len(val_df))
print("Test df length :", len(test_df))
print("Window size:", WINDOW_SIZE, "| Horizon:", HORIZON)

Train df length: 594
Val df length  : 366
Test df length : 136
Window size: 30 | Horizon: 7


In [205]:
print("Range selected_df:", selected_df.index.min(), "→", selected_df.index.max())


Range selected_df: 2024-01-20 00:00:00 → 2025-05-16 00:00:00


In [206]:
from sklearn.preprocessing import StandardScaler

# --- Scaling ---
# Simpan hanya kolom harga (tanpa 'calweek')

price_columns = df_pivot.columns.drop("calweek")
scaler = StandardScaler()  # Changed from MinMaxScaler to StandardScaler
scaled_prices = scaler.fit_transform(df_pivot[price_columns])

# Buat dataframe baru dengan hasil scaling
scaled_df = pd.DataFrame(scaled_prices, index=df_pivot.index, columns=price_columns)

# Tambahkan kembali kolom calweek
scaled_df["calweek"] = df_pivot["calweek"]

# Update selected_df
selected_df = scaled_df.dropna()

# Cek validitas tensor setelah scaling
def check_tensor_validity(name, tensor):
    print(f"🧪 Cek {name}:")
    print("  NaN:", torch.isnan(tensor).any().item())
    print("  Inf:", torch.isinf(tensor).any().item())
    print("  Min:", tensor.min().item(), "| Max:", tensor.max().item())
    print("  Mean:", tensor.mean().item(), "| Std:", tensor.std().item())  # Add mean and std check for StandardScaler

In [207]:
# Apply to our dataframe - week of year has max value of 53
selected_df = encode_cyclic_feature(selected_df, "calweek", 53)

# Show the new features
print("Original calweek and encoded features:")
selected_df[["calweek", "calweek_sin", "calweek_cos"]].head()

Original calweek and encoded features:


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_sin"] = np.sin(2 * np.pi * df[col] / max_val)
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df[f"{col}_cos"] = np.cos(2 * np.pi * df[col] / max_val)


Provinsi,calweek,calweek_sin,calweek_cos
Tanggal,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
2024-01-20,3,0.348202,0.93742
2024-01-21,3,0.348202,0.93742
2024-01-22,4,0.456629,0.889657
2024-01-23,4,0.456629,0.889657
2024-01-24,4,0.456629,0.889657


In [208]:
# ✅ Split data yang realistis berdasarkan tanggal aktual
train_df, val_df, test_df = split_data(selected_df, train_end="2024-08-31", val_end="2025-01-31")

# Konfirmasi panjang baris
print("Train df length:", len(train_df))
print("Val df length  :", len(val_df))
print("Test df length :", len(test_df))

Train df length: 225
Val df length  : 153
Test df length : 105


In [209]:
# Lanjutkan pembentukan tensor dengan calweek features
X_train, y_train = create_time_series_tensor(train_df, WINDOW_SIZE, HORIZON, TARGET_COLUMNS, include_calweek=True)
X_val, y_val = create_time_series_tensor(val_df, WINDOW_SIZE, HORIZON, TARGET_COLUMNS, include_calweek=True)
X_test, y_test = create_time_series_tensor(test_df, WINDOW_SIZE, HORIZON, TARGET_COLUMNS, include_calweek=True)

check_tensor_validity("X_train", X_train)
check_tensor_validity("y_train", y_train)

🧪 Cek X_train:
  NaN: False
  Inf: False
  Min: -2.172020673751831 | Max: 34.0
  Mean: 1.3317697048187256 | Std: 3.082226514816284
🧪 Cek y_train:
  NaN: False
  Inf: False
  Min: -1.8754154881574883 | Max: 5.20311530038103
  Mean: 0.9295126927025702 | Std: 0.5487578049725361


In [210]:
class TGCN(nn.Module):
    def __init__(self, num_nodes, num_features, hidden_dim, output_dim, edge_index, use_calweek=True):
        super(TGCN, self).__init__()
        self.edge_index = edge_index
        self.use_calweek = use_calweek
        
        # GCN for spatial dependencies
        self.gcn = GCNConv(num_features, hidden_dim)
        
        # Separate embeddings for calendar week features if used
        if self.use_calweek:
            self.calweek_embedding = nn.Linear(2, hidden_dim // 4)  # smaller dimension for calweek
            gru_input_dim = hidden_dim + (hidden_dim // 4)
        else:
            gru_input_dim = hidden_dim
            
        # GRU for temporal dependencies
        self.gru = nn.GRU(gru_input_dim, hidden_dim, batch_first=True)
        
        # Output layer
        self.fc = nn.Linear(hidden_dim, output_dim)

    def forward(self, x):
        # x: (batch_size, seq_len, num_nodes + calweek_features)
        batch_size, seq_len, features = x.shape
        
        # Extract calweek features if used
        if self.use_calweek:
            # Last 2 columns are calweek_sin and calweek_cos
            price_features = x[:, :, :-2]  # Remove calweek features
            calweek_features = x[:, :, -2:]  # Extract calweek_sin and calweek_cos
            num_nodes = price_features.shape[2]
        else:
            price_features = x
            num_nodes = features
        
        gcn_out = []
        
        # Process each sample in batch
        for b in range(batch_size):
            sample_seq = price_features[b]  # (seq_len, num_nodes)
            seq_gcn = []
            
            # Process each time step with GCN
            for t in range(seq_len):
                xt = sample_seq[t]  # (num_nodes,)
                xt = xt.unsqueeze(1)  # (num_nodes, 1)
                # Ensure xt has the same number of nodes as edge_index expects
                xt = self.gcn(xt, self.edge_index)  # (num_nodes, hidden_dim)
                xt = xt.mean(dim=0)  # Global average pooling → (hidden_dim,)
                seq_gcn.append(xt)
            
            seq_gcn = torch.stack(seq_gcn)  # (seq_len, hidden_dim)
            gcn_out.append(seq_gcn)
        
        gcn_out = torch.stack(gcn_out)  # (batch_size, seq_len, hidden_dim)
        
        # Combine with calweek features if used
        if self.use_calweek:
            # Process calweek features: (batch_size, seq_len, 2)
            calweek_embedded = self.calweek_embedding(calweek_features)  # (batch_size, seq_len, hidden_dim//4)
            combined = torch.cat([gcn_out, calweek_embedded], dim=2)  # (batch_size, seq_len, hidden_dim + hidden_dim//4)
            gru_input = combined
        else:
            gru_input = gcn_out
            
        # Process with GRU for temporal dependencies
        gru_out, _ = self.gru(gru_input)
        
        # Use the last time step for prediction
        out = self.fc(gru_out[:, -1])
        return out

In [211]:
def build_edge_index_from_correlation(df, threshold=0.5):
    # Ambil hanya kolom harga (tanpa calweek dan turunannya)
    df = df[[col for col in df.columns if not col.startswith('calweek')]]

    # Hitung korelasi antar kolom
    corr_matrix = df.corr().abs().values  # ambil nilai absolut

    # Threshold: korelasi rendah jadi 0 (tidak ada edge)
    corr_matrix[corr_matrix < threshold] = 0

    # Buat sparse edge_index dari adjacency matrix
    edge_index, edge_weight = dense_to_sparse(torch.tensor(corr_matrix, dtype=torch.float32))
    return edge_index

# Pastikan kita build graph setelah cyclical encoding diterapkan
# Gunakan data full untuk bangun graph, tapi tanpa kolom calweek
edge_index = build_edge_index_from_correlation(selected_df)

# Info graph
print("Edge index shape:", edge_index.shape)
print("Jumlah node (provinsi):", len([col for col in selected_df.columns if not col.startswith('calweek')]))
print("Jumlah edge:", edge_index.shape[1])

Edge index shape: torch.Size([2, 544])
Jumlah node (provinsi): 38
Jumlah edge: 544


In [212]:
print("X_train shape:", X_train.shape)  # (batch, seq_len, num_nodes)
print("y_train shape:", y_train.shape)  # (batch, num_targets)


X_train shape: torch.Size([189, 30, 41])
y_train shape: torch.Size([189, 38])


In [213]:
def evaluate(y_true, y_pred):
    y_true_np = y_true.detach().cpu().numpy()
    y_pred_np = y_pred.detach().cpu().numpy()

    rmse = np.sqrt(mean_squared_error(y_true_np, y_pred_np))
    mae = mean_absolute_error(y_true_np, y_pred_np)
    mape = np.mean(np.abs((y_true_np - y_pred_np) / (y_true_np + 1e-8))) * 100  # avoid zero division
    r2 = r2_score(y_true_np, y_pred_np)

    return {
        "RMSE (Rp)": rmse,
        "MAE (Rp)": mae,
        "MAPE (%)": mape,
        "R² Score": r2
    }

# Siapkan model
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Ensure dimensions are correct
num_price_features = len([col for col in selected_df.columns if not col.startswith('calweek')])
print(f"Number of price features: {num_price_features}")
output_dim_model = y_train.shape[1]  # Number of target provinces
print(f"Output dimension: {output_dim_model}")

# Check edge_index dimensions
print(f"Edge index shape: {edge_index.shape}, max index: {edge_index.max().item()}")

# Make sure edge_index doesn't exceed the number of nodes
assert edge_index.max().item() < num_price_features, "Edge index has indices larger than number of nodes!"

# ✅ Ganti num_features jadi 1 (fitur per node) dan aktifkan calweek
model = TGCN(
    num_nodes=num_price_features, 
    num_features=1, 
    hidden_dim=64, 
    output_dim=output_dim_model, 
    edge_index=edge_index,
    use_calweek=True
).to(device)

# Optimizer dan loss
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
loss_fn = nn.MSELoss()

# Cast inputs to the same dtype to avoid mixed precision issues
X_train = X_train.to(torch.float32)
y_train = y_train.to(torch.float32)
X_val = X_val.to(torch.float32)
y_val = y_val.to(torch.float32)
X_test = X_test.to(torch.float32)
y_test = y_test.to(torch.float32)

# Pindahkan data ke device
X_train, y_train = X_train.to(device), y_train.to(device)
X_val, y_val = X_val.to(device), y_val.to(device)
X_test, y_test = X_test.to(device), y_test.to(device)

# Training loop
EPOCHS = 50
for epoch in range(1, EPOCHS + 1):
    model.train()
    optimizer.zero_grad()
    out = model(X_train)
    loss = loss_fn(out, y_train)
    loss.backward()
    optimizer.step()

    if epoch % 10 == 0 or epoch == 1:
        model.eval()
        with torch.no_grad():
            val_pred = model(X_val)
            val_loss = loss_fn(val_pred, y_val)
        print(f"Epoch {epoch}: Train Loss = {loss.item():.4f}, Val Loss = {val_loss.item():.4f}")

Number of price features: 38
Output dimension: 38
Edge index shape: torch.Size([2, 544]), max index: 37
Epoch 1: Train Loss = 1.1600, Val Loss = 0.6666
Epoch 1: Train Loss = 1.1600, Val Loss = 0.6666


KeyboardInterrupt: 

In [None]:
def evaluate_with_inverse(y_true_scaled, y_pred_scaled, scaler, target_columns):
    # Convert tensor to numpy
    y_true_scaled = y_true_scaled.detach().cpu().numpy()
    y_pred_scaled = y_pred_scaled.detach().cpu().numpy()

    # Pad ke dimensi penuh agar bisa inverse_transform
    y_true_padded = np.zeros((y_true_scaled.shape[0], len(scaler.feature_names_in_)))
    y_pred_padded = np.zeros_like(y_true_padded)

    idx = [np.where(scaler.feature_names_in_ == col)[0][0] for col in target_columns]

    # Masukkan ke posisi kolom target
    y_true_padded[:, idx] = y_true_scaled
    y_pred_padded[:, idx] = y_pred_scaled

    # Inverse transform
    y_true_rupiah = scaler.inverse_transform(y_true_padded)[:, idx]
    y_pred_rupiah = scaler.inverse_transform(y_pred_padded)[:, idx]

    # Evaluasi
    rmse = np.sqrt(mean_squared_error(y_true_rupiah, y_pred_rupiah))
    mae = mean_absolute_error(y_true_rupiah, y_pred_rupiah)
    mape = np.mean(np.abs((y_true_rupiah - y_pred_rupiah) / (y_true_rupiah + 1e-8))) * 100
    r2 = r2_score(y_true_rupiah, y_pred_rupiah)

    return {
        "RMSE (Rp)": rmse,
        "MAE (Rp)": mae,
        "MAPE (%)": mape,
        "R² Score": r2
    }

# Setelah training selesai:
model.eval()
with torch.no_grad():
    y_pred = model(X_test)

results = evaluate_with_inverse(y_test, y_pred, scaler, target_columns=price_columns)

print("\n📊 Evaluasi Test Set (Skala Asli - Rupiah):")
for metric, value in results.items():
    print(f"{metric}: {value:,.2f}")



📊 Evaluasi Test Set (Skala Asli - Rupiah):
RMSE (Rp): 1,464.24
MAE (Rp): 736.57
MAPE (%): 4.17
R² Score: -82.09


In [None]:
# Cek dimensi output model vs y_train
model.eval()
with torch.no_grad():
    out_train_check = model(X_train)

print(f"Jumlah kolom (provinsi) di y_train: {y_train.shape[1]}")
print(f"Jumlah kolom (provinsi) di output model(X_train): {out_train_check.shape[1]}")

# Sanity check: output_dim_model should be the same
print(f"output_dim_model (parameter model): {output_dim_model}")

if y_train.shape[1] == out_train_check.shape[1] == output_dim_model:
    print("✅ Dimensi output model dan y_train cocok.")
else:
    print("⚠️ Peringatan: Dimensi output model dan y_train TIDAK cocok!")

Jumlah kolom (provinsi) di y_train: 38
Jumlah kolom (provinsi) di output model(X_train): 38
output_dim_model (parameter model): 38
✅ Dimensi output model dan y_train cocok.
