In [None]:
# ============================================================
# PROGRAM PREDIKSI HARGA BERAS - RANDOM FOREST VERSION
# ============================================================

import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, r2_score

# =======================
# 1. Load Dataset
# =======================
df = pd.read_csv("dataset_prediksi_harga_beras_final.csv")

# =======================
# 2. Encode Kolom Kategori
# =======================
df_encoded = pd.get_dummies(df, columns=['Kabupaten'], drop_first=True)

# =======================
# 3. Pisahkan Fitur & Target
# =======================
X = df_encoded.drop('Rata_Rata_Harga_Beras', axis=1)
y = df_encoded['Rata_Rata_Harga_Beras']

# =======================
# 4. Train-test split
# =======================
X_train, X_test, y_train, y_test = train_test_split(
    X, y,
    test_size=0.2,
    random_state=42,
    shuffle=True
)

# =======================
# 5. Scaling (opsional untuk Random Forest)
# Random Forest tidak wajib scaling, tapi tetap kita lakukan
# agar hasil lebih stabil.
# =======================
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled  = scaler.transform(X_test)

# =======================
# 6. Model Random Forest
# =======================
model = RandomForestRegressor(
    n_estimators=300,       # jumlah pohon
    max_depth=None,         # biarkan pohon tumbuh bebas
    min_samples_split=2,
    min_samples_leaf=1,
    random_state=42
)

model.fit(X_train_scaled, y_train)

# =======================
# 7. Prediksi
# =======================
y_pred = model.predict(X_test_scaled)

# =======================
# 8. Evaluasi
# =======================
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5
r2 = r2_score(y_test, y_pred)

print("===== HASIL RANDOM FOREST =====")
print("MSE :", mse)
print("RMSE:", rmse)
print("R²  :", r2)




===== HASIL RANDOM FOREST =====
MSE : 551651.8756808084
RMSE: 742.7327080994942
R²  : 0.7117990914760358

===== CONTOH PREDIKSI =====
Data input : [[2023 93866.34 54.59 512391.58 1.886 False False False False False False
  False False False False False False False False True False False]]
Prediksi   : 11637.706666666667
Aktual     : 12012
