In [None]:
# --- STEP 1: IMPORT LIBRARY ---
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error, mean_absolute_error
import warnings
import os

warnings.filterwarnings("ignore")

# --- STEP 2: BACA DATA ---
file_path = 'data_pendaftar_jurusan.xlsx'
df = pd.read_excel(file_path)

# --- STEP 3: UBAH FORMAT WIDE → LONG ---
df_long = df.melt(id_vars='Jurusan', var_name='tahun', value_name='jumlah')
df_long['tahun'] = pd.to_numeric(df_long['tahun'], errors='coerce')
df_long['jumlah'] = pd.to_numeric(df_long['jumlah'], errors='coerce')
df_long.dropna(inplace=True)
df_long['tahun'] = df_long['tahun'].astype(int)

# --- STEP 4: INISIALISASI OUTPUT ---
hasil_rf = []
chart_dir = 'charts_rf'
os.makedirs(chart_dir, exist_ok=True)
jurusan_list = df_long['Jurusan'].unique()

# --- STEP 5: PROSES TIAP JURUSAN ---
for jurusan in jurusan_list:
    data_jurusan = df_long[df_long['Jurusan'] == jurusan].sort_values('tahun')
    tahun = data_jurusan['tahun'].values.reshape(-1, 1)
    jumlah = data_jurusan['jumlah'].values

    if len(jumlah) < 4:
        continue  # Minimal 4 data untuk model

    # Train model
    model = RandomForestRegressor(n_estimators=100, random_state=42)
    model.fit(tahun, jumlah)

    # Prediksi historis (untuk evaluasi)
    pred_hist = model.predict(tahun)

    # Evaluasi
    mse = mean_squared_error(jumlah, pred_hist)
    mad = mean_absolute_error(jumlah, pred_hist)
    mape = np.mean(np.abs((jumlah - pred_hist) / jumlah)) * 100

    # Prediksi 2024, 2025, 2026
    tahun_pred = np.array([[2024], [2025], [2026]])
    pred_2024, pred_2025, pred_2026 = model.predict(tahun_pred)

    # Simpan hasil evaluasi
    hasil_rf.append({
        'Jurusan': jurusan,
        'Prediksi 2024': round(pred_2024, 2),
        'Prediksi 2025': round(pred_2025, 2),
        'Prediksi 2026': round(pred_2026, 2),
        'MSE': round(mse, 2),
        'MAD': round(mad, 2),
        'MAPE': round(mape, 2)
    })

    # Grafik
    plt.figure(figsize=(8, 4))
    plt.plot(tahun.flatten(), jumlah, marker='o', label='Data Aktual')
    plt.plot(tahun.flatten(), pred_hist, linestyle='--', label='Prediksi Historis')
    plt.plot([2024, 2025, 2026], [pred_2024, pred_2025, pred_2026], marker='x', color='red', label='Prediksi 2024-2026')
    plt.title(f'Prediksi Random Forest - {jurusan}')
    plt.xlabel('Tahun')
    plt.ylabel('Jumlah Pendaftar')
    plt.grid(True)
    plt.legend()
    plt.tight_layout()

    chart_path = os.path.join(chart_dir, f'{jurusan}.png')
    plt.savefig(chart_path)
    plt.close()

# --- STEP 6: SIMPAN KE FILE EXCEL ---
output_file = 'hasil_prediksi_RandomForest.xlsx'

try:
    import xlsxwriter
except ImportError:
    import subprocess
    subprocess.check_call(["pip", "install", "xlsxwriter"])
    import xlsxwriter

with pd.ExcelWriter(output_file, engine='xlsxwriter') as writer:
    df_eval = pd.DataFrame(hasil_rf)
    df_eval.to_excel(writer, index=False, sheet_name='Evaluasi Random Forest')

    for jurusan in df_eval['Jurusan']:
        sheet_name = jurusan[:31]  # Batas nama sheet Excel
        chart_path = os.path.join(chart_dir, f'{jurusan}.png')
        worksheet = writer.book.add_worksheet(sheet_name)
        writer.sheets[sheet_name] = worksheet
        if os.path.exists(chart_path):
            worksheet.insert_image('B2', chart_path)

print(f"✅ File berhasil disimpan sebagai: {output_file}")


✅ File berhasil disimpan sebagai: hasil_prediksi_RandomForest.xlsx
