In [None]:
# ================= CELL: GENERATE CLUSTER -> DOSEN ID MAPPING =================
import pandas as pd
import json
import os
import numpy as np

# --- KONFIGURASI PATH ---
BASE_DIR = r"D:\Media\Kuliah\Skripsi\ScripTI\Versi 4\Sistem_Files"

# Input 1: Hasil Clustering (Dari script sebelumnya)
CLUSTER_FILE = os.path.join(BASE_DIR, "proposal_clusters.csv")

# Input 2: Data ID Dosen (Dari Export SQL baru kamu)
DOSEN_IDS_FILE = os.path.join(BASE_DIR, "proposal_dosen_ids.csv")

# Output: Mapping Cluster -> List[DosenID]
OUTPUT_MAP_JSON = os.path.join(BASE_DIR, "cluster_dosen_ids.json")

# --- PROSES ---
print("Membaca data...")
df_clusters = pd.read_csv(CLUSTER_FILE) # [proposal_id, primary_cluster, ...]
df_dosen = pd.read_csv(DOSEN_IDS_FILE)  # [proposal_id, id_dosen_1, id_dosen_2]

# Gabungkan
df_merged = pd.merge(df_clusters, df_dosen, on='proposal_id', how='inner')
print(f"Data tergabung: {len(df_merged)} proposal.")

# Dictionary hasil: { cluster_id: [dosen_id_a, dosen_id_b] }
cluster_dosen_map = {}

# Group by Cluster Utama
grouped = df_merged.groupby('primary_cluster')

for cluster_id, group in grouped:
    # Kumpulkan semua ID dosen di cluster ini
    dosen_list = []
    
    # Ambil Dosen 1
    d1 = group['id_dosen_1'].dropna().tolist()
    dosen_list.extend(d1)
    
    # Ambil Dosen 2 (jika ada)
    d2 = group['id_dosen_2'].dropna().tolist()
    dosen_list.extend(d2)
    
    # Hitung Frekuensi (Siapa dosen paling sering di topik ini?)
    if not dosen_list:
        cluster_dosen_map[int(cluster_id)] = []
        continue
        
    # Pakai numpy/pandas value_counts biar cepat
    counts = pd.Series(dosen_list).value_counts()
    
    # Ambil Top 3 Dosen ID terbanyak
    # Convert ke int (JSON standard)
    top_ids = [int(x) for x in counts.head(3).index.tolist()]
    
    cluster_dosen_map[int(cluster_id)] = top_ids

# Simpan
with open(OUTPUT_MAP_JSON, 'w') as f:
    json.dump(cluster_dosen_map, f)

print(f"âœ… Mapping Cluster ke Dosen ID tersimpan: {OUTPUT_MAP_JSON}")
# Contoh isi: { "0": [55, 12], "1": [60] }