In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [4]:
import pandas as pd
import json
import os
from collections import Counter
import numpy as np

# ======= PATHS =======
base_path = "/content/drive/MyDrive/PENALARAN KOMPUTER FIX"
cases_path = f"{base_path}/cases.csv"
predictions_path = f"{base_path}/data/results/predictions.csv"
queries_path = f"{base_path}/data/eval/queries.json"
output_dir = f"{base_path}/data/results"
os.makedirs(output_dir, exist_ok=True)

# ======= Load Data =======
cases_df = pd.read_csv(cases_path)
pred_df = pd.read_csv(predictions_path)
with open(queries_path, "r", encoding="utf-8") as f:
    queries = json.load(f)
queries_df = pd.DataFrame(queries)

# ======= i. Ekstrak Solusi =======
def extract_solutions(top_5_case_ids, cases_df):
    """
    Ekstrak solusi (putusan atau ringkasan_fakta) dari top-5 case_ids.
    Return: {case_id: solusi_text}
    """
    solutions = {}
    case_ids = top_5_case_ids.split(',')
    for case_id in case_ids:
        case_id = case_id.strip()
        case_row = cases_df[cases_df['case_id'] == case_id]
        if not case_row.empty:
            # Prioritaskan putusan, jika kosong gunakan ringkasan_fakta
            solusi = case_row['putusan'].iloc[0] if pd.notna(case_row['putusan'].iloc[0]) else case_row['ringkasan_fakta'].iloc[0]
            # Tambahkan konteks narkotika jika relevan
            if pd.notna(case_row['jenis_perkara'].iloc[0]) and 'narkotika' in case_row['jenis_perkara'].iloc[0].lower():
                solusi += f" (Terkait Narkotika: {case_row['pasal'].iloc[0]})"
            solutions[case_id] = solusi if pd.notna(solusi) else "No solution available"
        else:
            solutions[case_id] = "Case not found"
    return solutions

# ======= ii. Algoritma Prediksi =======
def predict_solution(solutions, method='majority_vote', similarity_scores=None):
    """
    Prediksi solusi berdasarkan majority vote atau weighted similarity.
    Args:
        solutions: {case_id: solusi_text}
        method: 'majority_vote' atau 'weighted_similarity'
        similarity_scores: list skor similarity untuk top-5 case_ids
    Return: predicted_solution (string)
    """
    if not solutions:
        return "No solution predicted"

    if method == 'majority_vote':
        # Hitung solusi yang paling sering muncul
        solution_counts = Counter(solutions.values())
        if solution_counts:
            return solution_counts.most_common(1)[0][0]
        return "No common solution"

    elif method == 'weighted_similarity':
        if similarity_scores is None or len(similarity_scores) != len(solutions):
            raise ValueError("Similarity scores tidak sesuai dengan jumlah solusi")

        # Bobotkan solusi berdasarkan similarity score
        weighted_solutions = {}
        for (case_id, solusi), score in zip(solutions.items(), similarity_scores):
            if solusi not in weighted_solutions:
                weighted_solutions[solusi] = 0
            weighted_solutions[solusi] += score

        # Pilih solusi dengan bobot tertinggi
        if weighted_solutions:
            return max(weighted_solutions.items(), key=lambda x: x[1])[0]
        return "No weighted solution"

# ======= iii. Fungsi Utama Prediksi =======
def predict_outcome(query_id, top_5_case_ids, cases_df, method='majority_vote', similarity_scores=None):
    """
    Prediksi solusi untuk query_id berdasarkan top-5 case_ids.
    Return: (query_id, predicted_solution, top_5_case_ids)
    """
    solutions = extract_solutions(top_5_case_ids, cases_df)
    predicted_solution = predict_solution(solutions, method, similarity_scores)
    return query_id, predicted_solution, top_5_case_ids

# ======= iv. Demo Manual =======
def demo_manual(cases_df, pred_df, queries_df):
    """
    Demo manual dengan 5 kasus baru, termasuk satu kasus narkotika.
    Return: list hasil demo
    """
    # Contoh 5 kasus baru (simulasi)
    new_cases = [
        {"query_id": "new_001", "top_5_case_ids": pred_df['top_5_case_ids'].iloc[0], "true_solution": "Hukuman penjara 5 tahun"},
        {"query_id": "new_002", "top_5_case_ids": pred_df['top_5_case_ids'].iloc[1], "true_solution": "Hukuman penjara 3 tahun"},
        {"query_id": "new_003", "top_5_case_ids": pred_df['top_5_case_ids'].iloc[2], "true_solution": "Denda Rp 50 juta"},
        {"query_id": "new_004", "top_5_case_ids": pred_df['top_5_case_ids'].iloc[3], "true_solution": "Hukuman penjara 7 tahun"},
        {"query_id": "new_005", "top_5_case_ids": pred_df['top_5_case_ids'].iloc[4], "true_solution": "Hukuman penjara 4 tahun (Terkait Narkotika: Pasal 114 UU No. 35/2009)"},
    ]

    demo_results = []
    for case in new_cases:
        query_id = case['query_id']
        top_5_case_ids = case['top_5_case_ids']
        true_solution = case['true_solution']

        # Prediksi solusi (gunakan majority vote)
        _, predicted_solution, _ = predict_outcome(query_id, top_5_case_ids, cases_df, method='majority_vote')

        # Bandingkan dengan putusan sebenarnya
        match = predicted_solution == true_solution
        demo_results.append({
            'query_id': query_id,
            'predicted_solution': predicted_solution,
            'true_solution': true_solution,
            'match': match,
            'top_5_case_ids': top_5_case_ids
        })

    return demo_results

# ======= Main Function =======
def main():
    # Proses semua query untuk menghasilkan predictions.csv
    results = []
    for _, row in pred_df.iterrows():
        query_id = row['query_id']
        top_5_case_ids = row['top_5_case_ids']

        # Prediksi solusi (gunakan majority vote)
        qid, predicted_solution, top5 = predict_outcome(query_id, top_5_case_ids, cases_df, method='majority_vote')
        results.append({
            'query_id': qid,
            'predicted_solution': predicted_solution,
            'top_5_case_ids': top5
        })

    # Simpan hasil ke predictions.csv
    results_df = pd.DataFrame(results)
    results_df.to_csv(predictions_path, index=False)
    print(f"✅ Saved: {predictions_path}")

    # Jalankan demo manual
    print("\n📋 Demo Manual untuk 5 Kasus Baru:")
    demo_results = demo_manual(cases_df, pred_df, queries_df)
    for result in demo_results:
        print(f"Query ID: {result['query_id']}")
        print(f"Predicted Solution: {result['predicted_solution']}")
        print(f"True Solution: {result['true_solution']}")
        print(f"Match: {result['match']}")
        print(f"Top 5 Case IDs: {result['top_5_case_ids']}")
        print("-" * 50)

if __name__ == "__main__":
    main()

✅ Saved: /content/drive/MyDrive/PENALARAN KOMPUTER FIX/data/results/predictions.csv

📋 Demo Manual untuk 5 Kasus Baru:
Query ID: new_001
Predicted Solution: No solution available
True Solution: Hukuman penjara 5 tahun
Match: False
Top 5 Case IDs: zaf04c05450ae168ad29313232393536,zaf04b3abeac93b6b6df313232303132,zaf04b3b297d83949f16313232333131,zaf04c017a89dc30943a313230323438,zaf04bf54310144c9b17313033353231
--------------------------------------------------
Query ID: new_002
Predicted Solution: No solution available
True Solution: Hukuman penjara 3 tahun
Match: False
Top 5 Case IDs: zaf04b3b297d83949f16313232333131,zaf04c017a89dc30943a313230323438,zaf04c05450ae168ad29313232393536,zaf04b3abeac93b6b6df313232303132,zaf04bf54310144c9b17313033353231
--------------------------------------------------
Query ID: new_003
Predicted Solution: No solution available
True Solution: Denda Rp 50 juta
Match: False
Top 5 Case IDs: zaf04c05450ae168ad29313232393536,zaf04b3abeac93b6b6df313232303132,zaf04b