In [None]:
from google.colab import drive
drive.mount('/content/drive')

import requests
from bs4 import BeautifulSoup
import pandas as pd
import re

Mounted at /content/drive


In [None]:
def get_putusan_links():
    base_url = "https://putusan3.mahkamahagung.go.id/direktori/index/pengadilan/pn-rantau-prapat/kategori/narkotika-dan-psikotropika-1/tahunjenis/putus/tahun/2024/page/"
    putusan_links = []
    page = 1

    while len(putusan_links) < 150:
        response = requests.get(base_url + str(page) + ".html")
        soup = BeautifulSoup(response.text, 'html.parser')

        for putusan in soup.select(".spost .entry-c a[href*='/putusan/']"):
            url = putusan['href']
            nomor_putusan = putusan.get_text().strip()
            putusan_links.append((nomor_putusan, url))

            if len(putusan_links) >= 150:
                break

        page += 1

    return putusan_links[:150]

In [None]:
def crawl_putusan_detail(nomor_putusan, url):
    response = requests.get(url)
    soup = BeautifulSoup(response.text, 'html.parser')

    lembaga_peradilan = "PN RANTAU PRAPAT"
    barang_bukti = []
    amar_putusan = ""

    nomor_element = soup.find("td", string="Nomor")
    nomor = nomor_element.find_next_sibling("td").get_text(strip=True) if nomor_element else nomor_putusan

    amar_putusan_section = soup.find("td", string="Catatan Amar")
    if amar_putusan_section:
        amar_putusan_html = amar_putusan_section.find_next_sibling("td")
        full_amar_putusan = amar_putusan_html.get_text(separator=" ", strip=True) if amar_putusan_html else ""

        amar_text = re.sub(r"(?i)^MENGADILI\s*[:]*\s*", "", full_amar_putusan).strip()

        barang_bukti_match = re.search(r"Menetapkan barang bukti berupa:\s*(.*?)(?= Membebankan kepada|$)", amar_text, re.IGNORECASE)
        if barang_bukti_match:
            barang_bukti = barang_bukti_match.group(1).split('. ')

        amar_putusan = re.sub(r"Menetapkan barang bukti berupa:.*(?= Membebankan kepada|$)", "", amar_text, flags=re.IGNORECASE).strip()
    else:
        amar_putusan = ""

    return {
        "Nomor Putusan": nomor,
        "Lembaga Peradilan": lembaga_peradilan,
        "Barang Bukti": "; ".join(barang_bukti).strip(),
        "Amar Putusan": amar_putusan
    }

In [None]:
putusan_links = get_putusan_links()

data = []
for nomor_putusan, url in putusan_links:
    data.append(crawl_putusan_detail(nomor_putusan, url))

In [None]:
df = pd.DataFrame(data)
df.dropna(inplace=True)

# df = df[df["Barang Bukti"].str.strip() != ""]

In [None]:
df.insert(0, "Nomor", range(1, len(df) + 1))

output_path = '/content/drive/My Drive/Putusan_PN_Rantau_Prapat_2024.xlsx'
df.to_excel(output_path, index=False)

print("File saved to:", output_path)

File saved to: /content/drive/My Drive/Putusan_PN_Rantau_Prapat_2024.xlsx


In [None]:
print("Displaying the first 50 records:")
df.head(20)

Displaying the first 50 records:


Unnamed: 0,Nomor,Nomor Putusan,Lembaga Peradilan,Barang Bukti,Amar Putusan
0,1,727/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,1 (satu) Bungkus plastik klip transparan beris...,Menyatakan Terdakwa Ismail Alias Mail tersebut...
1,2,687/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,1 (satu) bungkus plastik bening kosong 4 (empa...,Menyatakan Terdakwa Hendri Gunawan Alias Aldi ...
2,3,784/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,2 (dua) bungkus plastik klip transparan berisi...,Menyatakan Terdakwa Ali Imran Alias Etot terse...
3,4,733/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,,Menyatakan Terdakwa Julham Alias Kuncung terse...
4,5,809/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,,Menyatakan Terdakwa Nasruddin Dalimunthe Alias...
5,6,728/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,,Menyatakan Terdakwa Roni Kartono Tambunan Alia...
6,7,757/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,1 (satu) bungkus plastik klip transparan didug...,Menyatakan Terdakwa Asnan Harahap Alias Asnan ...
7,8,825/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,3 (tiga) bungkus plastik klip transparan beris...,MENGADIL I : Menyatakan Terdakwa Tukijan alias...
8,9,590/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,,Menyatakan Terdakwa Dona Saputra Alias Dona te...
9,10,752/Pid.Sus/2024/PN Rap,PN RANTAU PRAPAT,1 (satu) unit handphone merek Samsung warna go...,Menyatakan Terdakwa Dara Qotni alias Kutni ali...
