#Part 1 – Membangun Case Base

In [1]:
import shutil
shutil.rmtree('/content/drive', ignore_errors=True)

from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install pandas requests beautifulsoup4 pdfminer.six lxml > /dev/null 2>&1

In [11]:
import argparse
import io
import os
import re
import time
import urllib
from concurrent.futures import ThreadPoolExecutor, wait
from datetime import date
import pandas as pd
import requests
from bs4 import BeautifulSoup
from pdfminer.high_level import extract_text

# Global set untuk tracking PDF yang sudah didownload
downloaded_pdfs = set()

def create_path(folder_name):
    if not os.path.exists(folder_name):
        os.makedirs(folder_name)
        print(f"Folder dibuat: {folder_name}")
    return folder_name

def load_existing_pdfs(path_pdf):
    """Load daftar PDF yang sudah ada untuk mencegah duplikasi"""
    global downloaded_pdfs
    if os.path.exists(path_pdf):
        existing_files = [f for f in os.listdir(path_pdf) if f.endswith('.pdf')]
        downloaded_pdfs.update(existing_files)
        print(f"Ditemukan {len(existing_files)} PDF yang sudah ada")

def sanitize_filename(filename):
    """Bersihkan nama file dari karakter tidak valid"""
    # Hapus karakter yang tidak valid untuk nama file
    invalid_chars = '<>:"/\\|?*'
    for char in invalid_chars:
        filename = filename.replace(char, '_')

    # Hapus spasi berlebih dan ganti dengan underscore
    filename = re.sub(r'\s+', '_', filename.strip())

    # Batasi panjang nama file (max 200 karakter)
    if len(filename) > 200:
        name_part = filename[:190]
        ext_part = filename[-10:] if '.' in filename[-10:] else '.pdf'
        filename = name_part + ext_part

    return filename

def generate_pdf_filename(nomor, tahun, tingkat_proses, original_url):
    """Generate nama file PDF yang rapi dan konsisten"""
    # Ambil nomor putusan dan bersihkan
    clean_nomor = re.sub(r'[^\w\-]', '_', str(nomor)) if nomor else "no_number"

    # Ambil tahun
    clean_tahun = str(tahun) if tahun else "unknown_year"

    # Singkat tingkat proses
    tingkat_map = {
        'Tingkat Pertama': 'TK1',
        'Tingkat Banding': 'TK2',
        'Tingkat Kasasi': 'TK3',
        'Peninjauan Kembali': 'PK'
    }
    clean_tingkat = tingkat_map.get(tingkat_proses, 'TK1')

    # Format: TAHUN_TINGKAT_NOMOR.pdf
    # Contoh: 2024_TK1_123_Pid_Sus_2024_PN_Jakarta.pdf
    filename = f"{clean_tahun}_{clean_tingkat}_{clean_nomor}.pdf"

    # Sanitize final filename
    filename = sanitize_filename(filename)

    return filename

def open_page(link):
    count = 0
    while count < 3:
        try:
            response = requests.get(link, timeout=30)
            return BeautifulSoup(response.text, "lxml")
        except Exception as e:
            count += 1
            print(f"Error membuka halaman (percobaan {count}): {e}")
            time.sleep(5)
    return None

def get_detail(soup, keyword):
    try:
        text = (
            soup.find(lambda tag: tag.name == "td" and keyword in tag.text)
            .find_next()
            .get_text()
            .strip()
        )
        return text
    except:
        return ""

def get_pdf(url, path_pdf, nomor, tahun, tingkat_proses):
    """Download PDF dengan nama file yang rapi dan deteksi duplikasi"""
    global downloaded_pdfs

    try:
        # Generate nama file yang rapi
        clean_filename = generate_pdf_filename(nomor, tahun, tingkat_proses, url)

        # Cek apakah PDF sudah pernah didownload
        if clean_filename in downloaded_pdfs:
            print(f"PDF sudah ada, skip: {clean_filename}")
            return None, clean_filename, "exists"

        # Download PDF
        full_url = f"https://putusan3.mahkamahagung.go.id{url}" if url.startswith('/') else url
        file = urllib.request.urlopen(full_url)
        file_content = file.read()

        # Simpan dengan nama yang rapi
        file_path = os.path.join(path_pdf, clean_filename)
        with open(file_path, "wb") as out_file:
            out_file.write(file_content)

        # Tambahkan ke tracking set
        downloaded_pdfs.add(clean_filename)

        print(f"PDF disimpan: {clean_filename}")
        return io.BytesIO(file_content), clean_filename, "downloaded"

    except Exception as e:
        print(f"Error download PDF: {e}")
        return None, None, "error"

def clean_text(text):
    """Bersihkan teks hasil ekstraksi PDF"""
    if not isinstance(text, str):
        return ""

    text = text.replace("M a h ka m a h A g u n g R e p u blik In d o n esia\n", "")
    text = text.replace("Disclaimer\n", "")
    text = text.replace(
        "Kepaniteraan Mahkamah Agung Republik Indonesia berusaha untuk selalu mencantumkan informasi paling kini dan akurat sebagai bentuk komitmen Mahkamah Agung untuk pelayanan publik, transparansi dan akuntabilitas\n",
        "",
    )
    text = text.replace(
        "pelaksanaan fungsi peradilan. Namun dalam hal-hal tertentu masih dimungkinkan terjadi permasalahan teknis terkait dengan akurasi dan keterkinian informasi yang kami sajikan, hal mana akan terus kami perbaiki dari waktu kewaktu.\n",
        "",
    )
    text = text.replace(
        "Dalam hal Anda menemukan inakurasi informasi yang termuat pada situs ini atau informasi yang seharusnya ada, namun belum tersedia, maka harap segera hubungi Kepaniteraan Mahkamah Agung RI melalui :\n",
        "",
    )
    text = text.replace(
        "Email : kepaniteraan@mahkamahagung.go.id    Telp : 021-384 3348 (ext.318)\n",
        "",
    )
    return text.strip()

def extract_data(link, keyword_url, path_output, path_pdf, today):
    try:
        full_link = f"https://putusan3.mahkamahagung.go.id{link}" if link.startswith('/') else link
        print(f"Mengekstrak data dari: {link}")

        soup = open_page(full_link)
        if not soup:
            print(f"Gagal membuka link: {link}")
            return

        table = soup.find("table", {"class": "table"})
        if not table:
            print(f"Tidak ditemukan table di: {link}")
            return

        judul = table.find("h2").text if table.find("h2") else ""

        # Extract semua detail
        nomor = get_detail(table, "Nomor")
        tingkat_proses = get_detail(table, "Tingkat Proses")
        klasifikasi = get_detail(table, "Klasifikasi")
        kata_kunci = get_detail(table, "Kata Kunci")
        tahun = get_detail(table, "Tahun")
        tanggal_register = get_detail(table, "Tanggal Register")
        lembaga_peradilan = get_detail(table, "Lembaga Peradilan")
        jenis_lembaga_peradilan = get_detail(table, "Jenis Lembaga Peradilan")
        hakim_ketua = get_detail(table, "Hakim Ketua")
        hakim_anggota = get_detail(table, "Hakim Anggota")
        panitera = get_detail(table, "Panitera")
        amar = get_detail(table, "Amar")
        amar_lainnya = get_detail(table, "Amar Lainnya")
        catatan_amar = get_detail(table, "Catatan Amar")
        tanggal_musyawarah = get_detail(table, "Tanggal Musyawarah")
        tanggal_dibacakan = get_detail(table, "Tanggal Dibacakan")
        kaidah = get_detail(table, "Kaidah")
        status = get_detail(table, "Status")
        abstrak = get_detail(table, "Abstrak")

        # Download PDF dengan nama yang rapi
        text_pdf = ""
        link_pdf = ""
        file_name_pdf = ""
        pdf_status = ""

        try:
            pdf_element = soup.find("a", href=re.compile(r"/pdf/"))
            if pdf_element:
                link_pdf = pdf_element["href"]
                file_pdf, file_name_pdf, pdf_status = get_pdf(
                    link_pdf, path_pdf, nomor, tahun, tingkat_proses
                )

                if file_pdf and pdf_status == "downloaded":
                    # Extract text dari PDF yang baru didownload
                    text_pdf = extract_text(file_pdf)
                    text_pdf = clean_text(text_pdf)
                elif pdf_status == "exists":
                    # Load text dari PDF yang sudah ada
                    existing_pdf_path = os.path.join(path_pdf, file_name_pdf)
                    if os.path.exists(existing_pdf_path):
                        try:
                            text_pdf = extract_text(existing_pdf_path)
                            text_pdf = clean_text(text_pdf)
                        except Exception as e:
                            print(f"Error extracting existing PDF {file_name_pdf}: {e}")
                            text_pdf = ""
            else:
                print(f"Tidak ada PDF untuk: {nomor}")

        except Exception as e:
            print(f"Error PDF: {e}")

        # Prepare data dengan nama PDF yang sudah rapi
        data = [
            judul, nomor, tingkat_proses, klasifikasi, kata_kunci, tahun,
            tanggal_register, lembaga_peradilan, jenis_lembaga_peradilan,
            hakim_ketua, hakim_anggota, panitera, amar, amar_lainnya,
            catatan_amar, tanggal_musyawarah, tanggal_dibacakan, kaidah,
            status, abstrak, full_link, link_pdf, file_name_pdf, text_pdf, pdf_status
        ]

        result = pd.DataFrame([data], columns=[
            "judul", "nomor", "tingkat_proses", "klasifikasi", "kata_kunci", "tahun",
            "tanggal_register", "lembaga_peradilan", "jenis_lembaga_peradilan",
            "hakim_ketua", "hakim_anggota", "panitera", "amar", "amar_lainnya",
            "catatan_amar", "tanggal_musyawarah", "tanggal_dibacakan", "kaidah",
            "status", "abstrak", "link", "link_pdf", "file_name_pdf", "text_pdf", "pdf_status"
        ])

        # Simpan ke CSV
        keyword_clean = "perdagangan_orang_2024"
        destination = f"{path_output}/putusan_ma_{keyword_clean}_{today}.csv"

        if not os.path.isfile(destination):
            result.to_csv(destination, header=True, index=False)
            print(f"File CSV dibuat: {destination}")
        else:
            result.to_csv(destination, mode="a", header=False, index=False)
            print(f"Data ditambahkan ke: {destination}")

    except Exception as e:
        print(f"Error extract_data: {e}")

def run_process(keyword_url, page, sort_date, path_output, path_pdf, today):
    try:
        if keyword_url.startswith("https"):
            link = f"{keyword_url}&page={page}"
        else:
            link = f"https://putusan3.mahkamahagung.go.id/search.html?q={keyword_url}&page={page}"

        if sort_date:
            link = f"{link}&obf=TANGGAL_PUTUS&obm=desc"

        print(f"\nScraping halaman {page}: {link}")
        soup = open_page(link)

        if not soup:
            print(f"Gagal membuka halaman {page}")
            return

        links = soup.find_all("a", {"href": re.compile("/direktori/putusan")})
        print(f"Ditemukan {len(links)} putusan di halaman {page}")

        for i, link_element in enumerate(links, 1):
            print(f"  [{i}/{len(links)}] Processing...")
            extract_data(link_element["href"], keyword_url, path_output, path_pdf, today)
            time.sleep(1)  # Delay untuk menghindari overload server

    except Exception as e:
        print(f"Error run_process halaman {page}: {e}")

def run_scraper(keyword=None, url=None, sort_date=True, download_pdf=True):
    if not keyword and not url:
        print("Please provide a keyword or URL")
        return

    # Buat path dan folder
    path_output = '/content/drive/MyDrive/perdagangan_orang/CSV'
    path_pdf = '/content/drive/MyDrive/perdagangan_orang/PDF'

    create_path(path_output)
    create_path(path_pdf)

    # Load PDF yang sudah ada
    load_existing_pdfs(path_pdf)

    today = date.today().strftime("%Y-%m-%d")

    link = f"https://putusan3.mahkamahagung.go.id/search.html?q={keyword}&page=1"
    if url:
        link = url

    print(f"Mengakses URL: {link}")
    soup = open_page(link)

    if not soup:
        print("Gagal membuka halaman pertama")
        return

    # Deteksi pagination dengan aman
    pagination_links = soup.find_all("a", {"class": "page-link"})

    if not pagination_links:
        print("Tidak ada pagination, kemungkinan hanya 1 halaman")
        last_page = 1
    else:
        try:
            page_numbers = []
            for link_elem in pagination_links:
                page_num = link_elem.get("data-ci-pagination-page")
                if page_num and page_num.isdigit():
                    page_numbers.append(int(page_num))
            last_page = max(page_numbers) if page_numbers else 1
        except Exception as e:
            print(f"Error deteksi pagination: {e}")
            last_page = 1

    # Cek hasil pencarian
    no_results = soup.find("div", class_="alert alert-info")
    if no_results and "tidak ditemukan" in no_results.text.lower():
        print("Tidak ada hasil ditemukan untuk pencarian ini")
        return

    print(f"Total halaman yang akan di-scrape: {last_page}")
    print(f"Estimasi data: {20 * last_page}")

    keyword_url = url if url else keyword

    # Mulai scraping
    for page in range(1, last_page + 1):
        run_process(keyword_url, page, sort_date, path_output, path_pdf, today)
        time.sleep(2)  # Delay antar halaman

    print("\n=== SCRAPING SELESAI ===")
    print(f"File CSV disimpan di: {path_output}")
    print(f"File PDF disimpan di: {path_pdf}")

    # Cek file yang tersimpan
    try:
        csv_files = [f for f in os.listdir(path_output) if f.endswith('.csv')]
        pdf_files = [f for f in os.listdir(path_pdf) if f.endswith('.pdf')]
        print(f"Jumlah file CSV: {len(csv_files)}")
        print(f"Jumlah file PDF: {len(pdf_files)}")

        # Tampilkan beberapa contoh nama PDF
        if pdf_files:
            print(f"\nContoh nama PDF yang rapi:")
            for pdf in sorted(pdf_files)[:5]:
                print(f"  - {pdf}")
            if len(pdf_files) > 5:
                print(f"  ... dan {len(pdf_files)-5} file lainnya")

    except Exception as e:
        print(f"Tidak dapat mengecek file hasil: {e}")

# Jalankan scraper
if __name__ == "__main__":
    run_scraper(url="https://putusan3.mahkamahagung.go.id/search.html?q=perdagangan%20orang&jenis_doc=putusan&cat=d92c02366ae91966e4cdbe6279fc36eb|591c725c9658703f62846dd5ad0a5443&jd=&tp=&court=&t_put=2021&t_reg=2021&t_upl=2021&t_pr=")

Folder dibuat: /content/drive/MyDrive/perdagangan_orang/CSV
Folder dibuat: /content/drive/MyDrive/perdagangan_orang/PDF
Ditemukan 0 PDF yang sudah ada
Mengakses URL: https://putusan3.mahkamahagung.go.id/search.html?q=perdagangan%20orang&jenis_doc=putusan&cat=d92c02366ae91966e4cdbe6279fc36eb|591c725c9658703f62846dd5ad0a5443&jd=&tp=&court=&t_put=2021&t_reg=2021&t_upl=2021&t_pr=
Total halaman yang akan di-scrape: 7
Estimasi data: 140

Scraping halaman 1: https://putusan3.mahkamahagung.go.id/search.html?q=perdagangan%20orang&jenis_doc=putusan&cat=d92c02366ae91966e4cdbe6279fc36eb|591c725c9658703f62846dd5ad0a5443&jd=&tp=&court=&t_put=2021&t_reg=2021&t_upl=2021&t_pr=&page=1&obf=TANGGAL_PUTUS&obm=desc
Ditemukan 23 putusan di halaman 1
  [1/23] Processing...
Mengekstrak data dari: https://putusan3.mahkamahagung.go.id/direktori/putusan/zaec6896e35fe5588514313830333031.html
Tidak ada PDF untuk: Putusan PT MATARAM Nomor 147/PID.SUS/2021/PT MTR Tanggal 29 Desember 2021 —Pembanding/Penuntut Umum : W

Konversi & Ekstraksi Teks

In [3]:
!pip install pandas requests beautifulsoup4 pdfminer.six lxml --quiet

In [4]:
import os
import pandas as pd
import re
import io
import subprocess
import logging
from pdfminer.high_level import extract_text
from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
from pdfminer.converter import TextConverter
from pdfminer.layout import LAParams
from pdfminer.pdfpage import PDFPage
from datetime import datetime

In [12]:
# PDF processing imports
try:
    from pdfminer.high_level import extract_text
    from pdfminer.pdfinterp import PDFResourceManager, PDFPageInterpreter
    from pdfminer.converter import TextConverter
    from pdfminer.layout import LAParams
    from pdfminer.pdfpage import PDFPage
    PDFMINER_AVAILABLE = True
except ImportError:
    PDFMINER_AVAILABLE = False
    print("pdfminer not available - install with: pip install pdfminer.six")

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TextExtractor:
    """Extract plain text from PDF files - NO CLEANING"""

    def __init__(self, base_dir="/content/drive/MyDrive/perdagangan_orang"):
        self.base_dir = base_dir
        self.pdf_dir = os.path.join(base_dir, "PDF")
        self.raw_text_dir = os.path.join(base_dir, "RAW_TEXT")  # Raw text output
        self.logs_dir = "/logs"

        # Create directories
        os.makedirs(self.raw_text_dir, exist_ok=True)
        os.makedirs(self.logs_dir, exist_ok=True)

        print(f"PDF input: {self.pdf_dir}")
        print(f"Raw text output: {self.raw_text_dir}")

        # Setup extraction logger
        self.setup_extraction_logger()

    def setup_extraction_logger(self):
        """Setup dedicated logger for extraction process"""
        self.extraction_logger = logging.getLogger('text_extraction')
        self.extraction_logger.setLevel(logging.INFO)

        # Remove existing handlers
        for handler in self.extraction_logger.handlers[:]:
            self.extraction_logger.removeHandler(handler)

        # Create file handler
        log_file = os.path.join(self.logs_dir, 'extraction.log')
        file_handler = logging.FileHandler(log_file, mode='a', encoding='utf-8')
        file_handler.setLevel(logging.INFO)

        # Create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        file_handler.setFormatter(formatter)
        self.extraction_logger.addHandler(file_handler)

        self.extraction_logger.info("="*60)
        self.extraction_logger.info("TEXT EXTRACTION SESSION STARTED")
        self.extraction_logger.info("="*60)

    # =================== PDF EXTRACTION METHODS ===================

    def pdf_to_text_pdfminer_basic(self, pdf_path):
        """Method 1: Extract using pdfminer basic"""
        if not PDFMINER_AVAILABLE:
            return None, "pdfminer not available"

        try:
            text = extract_text(pdf_path)
            return text, "pdfminer_basic"
        except Exception as e:
            return None, f"pdfminer_basic error: {e}"

    def pdf_to_text_pdfminer_advanced(self, pdf_path):
        """Method 2: Extract using pdfminer with layout analysis"""
        if not PDFMINER_AVAILABLE:
            return None, "pdfminer not available"

        try:
            resource_manager = PDFResourceManager()
            fake_file_handle = io.StringIO()
            converter = TextConverter(resource_manager, fake_file_handle, laparams=LAParams())
            page_interpreter = PDFPageInterpreter(resource_manager, converter)

            with open(pdf_path, 'rb') as fh:
                for page in PDFPage.get_pages(fh, caching=True, check_extractable=True):
                    page_interpreter.process_page(page)

            text = fake_file_handle.getvalue()
            fake_file_handle.close()
            converter.close()

            return text, "pdfminer_advanced"
        except Exception as e:
            return None, f"pdfminer_advanced error: {e}"

    def pdf_to_text_pdftotext(self, pdf_path):
        """Method 3: Extract using pdftotext (poppler-utils)"""
        try:
            # Check if pdftotext is available
            try:
                subprocess.run(['pdftotext', '-v'], capture_output=True, check=True)
            except (subprocess.CalledProcessError, FileNotFoundError):
                # Try to install poppler-utils
                try:
                    self.extraction_logger.info("Installing poppler-utils...")
                    subprocess.run(['apt-get', 'update'], check=True, capture_output=True)
                    subprocess.run(['apt-get', 'install', '-y', 'poppler-utils'], check=True, capture_output=True)
                    self.extraction_logger.info("poppler-utils installed successfully")
                except Exception as install_error:
                    return None, f"Failed to install poppler-utils: {install_error}"

            # Extract text using pdftotext
            result = subprocess.run(
                ['pdftotext', '-layout', pdf_path, '-'],
                capture_output=True, text=True, check=True
            )

            return result.stdout, "pdftotext"
        except subprocess.CalledProcessError as e:
            return None, f"pdftotext error: {e}"
        except Exception as e:
            return None, f"pdftotext setup error: {e}"

    def extract_from_pdf(self, pdf_path):
        """Extract text from single PDF using multiple methods"""
        filename = os.path.basename(pdf_path)
        self.extraction_logger.info(f"Extracting from PDF: {filename}")

        # Try extraction methods in order
        methods = [
            self.pdf_to_text_pdfminer_basic,
            self.pdf_to_text_pdfminer_advanced,
            self.pdf_to_text_pdftotext
        ]

        for method in methods:
            text, method_info = method(pdf_path)

            if text and len(text.strip()) > 50:  # Minimum threshold
                self.extraction_logger.info(f"Success with {method_info}: {len(text)} characters")
                return text, method_info
            elif text:
                self.extraction_logger.warning(f"{method_info} returned short text: {len(text)} chars")

        self.extraction_logger.error(f"All PDF extraction methods failed for {filename}")
        return None, "all_methods_failed"

    # =================== MAIN PROCESSING METHODS ===================

    def process_single_file(self, file_path):
        """Process single PDF file"""
        filename = os.path.basename(file_path)
        file_ext = os.path.splitext(filename)[1].lower()

        # Only process PDF files
        if file_ext != '.pdf':
            self.extraction_logger.error(f"Only PDF files supported: {filename}")
            return None

        raw_text, method_used = self.extract_from_pdf(file_path)

        if not raw_text or len(raw_text.strip()) < 50:
            self.extraction_logger.error(f"Extraction failed or insufficient text: {filename}")
            return None

        # Save raw text (NO CLEANING)
        base_name = os.path.splitext(filename)[0]
        raw_text_filename = f"raw_{base_name}.txt"
        raw_text_path = os.path.join(self.raw_text_dir, raw_text_filename)

        try:
            with open(raw_text_path, 'w', encoding='utf-8') as f:
                f.write(raw_text)

            self.extraction_logger.info(f"Raw text saved: {raw_text_filename}")
            print(f"SUCCESS: {filename} -> {raw_text_filename} ({len(raw_text)} chars)")

            return {
                'source_file': filename,
                'source_type': file_ext,
                'raw_text_file': raw_text_filename,
                'raw_text_path': raw_text_path,
                'method_used': method_used,
                'text_length': len(raw_text),
                'status': 'success'
            }

        except Exception as e:
            self.extraction_logger.error(f"Error saving raw text for {filename}: {e}")
            return None

    def process_all_pdfs(self):
        """Process all PDF files"""
        if not os.path.exists(self.pdf_dir):
            print(f"PDF directory not found: {self.pdf_dir}")
            return []

        pdf_files = [f for f in os.listdir(self.pdf_dir) if f.endswith('.pdf')]

        if not pdf_files:
            print(f"No PDF files found in {self.pdf_dir}")
            return []

        print(f"Found {len(pdf_files)} PDF files")

        results = []
        for i, pdf_file in enumerate(pdf_files, 1):
            pdf_path = os.path.join(self.pdf_dir, pdf_file)
            print(f"[{i}/{len(pdf_files)}] Processing: {pdf_file}")

            result = self.process_single_file(pdf_path)
            if result:
                results.append(result)

        return results

    def process_all_files(self):
        """Process all PDF files"""
        print("TEXT EXTRACTION STARTED")
        print("=" * 50)
        print("Tujuan: Konversi PDF -> Raw plain text")
        print("Output: Raw text files (BELUM dibersihkan)")
        print("=" * 50)

        # Process PDFs
        pdf_results = self.process_all_pdfs()

        if not pdf_results:
            print("No files processed successfully")
            return None

        # Create extraction report
        df_results = pd.DataFrame(pdf_results)
        report_path = os.path.join(self.logs_dir, 'extraction_report.csv')
        df_results.to_csv(report_path, index=False)

        # Summary
        print("\n" + "=" * 60)
        print("EXTRACTION SUMMARY")
        print("=" * 60)
        print(f"PDF files processed: {len(pdf_results)}")
        print(f"Total successful extractions: {len(pdf_results)}")
        print(f"Raw text files saved to: {self.raw_text_dir}")
        print(f"Extraction report: {report_path}")
        print(f"Extraction log: {os.path.join(self.logs_dir, 'extraction.log')}")

        return df_results

# Utility functions
def extract_single_pdf(pdf_path, output_dir="/tmp"):
    """Quick function to extract single PDF"""
    extractor = TextExtractor()
    extractor.raw_text_dir = output_dir
    return extractor.process_single_file(pdf_path)

# Main execution
def main():
    """Main function for text extraction"""
    print("ii. KONVERSI & EKSTRAKSI TEKS")
    print("=" * 50)

    # Check dependencies
    print("Checking dependencies...")
    if PDFMINER_AVAILABLE:
        print("SUCCESS: pdfminer.six available")
    else:
        print("ERROR: pdfminer.six not available - install with: pip install pdfminer.six")

    # Initialize extractor
    extractor = TextExtractor("/content/drive/MyDrive/perdagangan_orang")

    # Run extraction
    results = extractor.process_all_files()

    if results is not None:
        print(f"\nEXTRACTION COMPLETE!")
        print(f"Check raw text files in: {extractor.raw_text_dir}")
        print(f"Next step: Run text cleaning on raw files")
    else:
        print(f"\nNo files extracted. Check your PDF directory.")

if __name__ == "__main__":
    main()

INFO:text_extraction:TEXT EXTRACTION SESSION STARTED
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.pdf


ii. KONVERSI & EKSTRAKSI TEKS
Checking dependencies...
SUCCESS: pdfminer.six available
PDF input: /content/drive/MyDrive/perdagangan_orang/PDF
Raw text output: /content/drive/MyDrive/perdagangan_orang/RAW_TEXT
TEXT EXTRACTION STARTED
Tujuan: Konversi PDF -> Raw plain text
Output: Raw text files (BELUM dibersihkan)
Found 80 PDF files
[1/80] Processing: 2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.pdf


INFO:text_extraction:Success with pdfminer_basic: 55160 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN_Pli_Tanggal_16_Desember_2021__Penuntut_Umum_ANDI_HAMZAH_KUSUMAATMAJA__S_HTerdakwa_M__NOOR_Als_NUNUI_Bin_KHAIRI.pdf


SUCCESS: 2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.pdf -> raw_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.txt (55160 chars)
[2/80] Processing: 2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN_Pli_Tanggal_16_Desember_2021__Penuntut_Umum_ANDI_HAMZAH_KUSUMAATMAJA__S_HTerdakwa_M__NOOR_Als_NUNUI_Bin_KHAIRI.pdf


INFO:text_extraction:Success with pdfminer_basic: 86201 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN_Pli_Tanggal_16_Desember_2021__Penuntut_Umum_ANDI_HAMZAH_KUSUMAATMAJA__S_HTerdakwa_M__NOOR_Als_NUNUI_Bin_KHAIRI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.pdf


SUCCESS: 2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN_Pli_Tanggal_16_Desember_2021__Penuntut_Umum_ANDI_HAMZAH_KUSUMAATMAJA__S_HTerdakwa_M__NOOR_Als_NUNUI_Bin_KHAIRI.pdf -> raw_2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN_Pli_Tanggal_16_Desember_2021__Penuntut_Umum_ANDI_HAMZAH_KUSUMAATMAJA__S_HTerdakwa_M__NOOR_Als_NUNUI_Bin_KHAIRI.txt (86201 chars)
[3/80] Processing: 2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 62316 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.pdf


SUCCESS: 2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.pdf -> raw_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.txt (62316 chars)
[4/80] Processing: 2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.pdf


INFO:text_extraction:Success with pdfminer_basic: 87277 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_SAMARINDA_Nomor_240_PID_2021_PT_SMR_Tanggal_26_Nopember_2021__Pembanding_Terbanding_Terdakwa___DEVITA_ARIYANI_Als_DORA_Binti_MUSTOPA_Diwakili_Oleh___Nunung_Tri_Sulistiawa__S_H_.pdf


SUCCESS: 2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.pdf -> raw_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.txt (87277 chars)
[5/80] Processing: 2021_TK1_Putusan_PT_SAMARINDA_Nomor_240_PID_2021_PT_SMR_Tanggal_26_Nopember_2021__Pembanding_Terbanding_Terdakwa___DEVITA_ARIYANI_Als_DORA_Binti_MUSTOPA_Diwakili_Oleh___Nunung_Tri_Sulistiawa__S_H_.pdf


INFO:text_extraction:Success with pdfminer_basic: 160183 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_SAMARINDA_Nomor_240_PID_2021_PT_SMR_Tanggal_26_Nopember_2021__Pembanding_Terbanding_Terdakwa___DEVITA_ARIYANI_Als_DORA_Binti_MUSTOPA_Diwakili_Oleh___Nunung_Tri_Sulistiawa__S_H_.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.pdf


SUCCESS: 2021_TK1_Putusan_PT_SAMARINDA_Nomor_240_PID_2021_PT_SMR_Tanggal_26_Nopember_2021__Pembanding_Terbanding_Terdakwa___DEVITA_ARIYANI_Als_DORA_Binti_MUSTOPA_Diwakili_Oleh___Nunung_Tri_Sulistiawa__S_H_.pdf -> raw_2021_TK1_Putusan_PT_SAMARINDA_Nomor_240_PID_2021_PT_SMR_Tanggal_26_Nopember_2021__Pembanding_Terbanding_Terdakwa___DEVITA_ARIYANI_Als_DORA_Binti_MUSTOPA_Diwakili_Oleh___Nunung_Tri_Sulistiawa__S_H_.txt (160183 chars)
[6/80] Processing: 2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.pdf


INFO:text_extraction:Success with pdfminer_basic: 31407 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.pdf


SUCCESS: 2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.pdf -> raw_2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.txt (31407 chars)
[7/80] Processing: 2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.pdf


INFO:text_extraction:Success with pdfminer_basic: 114504 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MAKALE_Nomor_93_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_WIWIN_ALIAS_VALEN.pdf


SUCCESS: 2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.pdf -> raw_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt (114504 chars)
[8/80] Processing: 2021_TK1_Putusan_PN_MAKALE_Nomor_93_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_WIWIN_ALIAS_VALEN.pdf


INFO:text_extraction:Success with pdfminer_basic: 159083 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MAKALE_Nomor_93_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_WIWIN_ALIAS_VALEN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.pdf


SUCCESS: 2021_TK1_Putusan_PN_MAKALE_Nomor_93_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_WIWIN_ALIAS_VALEN.pdf -> raw_2021_TK1_Putusan_PN_MAKALE_Nomor_93_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_WIWIN_ALIAS_VALEN.txt (159083 chars)
[9/80] Processing: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.pdf


INFO:text_extraction:Success with pdfminer_basic: 55542 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.pdf


SUCCESS: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.pdf -> raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt (55542 chars)
[10/80] Processing: 2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.pdf


INFO:text_extraction:Success with pdfminer_basic: 50031 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TANGERANG_Nomor_1614_Pid_Sus_2021_PN_Tng_Tanggal_3_Nopember_2021__Penuntut_Umum_HADI_WIDODO__SHTerdakwa_1_SUBUR_RAHARJO_Bin_SUGITO2_AMAR_SAHIDIN_Als_ABANG_Bin_WARSITO.pdf


SUCCESS: 2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.pdf -> raw_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.txt (50031 chars)
[11/80] Processing: 2021_TK1_Putusan_PN_TANGERANG_Nomor_1614_Pid_Sus_2021_PN_Tng_Tanggal_3_Nopember_2021__Penuntut_Umum_HADI_WIDODO__SHTerdakwa_1_SUBUR_RAHARJO_Bin_SUGITO2_AMAR_SAHIDIN_Als_ABANG_Bin_WARSITO.pdf


INFO:text_extraction:Success with pdfminer_basic: 100054 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_TANGERANG_Nomor_1614_Pid_Sus_2021_PN_Tng_Tanggal_3_Nopember_2021__Penuntut_Umum_HADI_WIDODO__SHTerdakwa_1_SUBUR_RAHARJO_Bin_SUGITO2_AMAR_SAHIDIN_Als_ABANG_Bin_WARSITO.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.pdf


SUCCESS: 2021_TK1_Putusan_PN_TANGERANG_Nomor_1614_Pid_Sus_2021_PN_Tng_Tanggal_3_Nopember_2021__Penuntut_Umum_HADI_WIDODO__SHTerdakwa_1_SUBUR_RAHARJO_Bin_SUGITO2_AMAR_SAHIDIN_Als_ABANG_Bin_WARSITO.pdf -> raw_2021_TK1_Putusan_PN_TANGERANG_Nomor_1614_Pid_Sus_2021_PN_Tng_Tanggal_3_Nopember_2021__Penuntut_Umum_HADI_WIDODO__SHTerdakwa_1_SUBUR_RAHARJO_Bin_SUGITO2_AMAR_SAHIDIN_Als_ABANG_Bin_WARSITO.txt (100054 chars)
[12/80] Processing: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.pdf


INFO:text_extraction:Success with pdfminer_basic: 233739 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.pdf


SUCCESS: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.pdf -> raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.txt (233739 chars)
[13/80] Processing: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.pdf


INFO:text_extraction:Success with pdfminer_basic: 158417 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.pdf


SUCCESS: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.pdf -> raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.txt (158417 chars)
[14/80] Processing: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.pdf


INFO:text_extraction:Success with pdfminer_basic: 225258 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.pdf -> raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt (225258 chars)
[15/80] Processing: 2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 42101 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (42101 chars)
[16/80] Processing: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 30036 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3022_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (30036 chars)
[17/80] Processing: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3022_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 29829 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3022_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.pdf


SUCCESS: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3022_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3022_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (29829 chars)
[18/80] Processing: 2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.pdf


INFO:text_extraction:Success with pdfminer_basic: 542023 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TARAKAN_Nomor_213_Pid_Sus_2021_PN_Tar_Tanggal_14_Oktober_2021__Penuntut_Umum_KOMANG_NOPRIZAL_SAPUTRA__S_H_Terdakwa_MOH__YUSUF_DAENG_BETA_Als_IYUS_Bin_ABU_DAENG_BETA.pdf
INFO:text_extraction:Installing poppler-utils...


SUCCESS: 2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.pdf -> raw_2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.txt (542023 chars)
[19/80] Processing: 2021_TK1_Putusan_PN_TARAKAN_Nomor_213_Pid_Sus_2021_PN_Tar_Tanggal_14_Oktober_2021__Penuntut_Umum_KOMANG_NOPRIZAL_SAPUTRA__S_H_Terdakwa_MOH__YUSUF_DAENG_BETA_Als_IYUS_Bin_ABU_DAENG_BETA.pdf


INFO:text_extraction:poppler-utils installed successfully
ERROR:text_extraction:All PDF extraction methods failed for 2021_TK1_Putusan_PN_TARAKAN_Nomor_213_Pid_Sus_2021_PN_Tar_Tanggal_14_Oktober_2021__Penuntut_Umum_KOMANG_NOPRIZAL_SAPUTRA__S_H_Terdakwa_MOH__YUSUF_DAENG_BETA_Als_IYUS_Bin_ABU_DAENG_BETA.pdf
ERROR:text_extraction:Extraction failed or insufficient text: 2021_TK1_Putusan_PN_TARAKAN_Nomor_213_Pid_Sus_2021_PN_Tar_Tanggal_14_Oktober_2021__Penuntut_Umum_KOMANG_NOPRIZAL_SAPUTRA__S_H_Terdakwa_MOH__YUSUF_DAENG_BETA_Als_IYUS_Bin_ABU_DAENG_BETA.pdf
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.pdf


[20/80] Processing: 2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.pdf


INFO:text_extraction:Success with pdfminer_basic: 71527 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.pdf


SUCCESS: 2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.pdf -> raw_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.txt (71527 chars)
[21/80] Processing: 2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.pdf


INFO:text_extraction:Success with pdfminer_basic: 90477 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.pdf


SUCCESS: 2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.pdf -> raw_2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.txt (90477 chars)
[22/80] Processing: 2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.pdf


INFO:text_extraction:Success with pdfminer_basic: 72541 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.pdf


SUCCESS: 2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.pdf -> raw_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt (72541 chars)
[23/80] Processing: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.pdf


INFO:text_extraction:Success with pdfminer_basic: 83195 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.pdf


SUCCESS: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.pdf -> raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt (83195 chars)
[24/80] Processing: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.pdf


INFO:text_extraction:Success with pdfminer_basic: 91635 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.pdf -> raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.txt (91635 chars)
[25/80] Processing: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 64549 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (64549 chars)
[26/80] Processing: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 60440 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.pdf


SUCCESS: 2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (60440 chars)
[27/80] Processing: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 98002 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.pdf


SUCCESS: 2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.pdf -> raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.txt (98002 chars)
[28/80] Processing: 2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.pdf


INFO:text_extraction:Success with pdfminer_basic: 65137 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.pdf


SUCCESS: 2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.pdf -> raw_2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.txt (65137 chars)
[29/80] Processing: 2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.pdf


INFO:text_extraction:Success with pdfminer_basic: 48690 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.pdf


SUCCESS: 2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.pdf -> raw_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.txt (48690 chars)
[30/80] Processing: 2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.pdf


INFO:text_extraction:Success with pdfminer_basic: 281208 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.pdf


SUCCESS: 2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.pdf -> raw_2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.txt (281208 chars)
[31/80] Processing: 2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.pdf


INFO:text_extraction:Success with pdfminer_basic: 281140 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.pdf


SUCCESS: 2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.pdf -> raw_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt (281140 chars)
[32/80] Processing: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.pdf


INFO:text_extraction:Success with pdfminer_basic: 108866 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.pdf


SUCCESS: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.pdf -> raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.txt (108866 chars)
[33/80] Processing: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.pdf


INFO:text_extraction:Success with pdfminer_basic: 140931 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MANDAILING_NATAL_Nomor_87_Pid_Sus_2021_PN_Mdl_Tanggal_9_Agustus_2021__Penuntut_Umum_1_NURHAYATI_PULUNGAN__SH2_HERIYANTO_MANURUNG__SHTerdakwa_FITRIANI_ALIAS_JABUKE.pdf


SUCCESS: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.pdf -> raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt (140931 chars)
[34/80] Processing: 2021_TK1_Putusan_PN_MANDAILING_NATAL_Nomor_87_Pid_Sus_2021_PN_Mdl_Tanggal_9_Agustus_2021__Penuntut_Umum_1_NURHAYATI_PULUNGAN__SH2_HERIYANTO_MANURUNG__SHTerdakwa_FITRIANI_ALIAS_JABUKE.pdf


INFO:text_extraction:Success with pdfminer_basic: 108335 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MANDAILING_NATAL_Nomor_87_Pid_Sus_2021_PN_Mdl_Tanggal_9_Agustus_2021__Penuntut_Umum_1_NURHAYATI_PULUNGAN__SH2_HERIYANTO_MANURUNG__SHTerdakwa_FITRIANI_ALIAS_JABUKE.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.pdf


SUCCESS: 2021_TK1_Putusan_PN_MANDAILING_NATAL_Nomor_87_Pid_Sus_2021_PN_Mdl_Tanggal_9_Agustus_2021__Penuntut_Umum_1_NURHAYATI_PULUNGAN__SH2_HERIYANTO_MANURUNG__SHTerdakwa_FITRIANI_ALIAS_JABUKE.pdf -> raw_2021_TK1_Putusan_PN_MANDAILING_NATAL_Nomor_87_Pid_Sus_2021_PN_Mdl_Tanggal_9_Agustus_2021__Penuntut_Umum_1_NURHAYATI_PULUNGAN__SH2_HERIYANTO_MANURUNG__SHTerdakwa_FITRIANI_ALIAS_JABUKE.txt (108335 chars)
[35/80] Processing: 2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.pdf


INFO:text_extraction:Success with pdfminer_basic: 82104 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.pdf -> raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt (82104 chars)
[36/80] Processing: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 31365 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (31365 chars)
[37/80] Processing: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 33854 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_660_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (33854 chars)
[38/80] Processing: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_660_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 60190 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_660_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.pdf


SUCCESS: 2025_TK1_Putusan_PA_Sei_Rampah_Nomor_660_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_660_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (60190 chars)
[39/80] Processing: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.pdf


INFO:text_extraction:Success with pdfminer_basic: 138436 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.pdf


SUCCESS: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.pdf -> raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.txt (138436 chars)
[40/80] Processing: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.pdf


INFO:text_extraction:Success with pdfminer_basic: 142787 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1135_Pid_Sus_2021_PN_Sby_Tanggal_1_Juli_2021__Penuntut_Umum_SULFIKAR__SHTerdakwa_NUR_RAHMAT_KISWO_PRANGGONO_BIN_SENO_BT_PRANGGONO.pdf


SUCCESS: 2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.pdf -> raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt (142787 chars)
[41/80] Processing: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1135_Pid_Sus_2021_PN_Sby_Tanggal_1_Juli_2021__Penuntut_Umum_SULFIKAR__SHTerdakwa_NUR_RAHMAT_KISWO_PRANGGONO_BIN_SENO_BT_PRANGGONO.pdf


INFO:text_extraction:Success with pdfminer_basic: 41564 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_1135_Pid_Sus_2021_PN_Sby_Tanggal_1_Juli_2021__Penuntut_Umum_SULFIKAR__SHTerdakwa_NUR_RAHMAT_KISWO_PRANGGONO_BIN_SENO_BT_PRANGGONO.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.pdf


SUCCESS: 2021_TK1_Putusan_PN_SURABAYA_Nomor_1135_Pid_Sus_2021_PN_Sby_Tanggal_1_Juli_2021__Penuntut_Umum_SULFIKAR__SHTerdakwa_NUR_RAHMAT_KISWO_PRANGGONO_BIN_SENO_BT_PRANGGONO.pdf -> raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_1135_Pid_Sus_2021_PN_Sby_Tanggal_1_Juli_2021__Penuntut_Umum_SULFIKAR__SHTerdakwa_NUR_RAHMAT_KISWO_PRANGGONO_BIN_SENO_BT_PRANGGONO.txt (41564 chars)
[42/80] Processing: 2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.pdf


INFO:text_extraction:Success with pdfminer_basic: 53472 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.pdf


SUCCESS: 2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.pdf -> raw_2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.txt (53472 chars)
[43/80] Processing: 2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.pdf


INFO:text_extraction:Success with pdfminer_basic: 58221 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_341_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_9_Juni_2021__Penuntut_Umum_ERNI_PRAMOTI__SHTerdakwa_ARDIAN_FIRMANSYAH_BIN_IWONG_TASWAN_.pdf


SUCCESS: 2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.pdf -> raw_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.txt (58221 chars)
[44/80] Processing: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_341_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_9_Juni_2021__Penuntut_Umum_ERNI_PRAMOTI__SHTerdakwa_ARDIAN_FIRMANSYAH_BIN_IWONG_TASWAN_.pdf


INFO:text_extraction:Success with pdfminer_basic: 48657 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_341_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_9_Juni_2021__Penuntut_Umum_ERNI_PRAMOTI__SHTerdakwa_ARDIAN_FIRMANSYAH_BIN_IWONG_TASWAN_.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.pdf


SUCCESS: 2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_341_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_9_Juni_2021__Penuntut_Umum_ERNI_PRAMOTI__SHTerdakwa_ARDIAN_FIRMANSYAH_BIN_IWONG_TASWAN_.pdf -> raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_341_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_9_Juni_2021__Penuntut_Umum_ERNI_PRAMOTI__SHTerdakwa_ARDIAN_FIRMANSYAH_BIN_IWONG_TASWAN_.txt (48657 chars)
[45/80] Processing: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.pdf


INFO:text_extraction:Success with pdfminer_basic: 275734 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.pdf


SUCCESS: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.pdf -> raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.txt (275734 chars)
[46/80] Processing: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.pdf


INFO:text_extraction:Success with pdfminer_basic: 248336 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.pdf -> raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt (248336 chars)
[47/80] Processing: 2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 39985 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.pdf


SUCCESS: 2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (39985 chars)
[48/80] Processing: 2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.pdf


INFO:text_extraction:Success with pdfminer_basic: 30013 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.pdf -> raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt (30013 chars)
[49/80] Processing: 2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 52723 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.pdf


SUCCESS: 2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (52723 chars)
[50/80] Processing: 2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.pdf


INFO:text_extraction:Success with pdfminer_basic: 192520 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.pdf


SUCCESS: 2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.pdf -> raw_2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.txt (192520 chars)
[51/80] Processing: 2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.pdf


INFO:text_extraction:Success with pdfminer_basic: 46175 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_SUNIYE_Als_SOIMAH_Binti_ARIF.pdf


SUCCESS: 2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.pdf -> raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt (46175 chars)
[52/80] Processing: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_SUNIYE_Als_SOIMAH_Binti_ARIF.pdf


INFO:text_extraction:Success with pdfminer_basic: 82070 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_SUNIYE_Als_SOIMAH_Binti_ARIF.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.pdf


SUCCESS: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_SUNIYE_Als_SOIMAH_Binti_ARIF.pdf -> raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_SUNIYE_Als_SOIMAH_Binti_ARIF.txt (82070 chars)
[53/80] Processing: 2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.pdf


INFO:text_extraction:Success with pdfminer_basic: 70657 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.pdf


SUCCESS: 2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.pdf -> raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.txt (70657 chars)
[54/80] Processing: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.pdf


INFO:text_extraction:Success with pdfminer_basic: 84171 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.pdf


SUCCESS: 2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.pdf -> raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt (84171 chars)
[55/80] Processing: 2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.pdf


INFO:text_extraction:Success with pdfminer_basic: 366396 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.pdf


SUCCESS: 2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.pdf -> raw_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt (366396 chars)
[56/80] Processing: 2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.pdf


INFO:text_extraction:Success with pdfminer_basic: 95199 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_PADANG_Nomor_70_PID_SUS_2021_PT_PDG_Tanggal_27_April_2021__Pembanding_Penuntut_Umum_I___MEILYA_TRISNA__SH__MHTerbanding_Terdakwa___DIAN_EKA_PUTRA_Pgl__DIAN.pdf


SUCCESS: 2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.pdf -> raw_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.txt (95199 chars)
[57/80] Processing: 2021_TK1_Putusan_PT_PADANG_Nomor_70_PID_SUS_2021_PT_PDG_Tanggal_27_April_2021__Pembanding_Penuntut_Umum_I___MEILYA_TRISNA__SH__MHTerbanding_Terdakwa___DIAN_EKA_PUTRA_Pgl__DIAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 55305 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_PADANG_Nomor_70_PID_SUS_2021_PT_PDG_Tanggal_27_April_2021__Pembanding_Penuntut_Umum_I___MEILYA_TRISNA__SH__MHTerbanding_Terdakwa___DIAN_EKA_PUTRA_Pgl__DIAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.pdf


SUCCESS: 2021_TK1_Putusan_PT_PADANG_Nomor_70_PID_SUS_2021_PT_PDG_Tanggal_27_April_2021__Pembanding_Penuntut_Umum_I___MEILYA_TRISNA__SH__MHTerbanding_Terdakwa___DIAN_EKA_PUTRA_Pgl__DIAN.pdf -> raw_2021_TK1_Putusan_PT_PADANG_Nomor_70_PID_SUS_2021_PT_PDG_Tanggal_27_April_2021__Pembanding_Penuntut_Umum_I___MEILYA_TRISNA__SH__MHTerbanding_Terdakwa___DIAN_EKA_PUTRA_Pgl__DIAN.txt (55305 chars)
[58/80] Processing: 2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.pdf


INFO:text_extraction:Success with pdfminer_basic: 114420 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.pdf


SUCCESS: 2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.pdf -> raw_2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.txt (114420 chars)
[59/80] Processing: 2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.pdf


INFO:text_extraction:Success with pdfminer_basic: 90423 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid_Sus_2021_PN_Sag_Tanggal_22_April_2021__Penuntut_Umum_MIFA_AL_FAHMI__S_H_Terdakwa_BAKRI_Bin_CICUK_Alm.pdf


SUCCESS: 2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.pdf -> raw_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt (90423 chars)
[60/80] Processing: 2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid_Sus_2021_PN_Sag_Tanggal_22_April_2021__Penuntut_Umum_MIFA_AL_FAHMI__S_H_Terdakwa_BAKRI_Bin_CICUK_Alm.pdf


INFO:text_extraction:Success with pdfminer_basic: 59092 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid_Sus_2021_PN_Sag_Tanggal_22_April_2021__Penuntut_Umum_MIFA_AL_FAHMI__S_H_Terdakwa_BAKRI_Bin_CICUK_Alm.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.pdf


SUCCESS: 2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid_Sus_2021_PN_Sag_Tanggal_22_April_2021__Penuntut_Umum_MIFA_AL_FAHMI__S_H_Terdakwa_BAKRI_Bin_CICUK_Alm.pdf -> raw_2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid_Sus_2021_PN_Sag_Tanggal_22_April_2021__Penuntut_Umum_MIFA_AL_FAHMI__S_H_Terdakwa_BAKRI_Bin_CICUK_Alm.txt (59092 chars)
[61/80] Processing: 2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.pdf


INFO:text_extraction:Success with pdfminer_basic: 86282 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.pdf


SUCCESS: 2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.pdf -> raw_2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.txt (86282 chars)
[62/80] Processing: 2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.pdf


INFO:text_extraction:Success with pdfminer_basic: 85594 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.pdf -> raw_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt (85594 chars)
[63/80] Processing: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 16572 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1375_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (16572 chars)
[64/80] Processing: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1375_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 34973 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1375_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1375_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1375_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (34973 chars)
[65/80] Processing: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 34009 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.pdf


SUCCESS: 2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (34009 chars)
[66/80] Processing: 2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 79764 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_MARIA_YUNIANTI_JAGONG_Alias_YUNI.pdf


SUCCESS: 2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.pdf -> raw_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt (79764 chars)
[67/80] Processing: 2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_MARIA_YUNIANTI_JAGONG_Alias_YUNI.pdf


INFO:text_extraction:Success with pdfminer_basic: 67711 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_MARIA_YUNIANTI_JAGONG_Alias_YUNI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.pdf


SUCCESS: 2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_MARIA_YUNIANTI_JAGONG_Alias_YUNI.pdf -> raw_2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_MARIA_YUNIANTI_JAGONG_Alias_YUNI.txt (67711 chars)
[68/80] Processing: 2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 60340 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.pdf


SUCCESS: 2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.pdf -> raw_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt (60340 chars)
[69/80] Processing: 2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.pdf


INFO:text_extraction:Success with pdfminer_basic: 87134 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.pdf


SUCCESS: 2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.pdf -> raw_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.txt (87134 chars)
[70/80] Processing: 2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.pdf


INFO:text_extraction:Success with pdfminer_basic: 68532 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.pdf


SUCCESS: 2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.pdf -> raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.txt (68532 chars)
[71/80] Processing: 2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.pdf


INFO:text_extraction:Success with pdfminer_basic: 73863 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.pdf


SUCCESS: 2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.pdf -> raw_2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.txt (73863 chars)
[72/80] Processing: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.pdf


INFO:text_extraction:Success with pdfminer_basic: 68846 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_2_SUPRAYETNI_Binti_Alm_KASIMIN3_RAZALI_Bin_alm_SULAIMAN4_ROSMINI_BinTUBARA.pdf


SUCCESS: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.pdf -> raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt (68846 chars)
[73/80] Processing: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_2_SUPRAYETNI_Binti_Alm_KASIMIN3_RAZALI_Bin_alm_SULAIMAN4_ROSMINI_BinTUBARA.pdf


INFO:text_extraction:Success with pdfminer_basic: 70884 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_2_SUPRAYETNI_Binti_Alm_KASIMIN3_RAZALI_Bin_alm_SULAIMAN4_ROSMINI_BinTUBARA.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.pdf


SUCCESS: 2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_2_SUPRAYETNI_Binti_Alm_KASIMIN3_RAZALI_Bin_alm_SULAIMAN4_ROSMINI_BinTUBARA.pdf -> raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_2_SUPRAYETNI_Binti_Alm_KASIMIN3_RAZALI_Bin_alm_SULAIMAN4_ROSMINI_BinTUBARA.txt (70884 chars)
[74/80] Processing: 2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.pdf


INFO:text_extraction:Success with pdfminer_basic: 44772 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.pdf


SUCCESS: 2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.pdf -> raw_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt (44772 chars)
[75/80] Processing: 2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.pdf


INFO:text_extraction:Success with pdfminer_basic: 47107 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MATARAM_Nomor_12_PID_SUS_2021_PT_MTR_Tanggal_16_Februari_2021__Pembanding_Penuntut_Umum_I___SAHDI_SH_Terbanding_Terdakwa___H_HUSNUL_ANSORI_ALIAS_H_ANSORI.pdf


SUCCESS: 2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.pdf -> raw_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.txt (47107 chars)
[76/80] Processing: 2021_TK1_Putusan_PT_MATARAM_Nomor_12_PID_SUS_2021_PT_MTR_Tanggal_16_Februari_2021__Pembanding_Penuntut_Umum_I___SAHDI_SH_Terbanding_Terdakwa___H_HUSNUL_ANSORI_ALIAS_H_ANSORI.pdf


INFO:text_extraction:Success with pdfminer_basic: 63008 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_12_PID_SUS_2021_PT_MTR_Tanggal_16_Februari_2021__Pembanding_Penuntut_Umum_I___SAHDI_SH_Terbanding_Terdakwa___H_HUSNUL_ANSORI_ALIAS_H_ANSORI.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.pdf


SUCCESS: 2021_TK1_Putusan_PT_MATARAM_Nomor_12_PID_SUS_2021_PT_MTR_Tanggal_16_Februari_2021__Pembanding_Penuntut_Umum_I___SAHDI_SH_Terbanding_Terdakwa___H_HUSNUL_ANSORI_ALIAS_H_ANSORI.pdf -> raw_2021_TK1_Putusan_PT_MATARAM_Nomor_12_PID_SUS_2021_PT_MTR_Tanggal_16_Februari_2021__Pembanding_Penuntut_Umum_I___SAHDI_SH_Terbanding_Terdakwa___H_HUSNUL_ANSORI_ALIAS_H_ANSORI.txt (63008 chars)
[77/80] Processing: 2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.pdf


INFO:text_extraction:Success with pdfminer_basic: 46333 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.txt
INFO:text_extraction:Extracting from PDF: 2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.pdf


SUCCESS: 2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.pdf -> raw_2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.txt (46333 chars)
[78/80] Processing: 2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.pdf


INFO:text_extraction:Success with pdfminer_basic: 71392 characters
INFO:text_extraction:Raw text saved: raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.pdf -> raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt (71392 chars)
[79/80] Processing: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 34401 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_extraction:Extracting from PDF: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


SUCCESS: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (34401 chars)
[80/80] Processing: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf


INFO:text_extraction:Success with pdfminer_basic: 34649 characters
INFO:text_extraction:Raw text saved: raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt


SUCCESS: 2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.pdf -> raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (34649 chars)

EXTRACTION SUMMARY
PDF files processed: 79
Total successful extractions: 79
Raw text files saved to: /content/drive/MyDrive/perdagangan_orang/RAW_TEXT
Extraction report: /logs/extraction_report.csv
Extraction log: /logs/extraction.log

EXTRACTION COMPLETE!
Check raw text files in: /content/drive/MyDrive/perdagangan_orang/RAW_TEXT
Next step: Run text cleaning on raw files


Pembersihan

In [7]:
import os
import pandas as pd
import re
import logging
from datetime import date

In [14]:
import os
import re
import logging
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TextCleaner:
    """Clean raw extracted text files"""

    def __init__(self, base_dir="/content/drive/MyDrive/perdagangan_orang"):
        self.base_dir = base_dir
        self.input_dir = os.path.join(base_dir, "RAW_TEXT")  # Input: raw text dari tahap ii
        self.output_dir = "/data/raw"  # Output: cleaned text files
        self.gdrive_output_dir = os.path.join(base_dir, "CLEANED")  # Google Drive backup
        self.gdrive_data_raw_dir = os.path.join(base_dir, "data", "raw")  # Mirror of /data/raw in gdrive
        self.logs_dir = "/logs"  # Local logs
        self.gdrive_logs_dir = os.path.join(base_dir, "logs")  # Google Drive logs mirror

        # Create directories
        os.makedirs(self.input_dir, exist_ok=True)
        os.makedirs(self.output_dir, exist_ok=True)
        os.makedirs(self.gdrive_output_dir, exist_ok=True)
        os.makedirs(self.gdrive_data_raw_dir, exist_ok=True)
        os.makedirs(self.logs_dir, exist_ok=True)
        os.makedirs(self.gdrive_logs_dir, exist_ok=True)

        print(f"Input (raw text): {self.input_dir}")
        print(f"Output 1 (data/raw): {self.output_dir}")
        print(f"Output 2 (gdrive): {self.gdrive_output_dir}")
        print(f"Output 3 (gdrive/data/raw): {self.gdrive_data_raw_dir}")
        print(f"Logs 1 (local): {self.logs_dir}")
        print(f"Logs 2 (gdrive): {self.gdrive_logs_dir}")

        # Setup cleaning logger
        self.setup_cleaning_logger()

    def setup_cleaning_logger(self):
        """Setup dedicated cleaning logger with dual output"""
        self.cleaning_logger = logging.getLogger('text_cleaning')
        self.cleaning_logger.setLevel(logging.INFO)

        # Remove existing handlers
        for handler in self.cleaning_logger.handlers[:]:
            self.cleaning_logger.removeHandler(handler)

        # Create file handlers for both locations
        log_file_local = os.path.join(self.logs_dir, 'cleaning.log')
        log_file_gdrive = os.path.join(self.gdrive_logs_dir, 'cleaning.log')

        # Local log handler
        file_handler_local = logging.FileHandler(log_file_local, mode='a', encoding='utf-8')
        file_handler_local.setLevel(logging.INFO)

        # Google Drive log handler
        file_handler_gdrive = logging.FileHandler(log_file_gdrive, mode='a', encoding='utf-8')
        file_handler_gdrive.setLevel(logging.INFO)

        # Create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )
        file_handler_local.setFormatter(formatter)
        file_handler_gdrive.setFormatter(formatter)

        # Add both handlers
        self.cleaning_logger.addHandler(file_handler_local)
        self.cleaning_logger.addHandler(file_handler_gdrive)

        self.cleaning_logger.info("="*60)
        self.cleaning_logger.info("TEXT CLEANING SESSION STARTED")
        self.cleaning_logger.info("="*60)

    # =================== 1. HAPUS HEADER/FOOTER/WATERMARK ===================

    def remove_headers_footers_watermarks(self, text):
        """Step 1: Remove headers, footers, page numbers, and watermarks"""
        if not isinstance(text, str) or not text.strip():
            return ""

        original_length = len(text)

        # MA specific headers and footers (exact patterns)
        ma_patterns = [
            # MA header with various spacing
            r'M\s*[Aa]\s*[Hh]\s*[Kk]\s*[Aa]\s*[Mm]\s*[Aa]\s*[Hh]\s*\s*[Aa]\s*[Gg]\s*[Uu]\s*[Nn]\s*[Gg]\s*\s*[Rr]\s*[Ee]\s*[Pp]\s*[Uu]\s*[Bb]\s*[Ll]\s*[Ii]\s*[Kk]\s*\s*[Ii]\s*[Nn]\s*[Dd]\s*[Oo]\s*[Nn]\s*[Ee]\s*[Ss]\s*[Ii]\s*[Aa]',

            # Disclaimer section
            r'Disclaimer\s*',
            r'Kepaniteraan Mahkamah Agung Republik Indonesia berusaha untuk selalu mencantumkan informasi paling kini dan akurat sebagai bentuk komitmen Mahkamah Agung untuk pelayanan publik, transparansi dan akuntabilitas',
            r'pelaksanaan fungsi peradilan\.\s*Namun dalam hal-hal tertentu masih dimungkinkan terjadi permasalahan teknis terkait dengan akurasi dan keterkinian informasi yang kami sajikan, hal mana akan terus kami perbaiki dari waktu kewaktu\.',
            r'Dalam hal Anda menemukan inakurasi informasi yang termuat pada situs ini atau informasi yang seharusnya ada, namun belum tersedia, maka harap segera hubungi Kepaniteraan Mahkamah Agung RI melalui\s*:',
            r'Email\s*:\s*kepaniteraan@mahkamahagung\.go\.id\s*Telp\s*:\s*021-384\s*3348\s*',
        ]

        # Page numbers and navigation
        page_patterns = [
            r'Halaman\s+\d+\s+dari\s+\d+',
            r'Page\s+\d+\s+of\s+\d+',
            r'^\s*\d+\s*$',  # Standalone numbers
            r'^\s*-\s*\d+\s*-\s*$',  # -1-, -2-, etc
            r'^\s*\d+\s*/\s*\d+\s*$',  # 1/10, 2/10, etc
        ]

        # Watermarks and document stamps
        watermark_patterns = [
            r'SALINAN PUTUSAN',
            r'COPY\s+OF\s+VERDICT',
            r'DOKUMEN\s+ELEKTRONIK',
            r'ELECTRONIC\s+DOCUMENT',
            r'^\s*CONFIDENTIAL\s*$',
            r'^\s*RAHASIA\s*$',
            r'^\s*DRAFT\s*$',
            r'FOR\s+INTERNAL\s+USE\s+ONLY',
        ]

        # Apply all header/footer/watermark removals
        all_patterns = ma_patterns + page_patterns + watermark_patterns

        for pattern in all_patterns:
            text = re.sub(pattern, '', text, flags=re.IGNORECASE | re.MULTILINE)

        removed_chars = original_length - len(text)
        self.cleaning_logger.info(f"Headers/footers/watermarks removed: {removed_chars} characters")

        return text

    # =================== 2. NORMALISASI SPASI DAN KARAKTER ===================

    def normalize_spacing_and_characters(self, text, lowercase=True, remove_punctuation=False):
        """Step 2: Normalize spacing and characters"""
        if not isinstance(text, str) or not text.strip():
            return ""

        original_length = len(text)

        # Convert to lowercase if requested
        if lowercase:
            text = text.lower()
            self.cleaning_logger.info("Text converted to lowercase")

        # Normalize line breaks and spacing
        text = re.sub(r'\r\n', '\n', text)  # Windows line endings
        text = re.sub(r'\r', '\n', text)    # Mac line endings
        text = re.sub(r'\n\s*\n\s*\n+', '\n\n', text)  # Multiple line breaks to double
        text = re.sub(r'[ \t]+', ' ', text)  # Multiple spaces/tabs to single space
        text = re.sub(r'\n[ \t]+', '\n', text)  # Remove spaces at beginning of lines
        text = re.sub(r'[ \t]+\n', '\n', text)  # Remove spaces at end of lines

        # Fix common OCR errors (conservative approach)
        ocr_fixes = {
            r'\brn\b': 'm',     # "rn" → "m" only if standalone
            r'\bvv\b': 'w',     # "vv" → "w" only if standalone
            r'\bl1\b': 'll',    # "l1" → "ll" only if standalone
            r'\b0\b': 'o',      # "0" → "o" only if standalone in text context
        }

        for pattern, replacement in ocr_fixes.items():
            text = re.sub(pattern, replacement, text)

        # Handle punctuation
        if remove_punctuation:
            # Remove punctuation but keep sentence structure
            text = re.sub(r'[^\w\s\n]', '', text)
            self.cleaning_logger.info("Punctuation removed")
        else:
            # Just normalize punctuation spacing
            text = re.sub(r'\s+([.,;:!?])', r'\1', text)  # Remove spaces before punctuation
            text = re.sub(r'([.,;:!?])\s*([.,;:!?])', r'\1\2', text)  # Fix double punctuation

        # Final spacing cleanup
        text = re.sub(r'\s+', ' ', text)  # Multiple spaces to single
        text = re.sub(r'\n\s+', '\n', text)  # Spaces after newlines
        text = re.sub(r'\s+\n', '\n', text)  # Spaces before newlines
        text = re.sub(r'\n{3,}', '\n\n', text)  # Max 2 consecutive newlines

        normalized_chars = original_length - len(text)
        self.cleaning_logger.info(f"Spacing/character normalization: {normalized_chars} characters removed")

        return text.strip()

    # =================== COMPLETE CLEANING PIPELINE ===================

    def clean_single_text(self, text, remove_punctuation=False):
        """Complete cleaning pipeline for single text"""
        if not isinstance(text, str) or not text.strip():
            return ""

        original_length = len(text)
        self.cleaning_logger.info(f"Starting cleaning. Original length: {original_length} characters")

        # Step 1: Remove headers, footers, watermarks
        text = self.remove_headers_footers_watermarks(text)
        step1_length = len(text)

        # Step 2: Normalize spacing and characters
        text = self.normalize_spacing_and_characters(text, lowercase=True, remove_punctuation=remove_punctuation)
        final_length = len(text)

        # Calculate reduction
        total_reduction = ((original_length - final_length) / original_length * 100) if original_length > 0 else 0

        self.cleaning_logger.info(f"Cleaning complete. Final length: {final_length} characters ({total_reduction:.1f}% reduction)")

        # Quality check
        if final_length < original_length * 0.1:  # Less than 10% remaining
            self.cleaning_logger.warning("Over 90% of text was removed - check if cleaning is too aggressive")

        return text

    def process_single_file(self, input_filename, remove_punctuation=False):
        """Process single raw text file"""
        input_path = os.path.join(self.input_dir, input_filename)

        if not os.path.exists(input_path):
            self.cleaning_logger.error(f"File not found: {input_path}")
            return False

        try:
            # Read raw text
            with open(input_path, 'r', encoding='utf-8') as f:
                raw_text = f.read()

            self.cleaning_logger.info(f"Processing file: {input_filename}")

            if not raw_text.strip():
                self.cleaning_logger.warning(f"Empty file: {input_filename}")
                return False

            # Clean text
            cleaned_text = self.clean_single_text(raw_text, remove_punctuation)

            if not cleaned_text.strip():
                self.cleaning_logger.error(f"Cleaning resulted in empty text: {input_filename}")
                return False

            # Generate output filename (case_XXX.txt format as specified)
            base_name = input_filename.replace('raw_', '').replace('.txt', '')
            output_filename = f"case_{base_name}.txt"
            output_path_data = os.path.join(self.output_dir, output_filename)
            output_path_gdrive = os.path.join(self.gdrive_output_dir, output_filename)
            output_path_gdrive_data = os.path.join(self.gdrive_data_raw_dir, output_filename)

            # Step 3: Save cleaned text to ALL THREE locations
            # Save to /data/raw/
            with open(output_path_data, 'w', encoding='utf-8') as f:
                f.write(cleaned_text)

            # Save to Google Drive CLEANED
            with open(output_path_gdrive, 'w', encoding='utf-8') as f:
                f.write(cleaned_text)

            # Save to Google Drive data/raw
            with open(output_path_gdrive_data, 'w', encoding='utf-8') as f:
                f.write(cleaned_text)

            self.cleaning_logger.info(f"Cleaned file saved to all three locations: {output_filename}")
            print(f"SUCCESS: {input_filename} -> {output_filename} (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)")

            return True

        except Exception as e:
            self.cleaning_logger.error(f"Error processing {input_filename}: {str(e)}")
            print(f"ERROR: {input_filename}: {str(e)}")
            return False

    def process_all_files(self, remove_punctuation=False):
        """Process all raw text files"""
        text_files = [f for f in os.listdir(self.input_dir)
                     if f.endswith(('.txt', '.TXT')) and os.path.isfile(os.path.join(self.input_dir, f))]

        if not text_files:
            print(f"No text files found in {self.input_dir}")
            return

        print(f"Found {len(text_files)} files to process")
        print(f"Remove punctuation: {'YES' if remove_punctuation else 'NO'}")
        print("="*60)

        success_count = 0
        error_count = 0

        for i, filename in enumerate(text_files, 1):
            print(f"[{i}/{len(text_files)}] {filename}")
            if self.process_single_file(filename, remove_punctuation):
                success_count += 1
            else:
                error_count += 1

        print("\n" + "="*60)
        print("CLEANING SUMMARY:")
        print(f"Success: {success_count}")
        print(f"Errors: {error_count}")
        print(f"Output 1: {self.output_dir}")
        print(f"Output 2: {self.gdrive_output_dir}")
        print(f"Output 3: {self.gdrive_data_raw_dir}")

        # Create cleaning summary
        self.create_cleaning_summary(success_count, error_count, text_files)

    def create_cleaning_summary(self, success_count, error_count, processed_files):
        """Create cleaning summary report"""
        summary_content = f"""TEXT CLEANING SUMMARY
===================
Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Total files found: {len(processed_files)}
Successfully cleaned: {success_count}
Errors: {error_count}
Success rate: {(success_count/len(processed_files)*100):.1f}%

Output directory 1: {self.output_dir}
Output directory 2: {self.gdrive_output_dir}
Output directory 3: {self.gdrive_data_raw_dir}
Log file 1: {os.path.join(self.logs_dir, 'cleaning.log')}
Log file 2: {os.path.join(self.gdrive_logs_dir, 'cleaning.log')}

Files processed:
"""

        for i, filename in enumerate(processed_files, 1):
            summary_content += f"{i:3d}. {filename}\n"

        # Save summary to both locations
        summary_path_local = os.path.join(self.logs_dir, 'cleaning_summary.txt')
        summary_path_gdrive = os.path.join(self.gdrive_logs_dir, 'cleaning_summary.txt')

        with open(summary_path_local, 'w', encoding='utf-8') as f:
            f.write(summary_content)

        with open(summary_path_gdrive, 'w', encoding='utf-8') as f:
            f.write(summary_content)

        print(f"Summary saved to: {summary_path_local}")
        print(f"Summary saved to: {summary_path_gdrive}")
        self.cleaning_logger.info(f"Summary reports created in both locations")

# Utility functions
def clean_single_text_quick(text, remove_punctuation=False):
    """Quick function to clean single text without logging"""
    cleaner = TextCleaner()
    return cleaner.clean_single_text(text, remove_punctuation=remove_punctuation)

def clean_text_from_file(input_path, output_path, remove_punctuation=False):
    """Clean text from specific file and save to specific location"""
    try:
        with open(input_path, 'r', encoding='utf-8') as f:
            raw_text = f.read()

        cleaned_text = clean_single_text_quick(raw_text, remove_punctuation)

        with open(output_path, 'w', encoding='utf-8') as f:
            f.write(cleaned_text)

        print(f"Cleaned text saved: {output_path}")
        return True

    except Exception as e:
        print(f"Error: {e}")
        return False

def main():
    """Main execution function"""
    print("iii. PEMBERSIHAN TEKS")
    print("=" * 50)
    print("Input: Raw text files hasil ekstraksi PDF")
    print("Output 1: Clean text files di /data/raw/")
    print("Output 2: Clean text files di Google Drive/CLEANED")
    print("Output 3: Clean text files di Google Drive/data/raw")
    print("Logs: /logs/ dan Google Drive/logs/")
    print("=" * 50)

    # Initialize cleaner
    cleaner = TextCleaner("/content/drive/MyDrive/perdagangan_orang")

    # Process all raw text files (keep punctuation by default)
    cleaner.process_all_files(remove_punctuation=False)

    print("\nCLEANING PROCESS COMPLETE!")
    print(f"Check output files in: /data/raw/")
    print(f"Check output files in: {cleaner.gdrive_output_dir}")
    print(f"Check output files in: {cleaner.gdrive_data_raw_dir}")
    print(f"Check logs in: /logs/cleaning.log")
    print(f"Check logs in: {cleaner.gdrive_logs_dir}/cleaning.log")

# Execute
if __name__ == "__main__":
    main()

INFO:text_cleaning:TEXT CLEANING SESSION STARTED
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.txt
INFO:text_cleaning:Starting cleaning. Original length: 55160 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 12724 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 971 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 41464 characters (24.8% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PELAIHARI_Nomor_179_Pid_Sus_2021_PN

iii. PEMBERSIHAN TEKS
Input: Raw text files hasil ekstraksi PDF
Output 1: Clean text files di /data/raw/
Output 2: Clean text files di Google Drive/CLEANED
Output 3: Clean text files di Google Drive/data/raw
Logs: /logs/ dan Google Drive/logs/
Input (raw text): /content/drive/MyDrive/perdagangan_orang/RAW_TEXT
Output 1 (data/raw): /data/raw
Output 2 (gdrive): /content/drive/MyDrive/perdagangan_orang/CLEANED
Output 3 (gdrive/data/raw): /content/drive/MyDrive/perdagangan_orang/data/raw
Logs 1 (local): /logs
Logs 2 (gdrive): /content/drive/MyDrive/perdagangan_orang/logs
Found 79 files to process
Remove punctuation: NO
[1/79] raw_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa___Herman_Saputra_Rafiudin_Alias_Herman.txt
SUCCESS: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_145_PID_SUS_2021_PT_MTR_Tanggal_20_Desember_2021__Pembanding_Penuntut_Umum___MANIK_ARTHA_ADHITAMA__SHTerbanding_Terdakwa_

INFO:text_cleaning:Spacing/character normalization: 1083 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 45902 characters (26.3% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.txt
INFO:text_cleaning:Starting cleaning. Original length: 87277 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 21683 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 843 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 64750 characters (25.8% reduct

SUCCESS: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.txt -> case_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ_DIAN_CINDRAWATI_Alias_DIAN.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[4/79] raw_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.txt
SUCCESS: raw_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__SH_Terdakwa_JEKSON_RAJAGUKGUK_Alias_JECO_Anak_dari_ALBERT_RAJAGUKGUK.txt -> case_2021_TK1_Putusan_PN_BALIKPAPAN_Nomor_412_Pid_Sus_2021_PN_Bpp_Tanggal_30_Nopember_2021__Penuntut_Umum_Ita_Wahyuning_Lestari__S

INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt
INFO:text_cleaning:Starting cleaning. Original length: 114504 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 24675 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 3055 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 86773 characters (24.2% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PA

SUCCESS: raw_2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.txt -> case_2021_TK1_Putusan_PT_PEKANBARU_Nomor_494_PID_SUS_2021_PT_PBR_Tanggal_17_Nopember_2021__Pembanding_Terbanding_Terdakwa___EKO_SUMBARA_Alias_EKO_Bin_MUHMMAD_NASIR_Alm_Diwakili_Oleh___ANDI_NUGRAHG__SH_.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[7/79] raw_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt
SUCCESS: raw_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt -> case_2021_TK1_Putusan_PN_MAKALE_Nomor_92_Pid_Sus_2021_PN_Mak_Tanggal_15_Nopember_2021__Penuntut_Umum_MARGARETHA_H__PATURU__S_H_Terdakwa_SRI_SUNARTI_alias_MAMI.txt (saved to /da

INFO:text_cleaning:Headers/footers/watermarks removed: 13455 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 315 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 41771 characters (24.8% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.txt
INFO:text_cleaning:Starting cleaning. Original length: 50031 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 12410 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalizat

SUCCESS: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt -> case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[10/79] raw_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.txt
SUCCESS: raw_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerbanding_Pembanding_Penuntut_Umum___FEDDY_HANTYO_NUG__M_H_.txt -> case_2021_TK1_Putusan_PT_MATARAM_Nomor_120_PID_SUS_2021_PT_MTR_Tanggal_8_Nopember_2021__Pembanding_Terbanding_Terdakwa___RATNI__SH_Alias_RANITerb

INFO:text_cleaning:Spacing/character normalization: 1208 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 177187 characters (24.2% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.txt
INFO:text_cleaning:Starting cleaning. Original length: 158417 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 38887 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 939 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 118590 c

SUCCESS: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.txt -> case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_214_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ISMAEL_IBRAHIM_KHALEEL__alias_ISMAILI_AJAT.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[13/79] raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.txt
SUCCESS: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_JIHANTO_NUR_RACHMAN__SHTerdakwa_1_ULFIYATI_Alias_ULFI_Binti_SUTIMAN2_MAHFUDZ_SIDDIQ_Alias_M_DAKIM.txt -> case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_213_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021_

INFO:text_cleaning:Spacing/character normalization: 1138 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 171020 characters (24.1% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt
INFO:text_cleaning:Processing file: raw_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Starting cleaning. Original length: 42101 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 11244 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 317 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 30539 characters (27.5% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations:

SUCCESS: raw_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt -> case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[15/79] raw_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
SUCCESS: raw_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_LUBUK_PAKAM_Nomor_2063_Pdt_G_2025_PA_Lpk_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[16/79] raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Jun

INFO:text_cleaning:Headers/footers/watermarks removed: 125892 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 2737 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 413393 characters (23.7% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.txt
INFO:text_cleaning:Starting cleaning. Original length: 71527 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 17195 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing

SUCCESS: raw_2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.txt -> case_2021_TK1_Putusan_PT_BANTEN_Nomor_108_PID_SUS_2021_PT_BTN_Tanggal_18_Oktober_2021__Pembanding_Penuntut_Umum___AGUSTRI_HARTONO__SH__MHTerbanding_Terdakwa___TOFIK_TRIYATNO_Bin_TASMIARJO_ALM.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[19/79] raw_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.txt
SUCCESS: raw_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR_FIHAWIANO_SH2_AHMAD_SULHAN_S_H3_Erlysa_Said__S_H_Terdakwa_MELFI_INDIRIATI_PUTRI_Als__S_BATO.txt -> case_2021_TK1_Putusan_PN_PALOPO_Nomor_114_Pid_Sus_2021_PN_Plp_Tanggal_12_Oktober_2021__Penuntut_Umum_1_YANUAR

INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt
INFO:text_cleaning:Starting cleaning. Original length: 83195 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 20187 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 628 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 62379 characters (25.0% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVI

SUCCESS: raw_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt -> case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[22/79] raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt
SUCCESS: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt -> case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_124_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_OSI_NAVITALIA_ALS_OCI_BINTI_SUNARTO.txt (saved to /data/raw, gdrive/CLEANED, and gdrive

INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Processing file: raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Starting cleaning. Original length: 60440 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 15911 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 357 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 44171 characters (26.9% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum

SUCCESS: raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1378_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[25/79] raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
SUCCESS: raw_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[26/79] raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.txt
SUCCESS: raw_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Ta

INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.txt
INFO:text_cleaning:Starting cleaning. Original length: 48690 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 11959 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 416 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 36314 characters (25.4% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agus

SUCCESS: raw_2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.txt -> case_2021_TK1_Putusan_PN_ROKAN_HILIR_Nomor_234_Pid_Sus_2021_PN_Rhl_Tanggal_6_September_2021__Penuntut_Umum_1_MARULITUA_J__SITANGGANG__SH_2_YONGKI_ARVIUS__S_H_MHTerdakwa_EKO_SUMBARA_Alias_EKO_Bin_IR_Alm.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[28/79] raw_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.txt
SUCCESS: raw_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pembanding_Penuntut_Umum___GORUT_PERTHIKA__SHTerbanding_Terdakwa_I___MAYANG_APRILLA_RAHMAYANTI_als_MAMI_APRILA_CHOI.txt -> case_2021_TK1_Putusan_PT_BANTEN_Nomor_92_PID_SUS_2021_PT_BTN_Tanggal_31_Agustus_2021__Pem

INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt
INFO:text_cleaning:Starting cleaning. Original length: 281140 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 64319 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 1140 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 215680 characters (23.3% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_

SUCCESS: raw_2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.txt -> case_2021_TK1_Putusan_PN_Ngabang_Nomor_65_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Wan_Wan_Anak_dari_Alm_Liu_Po_Fha.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[30/79] raw_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt
SUCCESS: raw_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt -> case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[31/7

INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 513 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 82929 characters (23.8% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt
INFO:text_cleaning:Starting cleaning. Original length: 140931 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 32903 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 661 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 107366 characters (23.8% reduction

SUCCESS: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.txt -> case_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_596_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_ALI_NURUDIN_ALIAS_ALI_.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[32/79] raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt
SUCCESS: raw_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt -> case_2021_TK1_Putusan_PN_JAKARTA_UTARA_Nomor_595_Pid_Sus_2021_PN_Jkt_Utr_Tanggal_16_Agustus_2021__Penuntut_Umum_DYOFA_YUDHISTIRA__SHTerdakwa_YUDHISTIRA_ARMIN_ALIAS_YUDI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)


INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt
INFO:text_cleaning:Starting cleaning. Original length: 82104 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 18691 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 530 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 62882 characters (23.4% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt
INFO:text_cleaning:Processing file: raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Starting cleaning. Origin

SUCCESS: raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt -> case_2021_TK1_Putusan_PN_BENGKULU_Nomor_227_Pid_Sus_2021_PN_Bgl_Tanggal_22_Juli_2021__Penuntut_Umum_SRI_RAHMITerdakwa_RIWANSYAH__S_Pd_Als_RIWAN_Als_CIN_Als_MAMI_Bin_YASUR_I.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[35/79] raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
SUCCESS: raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_648_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[36/79] raw_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
SUCCESS: raw_2025_TK1_Putu

INFO:text_cleaning:Spacing/character normalization: 608 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 105672 characters (23.7% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt
INFO:text_cleaning:Starting cleaning. Original length: 142787 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 30659 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 644 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 111483 characters (21.9% reduction)
INFO:text_cleaning:Cleaned file saved t

SUCCESS: raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.txt -> case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_78_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_HAFIZ_AKBAR_RITONGA__SHTerdakwa_NURSANTI_alias_BUNDA_binti_TASWIN_MOITA.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[39/79] raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt
SUCCESS: raw_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt -> case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data

INFO:text_cleaning:Spacing/character normalization: 386 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 39630 characters (25.9% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.txt
INFO:text_cleaning:Starting cleaning. Original length: 58221 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 14971 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 341 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 42908 characters (26.3% reduction)
INFO:text_

SUCCESS: raw_2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.txt -> case_2021_TK1_Putusan_PT_KUPANG_Nomor_82_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terbanding_Terdakwa_II___YOPPI_NALLETerbanding_Pembanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_HTerbaSEMUEL.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[42/79] raw_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.txt
SUCCESS: raw_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MALLAKA__S_H.txt -> case_2021_TK1_Putusan_PT_KUPANG_Nomor_77_PID_2021_PT_KPG_Tanggal_30_Juni_2021__Pembanding_Terdakwa_I___YOPPI_NALLETerbanding_Penuntut_Umum___CHRISTOFEL_H__MA

INFO:text_cleaning:Spacing/character normalization: 1248 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 213906 characters (22.4% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt
INFO:text_cleaning:Starting cleaning. Original length: 248336 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 55343 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 1122 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 191870 characters (22.7% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations

SUCCESS: raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.txt -> case_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_59_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_ADE_MULYANI__SHTerdakwa_AHMAD_MUAMAR_Alias_AMAR_bin_MUHDLOR.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[45/79] raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt
SUCCESS: raw_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt -> case_2021_TK1_Putusan_PN_MAJALENGKA_Nomor_58_Pid_Sus_2021_PN_Mjl_Tanggal_2_Juni_2021__Penuntut_Umum_DANU_TRISNAWANTO__S_H_Terdakwa_AGUNG_SUBEKTI_Bin_DURIA.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[46/79] raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_883

INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Processing file: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt
INFO:text_cleaning:Starting cleaning. Original length: 30013 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 8760 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 210 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 21042 characters (29.9% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt
INFO:text_cleaning:Processing file: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
IN

SUCCESS: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[47/79] raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt
SUCCESS: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt -> case_2025_TK1_Putusan_PA_PONOROGO_Nomor_188_Pdt_P_2025_PA_Po_Tanggal_25_Juni_2025__Pemohon_melawan_Termohon.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[48/79] raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
SUCCESS: raw_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_PONOROGO_Nomor_746_P

INFO:text_cleaning:Spacing/character normalization: 1056 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 147663 characters (23.3% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt
INFO:text_cleaning:Starting cleaning. Original length: 46175 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 12707 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 261 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 33206 characters (28.1% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2

SUCCESS: raw_2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.txt -> case_2021_TK1_Putusan_PN_DENPASAR_Nomor_211_Pid_Sus_2021_PN_Dps_Tanggal_25_Mei_2021__Penuntut_Umum_Dewi_Agustin_Adiputri__SH_MHTerdakwa_Maulana_Aldi.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[50/79] raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt
SUCCESS: raw_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt -> case_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[51/79] raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_141_P

INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt
INFO:text_cleaning:Starting cleaning. Original length: 84171 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 18980 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 931 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 64259 characters (23.7% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAH

SUCCESS: raw_2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.txt -> case_2021_TK1_Putusan_PN_BENGKULU_Nomor_87_Pid_Sus_2021_PN_Bgl_Tanggal_4_Mei_2021__Penuntut_Umum_J_HUTAGAOL_SH_MHTerdakwa_HIKMAT_DEKI_Als_DEKI_Als_MAMI_SHISI_Bin_WAHIDIN.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[53/79] raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt
SUCCESS: raw_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt -> case_2021_TK1_Putusan_PN_TENGGARONG_Nomor_142_Pid_Sus_2021_PN_Trg_Tanggal_4_Mei_2021__Penuntut_Umum_FITRI_IRA_P__SH_Terdakwa_RANI_Als_RANI_RAHAYU_Alias_DIFA_Binti_IPAR.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/d

INFO:text_cleaning:Spacing/character normalization: 2249 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 273504 characters (25.4% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.txt
INFO:text_cleaning:Starting cleaning. Original length: 95199 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 22431 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 618 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 72149 characters (24.2% reduction)
INFO:text_cleaning

SUCCESS: raw_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt -> case_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[55/79] raw_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.txt
SUCCESS: raw_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.txt -> case_2021_TK1_Putusan_PN_TANJUNG_SELOR_Nomor_19_Pid_Sus_2021_PN_Tjs_Tanggal_28_April_2021__Penuntut_Umum_DANU_BAGUS_PRATAMA__S_HTerdakwa_MUHAMAD_SAFRIANSYAH_Als_BULOT_Bin_MILI.t

INFO:text_cleaning:Spacing/character normalization: 878 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 88721 characters (22.5% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt
INFO:text_cleaning:Starting cleaning. Original length: 90423 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 20187 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 437 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 69798 characters (22.8% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2

SUCCESS: raw_2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.txt -> case_2021_TK1_Putusan_PN_SUBANG_Nomor_66_Pid_Sus_2021_PN_SNG_Tanggal_27_April_2021__Penuntut_Umum_ADITYO_ISMUTOMO__SH_Terdakwa_WANAP_als_MANAP_bin_TAKIM.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[58/79] raw_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt
SUCCESS: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt -> case_2021_TK1_Putusan_PN_AMURANG_Nomor_5_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_VICKY_FERNANDO_BAHIHI_alias_VIKI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[59/79] raw_2021_TK1_Putusan_PN_SANGGAU_Nomor_75_Pid

INFO:text_cleaning:Spacing/character normalization: 409 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 66433 characters (23.0% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt
INFO:text_cleaning:Starting cleaning. Original length: 85594 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 18691 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 403 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 66499 characters (22.3% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_20

SUCCESS: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.txt -> case_2021_TK1_Putusan_PN_AMURANG_Nomor_6_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RICKY_JUNIOR_TUMBELAKA_alias_RIKI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[61/79] raw_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt
SUCCESS: raw_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt -> case_2021_TK1_Putusan_PN_AMURANG_Nomor_7_Pid_Sus_2021_PN_Amr_Tanggal_22_April_2021__Penuntut_Umum_M__REZA_PAHLEPI__SHTerdakwa_RIJAL_SUMAMPOW_alias_JAL.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[62/79] raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1419_Pdt_G_2025

INFO:text_cleaning:Starting cleaning. Original length: 34009 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 9522 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 183 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 24303 characters (28.5% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt
INFO:text_cleaning:Starting cleaning. Original length: 79764 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 18691 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 389 characters remo

SUCCESS: raw_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_LAMONGAN_Nomor_1436_Pdt_G_2025_PA_Lmg_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[65/79] raw_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt
SUCCESS: raw_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt -> case_2021_TK1_Putusan_PN_ENDE_Nomor_11_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1_OKKY_PRASETYO_AJIE2_TERESIA_WEKO__SHTerdakwa_STEFANUS_KUASA_Alias_EFAN.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[66/79] raw_2021_TK1_Putusan_PN_ENDE_Nomor_10_Pid_Sus_2021_PN_End_Tanggal_19_April_2021__Penuntut_Umum_1

INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt
INFO:text_cleaning:Starting cleaning. Original length: 60340 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 14951 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 683 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 44705 characters (25.9% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_

SUCCESS: raw_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt -> case_2021_TK1_Putusan_PN_BANYUWANGI_Nomor_76_Pid_Sus_2021_PN_Byw_Tanggal_12_April_2021__Penuntut_Umum_1_I_KETUT_GDE_DAME_NEGARA__SH2_GANDHI_MUCHLISIN__S_H_Terdakwa_SUWITO__Als__PAK_TO_Bin_MUSIMAN.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[68/79] raw_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.txt
SUCCESS: raw_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_GOMGOMAN_H_SIMBOLON__S_H___M_H_2_GANES_ADI_KUSUMA__S_H_Terdakwa_RINDA_EVANNA_HOTMAULI_SIAIAHAAN.txt -> case_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Um

INFO:text_cleaning:Headers/footers/watermarks removed: 17178 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 643 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 56041 characters (24.1% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt
INFO:text_cleaning:Starting cleaning. Original length: 68846 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 17925 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 497 characters removed
INFO:text_cleaning:Cleaning complete. Final le

SUCCESS: raw_2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.txt -> case_2021_TK1_Putusan_PN_SUBANG_Nomor_40_Pid_Sus_2021_PN_SNG_Tanggal_17_Maret_2021__Penuntut_Umum_AZAM_AKHMAD_AKHSYA__S_H_Terdakwa_RIZAL_FIKRI_NURROHIMUDIN.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[71/79] raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt
SUCCESS: raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt -> case_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[72/79] raw_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_23_Pid_Sus_2021_PN_Ksp_Tanggal_8_

INFO:text_cleaning:Headers/footers/watermarks removed: 11959 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 598 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 32214 characters (28.0% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.txt
INFO:text_cleaning:Starting cleaning. Original length: 47107 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 11959 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 531 ch

SUCCESS: raw_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt -> case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[74/79] raw_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.txt
SUCCESS: raw_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIMAH_Als__NADIRA_Als__MBAK_MBUL.txt -> case_2021_TK1_Putusan_PT_YOGYAKARTA_Nomor_12_PID_SUS_2021_PT_YYK_Tanggal_18_Februari_2021__Pembanding_Penuntut_Umum___AGUS_KURNIAWAN_SHTerbanding_Terdakwa___SITI_FATIM

INFO:text_cleaning:Spacing/character normalization: 429 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 34692 characters (25.1% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.txt
INFO:text_cleaning:Processing file: raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt
INFO:text_cleaning:Starting cleaning. Original length: 71392 characters
INFO:text_cleaning:Headers/footers/watermarks removed: 17195 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 360 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 53836 characters (24.6% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2021_TK1_Putusan_PN_S

SUCCESS: raw_2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.txt -> case_2021_TK1_Putusan_PT_MANADO_Nomor_4_PID_2021_PT_MND_Tanggal_10_Februari_2021__Identitas_Pihak_Tidak_Dipublikasi.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[77/79] raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt
SUCCESS: raw_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt -> case_2021_TK1_Putusan_PN_SAMBAS_Nomor_7_Pid_Sus_2021_PN_Sbs_Tanggal_9_Februari_2021__Penuntut_Umum_1_Meirita_Pakpahan__S_H_2_Salomo_Saing__S_H___M_H_Terdakwa_ELISA_BINTI_NAJIR.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)
[78/79] raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tan

INFO:text_cleaning:Headers/footers/watermarks removed: 9780 characters
INFO:text_cleaning:Text converted to lowercase
INFO:text_cleaning:Spacing/character normalization: 216 characters removed
INFO:text_cleaning:Cleaning complete. Final length: 24652 characters (28.9% reduction)
INFO:text_cleaning:Cleaned file saved to all three locations: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_cleaning:Summary reports created in both locations


SUCCESS: raw_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt -> case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt (saved to /data/raw, gdrive/CLEANED, and gdrive/data/raw)

CLEANING SUMMARY:
Success: 79
Errors: 0
Output 1: /data/raw
Output 2: /content/drive/MyDrive/perdagangan_orang/CLEANED
Output 3: /content/drive/MyDrive/perdagangan_orang/data/raw
Summary saved to: /logs/cleaning_summary.txt
Summary saved to: /content/drive/MyDrive/perdagangan_orang/logs/cleaning_summary.txt

CLEANING PROCESS COMPLETE!
Check output files in: /data/raw/
Check output files in: /content/drive/MyDrive/perdagangan_orang/CLEANED
Check output files in: /content/drive/MyDrive/perdagangan_orang/data/raw
Check logs in: /logs/cleaning.log
Check logs in: /content/drive/MyDrive/perdagangan_orang/logs/cleaning.log


Validasi

In [8]:
import os
import re
import logging
import pandas as pd
from datetime import datetime

In [17]:
import os
import re
import logging
import pandas as pd
from datetime import datetime

# Setup logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

class TextValidator:
    """Validate text integrity and completeness of legal documents"""

    def __init__(self, base_dir="/content/drive/MyDrive/perdagangan_orang"):
        self.base_dir = base_dir
        self.cleaned_dir = "/data/raw"  # Input: cleaned text files
        self.gdrive_cleaned_dir = os.path.join(base_dir, "CLEANED")  # Alternative input
        self.logs_dir = "/logs"  # Local logs
        self.gdrive_logs_dir = os.path.join(base_dir, "logs")  # Google Drive logs mirror
        self.validation_dir = os.path.join(base_dir, "VALIDATION")

        # Create directories
        os.makedirs(self.validation_dir, exist_ok=True)
        os.makedirs(self.logs_dir, exist_ok=True)
        os.makedirs(self.gdrive_logs_dir, exist_ok=True)

        print(f"Input 1 (cleaned): {self.cleaned_dir}")
        print(f"Input 2 (gdrive): {self.gdrive_cleaned_dir}")
        print(f"Validation output: {self.validation_dir}")
        print(f"Logs 1 (local): {self.logs_dir}")
        print(f"Logs 2 (gdrive): {self.gdrive_logs_dir}")

        # Setup validation logger
        self.setup_validation_logger()

        # Define legal document structure requirements
        self.setup_legal_requirements()

    def setup_validation_logger(self):
        """Setup dedicated validation logger with dual output"""
        self.validation_logger = logging.getLogger('text_validation')
        self.validation_logger.setLevel(logging.INFO)

        # Remove existing handlers
        for handler in self.validation_logger.handlers[:]:
            self.validation_logger.removeHandler(handler)

        # Ensure log directories exist
        os.makedirs(self.logs_dir, exist_ok=True)
        os.makedirs(self.gdrive_logs_dir, exist_ok=True)

        # Create file handlers for both locations
        log_file_local = os.path.join(self.logs_dir, 'validation.log')
        log_file_gdrive = os.path.join(self.gdrive_logs_dir, 'validation.log')

        # Create formatter
        formatter = logging.Formatter(
            '%(asctime)s - %(levelname)s - %(message)s',
            datefmt='%Y-%m-%d %H:%M:%S'
        )

        # Local log handler
        try:
            file_handler_local = logging.FileHandler(log_file_local, mode='a', encoding='utf-8')
            file_handler_local.setLevel(logging.INFO)
            file_handler_local.setFormatter(formatter)
            self.validation_logger.addHandler(file_handler_local)
        except Exception as e:
            print(f"Warning: Could not create local validation log: {e}")

        # Google Drive log handler
        try:
            file_handler_gdrive = logging.FileHandler(log_file_gdrive, mode='a', encoding='utf-8')
            file_handler_gdrive.setLevel(logging.INFO)
            file_handler_gdrive.setFormatter(formatter)
            self.validation_logger.addHandler(file_handler_gdrive)
        except Exception as e:
            print(f"Warning: Could not create gdrive validation log: {e}")

        self.validation_logger.info("="*60)
        self.validation_logger.info("TEXT VALIDATION SESSION STARTED")
        self.validation_logger.info("="*60)

    def setup_legal_requirements(self):
        """Define requirements for legal document completeness"""

        # Essential legal document sections (case insensitive)
        self.essential_sections = [
            r'menimbang',     # "MENIMBANG" section
            r'mengingat',     # "MENGINGAT" section
            r'mengadili',     # "MENGADILI" section
        ]

        # Important legal elements
        self.important_elements = [
            r'putusan',       # "PUTUSAN"
            r'terdakwa',      # "TERDAKWA"
            r'jaksa',         # "JAKSA" or prosecutor
            r'hakim',         # "HAKIM" or judge
            r'pasal',         # "PASAL" or article reference
        ]

        # Document structure indicators
        self.structure_indicators = [
            r'nomor.*\d+',    # Case number
            r'tahun.*\d{4}',  # Year reference
            r'pengadilan',    # Court reference
        ]

        # Minimum content thresholds
        self.min_word_count = 100       # Minimum words for valid document
        self.min_char_count = 500       # Minimum characters
        self.min_sentence_count = 10    # Minimum sentences

    # =================== TEXT ANALYSIS METHODS ===================

    def analyze_text_structure(self, text):
        """Analyze text structure and content"""
        if not isinstance(text, str) or not text.strip():
            return {
                'word_count': 0,
                'char_count': 0,
                'sentence_count': 0,
                'paragraph_count': 0,
                'essential_sections_found': [],
                'important_elements_found': [],
                'structure_indicators_found': []
            }

        # Basic statistics
        words = text.split()
        sentences = re.split(r'[.!?]+', text)
        paragraphs = [p.strip() for p in text.split('\n\n') if p.strip()]

        # Find essential sections
        essential_found = []
        for section in self.essential_sections:
            if re.search(section, text, re.IGNORECASE):
                essential_found.append(section)

        # Find important elements
        elements_found = []
        for element in self.important_elements:
            if re.search(element, text, re.IGNORECASE):
                elements_found.append(element)

        # Find structure indicators
        structure_found = []
        for indicator in self.structure_indicators:
            if re.search(indicator, text, re.IGNORECASE):
                structure_found.append(indicator)

        return {
            'word_count': len(words),
            'char_count': len(text),
            'sentence_count': len([s for s in sentences if s.strip()]),
            'paragraph_count': len(paragraphs),
            'essential_sections_found': essential_found,
            'important_elements_found': elements_found,
            'structure_indicators_found': structure_found
        }

    def calculate_completeness_score(self, analysis):
        """Calculate text completeness score (0-100%)"""
        score = 0
        max_score = 100

        # 1. Essential sections (40% of score)
        essential_score = (len(analysis['essential_sections_found']) / len(self.essential_sections)) * 40
        score += min(essential_score, 40)

        # 2. Important elements (30% of score)
        elements_score = (len(analysis['important_elements_found']) / len(self.important_elements)) * 30
        score += min(elements_score, 30)

        # 3. Text length adequacy (20% of score)
        length_score = 0
        if analysis['word_count'] >= self.min_word_count:
            length_score += 10
        if analysis['char_count'] >= self.min_char_count:
            length_score += 10
        score += length_score

        # 4. Structure indicators (10% of score)
        structure_score = (len(analysis['structure_indicators_found']) / len(self.structure_indicators)) * 10
        score += min(structure_score, 10)

        return min(score, 100)  # Cap at 100%

    def validate_single_file(self, filename, source_dir):
        """Validate single text file"""
        file_path = os.path.join(source_dir, filename)

        if not os.path.exists(file_path):
            self.validation_logger.error(f"File not found: {file_path}")
            return None

        try:
            # Read file
            with open(file_path, 'r', encoding='utf-8') as f:
                text = f.read()

            self.validation_logger.info(f"Validating file: {filename}")

            # Analyze text
            analysis = self.analyze_text_structure(text)

            # Calculate completeness score
            completeness_score = self.calculate_completeness_score(analysis)

            # Determine validation status
            is_valid = completeness_score >= 80.0  # 80% threshold

            # Create validation result
            result = {
                'filename': filename,
                'source_directory': source_dir,
                'file_size_bytes': len(text.encode('utf-8')),
                'word_count': analysis['word_count'],
                'char_count': analysis['char_count'],
                'sentence_count': analysis['sentence_count'],
                'paragraph_count': analysis['paragraph_count'],
                'essential_sections_found': len(analysis['essential_sections_found']),
                'essential_sections_list': ', '.join(analysis['essential_sections_found']),
                'important_elements_found': len(analysis['important_elements_found']),
                'important_elements_list': ', '.join(analysis['important_elements_found']),
                'structure_indicators_found': len(analysis['structure_indicators_found']),
                'structure_indicators_list': ', '.join(analysis['structure_indicators_found']),
                'completeness_score': round(completeness_score, 2),
                'is_valid': is_valid,
                'validation_status': 'PASS' if is_valid else 'FAIL',
                'validation_timestamp': datetime.now().strftime('%Y-%m-%d %H:%M:%S')
            }

            # Log result
            status = "PASS" if is_valid else "FAIL"
            self.validation_logger.info(f"{filename}: {status} (Score: {completeness_score:.1f}%)")

            if not is_valid:
                self.validation_logger.warning(f"{filename}: Below 80% threshold - may be incomplete")

            print(f"{status}: {filename} (Score: {completeness_score:.1f}%)")

            return result

        except Exception as e:
            self.validation_logger.error(f"Error validating {filename}: {str(e)}")
            print(f"ERROR: {filename}: {str(e)}")
            return None

    def get_text_files(self, directory):
        """Get list of text files from directory"""
        if not os.path.exists(directory):
            return []

        return [f for f in os.listdir(directory)
                if f.endswith('.txt') and os.path.isfile(os.path.join(directory, f))]

    def validate_all_files(self):
        """Validate all cleaned text files"""
        print("iv. VALIDASI TEKS")
        print("=" * 50)
        print("Tujuan: Periksa keutuhan teks (minimal 80% isi putusan)")
        print("Threshold: 80% completeness score")
        print("=" * 50)

        # Get files from both locations
        data_raw_files = self.get_text_files(self.cleaned_dir)
        gdrive_files = self.get_text_files(self.gdrive_cleaned_dir)

        if not data_raw_files and not gdrive_files:
            print("No text files found for validation")
            return None

        # Use data/raw files if available, otherwise use gdrive
        if data_raw_files:
            files_to_validate = data_raw_files
            source_directory = self.cleaned_dir
            print(f"Using files from: {self.cleaned_dir}")
        else:
            files_to_validate = gdrive_files
            source_directory = self.gdrive_cleaned_dir
            print(f"Using files from: {self.gdrive_cleaned_dir}")

        print(f"Found {len(files_to_validate)} files to validate")
        print("=" * 60)

        # Validate each file
        results = []
        pass_count = 0
        fail_count = 0

        for i, filename in enumerate(files_to_validate, 1):
            print(f"[{i}/{len(files_to_validate)}] Validating: {filename}")
            result = self.validate_single_file(filename, source_directory)

            if result:
                results.append(result)
                if result['is_valid']:
                    pass_count += 1
                else:
                    fail_count += 1

        if not results:
            print("No files successfully validated")
            return None

        # Create validation report
        self.create_validation_report(results, pass_count, fail_count)

        return results

    def create_validation_report(self, results, pass_count, fail_count):
        """Create comprehensive validation report"""

        # Create DataFrame
        df_results = pd.DataFrame(results)

        # Save detailed CSV report
        csv_path = os.path.join(self.validation_dir, 'validation_report.csv')
        df_results.to_csv(csv_path, index=False)

        # Create summary statistics
        total_files = len(results)
        pass_rate = (pass_count / total_files * 100) if total_files > 0 else 0
        avg_score = df_results['completeness_score'].mean()
        min_score = df_results['completeness_score'].min()
        max_score = df_results['completeness_score'].max()

        # Create text summary report
        summary_content = f"""TEXT VALIDATION REPORT
=====================
Date: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}
Validation Threshold: 80% completeness

SUMMARY STATISTICS:
------------------
Total files validated: {total_files}
Files passed (≥80%): {pass_count}
Files failed (<80%): {fail_count}
Pass rate: {pass_rate:.1f}%

SCORE STATISTICS:
----------------
Average score: {avg_score:.1f}%
Minimum score: {min_score:.1f}%
Maximum score: {max_score:.1f}%

FAILED FILES (if any):
---------------------
"""

        # Add failed files details
        failed_files = df_results[df_results['is_valid'] == False]
        if not failed_files.empty:
            for _, file_info in failed_files.iterrows():
                summary_content += f"- {file_info['filename']}: {file_info['completeness_score']:.1f}% "
                summary_content += f"(Missing: "

                missing_elements = []
                if file_info['essential_sections_found'] < len(self.essential_sections):
                    missing_elements.append("essential sections")
                if file_info['important_elements_found'] < len(self.important_elements):
                    missing_elements.append("important elements")
                if file_info['word_count'] < self.min_word_count:
                    missing_elements.append("sufficient content")

                summary_content += ", ".join(missing_elements) + ")\n"
        else:
            summary_content += "No files failed validation!\n"

        summary_content += f"\nDETAILED REPORT: {csv_path}\n"
        summary_content += f"VALIDATION LOG 1: {os.path.join(self.logs_dir, 'validation.log')}\n"
        summary_content += f"VALIDATION LOG 2: {os.path.join(self.gdrive_logs_dir, 'validation.log')}\n"

        # Save summary report to both locations
        summary_path_local = os.path.join(self.validation_dir, 'validation_summary.txt')
        summary_path_gdrive = os.path.join(self.gdrive_logs_dir, 'validation_summary.txt')

        # Ensure directories exist
        os.makedirs(self.validation_dir, exist_ok=True)
        os.makedirs(self.gdrive_logs_dir, exist_ok=True)

        try:
            with open(summary_path_local, 'w', encoding='utf-8') as f:
                f.write(summary_content)
        except Exception as e:
            print(f"Warning: Could not save validation summary locally: {e}")

        try:
            with open(summary_path_gdrive, 'w', encoding='utf-8') as f:
                f.write(summary_content)
        except Exception as e:
            print(f"Warning: Could not save validation summary to gdrive: {e}")

        # Display summary
        print("\n" + "=" * 60)
        print("VALIDATION SUMMARY:")
        print("=" * 60)
        print(f"Total files: {total_files}")
        print(f"Passed (≥80%): {pass_count}")
        print(f"Failed (<80%): {fail_count}")
        print(f"Pass rate: {pass_rate:.1f}%")
        print(f"Average score: {avg_score:.1f}%")
        print(f"Score range: {min_score:.1f}% - {max_score:.1f}%")

        # Show failed files in console
        if fail_count > 0:
            print(f"\nFAILED FILES ({fail_count}):")
            print("-" * 40)
            failed_files = df_results[df_results['is_valid'] == False]
            for _, file_info in failed_files.iterrows():
                print(f"  {file_info['filename']}: {file_info['completeness_score']:.1f}%")
                issues = []
                if file_info['essential_sections_found'] < len(self.essential_sections):
                    missing_count = len(self.essential_sections) - file_info['essential_sections_found']
                    issues.append(f"{missing_count} missing essential sections")
                if file_info['word_count'] < self.min_word_count:
                    issues.append(f"too short ({file_info['word_count']} words)")
                if issues:
                    print(f"    Issues: {', '.join(issues)}")
        else:
            print("\nAll files passed validation!")

        print(f"\nREPORTS GENERATED:")
        print(f"  Detailed CSV: {csv_path}")
        print(f"  Summary TXT (validation): {summary_path_local}")
        print(f"  Summary TXT (gdrive): {summary_path_gdrive}")
        print(f"  Validation log (local): {os.path.join(self.logs_dir, 'validation.log')}")
        print(f"  Validation log (gdrive): {os.path.join(self.gdrive_logs_dir, 'validation.log')}")

        # Log summary
        self.validation_logger.info(f"Validation complete: {pass_count}/{total_files} files passed ({pass_rate:.1f}%)")
        self.validation_logger.info(f"Average completeness score: {avg_score:.1f}%")

        if fail_count > 0:
            self.validation_logger.warning(f"{fail_count} files failed validation (below 80% threshold)")

# Utility functions
def validate_single_text(text):
    """Quick function to validate single text"""
    validator = TextValidator()
    analysis = validator.analyze_text_structure(text)
    score = validator.calculate_completeness_score(analysis)
    return score, score >= 80.0

def validate_specific_file(file_path):
    """Validate specific file by path"""
    validator = TextValidator()
    filename = os.path.basename(file_path)
    directory = os.path.dirname(file_path)
    return validator.validate_single_file(filename, directory)

def main():
    """Main execution function"""
    print("iv. VALIDASI")
    print("=" * 50)
    print("1. Periksa keutuhan teks (minimal 80% isi putusan tersedia)")
    print("2. Catat log file: /logs/ dan Google Drive/logs/")
    print("=" * 50)

    # Initialize validator
    validator = TextValidator("/content/drive/MyDrive/perdagangan_orang")

    # Run validation
    results = validator.validate_all_files()

    if results:
        print("\nVALIDATION PROCESS COMPLETE!")
        print(f"Check validation reports in: {validator.validation_dir}")
        print(f"Check validation logs: /logs/validation.log")
        print(f"Check validation logs: {validator.gdrive_logs_dir}/validation.log")
    else:
        print("\nNo files found for validation.")

# Execute
if __name__ == "__main__":
    main()

INFO:text_validation:TEXT VALIDATION SESSION STARTED
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt
INFO:text_validation:case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt: PASS (Score: 100.0%)
INFO:text_validation:Validating file: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_validation:case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt: PASS (Score: 88.0%)
INFO:text_validation:Validating file: case_2024_TK1_Putusan_PN_PADANG_Nomor_170_Pdt_Sus-BPSK_2024_PN_Pdg_Tanggal_30_Desember_2024__Penggugat_PT_Mandiri_Tunas_FinanceTergugat_Ali_Muhammadsyah.txt
INFO:text_v

iv. VALIDASI
1. Periksa keutuhan teks (minimal 80% isi putusan tersedia)
2. Catat log file: /logs/ dan Google Drive/logs/
Input 1 (cleaned): /data/raw
Input 2 (gdrive): /content/drive/MyDrive/perdagangan_orang/CLEANED
Validation output: /content/drive/MyDrive/perdagangan_orang/VALIDATION
Logs 1 (local): /logs
Logs 2 (gdrive): /content/drive/MyDrive/perdagangan_orang/logs
iv. VALIDASI TEKS
Tujuan: Periksa keutuhan teks (minimal 80% isi putusan)
Threshold: 80% completeness score
Using files from: /data/raw
Found 109 files to validate
[1/109] Validating: case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt
PASS: case_2021_TK1_Putusan_PT_MANADO_Nomor_10_PID_SUS_2021_PT_MND_Tanggal_25_Februari_2021__Pembanding_Penuntut_Umum___JENNY_R_WAYONG__SHTerbanding_Terdakwa___MICHAEL_UMBOH.txt (Score: 100.0%)
[2/109] Validating: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_3026_Pdt_G_2025

INFO:text_validation:case_2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.txt: PASS (Score: 86.7%)
INFO:text_validation:Validating file: case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_validation:case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt: PASS (Score: 88.0%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt: PASS (Score: 86.7%)
INFO:text_validation:Valid

PASS: case_2021_TK1_Putusan_PN_MATARAM_Nomor_467_Pid_Sus_2021_PN_Mtr_Tanggal_30_September_2021__Penuntut_Umum_1_HENDRO_SAYEKTI_SH_2_M_BUSTANUL__ARIFIN_SH_MH_3_MOCH__TAUFIQ_ISMAIL__SHTerdakwa_PANDRI__AZ_ANDRE.txt (Score: 86.7%)
[19/109] Validating: case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
PASS: case_2025_TK1_Putusan_PA_PONOROGO_Nomor_883_Pdt_G_2025_PA_Po_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (Score: 88.0%)
[20/109] Validating: case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt
PASS: case_2021_TK1_Putusan_PN_PASANGKAYU_Nomor_79_Pid_Sus_2021_PN_Pky_Tanggal_21_Juli_2021__Penuntut_Umum_FRI_HARMOKO__SH__MHTerdakwa_RUHANI_alias_RIFKA_binti_ABD__LATIF.txt (Score: 86.7%)
[21/109] Validating: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2726_Pdt_G_2025_PA_Tgrs_Tanggal_24_Juni_2025__Penggugat_melaw

INFO:text_validation:case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.txt: PASS (Score: 80.7%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt: PASS (Score: 94.0%)
INFO:text_validation:Validating file: case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_validation:case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt: PASS (Score: 88.0%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PT_MATARAM_Nomor_140_P

PASS: case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_126_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_KARMANESA_FEBRIARI_ALS_ESA_BIN_NIAT_NGUDIANTO.txt (Score: 80.7%)
[46/109] Validating: case_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt
PASS: case_2021_TK1_Putusan_PN_KUALA_SIMPANG_Nomor_22_Pid_Sus_2021_PN_Ksp_Tanggal_8_Maret_2021__Penuntut_Umum_MARIONO__SH_MHTerdakwa_HENGKIE_BIN_EFENDI.txt (Score: 94.0%)
[47/109] Validating: case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
PASS: case_2025_TK1_Putusan_PA_Ngamprah_Nomor_1205_Pdt_G_2025_PA_Nph_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (Score: 88.0%)
[48/109] Validating: case_2021_TK1_Putusan_PT_MATARAM_Nomor_140_PID_SUS_2021_PT_MTR_Tanggal_9_Desember_2021__Pembanding_Penuntut_Umum_I___HENDRO_S_I_B__SH_Terbanding_Terdakwa___BQ

INFO:text_validation:case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_16_Pdt_Sus-BPSK_2024_PN_Llg_Tanggal_27_Juni_2024__Penggugat_PT__Bank_Rakyat_Indonesia__Persero__Tbk_Kantor_Cabang_LubuklinggauTergugat_Ummul_Hoiriah_Bi_Hutni.txt: PASS (Score: 88.0%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt: PASS (Score: 86.7%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_S

PASS: case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_16_Pdt_Sus-BPSK_2024_PN_Llg_Tanggal_27_Juni_2024__Penggugat_PT__Bank_Rakyat_Indonesia__Persero__Tbk_Kantor_Cabang_LubuklinggauTergugat_Ummul_Hoiriah_Bi_Hutni.txt (Score: 88.0%)
[59/109] Validating: case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt
PASS: case_2021_TK1_Putusan_PN_CILACAP_Nomor_197_Pid_Sus_2021_PN_Clp_Tanggal_30_September_2021__Penuntut_Umum_Santa_Novena_Christy_SHTerdakwa_GULIYAH_Binti_Alm_TEGUH_SUPARDI.txt (Score: 86.7%)
[60/109] Validating: case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS_GATEL_BIN_PARDIMAN.txt
PASS: case_2021_TK1_Putusan_PN_PURWOKERTO_Nomor_125_Pid_Sus_2021_PN_Pwt_Tanggal_7_September_2021__Penuntut_Umum_MARYANI_WIDIYASTUTITerdakwa_JEFRI_TOMS_PARDIANTO_ALS_JEFRI_ALS

INFO:text_validation:case_2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.txt: PASS (Score: 86.7%)
INFO:text_validation:Validating file: case_2024_TK1_Putusan_PN_MUARO_Nomor_4_Pdt_Sus-BPSK_2024_PN_Mrj_Tanggal_4_Maret_2024__Penggugat_PT_Bank_Tabungan_Negara__Persero_TbkTergugat_Yuli_Firda_Yesih.txt
INFO:text_validation:case_2024_TK1_Putusan_PN_MUARO_Nomor_4_Pdt_Sus-BPSK_2024_PN_Mrj_Tanggal_4_Maret_2024__Penggugat_PT_Bank_Tabungan_Negara__Persero_TbkTergugat_Yuli_Firda_Yesih.txt: FAIL (Score: 74.7%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_A

PASS: case_2021_TK1_Putusan_PN_SAMBAS_Nomor_22_Pid_Sus_2021_PN_Sbs_Tanggal_22_Maret_2021__Penuntut_Umum_1_Muhammad_Nur_Faisal_Wijaya__S_H_2_I_in_Lindayani__S_H___M_H_Terdakwa_RIKKY_OKTADO_Als_RIKI_Bin_N__Alm.txt (Score: 86.7%)
[66/109] Validating: case_2024_TK1_Putusan_PN_MUARO_Nomor_4_Pdt_Sus-BPSK_2024_PN_Mrj_Tanggal_4_Maret_2024__Penggugat_PT_Bank_Tabungan_Negara__Persero_TbkTergugat_Yuli_Firda_Yesih.txt
FAIL: case_2024_TK1_Putusan_PN_MUARO_Nomor_4_Pdt_Sus-BPSK_2024_PN_Mrj_Tanggal_4_Maret_2024__Penggugat_PT_Bank_Tabungan_Negara__Persero_TbkTergugat_Yuli_Firda_Yesih.txt (Score: 74.7%)
[67/109] Validating: case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt
PASS: case_2021_TK1_Putusan_PN_SURABAYA_Nomor_1969_Pid_Sus_2021_PN_Sby_Tanggal_8_Nopember_2021__Penuntut_Umum_DEDDY_ARISANDI__SH__MHTerdakwa_HENDRI_YULIANSYAH_BIN_ALM_BUTRI_SYAMSI.txt (Score: 86.7%)
[68/109] 

INFO:text_validation:case_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt: PASS (Score: 94.0%)
INFO:text_validation:Validating file: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
INFO:text_validation:case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt: PASS (Score: 88.0%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt: PASS (Score: 100.0%)
INFO:text_validation:Vali

PASS: case_2021_TK1_Putusan_PN_TEGAL_Nomor_22_Pid_Sus_2021_PN_Tgl_Tanggal_4_Mei_2021__Penuntut_Umum_1_Haerati__SH2_GRETA_ANASTASIA__S_H__M_H_3_Intan_Kafa_Arbina__SH_MHTerdakwa_MUAMAR_KADAFI.txt (Score: 94.0%)
[74/109] Validating: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt
PASS: case_2025_TK1_Putusan_PA_TIGARAKSA_Nomor_2898_Pdt_G_2025_PA_Tgrs_Tanggal_25_Juni_2025__Penggugat_melawan_Tergugat.txt (Score: 88.0%)
[75/109] Validating: case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt
PASS: case_2021_TK1_Putusan_PN_Ngabang_Nomor_64_Pid_Sus_2021_PN_Nba_Tanggal_30_Agustus_2021__Penuntut_Umum_Pewira_Saputra_SHTerdakwa_Susanti_Alias_Aling_Anak_Dari_Siau_Ket_Loy.txt (Score: 100.0%)
[76/109] Validating: case_2025_TK1_Putusan_MS_CALANG_Nomor_70_Pdt_P_2025_MS_Cag_Tanggal_12_Juni_2025__Pemohon_melawan_Term

INFO:text_validation:Validating file: case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_42_Pdt_Sus-BPSK_2023_PN_Llg_Tanggal_2_Januari_2024__Penggugat_PT_BCA_FinanceTergugat_Marlina.txt
INFO:text_validation:case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_42_Pdt_Sus-BPSK_2023_PN_Llg_Tanggal_2_Januari_2024__Penggugat_PT_BCA_FinanceTergugat_Marlina.txt: FAIL (Score: 74.7%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt
INFO:text_validation:case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt: PASS (Score: 100.0%)
INFO:text_validation:Validating file: case_2021_TK1_Putusan_PN_PANGKALAN_BUN_Nomor_57_Pid_Sus_2021_PN_Pbu_Tanggal_7_April_2021__Penuntut_Umum_1_G

PASS: case_2021_TK1_Putusan_PN_SURABAYA_Nomor_575_Pid_Sus_2021_PN_Sby_Tanggal_24_Mei_2021__Penuntut_Umum_DZULKIFLY_NENTO__SHTerdakwa_RICO_LINGGAR_JAYA_BIN_EDI_WAHYONO.txt (Score: 100.0%)
[93/109] Validating: case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_42_Pdt_Sus-BPSK_2023_PN_Llg_Tanggal_2_Januari_2024__Penggugat_PT_BCA_FinanceTergugat_Marlina.txt
FAIL: case_2024_TK1_Putusan_PN_LUBUK_LINGAU_Nomor_42_Pdt_Sus-BPSK_2023_PN_Llg_Tanggal_2_Januari_2024__Penggugat_PT_BCA_FinanceTergugat_Marlina.txt (Score: 74.7%)
[94/109] Validating: case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt
PASS: case_2021_TK1_Putusan_PN_INDRAMAYU_Nomor_215_Pid_Sus_2021_PN_Idm_Tanggal_21_Oktober_2021__Penuntut_Umum_1_M__ICHSAN__S_H___M_H_2_TISNA_P__WIJAYA__SHTerdakwa_1_ANDI_SOPANDI_alias_ANDI_BIN_UUM_2_DAUSTADI.txt (Score: 100.0%)
[95/109] Validating: case_2021_TK1

INFO:text_validation:Validation complete: 95/109 files passed (87.2%)
INFO:text_validation:Average completeness score: 87.9%



VALIDATION SUMMARY:
Total files: 109
Passed (≥80%): 95
Failed (<80%): 14
Pass rate: 87.2%
Average score: 87.9%
Score range: 74.7% - 100.0%

FAILED FILES (14):
----------------------------------------
  case_2025_TK1_Putusan_PA_Sei_Rampah_Nomor_655_Pdt_G_2025_PA_Srh_Tanggal_24_Juni_2025__Penggugat_melawan_Tergugat.txt: 74.7%
    Issues: 1 missing essential sections
  case_2024_TK1_Putusan_PN_MEDAN_Nomor_685_Pdt_Sus-BPSK_2024_PN_Mdn_Tanggal_9_September_2024__Penggugat_PT__Astra_Sedaya_FinanceTergugat_Amiruddin.txt: 74.7%
    Issues: 1 missing essential sections
  case_2024_TK1_Putusan_PN_LUBUK_PAKAM_Nomor_124_Pdt_Sus-BPSK_2024_PN_Lbp_Tanggal_9_Juli_2024__Penggugat_PT_PLN_PERSEROTergugat_ARMANSYAH.txt: 74.7%
    Issues: 1 missing essential sections
  case_2024_TK1_Putusan_PN_MEDAN_Nomor_514_Pdt_Sus-BPSK_2024_PN_Mdn_Tanggal_7_Agustus_2024__Penggugat_PT__EKSPRES_MANDIRI_GADAITergugat_SUTIMAN.txt: 74.7%
    Issues: 1 missing essential sections
  case_2024_TK1_Putusan_PN_INDRAMAYU_Nomor_62_P