In [None]:

import shutil, subprocess, sys, os, re, time

# ✅ Cek apakah jalan di Colab
in_colab = 'google.colab' in sys.modules
if in_colab:
    from google.colab import files

# ✅ Cek dan install chromium/chromedriver/selenium jika belum ada
def ensure_dependencies():
    apt_needed = []
    if not shutil.which("chromedriver"):
        apt_needed.append("chromium-chromedriver")
    if not shutil.which("chromium-browser") and not shutil.which("chromium"):
        apt_needed.append("chromium-browser")

    if apt_needed and in_colab:
        print(f"[*] Installing: {' '.join(apt_needed)} ...")
        subprocess.run(["apt", "install", "-y"] + apt_needed, stdout=subprocess.DEVNULL)

    try:
        import selenium
    except ImportError:
        print("[*] Installing selenium...")
        subprocess.run([sys.executable, "-m", "pip", "install", "selenium"], stdout=subprocess.DEVNULL)

ensure_dependencies()

# ✅ Import Selenium setelah dipastikan
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.common.exceptions import WebDriverException, TimeoutException, NoSuchElementException

def selenium_scraper(url):
    if not url.startswith("http"):
        print("[!] URL tidak valid")
        return None

    title_safe = re.sub(r'\W+', '_', url.strip().split('/')[-1] or "output")
    output_filename = f'selenium_title_{title_safe}.txt'

    options = Options()
    options.add_argument("--headless")
    options.add_argument("--no-sandbox")
    options.add_argument("--disable-dev-shm-usage")

    try:
        print("[*] Memulai browser...")
        driver = webdriver.Chrome(options=options)
        driver.set_page_load_timeout(20)

        start = time.time()
        driver.get(url)
        time.sleep(3)

        # ✅ Tambahan: Auto scroll buat load semua konten
        scroll_pause_time = 1
        last_height = driver.execute_script("return document.body.scrollHeight")
        for _ in range(10):
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(scroll_pause_time)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        try:
            body = driver.find_element(By.TAG_NAME, "body")
            text = body.text.strip()

            if not text:
                raise ValueError("Halaman kosong atau gagal ambil konten")

            with open(output_filename, "w", encoding="utf-8") as f:
                f.write(text)

            print(f"[✓] Disimpan ke: {output_filename} ({len(text)} karakter)")

        except NoSuchElementException:
            print("[!] Elemen <body> tidak ditemukan.")
            return None
        except Exception as e:
            print(f"[!] Gagal ambil teks: {e}")
            return None

    except (WebDriverException, TimeoutException) as e:
        print(f"[!] Gagal membuka halaman: {e}")
        return None
    except Exception as e:
        print(f"[!] Error tidak terduga: {e}")
        return None
    finally:
        try:
            driver.quit()
        except:
            pass
        print(f"[i] Selesai dalam {round(time.time() - start, 2)} detik")

    # ✅ Auto download di Colab
    if in_colab:
        files.download(output_filename)
    return output_filename

# ✅ Input URL
try:
    url = input("Masukkan URL: ").strip()
except EOFError:
    print("[!] Input tidak terbaca")
    sys.exit(1)

selenium_scraper(url)