# Untuk ambil cookie dari login akun

In [None]:
import os
import time
import json
import tkinter as tk
from tkinter import messagebox
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import undetected_chromedriver as uc

COOKIE_FILE = "shopee_cookies.json"

def save_cookies(driver, filename):
    with open(filename, 'w') as f:
        json.dump(driver.get_cookies(), f)

def load_cookies(driver, filename):
    with open(filename, 'r') as f:
        cookies = json.load(f)
    for cookie in cookies:
        driver.add_cookie(cookie)

def is_logged_in(driver):
    try:
        driver.get("https://shopee.co.id/")
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.CLASS_NAME, "shopee-searchbar-input__input"))
        )
        return True
    except:
        return False

def handle_login_and_captcha(keyword: str):
    options = uc.ChromeOptions()
    options.add_argument("--window-size=1200,800")
    driver = uc.Chrome(options=options)

    driver.get("https://shopee.co.id/")
    time.sleep(3)

    if os.path.exists(COOKIE_FILE):
        print("🍪 Memuat cookie login...")
        load_cookies(driver, COOKIE_FILE)
        driver.refresh()
        time.sleep(5)

    if not is_logged_in(driver):
        print("🔐 Belum login, silakan login manual.")
        driver.get("https://shopee.co.id/buyer/login")
        messagebox.showinfo("Login", "Silakan login secara manual di browser yang muncul.\nKlik OK jika sudah selesai login.")

        search_url = f"https://shopee.co.id/search?keyword={keyword.replace(' ', '%20')}"
        driver.get(search_url)

        timeout = 180  
        start_time = time.time()

        while not is_logged_in(driver) and time.time() - start_time < timeout:
            print("⚠️ Belum berhasil login... mencoba ulang CAPTCHA.")
            time.sleep(6)
            driver.refresh()

        if is_logged_in(driver):
            save_cookies(driver, COOKIE_FILE)
            print("✅ Cookie disimpan.")
            messagebox.showinfo("Selesai", "Login dan CAPTCHA berhasil.\nCookie telah disimpan.")
        else:
            print("⛔ Timeout: Login atau CAPTCHA gagal.")
            messagebox.showerror("Gagal", "Login atau CAPTCHA tidak berhasil dalam waktu yang ditentukan.")

    else:
        print("✅ Sudah login dari cookie.")
        messagebox.showinfo("Info", "Login masih aktif dari cookie yang tersimpan.")

    driver.quit()

# ========== GUI ==========

def start():
    keyword = keyword_entry.get()
    if not keyword:
        messagebox.showwarning("Input Kosong", "Masukkan kata kunci pencarian untuk memicu CAPTCHA.")
        return
    handle_login_and_captcha(keyword)

root = tk.Tk()
root.title("Shopee Login + CAPTCHA")
root.geometry("400x200")

tk.Label(root, text="🛒 Masukkan Kata Kunci Pencarian (untuk trigger CAPTCHA):").pack(pady=(15, 0))
keyword_entry = tk.Entry(root, width=40)
keyword_entry.pack(pady=10)

tk.Button(root, text="🚀 Mulai Proses Login", command=start).pack(pady=20)

root.mainloop()

# Scrapping dengan cookie

In [None]:
import time
import json
import pandas as pd
from bs4 import BeautifulSoup
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import undetected_chromedriver as uc
import tkinter as tk
from tkinter import ttk, messagebox

def slow_scroll(driver):
    last_height = driver.execute_script("return document.body.scrollHeight")
    for y in range(0, last_height, 400):
        driver.execute_script(f"window.scrollTo(0, {y});")
        time.sleep(0.3)
    time.sleep(2.5)

def load_cookies(driver, cookies_file="shopee_cookies.json"):
    try:
        with open(cookies_file, "r") as f:
            cookies = json.load(f)
        for cookie in cookies:
            cookie.pop("sameSite", None) 
            driver.add_cookie(cookie)
        print("✅ Cookie dimuat.")
    except Exception as e:
        raise Exception("❌ Gagal memuat cookie: " + str(e))

def scrape_shopee(keyword: str, max_page: int, output_file: str):
    try:
        options = uc.ChromeOptions()
        options.add_argument("--window-size=1200,800")
        driver = uc.Chrome(options=options)

        driver.get("https://shopee.co.id")
        time.sleep(3)

        load_cookies(driver)

        driver.refresh()
        time.sleep(5)

        driver.get(f"https://shopee.co.id/search?keyword={keyword.replace(' ', '%20')}")
        time.sleep(5)

        products = []
        page = 1

        while page <= max_page:
            print(f"\n📄 Scraping halaman {page}...")

            try:
                WebDriverWait(driver, 15).until(
                    EC.presence_of_all_elements_located((By.CLASS_NAME, "shopee-search-item-result__item"))
                )
            except:
                print("❗ Produk tidak muncul, halaman mungkin gagal dimuat.")
                break

            slow_scroll(driver)
            soup = BeautifulSoup(driver.page_source, "html.parser")
            product_cards = soup.select(".shopee-search-item-result__item")
            print(f"🔎 {len(product_cards)} produk ditemukan")

            for card in product_cards:
                try:
                    name_tag = card.select_one("div.line-clamp-2")
                    price_tag = card.select_one("span.font-medium.text-base\\/5.truncate")
                    if not name_tag or not price_tag:
                        continue

                    name = name_tag.text.strip()
                    price = price_tag.text.strip()

                    sold_tag = card.select_one("div.truncate.text-shopee-black87.text-xs.min-h-4")
                    sold = sold_tag.text.strip() if sold_tag else "Tidak ditemukan"

                    location_tag = card.select_one("div.flex-shrink.min-w-0.truncate.text-shopee-black54")
                    location = location_tag.text.strip() if location_tag else "Tidak ditemukan"

                    rating_tag = card.select_one("div.text-shopee-black87.text-xs\\/sp14.flex-none")
                    rating = rating_tag.text.strip() if rating_tag else "Tidak ditemukan"

                    products.append({
                        "product_name": name,
                        "price": price,
                        "sold": sold,
                        "location": location,
                        "rating": rating,
                        "kategori": keyword
                    })
                except:
                    continue

            page += 1
            if page > max_page:
                break

            try:
                next_button = WebDriverWait(driver, 10).until(
                    EC.element_to_be_clickable((By.XPATH, f'//a[@class="shopee-button-no-outline" and text()="{page}"]'))
                )
                next_button.click()
                time.sleep(4)
            except:
                print("🚫 Gagal klik halaman berikutnya.")
                break

        df = pd.DataFrame(products)
        df.to_csv(output_file, index=False)
        print(f"\n✅ {len(products)} produk disimpan ke '{output_file}'")
        messagebox.showinfo("Selesai", f"Scraping selesai!\n{len(products)} produk disimpan ke '{output_file}'")
    except Exception as e:
        messagebox.showerror("Error", str(e))
    finally:
        driver.quit()

# ========== GUI ========== #
def start_scraping():
    keyword = keyword_entry.get()
    pages = pages_entry.get()
    filename = filename_entry.get()

    if not keyword or not pages or not filename:
        messagebox.showwarning("Input Kosong", "Semua kolom harus diisi.")
        return

    try:
        pages = int(pages)
    except ValueError:
        messagebox.showerror("Input Salah", "Jumlah halaman harus berupa angka.")
        return

    if not filename.endswith(".csv"):
        filename += ".csv"

    scrape_shopee(keyword, pages, filename)

# GUI setup
root = tk.Tk()
root.title("Shopee Scraper (Auto Cookie Login)")
root.geometry("400x250")

tk.Label(root, text="🔍 Kata Kunci Pencarian:").pack(pady=(10, 0))
keyword_entry = tk.Entry(root, width=40)
keyword_entry.pack()

tk.Label(root, text="📄 Jumlah Halaman:").pack(pady=(10, 0))
pages_entry = tk.Entry(root, width=10)
pages_entry.pack()

tk.Label(root, text="💾 Nama File Output:").pack(pady=(10, 0))
filename_entry = tk.Entry(root, width=30)
filename_entry.pack()

ttk.Button(root, text="🚀 Mulai Scraping", command=start_scraping).pack(pady=20)

root.mainloop()