In [None]:
!pip install playwright openpyxl
!playwright install

from playwright.async_api import async_playwright
import pandas as pd

# Define async scraping function
async def scrape_dubizzle():
    data = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        for page_num in range(1, 190):
            url = f"https://www.dubizzle.com.om/vehicles/cars-for-sale/?filter=new_used_eq_used&page={page_num}"
            await page.goto(url)
            await page.wait_for_timeout(2000)
            cards = await page.query_selector_all("a.f689619e")  # car card
            for card in cards:
                title = await card.query_selector_eval("h2", "el => el.innerText") if await card.query_selector("h2") else "N/A"
                price = await card.query_selector_eval("div[aria-label='Price'] span", "el => el.innerText") if await card.query_selector("div[aria-label='Price'] span") else "N/A"
                link = await card.get_attribute("href")
                link = f"https://www.dubizzle.com.om{link}" if link else "N/A"
                data.append({"title": title, "price": price, "link": link, "page": page_num})
            print(f"Page {page_num} scraped, cars: {len(cards)}")
        await browser.close()
    df = pd.DataFrame(data)
    df.to_excel("dubizzle_used_cars.xlsx", index=False)
    print("Scraping completed! Saved to 'dubizzle_used_cars.xlsx'")

# Run in Jupyter without asyncio.run()
import nest_asyncio
nest_asyncio.apply()  # fix for running asyncio in Jupyter
await scrape_dubizzle()



