In [1]:
import asyncio
from playwright.async_api import async_playwright
from bs4 import BeautifulSoup
import pandas as pd

async def scrape_listings(base_url, max_pages=20):
    data = []
    seen_titles = set()  # to prevent duplicates

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()

        for page_num in range(1, max_pages + 1):
            url = base_url.replace("page=1", f"page={page_num}")
            print(f"🔎 Visiting: {url}")
            await page.goto(url)

            try:
                await page.wait_for_selector("div.ps-missions__item, div.ps-mission", timeout=15000)
            except:
                print("❌ No listings found, scraping stopped.")
                break

            html = await page.content()
            soup = BeautifulSoup(html, "html.parser")
            listings = soup.find_all("div", class_="ps-missions__item")
            print(f"➡️ {len(listings)} listings found (page {page_num})")

            if not listings:
                break

            # If the first listing title was seen before -> repetition started, stop
            first_title_el = listings[0].find("div", class_="ps-mission__title")
            first_title = first_title_el.get_text(strip=True) if first_title_el else None
            if first_title in seen_titles:
                print("♻️ Same listings started repeating, scraping stopped.")
                break

            for item in listings:
                # Title
                title_el = item.find("div", class_="ps-mission__title")
                title = title_el.get_text(strip=True) if title_el else None
                seen_titles.add(title)

                # Price
                price_value = item.find("span", class_="ps-price__value")
                price_currency = item.find("span", class_="ps-price__curency")
                if price_value or price_currency:
                    val = price_value.get_text(strip=True) if price_value else ""
                    cur = price_currency.get_text(strip=True) if price_currency else ""
                    price = f"{val} {cur}".strip()
                else:
                    price = None

                # Category
                category_el = item.find("span", class_="ps-tag")
                category = category_el.get_text(strip=True) if category_el else None

                # Date
                date_el = item.find("span", class_="cardinfo__datetime")
                date = date_el.get_text(strip=True) if date_el else None

                # Location
                loc_el = item.find("span", class_="cardinfo__location")
                location = loc_el.get_text(strip=True) if loc_el else None
                city = location.split(",")[-1].strip() if location else None

                data.append([title, category, date, price, city])

        await browser.close()

    # ✅ Save results into CSV
    df = pd.DataFrame(data, columns=["title", "category", "date", "price", "location"])
    df.to_csv("fastli_clean.csv", index=False, encoding="utf-8")
    print(f"✅ Scraping completed! {len(df)} listings found. Saved to fastli_clean.csv.")
    return df


# Usage
df = await scrape_listings("https://fastli.pl/missions?page=1", max_pages=20)
print(df.head())


🔎 Visiting: https://fastli.pl/missions?page=1
➡️ 6 listings found (page 1)
🔎 Visiting: https://fastli.pl/missions?page=2
➡️ 6 listings found (page 2)
♻️ Same listings started repeating, scraping stopped.
✅ Scraping completed! 6 listings found. Saved to fastli_clean.csv.
                     title           category              date   price  \
0  Best Mechanik in Warsaw       Rzeczoznawca  2025-09-10 11:29  219 zł   
1            Ubezpieczenia             Własne  2025-09-10 11:29      zł   
2              Nauka Boksu  Kursy i szkolenia  2025-09-10 11:29   30 zł   
3        Serwis komputerów        Inne/Własne  2025-09-10 11:29      zł   
4         Mobilny mechanik             Serwis  2025-09-10 11:29  200 zł   

  location  
0     None  
1  Rzeszów  
2    Guzów  
3  Gliwice  
4  Gliwice  
