In [9]:
import asyncio
from crawl4ai import *

async def main():
    async with AsyncWebCrawler() as crawler:
        result = await crawler.arun(
            url="https://corfo.cl/sites/cpp/regiones/coquimbo/",
        )
        return result.markdown.split('\n######')


resp = await main()


In [10]:
resp

['![Site in english](https://corfo.cl/sites/cpp/wp-content/uploads/2025/01/icono-england.png) [ Sitio en ingl√©s ](https://corfo.cl/sites/cpp/web-ingles/)\n![Cont√°ctanos](https://corfo.cl/sites/cpp/wp-content/uploads/2025/01/icono-dialogo.png) [ Cont√°ctanos ](https://corfo.cl/sites/cpp/contacto/)\n  * [ ![Instagram Corfo](https://corfo.cl/sites/cpp/wp-content/uploads/2025/01/icono-instagram.png) ](https://www.instagram.com/corfochile)\n  * [ ![Facebook de Corfo](https://corfo.cl/sites/cpp/wp-content/uploads/2025/01/icono-facebook.png) ](http://www.facebook.com/CorfoChile)\n  * [ ![Youtube](https://corfo.cl/sites/cpp/wp-content/uploads/2025/02/icono-youtube-1.png) ](http://www.youtube.com/corfotv)\n  * [ ![](https://corfo.cl/sites/cpp/wp-content/uploads/2025/02/icono-twitter-1.png) ](http://www.twitter.com/corfo)\n  * [ ![](https://corfo.cl/sites/cpp/wp-content/uploads/2025/02/icono-linkedin-1.png) ](https://www.linkedin.com/company/corfo/)\n  * [ ![](https://corfo.cl/sites/cpp/wp-con

In [11]:
import asyncio
import pandas as pd
from playwright.async_api import async_playwright

URL = "https://corfo.cl/sites/cpp/regiones/coquimbo/"

async def scrape_page(page):
    """Extrae todas las tarjetas visibles en la p√°gina actual"""
    await page.wait_for_timeout(2500)  # dejar que cargue JS

    # Espera a que aparezcan las tarjetas
    try:
        await page.wait_for_selector("div", timeout=10000)
    except:
        print("‚ö†Ô∏è No se encontraron tarjetas en esta p√°gina")
        return []

    cards = await page.query_selector_all("div")
    data = []
    print(cards)
    for card in cards:
        print(card)
        title_el = await card.query_selector("h3 a")
        title = (await title_el.inner_text()).strip() if title_el else ""
        link = (await title_el.get_attribute("href")) if title_el else ""

        estado_el = await card.query_selector("span.estado")
        estado = (await estado_el.inner_text()).strip() if estado_el else ""

        fechas_el = await card.query_selector_all("div.fechas p")
        apertura = fechas_el[0].inner_text() if len(fechas_el) >= 1 else ""
        cierre = fechas_el[1].inner_text() if len(fechas_el) >= 2 else ""

        territorio_el = await card.query_selector("div.territorio p")
        territorio = (await territorio_el.inner_text()).strip() if territorio_el else ""

        resumen_el = await card.query_selector("p")
        resumen = (await resumen_el.inner_text()).strip() if resumen_el else ""

        if title and link:
            data.append({
                "titulo": title,
                "enlace": link,
                "estado": estado,
                "apertura": apertura,
                "cierre": cierre,
                "territorio": territorio,
                "resumen": resumen
            })
    return data

async def main():
    all_data = []
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=False, slow_mo=300)  # ver navegador
        page = await browser.new_page()

        await page.goto(URL)
        page_num = 1

        while True:
            print(f"\nüåê Scrapeando p√°gina {page_num}")
            rows = await scrape_page(page)
            print(page)
            if not rows:
                break
            all_data.extend(rows)

            # Intentar click en "Siguiente"
            try:
                next_btn = await page.query_selector("a.ui-paginator-next")
                if not next_btn:
                    break
                disabled = await next_btn.get_attribute("aria-disabled")
                if disabled == "true":
                    break
                await next_btn.click()
                await page.wait_for_timeout(2000)  # dejar cargar nuevas tarjetas
                page_num += 1
            except:
                break

        await browser.close()

    # Guardar CSV
    df = pd.DataFrame(all_data).drop_duplicates(subset=["titulo", "enlace"]).reset_index(drop=True)
    df.to_csv("corfo_convocatorias.csv", index=False, encoding="utf-8-sig")
    print(df)
    print(f"\n‚úÖ Guardado en corfo_convocatorias.csv ({len(df)} filas)")








await main()




üåê Scrapeando p√°gina 1
‚ö†Ô∏è No se encontraron tarjetas en esta p√°gina
<Page url='https://corfo.cl/sites/cpp/regiones/coquimbo/'>
Empty DataFrame
Columns: []
Index: []

‚úÖ Guardado en corfo_convocatorias.csv (0 filas)
