In [5]:
import requests
import re
import html
from bs4 import BeautifulSoup
from tqdm import tqdm
import pandas as pd
import time
import os
import random
import json

In [6]:
# Base parameters for Web Scrapping
BASE_URL = "https://www.gob.mx"
ARCHIVE_URL = f"{BASE_URL}/presidencia/es/archivo/articulos?filter_origin=archive&idiom=es&order=DESC&page="

In [7]:
def get_articles_from_page(page_num):
    """
    Extract clean titles, URLs, and dates from Gob.mx dynamic HTML.
    Args:
        page_num (int): Page number to scrape.
    Returns:
        list of dict: Each dict contains 'title', 'url', and 'date' keys
    """
    url = f"{ARCHIVE_URL}{page_num}"
    r = requests.get(url)
    r.raise_for_status()
    text = r.text

    # Extract JS-embedded HTML fragments
    fragments = re.findall(r"\$\('#prensa'\)\.append\('(.+?)'\);", text, flags=re.DOTALL)

    articles_out = []

    for frag in fragments:
        # Step 1: Decode HTML entities (e.g. &lt;, &quot;)
        frag_clean = html.unescape(frag)
        # Step 2: Replace escaped quotes \" ‚Üí "
        frag_clean = frag_clean.replace('\\"', '"').replace("\\'", "'")
        # Step 3: Remove stray backslashes that break tags
        frag_clean = frag_clean.replace("\\n", "").replace("\\", "")
        # Step 4: Parse
        soup = BeautifulSoup(frag_clean, "html.parser")

        # Extract all article cards
        for art in soup.find_all("article"):
            title_el = art.find("h2")
            link_el = art.find("a", class_="small-link")
            date_el = art.find("time")

            title = title_el.get_text(strip=True) if title_el else None
            date = date_el.get_text(strip=True) if date_el else None

            # Some hrefs may end with ?idiom=es
            if link_el and link_el.has_attr("href"):
                href = link_el["href"].strip('"')
                if href.startswith("/"):
                    href = BASE_URL + href
            else:
                href = None

            if title or href:
                articles_out.append({
                    "title": title,
                    "url": href,
                    "date": date
                })
    return articles_out

In [8]:
def scrape_all_articles(max_pages=50, retries=3):
    """
    Scrape articles from multiple pages and save metadata to CSV.
    
    Args:
        max_pages (int): Maximum number of pages to scrape.

    Returns:
        pd.DataFrame: DataFrame containing all scraped articles.
    """
    # Define a list to store all data
    all_data = []
    output_dir = os.path.join("..", "data", "raw")
    os.makedirs(output_dir, exist_ok=True)
    output_path = os.path.join(output_dir, "article_metadata.csv")

    # Loop through all the pages that are paginated in the site 
    for page in tqdm(range(1, max_pages + 1)):
        success = False
        for attempt in range(retries):
            try:
                # Get all the transcriptions from a single page
                page_data = get_articles_from_page(page)
                if not page_data:
                    print(f"No more data after page {page}.")
                    success = True
                    break
                all_data.extend(page_data)
                success = True
                break
            # Consider Error Codes:
            except requests.exceptions.HTTPError as e:
                print(f"HTTP error on page {page}: {e}. Retrying ({attempt+1}/{retries})...")
                time.sleep(random.uniform(5, 10))  # wait before retry
            except Exception as e:
                print(f"Other error on page {page}: {e}. Retrying...")
                time.sleep(random.uniform(5, 10))
        # If not successful, after retires
        if not success:
            print(f"Failed to fetch page {page} after {retries} retries. Skipping...")
            continue

        # Random polite delay between pages
        time.sleep(random.uniform(2, 5))

        # Save progress every 5 pages
        if page % 5 == 0:
            df_tmp = pd.DataFrame(all_data)
            df_tmp.to_csv(output_path, index=False)
            print(f"Checkpoint saved at page {page}")

    # Final save
    df = pd.DataFrame(all_data)
    df.to_csv(output_path, index=False)
    print(f"\nSaved {len(df)} articles to {output_path}")

    return df

In [None]:
#scrape_all_articles(max_pages=59)

  8%|‚ñä         | 5/59 [00:18<03:29,  3.88s/it]

üíæ Checkpoint saved at page 5


 17%|‚ñà‚ñã        | 10/59 [01:37<18:15, 22.36s/it]

üíæ Checkpoint saved at page 10


 25%|‚ñà‚ñà‚ñå       | 15/59 [02:59<18:27, 25.17s/it]

üíæ Checkpoint saved at page 15


 34%|‚ñà‚ñà‚ñà‚ñç      | 20/59 [03:16<04:34,  7.04s/it]

üíæ Checkpoint saved at page 20


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 25/59 [03:37<02:44,  4.85s/it]

üíæ Checkpoint saved at page 25


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 30/59 [03:54<01:43,  3.58s/it]

üíæ Checkpoint saved at page 30


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ    | 35/59 [04:18<01:48,  4.53s/it]

üíæ Checkpoint saved at page 35


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 40/59 [05:38<04:07, 13.01s/it]

üíæ Checkpoint saved at page 40


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 45/59 [05:56<01:08,  4.90s/it]

üíæ Checkpoint saved at page 45


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 50/59 [06:16<00:36,  4.00s/it]

üíæ Checkpoint saved at page 50


 93%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé| 55/59 [06:37<00:15,  3.92s/it]

üíæ Checkpoint saved at page 55


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 59/59 [06:55<00:00,  7.04s/it]


‚úÖ Saved 531 articles to ../data/raw/article_metadata.csv





Unnamed: 0,title,url,date
0,Versi√≥n estenogr√°fica.¬†Mensaje de la President...,https://www.gob.mx/presidencia/es/articulos/ve...,"jueves, 16 de octubre de 2025Fecha de publicaci√≥n"
1,Versi√≥n estenogr√°fica. Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"jueves, 16 de octubre de 2025Fecha de publicaci√≥n"
2,Versi√≥n estenogr√°fica. Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"mi√©rcoles, 15 de octubre de 2025Fecha de publi..."
3,Versi√≥n estenogr√°fica. Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"martes, 14 de octubre de 2025Fecha de publicaci√≥n"
4,Versi√≥n estenogr√°fica. Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"lunes, 13 de octubre de 2025Fecha de publicaci√≥n"
...,...,...,...
526,Versi√≥n estenogr√°fica. Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"jueves, 03 de octubre de 2024Fecha de publicaci√≥n"
527,Versi√≥n estenogr√°fica.¬†Conferencia de prensa d...,https://www.gob.mx/presidencia/es/articulos/ve...,"mi√©rcoles, 02 de octubre de 2024Fecha de publi..."
528,Versi√≥n estenogr√°fica. Toma de protesta ante e...,https://www.gob.mx/presidencia/es/articulos/ve...,"martes, 01 de octubre de 2024Fecha de publicaci√≥n"
529,Versi√≥n estenogr√°fica. Mensaje de la president...,https://www.gob.mx/presidencia/es/articulos/ve...,"martes, 01 de octubre de 2024Fecha de publicaci√≥n"


In [9]:
def get_transcript_structured(url):
    """
    Extract a structured transcript from a single Ma√±anera article page on gob.mx.

    The function retrieves the HTML of a given conference URL, parses it, and returns 
    a structured list of speech segments, each containing the speaker (if identifiable) 
    and their spoken text.

    Args:
        url (str): The full URL of the article page to scrape.

    Returns:
        list[dict]: A list of dictionaries, each with the following keys:
            - "speaker" (str or None): The name of the speaker if specified, else None.
            - "text" (str): The paragraph text, cleaned and concatenated.
    """
    # Request and parse content
    headers = {"User-Agent": "Mozilla/5.0 (compatible; Ma√±anerasScraper/1.0)"}
    response = requests.get(url, headers=headers, timeout=15)
    response.raise_for_status()
    soup = BeautifulSoup(response.text, "html.parser")

    # Find only the text body
    content_div = soup.find("div", class_="article-body")
    if not content_div:
        return []
    # Body text is composed of a lot of components
    entries = []
    for p in content_div.find_all("p"):
        # Get speaker (if present)
        strong = p.find("strong")
        speaker = strong.get_text(strip=True) if strong else None
        # Get text
        text = p.get_text(" ", strip=True)
        entries.append({"speaker": speaker, "text": text})
    
    return entries


In [None]:
def get_transcript_structured(url, retries=3, backoff_factor=2):
    """
    Extract structured transcript with speaker & text, with retry logic.
    Built this function on top of the previous one to handle non success. 

    Args:
        url (str): Article URL.
        retries (int): Max number of retry attempts on failure.
        backoff_factor (int): Multiplier for exponential backoff.
    Returns:
        list[dict]: [{'speaker': ..., 'text': ...}, ...]
    """
    headers = {"User-Agent": "Mozilla/5.0 (compatible; Ma√±anerasScraper/1.0)"}
    attempt = 0

    while attempt < retries:
        try:
            r = requests.get(url, headers=headers, timeout=20)
            r.raise_for_status()
            soup = BeautifulSoup(r.text, "html.parser")

            content = soup.find("div", class_="article-body")
            if not content:
                print(f"No <div class='article-body'> found for {url}")
                return []

            entries = []
            for p in content.find_all("p"):
                strong = p.find("strong")
                speaker = strong.get_text(strip=True) if strong else None
                text = p.get_text(" ", strip=True)
                if text:
                    entries.append({"speaker": speaker, "text": text})

            return entries  # success ‚Äî exit function

        except (requests.exceptions.Timeout, requests.exceptions.ConnectionError) as e:
            wait = backoff_factor * (2 ** attempt) + random.uniform(0, 1)
            print(f"Timeout or connection error on {url} ‚Äî retry {attempt+1}/{retries} after {wait:.1f}s")
            time.sleep(wait)
            attempt += 1
            continue

        except requests.exceptions.HTTPError as e:
            print(f"HTTP error {e} on {url} ‚Äî skipping")
            return []

        except Exception as e:
            print(f"Unexpected error for {url}: {e}")
            return []

    print(f"Failed after {retries} retries: {url}")
    return []


In [None]:
df_meta = pd.read_csv("../data/raw/article_metadata.csv")
# --- Scrape all articles ---
data = []

for i, row in tqdm(df_meta.iterrows(), total=len(df_meta)):
    url = row["url"]
    if pd.isna(url):
        continue

    transcript_structured = get_transcript_structured(url)
    article = {
        "date": row["date"],
        "title": row["title"],
        "url": url,
        "transcript": transcript_structured
    }
    data.append(article)

    time.sleep(random.uniform(2, 5))

    # checkpoint every 10
    if i % 10 == 0 and i > 0:
        with open("../data/raw/article_transcripts_checkpoint.json", "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
        print(f"üíæ Checkpoint saved ({i} articles)")

# final save
with open("../data/processed/article_transcripts.json", "w", encoding="utf-8") as f:
    json.dump(data, f, ensure_ascii=False, indent=2)

print(f"Saved {len(data)} full transcripts.")

  2%|‚ñè         | 11/531 [00:42<38:15,  4.41s/it]

üíæ Checkpoint saved (10 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-06-de-octubre-de-2025?idiom=es ‚Äî retry 1/3 after 2.8s


  3%|‚ñé         | 14/531 [01:16<1:02:45,  7.28s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-02-de-octubre-de-2025?idiom=es ‚Äî retry 1/3 after 2.3s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-02-de-octubre-de-2025?idiom=es ‚Äî retry 2/3 after 4.8s


  3%|‚ñé         | 17/531 [02:16<1:43:55, 12.13s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-supervision-tras-lluvias-en-oriente-de-la-zona-metropolitana-del-valle-de-mexico?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-supervision-tras-lluvias-en-oriente-de-la-zona-metropolitana-del-valle-de-mexico?idiom=es ‚Äî skipping


  3%|‚ñé         | 18/531 [02:19<1:19:51,  9.34s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-30-de-septiembre-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-30-de-septiembre-de-2025?idiom=es ‚Äî skipping


  4%|‚ñç         | 21/531 [02:29<46:30,  5.47s/it]  

üíæ Checkpoint saved (20 articles)


  4%|‚ñç         | 22/531 [02:34<43:22,  5.11s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-la-transformacion-avanza-en-nayarit?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-la-transformacion-avanza-en-nayarit?idiom=es ‚Äî skipping


  5%|‚ñç         | 24/531 [02:42<40:37,  4.81s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-la-transformacion-avanza-en-baja-california-sur?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-la-transformacion-avanza-en-baja-california-sur?idiom=es ‚Äî skipping


  6%|‚ñå         | 31/531 [03:13<35:45,  4.29s/it]

üíæ Checkpoint saved (30 articles)


  8%|‚ñä         | 41/531 [03:51<34:54,  4.27s/it]

üíæ Checkpoint saved (40 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-18-de-septiembre-de-2025?idiom=es ‚Äî retry 1/3 after 2.4s


 10%|‚ñâ         | 51/531 [04:51<30:51,  3.86s/it]  

üíæ Checkpoint saved (50 articles)


 11%|‚ñà‚ñè        | 61/531 [05:32<33:04,  4.22s/it]

üíæ Checkpoint saved (60 articles)


 12%|‚ñà‚ñè        | 65/531 [05:51<36:01,  4.64s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-la-transformacion-avanza-en-aguascalientes?idiom=es ‚Äî retry 1/3 after 2.9s


 13%|‚ñà‚ñé        | 71/531 [06:39<41:48,  5.45s/it]  

üíæ Checkpoint saved (70 articles)


 14%|‚ñà‚ñé        | 73/531 [06:48<37:21,  4.89s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-primer-informe-de-gobierno-de-la-presidenta-claudia-sheinbaum-pardo-palacio-nacional?idiom=es ‚Äî retry 1/3 after 2.1s


 15%|‚ñà‚ñå        | 81/531 [07:46<35:33,  4.74s/it]  

üíæ Checkpoint saved (80 articles)


 16%|‚ñà‚ñå        | 84/531 [07:59<32:40,  4.39s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-plan-de-justicia-para-el-pueblo-amuzgo-tlacoachistlahuaca-guerrero?idiom=es ‚Äî retry 1/3 after 2.6s


 16%|‚ñà‚ñã        | 87/531 [08:35<56:37,  7.65s/it]  

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-hospital-regional-de-alta-especialidad-issste-primera-etapa-acapulco-guerrero?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-hospital-regional-de-alta-especialidad-issste-primera-etapa-acapulco-guerrero?idiom=es ‚Äî skipping


 17%|‚ñà‚ñã        | 88/531 [08:38<46:54,  6.35s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-22-de-agosto-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-22-de-agosto-de-2025?idiom=es ‚Äî skipping


 17%|‚ñà‚ñã        | 89/531 [08:43<44:12,  6.00s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-21-de-agosto-de-2025?idiom=es ‚Äî retry 1/3 after 2.6s


 17%|‚ñà‚ñã        | 91/531 [09:13<1:11:00,  9.68s/it]

üíæ Checkpoint saved (90 articles)


 19%|‚ñà‚ñâ        | 101/531 [09:54<32:46,  4.57s/it] 

üíæ Checkpoint saved (100 articles)


 21%|‚ñà‚ñà        | 111/531 [10:36<29:05,  4.15s/it]

üíæ Checkpoint saved (110 articles)


 21%|‚ñà‚ñà        | 112/531 [10:39<27:10,  3.89s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-faispiam-presupuesto-directo-para-pueblos-indigenas-y-afromexicanos-amealco-queretaro?idiom=es ‚Äî retry 1/3 after 2.1s


 23%|‚ñà‚ñà‚ñé       | 121/531 [11:38<28:26,  4.16s/it]  

üíæ Checkpoint saved (120 articles)


 24%|‚ñà‚ñà‚ñç       | 129/531 [12:17<31:28,  4.70s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-salud-casa-por-casa-acapulco-guerrero?idiom=es ‚Äî retry 1/3 after 3.0s


 25%|‚ñà‚ñà‚ñç       | 131/531 [12:47<59:15,  8.89s/it]  

üíæ Checkpoint saved (130 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-ampliacion-del-area-de-hemodialisis-hospital-de-alta-especialidad-imss-bienestar-centro-tabasco?idiom=es ‚Äî retry 1/3 after 3.0s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-ampliacion-del-area-de-hemodialisis-hospital-de-alta-especialidad-imss-bienestar-centro-tabasco?idiom=es ‚Äî retry 2/3 after 4.7s


 26%|‚ñà‚ñà‚ñå       | 136/531 [13:55<54:16,  8.24s/it]  

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-mando-unico-en-el-oriente-del-estado-de-mexico-nezahualcoyotl-estado-de-mexico?idiom=es ‚Äî retry 1/3 after 2.5s


 27%|‚ñà‚ñà‚ñã       | 141/531 [14:39<40:35,  6.25s/it]  

üíæ Checkpoint saved (140 articles)


 28%|‚ñà‚ñà‚ñä       | 151/531 [15:27<30:02,  4.74s/it]

üíæ Checkpoint saved (150 articles)


 30%|‚ñà‚ñà‚ñâ       | 159/531 [15:54<22:56,  3.70s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-03-de-julio-de-2025?idiom=es ‚Äî retry 1/3 after 2.0s


 30%|‚ñà‚ñà‚ñà       | 161/531 [16:24<52:47,  8.56s/it]  

üíæ Checkpoint saved (160 articles)


 31%|‚ñà‚ñà‚ñà       | 163/531 [16:33<39:30,  6.44s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-6-aniversario-de-la-guardia-nacional-ciudad-de-mexico?idiom=es ‚Äî retry 1/3 after 2.7s


 31%|‚ñà‚ñà‚ñà‚ñè      | 167/531 [17:14<44:30,  7.34s/it]  

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-salud-casa-por-casa-aquila-michoacan?idiom=es ‚Äî retry 1/3 after 2.5s


 32%|‚ñà‚ñà‚ñà‚ñè      | 171/531 [17:51<41:22,  6.90s/it]  

üíæ Checkpoint saved (170 articles)


 33%|‚ñà‚ñà‚ñà‚ñé      | 174/531 [18:02<28:33,  4.80s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-24-de-junio-de-2025?idiom=es ‚Äî retry 1/3 after 2.2s


 34%|‚ñà‚ñà‚ñà‚ñç      | 181/531 [18:53<26:19,  4.51s/it]  

üíæ Checkpoint saved (180 articles)


 35%|‚ñà‚ñà‚ñà‚ñå      | 187/531 [19:19<25:25,  4.43s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-faispiam-presupuesto-directo-para-pueblos-indigenas-y-afromexicanos?idiom=es ‚Äî retry 1/3 after 2.2s


 36%|‚ñà‚ñà‚ñà‚ñå      | 191/531 [19:57<37:02,  6.54s/it]  

üíæ Checkpoint saved (190 articles)


 37%|‚ñà‚ñà‚ñà‚ñã      | 199/531 [20:33<26:19,  4.76s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-asamblea-del-faispiam-en-coatetelco-morelos?idiom=es ‚Äî retry 1/3 after 2.1s


 38%|‚ñà‚ñà‚ñà‚ñä      | 201/531 [21:04<50:30,  9.18s/it]  

üíæ Checkpoint saved (200 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-del-centro-libre-para-las-mujeres-temixco-morelos?idiom=es ‚Äî retry 1/3 after 2.8s


 40%|‚ñà‚ñà‚ñà‚ñâ      | 210/531 [22:04<23:10,  4.33s/it]  

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-mensaje-de-la-presidenta-de-mexico-claudia-sheinbaum-pardo?idiom=es ‚Äî retry 1/3 after 2.5s
‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-mensaje-de-la-presidenta-de-mexico-claudia-sheinbaum-pardo?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-mensaje-de-la-presidenta-de-mexico-claudia-sheinbaum-pardo?idiom=es ‚Äî skipping


 40%|‚ñà‚ñà‚ñà‚ñâ      | 211/531 [22:31<59:10, 11.10s/it]

üíæ Checkpoint saved (210 articles)
‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-30-de-mayo-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-30-de-mayo-de-2025?idiom=es ‚Äî skipping


 40%|‚ñà‚ñà‚ñà‚ñà      | 215/531 [22:47<29:31,  5.61s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-26-de-mayo-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-26-de-mayo-de-2025?idiom=es ‚Äî skipping


 41%|‚ñà‚ñà‚ñà‚ñà      | 218/531 [23:00<25:14,  4.84s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-proyectos-prioritarios-de-san-luis-potosi-villa-hidalgo-san-luis-potosi?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-proyectos-prioritarios-de-san-luis-potosi-villa-hidalgo-san-luis-potosi?idiom=es ‚Äî skipping


 42%|‚ñà‚ñà‚ñà‚ñà‚ñè     | 221/531 [23:13<23:55,  4.63s/it]

üíæ Checkpoint saved (220 articles)


 43%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 229/531 [23:47<19:09,  3.81s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-encuentro-con-familiares-de-mineros-de-pasta-de-conchos?idiom=es ‚Äî retry 1/3 after 2.8s


 44%|‚ñà‚ñà‚ñà‚ñà‚ñé     | 231/531 [24:17<42:50,  8.57s/it]

üíæ Checkpoint saved (230 articles)


 44%|‚ñà‚ñà‚ñà‚ñà‚ñç     | 236/531 [24:41<26:20,  5.36s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-12-de-mayo-de-2025?idiom=es ‚Äî retry 1/3 after 2.4s


 45%|‚ñà‚ñà‚ñà‚ñà‚ñå     | 241/531 [25:26<31:50,  6.59s/it]

üíæ Checkpoint saved (240 articles)


 47%|‚ñà‚ñà‚ñà‚ñà‚ñã     | 251/531 [26:12<21:45,  4.66s/it]

üíæ Checkpoint saved (250 articles)


 48%|‚ñà‚ñà‚ñà‚ñà‚ñä     | 256/531 [26:35<20:27,  4.46s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-inicio-de-la-construccion-del-tren-mexico-queretaro-pedro-escobedo-queretaro?idiom=es ‚Äî retry 1/3 after 2.5s


 49%|‚ñà‚ñà‚ñà‚ñà‚ñâ     | 261/531 [27:24<31:02,  6.90s/it]

üíæ Checkpoint saved (260 articles)


 51%|‚ñà‚ñà‚ñà‚ñà‚ñà     | 271/531 [28:08<17:48,  4.11s/it]

üíæ Checkpoint saved (270 articles)


 53%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé    | 281/531 [28:48<16:24,  3.94s/it]

üíæ Checkpoint saved (280 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-7-de-abril-de-2025?idiom=es ‚Äî retry 1/3 after 2.3s


 55%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç    | 291/531 [29:51<15:57,  3.99s/it]

üíæ Checkpoint saved (290 articles)


 56%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå    | 295/531 [30:08<16:26,  4.18s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-28-de-marzo-de-2025?idiom=es ‚Äî retry 1/3 after 2.4s


 57%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã    | 301/531 [30:59<23:08,  6.04s/it]

üíæ Checkpoint saved (300 articles)


 58%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 308/531 [31:30<15:05,  4.06s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-81-asamblea-anual-a-camara-de-la-industria-de-transformacion-nuevo-leon-caintra?idiom=es ‚Äî retry 1/3 after 2.6s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-81-asamblea-anual-a-camara-de-la-industria-de-transformacion-nuevo-leon-caintra?idiom=es ‚Äî retry 2/3 after 4.6s


 59%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä    | 311/531 [32:31<41:28, 11.31s/it]  

üíæ Checkpoint saved (310 articles)


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 319/531 [33:09<17:02,  4.83s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-mundial-de-mujeres-parlamentarias?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-mundial-de-mujeres-parlamentarias?idiom=es ‚Äî skipping


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 320/531 [33:13<16:57,  4.82s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-14-de-marzo-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-14-de-marzo-de-2025?idiom=es ‚Äî skipping


 60%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 321/531 [33:16<14:43,  4.21s/it]

üíæ Checkpoint saved (320 articles)


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 323/531 [33:27<16:29,  4.76s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-11-de-marzo-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-11-de-marzo-de-2025?idiom=es ‚Äî skipping


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 324/531 [33:31<16:22,  4.74s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-10-de-marzo-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-10-de-marzo-de-2025?idiom=es ‚Äî skipping


 61%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà    | 325/531 [33:35<15:05,  4.40s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-asamblea-informativa-en-el-zocalo-de-la-ciudad-de-mexico?idiom=es ‚Äî retry 1/3 after 2.3s


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 327/531 [34:06<30:23,  8.94s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-7-de-marzo-de-2025?idiom=es ‚Äî retry 1/3 after 2.9s


 62%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè   | 331/531 [34:45<25:08,  7.54s/it]

üíæ Checkpoint saved (330 articles)


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 334/531 [35:00<18:49,  5.73s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-programas-para-el-bienestar-391785?idiom=es ‚Äî retry 1/3 after 2.7s


 63%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 337/531 [35:34<25:23,  7.85s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-28-de-febrero-de-2025?idiom=es ‚Äî retry 1/3 after 3.0s


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé   | 338/531 [36:02<44:27, 13.82s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-27-de-febrero-de-2025?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-27-de-febrero-de-2025?idiom=es ‚Äî skipping


 64%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç   | 341/531 [36:13<23:09,  7.32s/it]

üíæ Checkpoint saved (340 articles)


 66%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå   | 351/531 [36:58<14:16,  4.76s/it]

üíæ Checkpoint saved (350 articles)


 68%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 361/531 [37:42<14:04,  4.97s/it]

üíæ Checkpoint saved (360 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-110-aniversario-de-la-fuerza-aerea-mexicana-inauguracion-de-la-base-aerea-militar-numero-20?idiom=es ‚Äî retry 1/3 after 2.1s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-110-aniversario-de-la-fuerza-aerea-mexicana-inauguracion-de-la-base-aerea-militar-numero-20?idiom=es ‚Äî retry 2/3 after 4.4s


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä   | 364/531 [38:42<31:40, 11.38s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-fertilizantes-para-el-bienestar?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-fertilizantes-para-el-bienestar?idiom=es ‚Äî skipping


 69%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 366/531 [38:48<19:36,  7.13s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-clinica-de-medicina-familiar-de-especialidades-issste?idiom=es ‚Äî retry 1/3 after 2.3s


 70%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ   | 371/531 [39:31<15:52,  5.95s/it]

üíæ Checkpoint saved (370 articles)


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 380/531 [40:07<10:52,  4.32s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-entrega-de-tarjetas-del-programa-de-mejoramiento-de-vivienda-para-el-bienestar?idiom=es ‚Äî retry 1/3 after 2.0s


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 381/531 [40:34<28:22, 11.35s/it]

üíæ Checkpoint saved (380 articles)


 72%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè  | 382/531 [40:40<23:45,  9.57s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-entrega-de-tarjetas-vivienda-para-el-bienestar-389303?idiom=es ‚Äî retry 1/3 after 3.0s


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé  | 391/531 [41:40<10:21,  4.44s/it]

üíæ Checkpoint saved (390 articles)


 74%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç  | 393/531 [41:48<09:28,  4.12s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-23-de-enero-de-2025?idiom=es ‚Äî retry 1/3 after 2.7s


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå  | 401/531 [42:42<09:17,  4.29s/it]

üíæ Checkpoint saved (400 articles)


 76%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 405/531 [43:00<09:09,  4.36s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-16-de-enero-de-2025?idiom=es ‚Äî retry 1/3 after 2.5s


 77%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã  | 411/531 [43:45<09:45,  4.88s/it]

üíæ Checkpoint saved (410 articles)


 78%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä  | 416/531 [44:05<07:15,  3.79s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-reapertura-del-segundo-piso-del-museo-nacional-de-antropologia-grandeza-y-diversidad-cultural-de-mexico?idiom=es ‚Äî retry 1/3 after 2.1s


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 420/531 [44:42<11:11,  6.05s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-entrega-de-tarjetas-de-la-pension-mujeres-bienestar?idiom=es ‚Äî retry 1/3 after 2.8s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-entrega-de-tarjetas-de-la-pension-mujeres-bienestar?idiom=es ‚Äî retry 2/3 after 4.4s


 79%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ  | 421/531 [45:34<36:23, 19.85s/it]

üíæ Checkpoint saved (420 articles)


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 426/531 [45:56<12:39,  7.23s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-programas-para-el-bienestar-386975?idiom=es ‚Äî retry 1/3 after 2.4s


 80%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 427/531 [46:23<23:00, 13.28s/it]

‚ö†Ô∏è HTTP error 404 Client Error: Not Found for url: https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-de-la-autopista-jala-puerto-vallarta?idiom=es on https://www.gob.mx/presidencia/es/articulos/version-estenografica-inauguracion-de-la-autopista-jala-puerto-vallarta?idiom=es ‚Äî skipping


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 429/531 [46:30<14:14,  8.38s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-26-de-diciembre-de-2024?idiom=es ‚Äî retry 1/3 after 2.9s


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 430/531 [46:59<24:14, 14.40s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-23-de-diciembre-de-2024?idiom=es ‚Äî retry 1/3 after 2.7s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-23-de-diciembre-de-2024?idiom=es ‚Äî retry 2/3 after 4.2s


 81%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà  | 431/531 [47:52<43:25, 26.05s/it]

üíæ Checkpoint saved (430 articles)


 83%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñé | 441/531 [48:33<07:01,  4.68s/it]

üíæ Checkpoint saved (440 articles)


 85%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç | 451/531 [49:18<06:24,  4.81s/it]

üíæ Checkpoint saved (450 articles)


 87%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã | 461/531 [49:55<04:21,  3.74s/it]

üíæ Checkpoint saved (460 articles)


 88%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 469/531 [50:28<04:35,  4.44s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-199-aniversario-de-la-consolidacion-de-la-independencia-en-el-mar?idiom=es ‚Äî retry 1/3 after 2.8s


 89%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä | 471/531 [51:00<09:21,  9.35s/it]

üíæ Checkpoint saved (470 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-21-de-noviembre-de-2024?idiom=es ‚Äî retry 1/3 after 2.1s


 91%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà | 481/531 [52:05<04:13,  5.07s/it]

üíæ Checkpoint saved (480 articles)


 92%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñè| 491/531 [52:41<02:16,  3.40s/it]

üíæ Checkpoint saved (490 articles)


 94%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñç| 501/531 [53:25<02:00,  4.03s/it]

üíæ Checkpoint saved (500 articles)


 96%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñå| 511/531 [54:07<01:27,  4.37s/it]

üíæ Checkpoint saved (510 articles)
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-15-de-octubre-de-2024?idiom=es ‚Äî retry 1/3 after 2.6s


 97%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñã| 515/531 [54:49<01:52,  7.05s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-11-de-octubre-de-2024?idiom=es ‚Äî retry 1/3 after 2.3s
‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-conferencia-de-prensa-de-la-presidenta-claudia-sheinbaum-pardo-del-11-de-octubre-de-2024?idiom=es ‚Äî retry 2/3 after 4.7s


 98%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñä| 521/531 [56:03<01:13,  7.37s/it]

üíæ Checkpoint saved (520 articles)


 99%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñâ| 528/531 [56:29<00:13,  4.39s/it]

‚è≥ Timeout or connection error on https://www.gob.mx/presidencia/es/articulos/version-estenografica-toma-de-protesta-ante-el-pueblo-de-mexico?idiom=es ‚Äî retry 1/3 after 2.1s


100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 531/531 [57:05<00:00,  6.45s/it]

üíæ Checkpoint saved (530 articles)





‚úÖ Saved 531 full transcripts.
