In [1]:
# Instalar Selenium, BeautifulSoup, Pandas y Webdriver Manager
!pip install selenium beautifulsoup4 pandas webdriver-manager

# --- INSTALACIÓN DE GOOGLE CHROME EN COLAB ---
# 1. Descargar la clave GPG de Google Chrome
!wget -q -O - https://dl.google.com/linux/linux_signing_key.pub | sudo gpg --dearmor -o /usr/share/keyrings/google-chrome-archive-keyring.gpg

# 2. Añadir el repositorio de Google Chrome a las fuentes de apt
!echo "deb [arch=amd64 signed-by=/usr/share/keyrings/google-chrome-archive-keyring.gpg] http://dl.google.com/linux/chrome/deb/ stable main" | sudo tee /etc/apt/sources.list.d/google-chrome.list

# 3. Actualizar los listados de paquetes apt para incluir el nuevo repositorio
!sudo apt-get update

# 4. Instalar Google Chrome estable
!sudo apt-get install -y google-chrome-stable

# Opcional: Verificar la versión de Chrome instalada
!google-chrome --version

print("\n--- Instalación de Chrome y dependencias completada ---")

Collecting selenium
  Downloading selenium-4.34.2-py3-none-any.whl.metadata (7.5 kB)
Collecting webdriver-manager
  Downloading webdriver_manager-4.0.2-py2.py3-none-any.whl.metadata (12 kB)
Collecting urllib3~=2.5.0 (from urllib3[socks]~=2.5.0->selenium)
  Downloading urllib3-2.5.0-py3-none-any.whl.metadata (6.5 kB)
Collecting trio~=0.30.0 (from selenium)
  Downloading trio-0.30.0-py3-none-any.whl.metadata (8.5 kB)
Collecting trio-websocket~=0.12.2 (from selenium)
  Downloading trio_websocket-0.12.2-py3-none-any.whl.metadata (5.1 kB)
Collecting python-dotenv (from webdriver-manager)
  Downloading python_dotenv-1.1.1-py3-none-any.whl.metadata (24 kB)
Collecting outcome (from trio~=0.30.0->selenium)
  Downloading outcome-1.3.0.post0-py2.py3-none-any.whl.metadata (2.6 kB)
Collecting wsproto>=0.14 (from trio-websocket~=0.12.2->selenium)
  Downloading wsproto-1.2.0-py3-none-any.whl.metadata (5.6 kB)
Downloading selenium-4.34.2-py3-none-any.whl (9.4 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━

In [12]:
# --- PASO 1: INSTALAR LAS LIBRERÍAS NECESARIAS EN GOOGLE COLAB ---
# ¡IMPORTANTE!: Ejecuta esta celda al inicio de tu notebook.
# Esto intentará suprimir los mensajes de instalación.
!pip install selenium webdriver-manager > /dev/null 2>&1

# --- PASO 2: IMPORTAR LAS LIBRERÍAS ---
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from bs4 import BeautifulSoup
import time
import pandas as pd
from webdriver_manager.chrome import ChromeDriverManager

# Importar para mostrar HTML en Colab
from IPython.display import display, HTML

def extract_claro_plans_colab():
    """
    Extracts post-paid plan information from Claro Peru's website.
    It uses Selenium for dynamic content and BeautifulSoup for HTML parsing.
    Returns a list of dictionaries, with plans sorted by price and without duplicates.
    """
    url = "https://www.claro.com.pe/personas/movil/postpago/"
    plans_data = []
    processed_plans = set()  # To store (name, price) tuples to prevent duplicates

    # --- SELENIUM CONFIGURATION FOR COLAB ---
    options = webdriver.ChromeOptions()
    options.add_argument('--headless')           # Runs Chrome without a visible window
    options.add_argument('--no-sandbox')         # Necessary for Linux environments like Colab
    options.add_argument('--disable-dev-shm-usage') # Prevents memory issues in some environments
    options.add_argument('--window-size=1920,1080') # Common resolution for better element loading
    options.add_argument('--log-level=3')        # Suppresses most Chrome browser log messages

    driver = None

    try:
        service = Service(ChromeDriverManager().install())
        driver = webdriver.Chrome(service=service, options=options)
        driver.get(url)

        # --- WAIT FOR DYNAMIC CONTENT TO LOAD ---
        wait = WebDriverWait(driver, 20)
        wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'cA1PEBodyCardWrap')))
        time.sleep(5) # Give extra time for full content rendering

        page_source = driver.page_source
        soup = BeautifulSoup(page_source, 'html.parser')

        plan_elements = soup.find_all('div', class_='cA1PEBodyCardWrap')

        if not plan_elements:
            # This is the only error message that will appear if no plan elements are found.
            print("ERROR: No plan elements found on the page. The HTML structure might have changed.")
            return []

        for plan_element in plan_elements:
            name = plan_element.get('data-badge', 'N/A')
            price_str = plan_element.get('data-price', 'N/A')
            try:
                price = float(price_str)
            except ValueError:
                price = 'N/A'

            plan_key = (name, price)
            if plan_key in processed_plans:
                continue
            processed_plans.add(plan_key)

            gigas = 'N/A'
            unlimited_apps = 'N/A'
            calls_sms = 'N/A'

            # --- GIGAS EXTRACTION ---
            gigas_tag = plan_element.find('span', class_='number')
            if gigas_tag:
                gigas_text = gigas_tag.get_text(strip=True)
                if gigas_text.upper().endswith('GB'):
                    gigas = gigas_text
                else:
                    gigas = gigas_text + ' GB'

            # --- SPECIFIC HANDLING FOR "MAX ILIMITADO" PLANS ---
            is_max_ilimitado_promo = False
            promo_div = plan_element.find('div', class_='cardPePromo')
            if promo_div:
                promo_text_span = promo_div.find('span', string=lambda text: text and 'Gigas, Minutos y SMS' in text)
                if promo_text_span:
                    is_max_ilimitado_promo = True
                    calls_sms = "Ilimitadas"
                    unlimited_apps = "Incluidas en Todo Ilimitado"

            # --- GENERAL EXTRACTION (if not a specific "Max Ilimitado" promo plan) ---
            if not is_max_ilimitado_promo:
                apps_list = []
                app_icon_tags = plan_element.find_all('i', class_=lambda x: x and 'cIco-rs-' in x)
                for icon_tag in app_icon_tags:
                    for cls in icon_tag.get('class', []):
                        if 'cIco-rs-' in cls:
                            app_name = cls.replace('cIco-rs-', '')
                            apps_list.append(app_name.capitalize())
                unlimited_apps = ", ".join(apps_list) if apps_list else 'N/A'

                span_element_with_text = plan_element.find('span', string=lambda text: text and 'Llamadas y SMS' in text.strip())
                if span_element_with_text:
                    dt_parent = span_element_with_text.find_parent('dt')
                    if dt_parent:
                        dd_element = dt_parent.find_next_sibling('dd')
                        if dd_element:
                            calls_sms = dd_element.get_text(strip=True)

            plans_data.append({
                'Nombre del Plan': name,
                'Precio (S/)': price,
                'Gigas': gigas,
                'Apps Ilimitadas': unlimited_apps,
                'Llamadas y SMS': calls_sms
            })

    except Exception as e:
        # This is the only place a critical error message will be printed.
        print(f"An unexpected error occurred during extraction: {e}")
        return []
    finally:
        if driver:
            driver.quit()

    sorted_plans = sorted(plans_data, key=lambda x: x['Precio (S/)'] if isinstance(x['Precio (S/)'], (int, float)) else float('inf'))
    return sorted_plans

if __name__ == "__main__":
    plans = extract_claro_plans_colab()

    if plans:
        df = pd.DataFrame(plans)

        # Custom CSS for the HTML table
        html_style = """
        <style>
            body { font-family: Arial, sans-serif; margin: 20px; background-color: #f4f4f4; color: #333; }
            h1 { color: #E4002B; text-align: center; margin-bottom: 20px; } /* Claro red */
            table {
                width: 100%;
                border-collapse: collapse;
                margin-top: 20px;
                box-shadow: 0 4px 12px rgba(0,0,0,0.15); /* More pronounced shadow */
                background-color: #fff;
                border-radius: 8px; /* Rounded corners for the table */
                overflow: hidden; /* Ensures rounded corners apply to content */
            }
            th, td {
                padding: 15px 20px; /* More padding */
                text-align: left;
                border-bottom: 1px solid #eee; /* Lighter border */
            }
            th {
                background-color: #E4002B; /* Claro red for headers */
                color: white;
                text-transform: uppercase;
                font-size: 0.95em;
                letter-spacing: 0.5px;
            }
            tr:nth-child(even) {
                background-color: #f8f8f8; /* Slightly different shade for even rows */
            }
            tr:hover {
                background-color: #f0f0f0; /* Subtle hover effect */
            }
            /* Style for the header line */
            .header-line {
                font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
                font-size: 1.2em;
                font-weight: bold;
                color: #555;
                text-align: center;
                margin-bottom: 25px;
                padding-bottom: 10px;
                border-bottom: 2px solid #ddd;
            }
        </style>
        """

        # Generate the HTML table from the DataFrame
        # Using escape=False to allow HTML entities if any, but generally good for plain text
        html_table = df.to_html(index=False, escape=False, classes='claro-plans-table')

        # Combine all parts into a full HTML document
        full_html_output = f"""
        <!DOCTYPE html>
        <html lang="es">
        <head>
            <meta charset="UTF-8">
            <meta name="viewport" content="width=device-width, initial-scale=1.0">
            <title>Planes y Beneficios Claro Perú</title>
            {html_style}
        </head>
        <body>
            <div class="header-line">--- Planes y Beneficios (ordenados por precio, sin duplicados) ---</div>
            {html_table}
        </body>
        </html>
        """

        # Display the HTML directly in the Colab output cell
        display(HTML(full_html_output))
    else:
        # This message will only appear if the extraction function returns an empty list
        # (meaning an error occurred and was printed by the function itself).
        pass # No additional print here to keep output clean if error already printed

Nombre del Plan,Precio (S/),Gigas,Apps Ilimitadas,Llamadas y SMS
max29.90,29.9,10 GB,"Facebook, Instagram, Threads, Whatsapp",Ilimitadas
max39.90,39.9,25 GB,"Facebook, Instagram, Threads, Whatsapp",Ilimitadas
max49.90,49.9,45 GB,"Facebook, Instagram, Messenger, Threads, Whatsapp",Ilimitadas
max55.90,55.9,75 GB,"Facebook, Instagram, Messenger, Threads, Whatsapp",Ilimitadas
maxilimitado69.90,69.9,110 GB,Incluidas en Todo Ilimitado,Ilimitadas
maxilimitado79.90,79.9,125 GB,Incluidas en Todo Ilimitado,Ilimitadas
maxilimitado95.90,95.9,135 GB,Incluidas en Todo Ilimitado,Ilimitadas
maxilimitado109.90,109.9,160 GB,Incluidas en Todo Ilimitado,Ilimitadas
maxilimitado159.90,159.9,175 GB,Incluidas en Todo Ilimitado,Ilimitadas
maxilimitado189.90,189.9,185 GB,Incluidas en Todo Ilimitado,Ilimitadas
