In [1]:
import pandas as pd

In [2]:
!pip install lxml

Collecting lxml
  Downloading lxml-6.0.2-cp310-cp310-manylinux_2_26_x86_64.manylinux_2_28_x86_64.whl (5.3 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m5.3/5.3 MB[0m [31m27.1 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: lxml
Successfully installed lxml-6.0.2


In [None]:
import requests
from io import StringIO

url = 'https://pt.wikipedia.org/wiki/Ensino_superior_no_Brasil'

# Headers para simular um navegador real
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}

In [None]:
# Método 1: Usando requests com headers
try:
    response = requests.get(url, headers=headers)
    response.raise_for_status()  # Levanta exceção se houver erro HTTP
    
    # Usar pd.read_html com o conteúdo HTML obtido
    lista = pd.read_html(StringIO(response.text))
    print(f"Sucesso! Encontradas {len(lista)} tabelas")
    
except requests.exceptions.RequestException as e:
    print(f"Erro na requisição: {e}")
except Exception as e:
    print(f"Erro ao processar HTML: {e}")

HTTPError: HTTP Error 403: Forbidden

In [None]:
# Método 2: Usando Beautiful Soup (caso o método 1 não funcione)
# Primeiro instale: !pip install beautifulsoup4

try:
    from bs4 import BeautifulSoup
    
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # Encontrar todas as tabelas
    tables = soup.find_all('table')
    print(f"Encontradas {len(tables)} tabelas com Beautiful Soup")
    
    # Converter para DataFrame usando pd.read_html no HTML da tabela
    lista_bs = []
    for i, table in enumerate(tables):
        try:
            df = pd.read_html(str(table))[0]
            lista_bs.append(df)
            print(f"Tabela {i+1}: {df.shape}")
        except Exception as e:
            print(f"Erro na tabela {i+1}: {e}")
            
except ImportError:
    print("Beautiful Soup não está instalado. Execute: !pip install beautifulsoup4")
except Exception as e:
    print(f"Erro com Beautiful Soup: {e}")