In [18]:
import requests
from bs4 import BeautifulSoup
import pandas as pd

url = "https://pt.wikipedia.org/wiki/Lista_de_unidades_federativas_do_Brasil_por_%C3%8Dndice_de_Desenvolvimento_Humano"

response = requests.get(url)
soup = BeautifulSoup(response.content, 'html.parser')

table = soup.find('table', {'class': 'wikitable'})

data = []

for row in table.find_all('tr')[1:]:
    cols = row.find_all('td')
    if len(cols) >= 5:
        posicao = cols[0].text.strip()

        # Extrai todo o texto da célula, ignorando tags de imagem
        estado = cols[2].get_text(strip=True)

        # IDH-M 2021
        idh_2021_raw = cols[3].text.strip().split()[0].replace(",", ".")
        try:
            idh_2021 = float(idh_2021_raw)
        except ValueError:
            idh_2021 = None

        # IDH-M 2020
        idh_2020_raw = cols[4].text.strip().replace(",", ".")
        try:
            idh_2020 = float(idh_2020_raw)
        except ValueError:
            idh_2020 = None

        data.append([posicao, estado, idh_2021, idh_2020])

df_idh = pd.DataFrame(data, columns=["Posição", "Estado", "IDH-M 2021", "IDH-M 2020"])
df_sorted = df_idh.sort_values(by="IDH-M 2021", ascending=False)

print(df_sorted.head())

  Posição            Estado  IDH-M 2021  IDH-M 2020
0       1  Distrito Federal       0.814       0.829
1       2         São Paulo       0.806       0.823
2       3    Santa Catarina       0.792       0.803
3       4      Minas Gerais       0.774       0.789
4       5    Espírito Santo       0.771       0.792


In [19]:
estados_uf = {
    "Acre": "AC",
    "Alagoas": "AL",
    "Amapá": "AP",
    "Amazonas": "AM",
    "Bahia": "BA",
    "Ceará": "CE",
    "Distrito Federal": "DF",
    "Espírito Santo": "ES",
    "Goiás": "GO",
    "Maranhão": "MA",
    "Mato Grosso": "MT",
    "Mato Grosso do Sul": "MS",
    "Minas Gerais": "MG",
    "Pará": "PA",
    "Paraíba": "PB",
    "Paraná": "PR",
    "Pernambuco": "PE",
    "Piauí": "PI",
    "Rio de Janeiro": "RJ",
    "Rio Grande do Norte": "RN",
    "Rio Grande do Sul": "RS",
    "Rondônia": "RO",
    "Roraima": "RR",
    "Santa Catarina": "SC",
    "São Paulo": "SP",
    "Sergipe": "SE",
    "Tocantins": "TO"
}


In [20]:
df_idh["UF"] = df_idh["Estado"].map(estados_uf)

In [21]:
df_idh = df_idh[['Posição', 'UF', 'Estado', 'IDH-M 2021', 'IDH-M 2020']]

In [22]:
df_idh

Unnamed: 0,Posição,UF,Estado,IDH-M 2021,IDH-M 2020
0,1,DF,Distrito Federal,0.814,0.829
1,2,SP,São Paulo,0.806,0.823
2,3,SC,Santa Catarina,0.792,0.803
3,4,MG,Minas Gerais,0.774,0.789
4,5,ES,Espírito Santo,0.771,0.792
5,6,RS,Rio Grande do Sul,0.771,0.78
6,7,PR,Paraná,0.769,0.787
7,8,RJ,Rio de Janeiro,0.762,0.785
8,9,MS,Mato Grosso do Sul,0.742,0.76
9,10,GO,Goiás,0.737,0.758


In [25]:
df_idh.to_csv('./idh_table.csv',sep='\t')