In [55]:
# Import necessary libraries

import requests
import re
from bs4 import BeautifulSoup as bs
from bs4 import XMLParsedAsHTMLWarning
import pandas as pd
import lxml
import warnings

# Configure to ignore specific warnings
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)

In [56]:
# Define the URL
url = "https://www.anbima.com.br/feriados/fer_nacionais/2025.asp"

# Send an HTTP GET request to the URL
response = requests.get(url)

# Check if the request was successful
if response.status_code == 200:
    # Parse the HTML content
    soup = bs(response.content, "lxml")
    print("HTML content fetched successfully!")
else:
    print(f"Failed to fetch the URL. Status code: {response.status_code}")

HTML content fetched successfully!


In [57]:
# Find the table in the HTML
table = soup.find("table", {"class": "interna"})

# Extract rows from the table
rows = table.find_all("tr")

# Extract headers
headers = [header.text.strip() for header in rows[0].find_all("td", class_="tabela")]

# Extract data from all rows
data = []
for row in rows[1:]:
    cols = row.find_all("td")
    data.append([col.text.strip() for col in cols])

# Create a DataFrame
df_feriados_anbima_2025 = pd.DataFrame(data, columns=headers)

In [58]:
# Remove special characters from all string columns in the DataFrame
def remove_special_characters(value):
    if isinstance(value, str):
        value = re.sub(r'[ãáàâä]', 'a', value)
        value = re.sub(r'[éèêë]', 'e', value)
        value = re.sub(r'[íìîï]', 'i', value)
        value = re.sub(r'[õóòôö]', 'o', value)
        value = re.sub(r'[úùûü]', 'u', value)
        value = re.sub(r'[ç]', 'c', value)
    return value

In [None]:
# Convert the 'Data' column to datetime format and suppress warnings
df_feriados_anbima_2025['Data'] = pd.to_datetime(df_feriados_anbima_2025['Data'], errors='coerce', dayfirst=True)

In [60]:
df_feriados_anbima_2025

Unnamed: 0,Data,Dia da Semana,Feriado
0,2025-01-01,quarta-feira,Confraternização Universal
1,2025-03-03,segunda-feira,Carnaval
2,2025-03-04,terça-feira,Carnaval
3,2025-04-18,sexta-feira,Paixão de Cristo
4,2025-04-21,segunda-feira,Tiradentes
5,2025-05-01,quinta-feira,Dia do Trabalho
6,2025-06-19,quinta-feira,Corpus Christi
7,2025-09-07,domingo,Independência do Brasil
8,2025-10-12,domingo,Nossa Sr.a Aparecida - Padroeira do Brasil
9,2025-11-02,domingo,Finados


In [61]:
# Generate a DataFrame with all days of 2025
all_days_2025 = pd.date_range(start="2025-01-01", end="2025-12-31", freq="D").to_frame(index=False, name="Data")

# Add the 'Dia da Semana' column
all_days_2025["Dia da Semana"] = all_days_2025["Data"].dt.day_name(locale="pt_BR.utf8")

# Transform the 'Dia da Semana' column to lowercase
all_days_2025["Dia da Semana"] = all_days_2025["Dia da Semana"].str.lower()

# Rename some days with special characters
all_days_2025["Dia da Semana"] = all_days_2025["Dia da Semana"].replace({"sã¡bado": "sábado", "terã§a-feira": "terça-feira"})

In [62]:
# Merge with the holidays DataFrame
calendar_2025 = all_days_2025.merge(df_feriados_anbima_2025[['Data', 'Feriado']], on="Data", how="left")

In [63]:
calendar_2025 = calendar_2025.map(remove_special_characters)

In [64]:
calendar_2025

Unnamed: 0,Data,Dia da Semana,Feriado
0,2025-01-01,quarta-feira,Confraternizacao Universal
1,2025-01-02,quinta-feira,
2,2025-01-03,sexta-feira,
3,2025-01-04,sabado,
4,2025-01-05,domingo,
...,...,...,...
360,2025-12-27,sabado,
361,2025-12-28,domingo,
362,2025-12-29,segunda-feira,
363,2025-12-30,terca-feira,
