Aprende a consumir APIs REST desde Python

In [2]:
import requests
from bs4 import BeautifulSoup
import json

# The previously identified redirect URL is a more stable target.
# Using the last observed redirect target directly.
SIATA_TARGET_URL = "https://www.siata.gov.co/siata_nuevo"

print(f"Attempting to fetch SIATA target URL: {SIATA_TARGET_URL} with increased timeout.")

siata_homepage_soup = None
try:
    # Directly attempt to fetch the redirected URL, allowing further redirects, with a longer timeout
    response = requests.get(SIATA_TARGET_URL, allow_redirects=True, timeout=15) # Increased timeout to 15 seconds
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print("Successfully fetched SIATA homepage (or its redirected version).")
    siata_homepage_soup = BeautifulSoup(response.text, 'html.parser')

    print("\n--- Analyzing SIATA Homepage Content ---")

    # 1. Examine script tags for embedded JSON data or data URLs
    script_data_sources = []
    for script in siata_homepage_soup.find_all('script'):
        if script.string and ('json' in script.string.lower() or 'data' in script.string.lower()): # Broad search
            # This is a very basic heuristic. More advanced parsing would be needed to extract actual JSON.
            script_data_sources.append(f"Potential data in script (first 100 chars): {script.string[:100]}...")

    # 2. Look for iframe tags and extract their src attributes
    iframe_sources = []
    for iframe in siata_homepage_soup.find_all('iframe'):
        if 'src' in iframe.attrs:
            iframe_sources.append(iframe['src'])

    # 3. Identify any other HTML elements that seem to contain or dynamically load meteorological data.
    potential_data_divs = []
    for div in siata_homepage_soup.find_all('div', class_=lambda x: x and ('map' in x or 'data' in x)):
        if div.get('id') or div.get('class'):
            potential_data_divs.append(f"Div with id/class: {div.get('id', '')}/{div.get('class', '')} (first 50 chars of content): {div.text[:50]}...")

    # Print a summary of findings
    print("\n--- Summary of Findings ---")
    if script_data_sources:
        print("Potential data sources found in <script> tags:")
        for src in script_data_sources:
            print(f"- {src}")
    else:
        print("No obvious embedded JSON or data structures found in <script> tags.")

    if iframe_sources:
        print("\nPotential data sources found in <iframe> src attributes:")
        for src in iframe_sources:
            print(f"- {src}")
    else:
        print("No <iframe> tags with src attributes found.")

    if potential_data_divs:
        print("\nPotential data loading elements (divs) found:")
        for div_info in potential_data_divs:
            print(f"- {div_info}")
    else:
        print("No specific data loading divs identified.")

except requests.exceptions.RequestException as e:
    print(f"Error fetching SIATA homepage: {e}")
except Exception as e:
    print(f"An unexpected error occurred during parsing: {e}")

Attempting to fetch SIATA target URL: https://www.siata.gov.co/siata_nuevo with increased timeout.
Successfully fetched SIATA homepage (or its redirected version).

--- Analyzing SIATA Homepage Content ---

--- Summary of Findings ---
Potential data sources found in <script> tags:
- Potential data in script (first 100 chars): 
  window.dataLayer = window.dataLayer || [];
  function gtag(){dataLayer.push(arguments);}
  gtag('...
No <iframe> tags with src attributes found.

Potential data loading elements (divs) found:
- Div with id/class: /['mapa_detalle_panel'] (first 50 chars of content): 


 Capas 


 Informaci√≥n 








...
- Div with id/class: /['tab-content', 'navbar-left', 'mapa_detalle_tabs_content', 'small', 'active_panel', 'font_size'] (first 50 chars of content): 




...
- Div with id/class: mapa_div/['panel_left_map'] (first 50 chars of content):  ...


In [3]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import json

# 1. Define the URL for SIATA's operational data section
SIATA_OPERACIONAL_URL = "https://www.siata.gov.co/operacional/"

print(f"Attempting to fetch data from SIATA operational URL: {SIATA_OPERACIONAL_URL}")

siata_operacional_soup = None
siata_downloaded_data = None # Variable to store raw file content if downloaded

try:
    # 2. Make an HTTP GET request to the URL, allowing redirects and including a timeout
    response = requests.get(SIATA_OPERACIONAL_URL, allow_redirects=True, timeout=20)
    # 3. Implement robust error handling
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print(f"Successfully fetched SIATA operational page from {SIATA_OPERACIONAL_URL}.")

    # 4. Parse the HTML content of the response
    siata_operacional_soup = BeautifulSoup(response.text, 'html.parser')
    print(f"Title of the page: {siata_operacional_soup.title.string if siata_operacional_soup.title else 'No title found'}")
    print("HTML content parsed successfully.")

    # 5. Inspect the parsed HTML to identify potential links to downloadable data files
    # This is a placeholder for detailed inspection. We'll look for common download link patterns.
    download_links = []
    for link in siata_operacional_soup.find_all('a', href=True):
        href = link['href']
        # Look for common file extensions or keywords indicating data downloads
        if any(ext in href for ext in ['.csv', '.xlsx', '.json', '.zip', '.txt', 'download', 'export', 'data']):
            # Construct absolute URL if it's relative
            if not href.startswith('http'):
                # Simple join for relative paths, more complex logic needed for all cases
                absolute_href = requests.compat.urljoin(SIATA_OPERACIONAL_URL, href)
            else:
                absolute_href = href
            download_links.append(absolute_href)

    if download_links:
        print("\nPotential downloadable data links found:")
        for dl_link in download_links:
            print(f"- {dl_link}")
            # 6. For simplicity, attempt to download the first identified relevant file for now
            # In a real scenario, we might need to filter more specifically or present options.
            if siata_downloaded_data is None: # Only download the first one for initial check
                print(f"Attempting to download data from: {dl_link}")
                data_file_response = requests.get(dl_link, allow_redirects=True, timeout=20)
                data_file_response.raise_for_status()
                siata_downloaded_data = data_file_response.content # Store raw bytes
                print(f"Successfully downloaded data from {dl_link}. Size: {len(siata_downloaded_data)} bytes.")
                # Decide if we want to break after first download or list all and then pick.
                # For this step, let's download the first and stop.
                # If the content type is text/json, we might try to decode it.
                if 'json' in data_file_response.headers.get('Content-Type', ''):
                    try:
                        siata_downloaded_data = data_file_response.json() # Try parsing as JSON
                        print("Downloaded data appears to be JSON.")
                    except json.JSONDecodeError:
                        print("Downloaded data is not valid JSON, storing as raw content.")
                elif 'text' in data_file_response.headers.get('Content-Type', '') or 'csv' in data_file_response.headers.get('Content-Type', ''):
                    try:
                        siata_downloaded_data = data_file_response.text # Store as text for potential CSV/text parsing
                        print("Downloaded data appears to be text/CSV.")
                    except Exception as parse_e:
                        print(f"Error parsing downloaded text: {parse_e}")
                break # Only download the first for now
    else:
        # 7. If no direct downloadable file link is found
        print("\nNo direct downloadable data links (.csv, .xlsx, .json, etc.) found on the page.")
        print("Manual inspection of the page or network requests might be necessary to locate the data source.")

except requests.exceptions.Timeout:
    print(f"Error: Request to {SIATA_OPERACIONAL_URL} timed out after 20 seconds.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from SIATA operational URL: {e}")
except Exception as e:
    print(f"An unexpected error occurred during processing: {e}")

if siata_downloaded_data is not None:
    print("SIATA data successfully downloaded and stored in 'siata_downloaded_data'.")
else:
    print("Failed to download SIATA data or no direct download links found.")


Attempting to fetch data from SIATA operational URL: https://www.siata.gov.co/operacional/
Successfully fetched SIATA operational page from https://www.siata.gov.co/operacional/.
Title of the page: Listing folder
HTML content parsed successfully.

No direct downloadable data links (.csv, .xlsx, .json, etc.) found on the page.
Manual inspection of the page or network requests might be necessary to locate the data source.
Failed to download SIATA data or no direct download links found.


In [4]:
import requests
from urllib.parse import urlparse

# Assuming siata_operacional_soup is available from the previous step
# And SIATA_OPERACIONAL_URL is also available

if siata_operacional_soup is None:
    print("Error: siata_operacional_soup is not available. Please ensure the previous step ran successfully.")
else:
    print("Analyzing SIATA operational page for links...")

    data_files = []
    subdirectories = []
    other_links = []

    # Common data file extensions and keywords
    data_extensions = ['.csv', '.xlsx', '.json', '.zip', '.txt', '.xml', '.kmz', '.tgz', '.gz']
    data_keywords = ['data', 'export', 'archivo'] # in file name part

    for link in siata_operacional_soup.find_all('a', href=True):
        href = link['href']
        absolute_href = requests.compat.urljoin(SIATA_OPERACIONAL_URL, href)

        # Extract the path part of the URL to check for extensions/keywords
        parsed_url = urlparse(absolute_href)
        path = parsed_url.path.lower()

        # 3. Categorize links
        if path.endswith('/'):
            subdirectories.append(absolute_href)
        elif any(path.endswith(ext) for ext in data_extensions) or any(keyword in path for keyword in data_keywords):
            data_files.append(absolute_href)
        else:
            other_links.append(absolute_href)

    print("\n--- Link Analysis Summary ---")

    if data_files:
        print(f"Found {len(data_files)} potential Data Files:")
        for df_link in sorted(list(set(data_files))): # Use set to avoid duplicates and sort for readability
            print(f"- {df_link}")
    else:
        print("No obvious data files found.")

    if subdirectories:
        print(f"\nFound {len(subdirectories)} potential Subdirectories:")
        for sub_link in sorted(list(set(subdirectories))): # Use set to avoid duplicates and sort for readability
            print(f"- {sub_link}")
    else:
        print("No obvious subdirectories found.")

    # Optionally, print other links for full transparency
    # if other_links:
    #     print(f"\nFound {len(other_links)} other links (not categorized as data files or subdirectories):")
    #     for other_link_item in sorted(list(set(other_links))):
    #         print(f"- {other_link_item}")

    # Store the categorized links for potential future steps if needed
    siata_found_data_files = list(set(data_files))
    siata_found_subdirectories = list(set(subdirectories))

    print("\nLink categorization complete.")

Analyzing SIATA operational page for links...

--- Link Analysis Summary ---
No obvious data files found.

Found 32 potential Subdirectories:
- http://www.siata.gov.co/
- https://www.siata.gov.co/
- https://www.siata.gov.co/operacional/#
- https://www.siata.gov.co/operacional/?C=M;O=A
- https://www.siata.gov.co/operacional/?C=N;O=A
- https://www.siata.gov.co/operacional/?C=S;O=A
- https://www.siata.gov.co/operacional/CicloAnual/
- https://www.siata.gov.co/operacional/Meteorologia/
- https://www.siata.gov.co/operacional/WRF/
- https://www.siata.gov.co/operacional/enso/
- https://www.siata.gov.co/operacional/mapas/
- https://www.siata.gov.co/operacional/monitoreo/
- https://www.siata.gov.co/operacional/prcSirena/
- https://www.siata.gov.co/operacional/radar/
- https://www.siata.gov.co/operacional/radiometro/
- https://www.siata.gov.co/operacional/seriesdetiempo/
- https://www.siata.gov.co/operacional/seriesdetiempo_prueba/

Link categorization complete.


In [5]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin

# 1. Define the URL for SIATA's Meteorological subdirectory
SIATA_METEOROLOGIA_URL = "https://www.siata.gov.co/operacional/Meteorologia/"

print(f"Attempting to fetch data from SIATA Meteorological URL: {SIATA_METEOROLOGIA_URL}")

siata_meteorologia_soup = None

try:
    # 2. Make an HTTP GET request to the URL, allowing redirects and including a timeout
    response = requests.get(SIATA_METEOROLOGIA_URL, allow_redirects=True, timeout=20)
    # 3. Implement robust error handling
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print(f"Successfully fetched SIATA Meteorological page from {SIATA_METEOROLOGIA_URL}.")

    # 4. Parse the HTML content of the response
    siata_meteorologia_soup = BeautifulSoup(response.text, 'html.parser')
    print(f"Title of the page: {siata_meteorologia_soup.title.string if siata_meteorologia_soup.title else 'No title found'}")
    print("HTML content parsed successfully.")

    # 5. Extract all links found on the page.
    data_files = []
    subdirectories = []

    # Common data file extensions and keywords
    data_extensions = ['.csv', '.xlsx', '.json', '.zip', '.txt', '.xml', '.kmz', '.tgz', '.gz']
    data_keywords = ['data', 'export', 'archivo'] # in file name part

    for link in siata_meteorologia_soup.find_all('a', href=True):
        href = link['href']
        absolute_href = urljoin(SIATA_METEOROLOGIA_URL, href)

        # Extract the path part of the URL to check for extensions/keywords
        parsed_url = urlparse(absolute_href)
        path = parsed_url.path.lower()

        # 6. Categorize the extracted links
        if path.endswith('/') and absolute_href != SIATA_METEOROLOGIA_URL and absolute_href != SIATA_METEOROLOGIA_URL + '../': # Exclude current dir and parent dir
            subdirectories.append(absolute_href)
        elif any(path.endswith(ext) for ext in data_extensions) or any(keyword in path for keyword in data_keywords):
            data_files.append(absolute_href)

    # 7. Print a summary of the findings
    print("\n--- Link Analysis Summary for Meteorologia Directory ---")

    if data_files:
        print(f"Found {len(data_files)} potential Data Files:")
        for df_link in sorted(list(set(data_files))): # Use set to avoid duplicates and sort for readability
            print(f"- {df_link}")
    else:
        print("No obvious data files found in this directory.")

    if subdirectories:
        print(f"\nFound {len(subdirectories)} potential Subdirectories:")
        for sub_link in sorted(list(set(subdirectories))): # Use set to avoid duplicates and sort for readability
            print(f"- {sub_link}")
    else:
        print("No obvious subdirectories found in this directory.")

except requests.exceptions.Timeout:
    print(f"Error: Request to {SIATA_METEOROLOGIA_URL} timed out after 20 seconds.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from SIATA Meteorological URL: {e}")
except Exception as e:
    print(f"An unexpected error occurred during processing: {e}")

Attempting to fetch data from SIATA Meteorological URL: https://www.siata.gov.co/operacional/Meteorologia/
Successfully fetched SIATA Meteorological page from https://www.siata.gov.co/operacional/Meteorologia/.
Title of the page: Listing folder
HTML content parsed successfully.

--- Link Analysis Summary for Meteorologia Directory ---
No obvious data files found in this directory.

Found 12 potential Subdirectories:
- http://www.siata.gov.co/
- https://www.siata.gov.co/
- https://www.siata.gov.co/operacional/
- https://www.siata.gov.co/operacional/Meteorologia/#
- https://www.siata.gov.co/operacional/Meteorologia/?C=M;O=A
- https://www.siata.gov.co/operacional/Meteorologia/?C=N;O=A
- https://www.siata.gov.co/operacional/Meteorologia/?C=S;O=A
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/


In [6]:
import requests
from bs4 import BeautifulSoup
from urllib.parse import urlparse, urljoin

# 1. Define the URL for SIATA's Accumulative Precipitation subdirectory
SIATA_ACUMPRECIPITACION_URL = "https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/"

print(f"Attempting to fetch data from SIATA Accumulative Precipitation URL: {SIATA_ACUMPRECIPITACION_URL}")

siata_acumprecipitacion_soup = None

try:
    # 2. Make an HTTP GET request to the URL, allowing redirects and including a timeout
    response = requests.get(SIATA_ACUMPRECIPITACION_URL, allow_redirects=True, timeout=20)
    # 3. Implement robust error handling
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print(f"Successfully fetched SIATA Accumulative Precipitation page from {SIATA_ACUMPRECIPITACION_URL}.")

    # 4. Parse the HTML content of the response
    siata_acumprecipitacion_soup = BeautifulSoup(response.text, 'html.parser')
    print(f"Title of the page: {siata_acumprecipitacion_soup.title.string if siata_acumprecipitacion_soup.title else 'No title found'}")
    print("HTML content parsed successfully.")

    # 5. Extract all links found on the page.
    data_files = []
    subdirectories = []

    # Common data file extensions and keywords
    data_extensions = ['.csv', '.xlsx', '.json', '.zip', '.txt', '.xml', '.kmz', '.tgz', '.gz']
    data_keywords = ['data', 'export', 'archivo'] # in file name part

    for link in siata_acumprecipitacion_soup.find_all('a', href=True):
        href = link['href']
        absolute_href = urljoin(SIATA_ACUMPRECIPITACION_URL, href)

        # Extract the path part of the URL to check for extensions/keywords
        parsed_url = urlparse(absolute_href)
        path = parsed_url.path.lower()

        # 6. Categorize the extracted links
        if path.endswith('/') and absolute_href != SIATA_ACUMPRECIPITACION_URL and absolute_href != SIATA_ACUMPRECIPITACION_URL + '../': # Exclude current dir and parent dir
            subdirectories.append(absolute_href)
        elif any(path.endswith(ext) for ext in data_extensions) or any(keyword in path for keyword in data_keywords):
            data_files.append(absolute_href)

    # 7. Print a summary of the findings
    print("\n--- Link Analysis Summary for AcumPrecipitacion Directory ---")

    if data_files:
        print(f"Found {len(data_files)} potential Data Files:")
        for df_link in sorted(list(set(data_files))): # Use set to avoid duplicates and sort for readability
            print(f"- {df_link}")
    else:
        print("No obvious data files found in this directory.")

    if subdirectories:
        print(f"\nFound {len(subdirectories)} potential Subdirectories:")
        for sub_link in sorted(list(set(subdirectories))): # Use set to avoid duplicates and sort for readability
            print(f"- {sub_link}")
    else:
        print("No obvious subdirectories found in this directory.")

except requests.exceptions.Timeout:
    print(f"Error: Request to {SIATA_ACUMPRECIPITACION_URL} timed out after 20 seconds.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data from SIATA Accumulative Precipitation URL: {e}")
except Exception as e:
    print(f"An unexpected error occurred during processing: {e}")

Attempting to fetch data from SIATA Accumulative Precipitation URL: https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/
Successfully fetched SIATA Accumulative Precipitation page from https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/.
Title of the page: Listing folder
HTML content parsed successfully.

--- Link Analysis Summary for AcumPrecipitacion Directory ---
Found 22 potential Data Files:
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Abril2025.txt
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Agosto2025.txt
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Diciembre2025.txt
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Febrero2025.txt
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Julio2025.txt
- https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Juni

In [7]:
import requests
import pandas as pd
import io

# URL of a specific data file identified in the previous step
# Choosing DatosPacum_Noviembre2025.txt as an example
SIATA_DATA_FILE_URL = "https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Noviembre2025.txt"

print(f"Attempting to download data file from: {SIATA_DATA_FILE_URL}")

siata_precipitacion_data = None

try:
    # Make an HTTP GET request to download the file
    response = requests.get(SIATA_DATA_FILE_URL, allow_redirects=True, timeout=30)
    response.raise_for_status() # Raise an HTTPError for bad responses (4xx or 5xx)

    print(f"Successfully downloaded data from {SIATA_DATA_FILE_URL}.")

    # The content is text, likely tab-separated based on typical SIATA files or simple text
    # Let's try to read it into a pandas DataFrame, first as a raw string to inspect.
    raw_content = response.text
    print("\n--- Sample of raw file content (first 500 characters) ---")
    print(raw_content[:500])
    print("-----------------------------------------------------------")

    # Attempt to parse as CSV/TXT, assuming tab or space separated for now
    # Given the output format, it's highly likely to be tab-separated or fixed-width.
    # Let's try reading with pandas, inferring delimiter or using common ones.
    try:
        # Using io.StringIO to treat the string content as a file
        siata_precipitacion_data = pd.read_csv(io.StringIO(raw_content), sep='\t', encoding='utf-8')
        print("\nSuccessfully parsed data into a DataFrame (assuming tab-separated).")
        print("First 5 rows of SIATA precipitation DataFrame:")
        print(siata_precipitacion_data.head())
        print("\nDataFrame Info:")
        siata_precipitacion_data.info()
    except Exception as parse_error:
        print(f"Error parsing data into DataFrame: {parse_error}")
        print("Attempting to read with space as delimiter.")
        try:
            siata_precipitacion_data = pd.read_csv(io.StringIO(raw_content), sep=' ', skipinitialspace=True, encoding='utf-8')
            print("\nSuccessfully parsed data into a DataFrame (assuming space-separated).")
            print("First 5 rows of SIATA precipitation DataFrame:")
            print(siata_precipitacion_data.head())
            print("\nDataFrame Info:")
            siata_precipitacion_data.info()
        except Exception as second_parse_error:
            print(f"Error parsing data with space delimiter: {second_parse_error}")
            print("Could not parse data into DataFrame. Raw content stored as string.")
            siata_precipitacion_data = raw_content # Fallback to raw string if parsing fails

except requests.exceptions.Timeout:
    print(f"Error: Request to {SIATA_DATA_FILE_URL} timed out after 30 seconds.")
except requests.exceptions.RequestException as e:
    print(f"Error fetching data file from SIATA: {e}")
except Exception as e:
    print(f"An unexpected error occurred during processing: {e}")

if siata_precipitacion_data is not None and isinstance(siata_precipitacion_data, pd.DataFrame):
    print("SIATA precipitation data successfully loaded into 'siata_precipitacion_data' DataFrame.")
elif siata_precipitacion_data is not None:
    print("SIATA precipitation data downloaded but not parsed into DataFrame. Stored as raw content.")
else:
    print("Failed to download or process SIATA precipitation data.")


Attempting to download data file from: https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Noviembre2025.txt
Successfully downloaded data from https://www.siata.gov.co/operacional/Meteorologia/AcumPrecipitacion/DatosPacum_Noviembre2025.txt.

--- Sample of raw file content (first 500 characters) ---
Fecha actualizacion: 2025/12/01 00:01
Estacion,Nombre,Municipio,Barrio,Climatologia mes,Acumulado Mes (mm),Porcentaje Mes
66, I.E San Andres (Sede El Socorro), Girardota, NA, 120.730, 284.734, 235.844
420, Pueblo Viejo - Pluviometro, La Estrella, NULL, 172.720, 382.016, 221.176
278, Vereda Potrerito - Pluviometro, Barbosa, NA, 229.210, 429.260, 187.278
127, I.E. Manuel Jose Sierra - Sede la Holanda, Girardota, NA, 146.710, 267.716, 182.480
62, Gimnasio Cantabria, La Estrella, NA, 208.720, 370.07
-----------------------------------------------------------

Successfully parsed data into a DataFrame (assuming tab-separated).
First 5 rows of SIATA precipitation DataFra

In [8]:
import io
import pandas as pd

# Assuming raw_content is available from the previous step
# It contains the entire text content of the downloaded file.

if raw_content:
    print("Attempting to re-parse SIATA precipitation data with correct delimiter and skipping header.")
    try:
        # Use io.StringIO to treat the string content as a file
        # Skip the first row (metadata) using skiprows=1
        # Use comma as the delimiter
        siata_precipitacion_data_cleaned = pd.read_csv(
            io.StringIO(raw_content),
            sep=',',
            skiprows=1, # Skip the "Fecha actualizacion" line
            encoding='utf-8',
            # We might have issues with extra spaces or inconsistent delimiters, let's refine this if needed
            # For now, assuming standard CSV format after skipping the first line.
            on_bad_lines='skip' # Skip lines that have too many fields
        )
        print("\nSuccessfully parsed data into a DataFrame with correct delimiter and skipped header.")
        print("First 5 rows of cleaned SIATA precipitation DataFrame:")
        print(siata_precipitacion_data_cleaned.head())
        print("\nDataFrame Info:")
        siata_precipitacion_data_cleaned.info()

        siata_precipitacion_data = siata_precipitacion_data_cleaned # Update the main variable

    except Exception as parse_error:
        print(f"Error re-parsing data into DataFrame: {parse_error}")
        print("Could not parse data into DataFrame after cleaning attempt. Raw content stored as string.")
        # Fallback to raw string if parsing fails even after cleaning attempts
        siata_precipitacion_data = raw_content
else:
    print("No raw content available for re-parsing.")

if siata_precipitacion_data is not None and isinstance(siata_precipitacion_data, pd.DataFrame):
    print("SIATA precipitation data successfully loaded into 'siata_precipitacion_data' DataFrame.")
elif siata_precipitacion_data is not None:
    print("SIATA precipitation data downloaded but not parsed into DataFrame. Stored as raw content.")
else:
    print("Failed to download or process SIATA precipitation data.")

Attempting to re-parse SIATA precipitation data with correct delimiter and skipping header.

Successfully parsed data into a DataFrame with correct delimiter and skipped header.
First 5 rows of cleaned SIATA precipitation DataFrame:
   Estacion                                      Nombre     Municipio Barrio  \
0        66            I.E San Andres (Sede El Socorro)     Girardota     NA   
1       420                  Pueblo Viejo - Pluviometro   La Estrella   NULL   
2       278              Vereda Potrerito - Pluviometro       Barbosa     NA   
3       127   I.E. Manuel Jose Sierra - Sede la Holanda     Girardota     NA   
4        62                          Gimnasio Cantabria   La Estrella     NA   

   Climatologia mes  Acumulado Mes (mm)  Porcentaje Mes  
0            120.73             284.734         235.844  
1            172.72             382.016         221.176  
2            229.21             429.260         187.278  
3            146.71             267.716         182.48