# Cityhall Data

This script downloads data from Navarcles Cityhall webpage www.navarcles.cat.  
There are monthly data with daily resolution from 2008 in the form of .TXT files (for example, meteo_navarcles_202301.TXT).

Txt files can be in different places, for example:

- https://www.navarcles.cat/media/repository/meteo_navarcles_202303.TXT
- https://www.navarcles.cat/media/repository/publicacions/meteo/2023/meteo_navarcles_202301.TXT

In [1]:
import os
import requests
from bs4 import BeautifulSoup
from urllib.parse import urljoin

In [2]:
# base URL
BASE_URL = "https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics"
TXT_BASE_URL = "https://www.navarcles.cat/media/repository/publicacions/meteo/"

In [3]:
# directory to save the downloaded files
SAVE_DIR = os.path.join('..', 'data', 'navarcles_cityhall_data')
os.makedirs(SAVE_DIR, exist_ok=True)

In [4]:
# find all "year" links from base url

response = requests.get(BASE_URL)
if response.status_code != 200:
    print("Failed to fetch the main page.")
    exit()

# parse the main page
soup = BeautifulSoup(response.text, "html.parser")

# {year: link}
year_links = {}
for link in soup.find_all("a", href=True):
    href = link["href"]
    if "resums-climatologics-" in href and href.endswith(".html"):
        year = href.split("-")[-1].split(".")[0]  # extract year from URL
        year_links[year] = urljoin(BASE_URL, href)

In [5]:
year_links

{'2025': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2025.html',
 '2024': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2024.html',
 '2023': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2023.html',
 '2022': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2022.html',
 '2021': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2021.html',
 '2020': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2020.html',
 '2019': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatologics-2019.html',
 '2018': 'https://www.navarcles.cat/el-municipi/informacio-meteorologica/resums-climatologics/resums-climatolog

In [59]:
# iterate over each yearly summary page
for year, year_link in year_links.items():
    print(f"Fetching data for {year}...")

    year_response = requests.get(year_link)
    if year_response.status_code != 200:
        print(f"Failed to fetch year {year}.")
        continue

    # Parse yearly summary page
    year_soup = BeautifulSoup(year_response.text, "html.parser")

    # Find all possible links to .txt files
    txt_links = []
    for file_link in year_soup.find_all("a", href=True):
        href = file_link["href"]
        if "media/repository/" in href and href.lower().endswith(".txt"):
            full_url = urljoin("https://www.navarcles.cat", href)
            txt_links.append(full_url)
    print(f"{year} has {len(txt_links)} files")

    for txt_link in txt_links:
        file_name = os.path.basename(txt_link)
        file_path = os.path.join(SAVE_DIR, file_name)
    
        # Download file
        file_response = requests.get(txt_link)
        if file_response.status_code == 200:
            with open(file_path, "wb") as f:
                f.write(file_response.content)
            print(f"Downloaded: {file_name}")
        else:
            print(f"Failed to download {file_name}")

print("All files downloaded successfully.")

Fetching data for 2025...
2025 has 1 files
Downloaded: meteo_navarcles_202501.TXT
Fetching data for 2024...
2024 has 12 files
Downloaded: meteo_navarcles_202401.TXT
Downloaded: meteo_navarcles_202402.TXT
Downloaded: meteo_navarcles_202403.TXT
Downloaded: meteo_navarcles_202404.TXT
Downloaded: meteo_navarcles_202405.TXT
Downloaded: meteo_navarcles_202406.TXT
Downloaded: meteo_navarcles_202407.TXT
Downloaded: meteo_navarcles_202408.TXT
Downloaded: meteo_navarcles_202409.TXT
Downloaded: meteo_navarcles_202410.TXT
Downloaded: meteo_navarcles_202411.TXT
Downloaded: meteo_navarcles_202412.TXT
Fetching data for 2023...
2023 has 12 files
Downloaded: meteo_navarcles_202301.TXT
Downloaded: meteo_navarcles_202302.TXT
Downloaded: meteo_navarcles_202303.TXT
Downloaded: meteo_navarcles_202304.txt
Downloaded: meteo_navarcles_202305.TXT
Downloaded: meteo_navarcles-202306.TXT
Downloaded: meteo_navarcles_202307.TXT
Downloaded: meteo_navarcles_202308.TXT
Downloaded: meteo_navarcles_202309.TXT
Downloaded:

In [7]:
# data for specific year
year = '2025'
year_link = year_links[year]
print(f"Fetching data for {year}...")
year_response = requests.get(year_link)
if year_response.status_code != 200:
    print(f"Failed to fetch year {year}.")

# Parse yearly summary page
year_soup = BeautifulSoup(year_response.text, "html.parser")

# Find all possible links to .txt files
txt_links = []
for file_link in year_soup.find_all("a", href=True):
    href = file_link["href"]
    if "media/repository/" in href and href.lower().endswith(".txt"):
        full_url = urljoin("https://www.navarcles.cat", href)
        txt_links.append(full_url)
print(f"{year} has {len(txt_links)} files")

for txt_link in txt_links:
    file_name = os.path.basename(txt_link)
    file_path = os.path.join(SAVE_DIR, file_name)

    # Download file
    file_response = requests.get(txt_link)
    if file_response.status_code == 200:
        with open(file_path, "wb") as f:
            f.write(file_response.content)
        print(f"Downloaded: {file_name}")
    else:
        print(f"Failed to download {file_name}")

print("All files downloaded successfully.")

Fetching data for 2025...
2025 has 4 files
Downloaded: meteo_navarcles_202501.TXT
Downloaded: meteo_navarcles_202504.TXT
Downloaded: meteo_navarcles_202502.TXT
Downloaded: meteo_navarcles_202503.TXT
All files downloaded successfully.
