In [1]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas



In [9]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=3600)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Define the API endpoint and parameters
url = "https://air-quality-api.open-meteo.com/v1/air-quality"
params = {
    "latitude": 46.0679,
    "longitude": 11.1211,
    "hourly": [
        "alder_pollen", "birch_pollen", "grass_pollen", 
        "mugwort_pollen", "olive_pollen", "ragweed_pollen"
    ],
    "start_date": "2024-01-01",
    "end_date": "2024-09-30"
}

# Make the API call
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process hourly data. The order of variables needs to be the same as requested.
hourly = response.Hourly()
hourly_alder_pollen = hourly.Variables(0).ValuesAsNumpy()
hourly_birch_pollen = hourly.Variables(1).ValuesAsNumpy()
hourly_grass_pollen = hourly.Variables(2).ValuesAsNumpy()
hourly_mugwort_pollen = hourly.Variables(3).ValuesAsNumpy()
hourly_olive_pollen = hourly.Variables(4).ValuesAsNumpy()
hourly_ragweed_pollen = hourly.Variables(5).ValuesAsNumpy()

# Build the data dictionary
hourly_data = {"time": pd.date_range(
    start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
    end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
    freq=pd.Timedelta(seconds=hourly.Interval()),
    inclusive="left"
)}
hourly_data["alder_pollen (grains/m³)"] = hourly_alder_pollen
hourly_data["birch_pollen (grains/m³)"] = hourly_birch_pollen
hourly_data["grass_pollen (grains/m³)"] = hourly_grass_pollen
hourly_data["mugwort_pollen (grains/m³)"] = hourly_mugwort_pollen
hourly_data["olive_pollen (grains/m³)"] = hourly_olive_pollen
hourly_data["ragweed_pollen (grains/m³)"] = hourly_ragweed_pollen

# Convert to DataFrame
hourly_dataframe = pd.DataFrame(data=hourly_data)

# Format the 'time' column
hourly_dataframe['time'] = pd.to_datetime(hourly_dataframe['time']).dt.strftime('%Y-%m-%dT%H:%M')

# Save to CSV
output_file = "hourly_pollen_data_updated.csv"
hourly_dataframe.to_csv(output_file, index=False)
print(f"Data saved to {output_file}")


Coordinates 46.099998474121094°N 11.10000228881836°E
Elevation 204.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
Data saved to hourly_pollen_data_updated.csv


In [10]:
# Load the CSV files
hourly_pollen_data = pd.read_csv('hourly_pollen_data_updated.csv')
trento_polline_data = pd.read_csv('previsioni/trento_polline_2024.csv')

# Print the first 30 rows of each dataframe
print("First 30 rows of hourly_pollen_data_updated.csv:")
print(hourly_pollen_data.head(30))

print("\nFirst 30 rows of trento_polline_2024.csv:")
print(trento_polline_data.head(30))

First 30 rows of hourly_pollen_data_updated.csv:
                time  alder_pollen (grains/m³)  birch_pollen (grains/m³)  \
0   2024-01-01T00:00                       0.0                       0.0   
1   2024-01-01T01:00                       0.0                       0.0   
2   2024-01-01T02:00                       0.0                       0.0   
3   2024-01-01T03:00                       0.0                       0.0   
4   2024-01-01T04:00                       0.0                       0.0   
5   2024-01-01T05:00                       0.0                       0.0   
6   2024-01-01T06:00                       0.0                       0.0   
7   2024-01-01T07:00                       0.0                       0.0   
8   2024-01-01T08:00                       0.0                       0.0   
9   2024-01-01T09:00                       0.0                       0.0   
10  2024-01-01T10:00                       0.0                       0.0   
11  2024-01-01T11:00                   

In [6]:
import openmeteo_requests
import requests_cache
import pandas as pd
import json
from shapely.geometry import shape
import time
from retry_requests import retry
import os
import re

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Load the GeoJSON file with municipalities
municipalities_path = 'limits_IT_municipalities.geojson'
with open(municipalities_path, 'r') as file:
    geojson_data = json.load(file)

# Extract relevant municipalities in Trentino-Alto Adige and calculate centroids
municipality_data = []
for feature in geojson_data["features"]:
    properties = feature["properties"]
    geometry = feature["geometry"]
    
    # Filter for municipalities in Trentino-Alto Adige region
    if properties.get("reg_name") == "Trentino-Alto Adige/Südtirol":
        geom = shape(geometry)
        centroid = geom.centroid
        municipality_data.append({
            "name": properties["name"],
            "latitude": centroid.y,
            "longitude": centroid.x
        })

# Function to sanitize file names
def sanitize_filename(name):
    return re.sub(r'[^\w\-_\. ]', '_', name)

# Date range and variables for the API request
start_date = "2022-01-01"
end_date = "2024-05-30"
hourly_variables = [
    "alder_pollen", "birch_pollen", "grass_pollen", 
    "mugwort_pollen", "olive_pollen", "ragweed_pollen"
]

# Start processing
current_index = 0  # Start from the first municipality
while current_index < len(municipality_data):
    municipality = municipality_data[current_index]
    latitude = municipality["latitude"]
    longitude = municipality["longitude"]
    name = municipality["name"]
    sanitized_name = sanitize_filename(name)  # Sanitize municipality name
    
    print(f"Processing pollen data for municipality: {name} ({latitude}, {longitude})...")
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": hourly_variables
    }
    
    try:
        # Make the API call
        responses = openmeteo.weather_api("https://air-quality-api.open-meteo.com/v1/air-quality", params=params)
        
        # Process the response
        response = responses[0]
        hourly = response.Hourly()
        hourly_data = {
            "time": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        # Add pollen data and format column names
        for i, var in enumerate(hourly_variables):
            formatted_column_name = f"{var.replace('_', ' ').capitalize()} (grains/m³)"
            hourly_data[formatted_column_name] = hourly.Variables(i).ValuesAsNumpy()
        
        # Convert to DataFrame
        df = pd.DataFrame(data=hourly_data)
        
        # Format the 'time' column
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%Y-%m-%dT%H:%M')
        
        # Ensure the output directory exists
        output_dir = "PollenTrentinoAltoAdige2024"
        os.makedirs(output_dir, exist_ok=True)

        # Save the transformed dataset
        output_file = os.path.join(output_dir, f"{sanitized_name}_pollen.csv")
        df.to_csv(output_file, index=False)
        print(f"Pollen data for {name} saved to {output_file}")
        
        current_index += 1  # Move to the next municipality on success
    
    except Exception as e:
        error_message = str(e)
        print(f"Error for {name} ({latitude}, {longitude}): {error_message}")
        if "Minutely API request limit exceeded" in error_message:
            # Extract suggested wait time
            retry_wait_time = 60  # Default to 60 seconds
            if "one minute" in error_message:
                retry_wait_time = 60
            elif "minute" in error_message:
                try:
                    retry_wait_time = int(re.findall(r'(\d+)\s+minute', error_message)[0]) * 60
                except:
                    retry_wait_time = 120  # Fallback to 2 minutes
            
            print(f"Waiting for {retry_wait_time} seconds before retrying...")
            time.sleep(retry_wait_time)
            # Retry the same municipality
        else:
            # Skip municipality on other errors
            print(f"Skipping {name} due to unhandled error: {error_message}")
            current_index += 1


Processing pollen data for municipality: Aldino/Aldein (46.359502055842874, 11.392560462182377)...
Pollen data for Aldino/Aldein saved to PollenTrentinoAltoAdige2024/Aldino_Aldein_pollen.csv
Processing pollen data for municipality: Andriano/Andrian (46.518480075940644, 11.232025774529575)...
Pollen data for Andriano/Andrian saved to PollenTrentinoAltoAdige2024/Andriano_Andrian_pollen.csv
Processing pollen data for municipality: Anterivo/Altrei (46.27956889820043, 11.368050808976268)...
Pollen data for Anterivo/Altrei saved to PollenTrentinoAltoAdige2024/Anterivo_Altrei_pollen.csv
Processing pollen data for municipality: Appiano sulla strada del vino/Eppan an der Weinstraße (46.467482496744346, 11.255986244457276)...
Pollen data for Appiano sulla strada del vino/Eppan an der Weinstraße saved to PollenTrentinoAltoAdige2024/Appiano sulla strada del vino_Eppan an der Weinstraße_pollen.csv
Processing pollen data for municipality: Avelengo/Hafling (46.659199348784504, 11.250164803634044)...


The gathered data using the geojson file where not enough to build a solid model, so by using ChatGPT and other online sources I fetched the coordinates of the municipalities of each valley and presened them in the file called 'Merged_Valleys_and_Municipalities.csv', although this file may not contain all the municipalities for each valley of the region Trentino Alto Adige, and neither all the valleys, it i enough to have a structured model with a good amount of municipalities and also with the major valleys of the region that then will be levaraged to gather the data and build the predictions models.

In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
import json
import time
from retry_requests import retry
import os
import re

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Load the CSV file with municipalities
municipalities_path = 'TrentinoAltoAdigeZoneMapping/Final/Merged_Valleys_and_Municipalities.csv'
data = pd.read_csv(municipalities_path)
data.columns = data.columns.str.strip()  # Strip any whitespace from column names

# Extract relevant municipalities and their coordinates
municipality_data = []
for _, row in data.iterrows():
    municipality_data.append({
        "name": row["normalized_municipality"],
        "latitude": row["latitude"],
        "longitude": row["longitude"]
    })

# Function to sanitize file names
def sanitize_filename(name):
    return re.sub(r'[^\w\-_\. ]', '_', name)

# Date range and variables for the API request
start_date = "2022-01-01"
end_date = "2024-05-30"
hourly_variables = [
    "alder_pollen", "birch_pollen", "grass_pollen", 
    "mugwort_pollen", "olive_pollen", "ragweed_pollen"
]

# Start processing
current_index = 0  # Start from the first municipality
while current_index < len(municipality_data):
    municipality = municipality_data[current_index]
    latitude = municipality["latitude"]
    longitude = municipality["longitude"]
    name = municipality["name"]
    sanitized_name = sanitize_filename(name)  # Sanitize municipality name
    
    print(f"Processing pollen data for municipality: {name} ({latitude}, {longitude})...")
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": hourly_variables
    }
    
    try:
        # Make the API call
        responses = openmeteo.weather_api("https://air-quality-api.open-meteo.com/v1/air-quality", params=params)
        
        # Process the response
        response = responses[0]
        hourly = response.Hourly()
        hourly_data = {
            "time": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        # Add pollen data and format column names
        for i, var in enumerate(hourly_variables):
            formatted_column_name = f"{var.replace('_', ' ').capitalize()} (grains/m³)"
            hourly_data[formatted_column_name] = hourly.Variables(i).ValuesAsNumpy()
        
        # Convert to DataFrame
        df = pd.DataFrame(data=hourly_data)
        
        # Format the 'time' column
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%Y-%m-%dT%H:%M')
        
        # Ensure the output directory exists
        output_dir = "PollenTrentinoAltoAdige2024"
        os.makedirs(output_dir, exist_ok=True)

        # Save the transformed dataset
        output_file = os.path.join(output_dir, f"{sanitized_name}_pollen.csv")
        df.to_csv(output_file, index=False)
        print(f"Pollen data for {name} saved to {output_file}")
        
        current_index += 1  # Move to the next municipality on success
    
    except Exception as e:
        error_message = str(e)
        print(f"Error for {name} ({latitude}, {longitude}): {error_message}")
        if "Minutely API request limit exceeded" in error_message:
            # Extract suggested wait time
            retry_wait_time = 60  # Default to 60 seconds
            if "one minute" in error_message:
                retry_wait_time = 60
            elif "minute" in error_message:
                try:
                    retry_wait_time = int(re.findall(r'(\d+)\s+minute', error_message)[0]) * 60
                except:
                    retry_wait_time = 120  # Fallback to 2 minutes
            
            print(f"Waiting for {retry_wait_time} seconds before retrying...")
            time.sleep(retry_wait_time)
            # Retry the same municipality
        else:
            # Skip municipality on other errors
            print(f"Skipping {name} due to unhandled error: {error_message}")
            current_index += 1


Processing pollen data for municipality: abtei (46.58223389504704, 11.920465829514228)...
Pollen data for abtei saved to PollenTrentinoAltoAdige2024/abtei_pollen.csv
Processing pollen data for municipality: ahrntal (46.98816745720802, 11.94342284351528)...
Pollen data for ahrntal saved to PollenTrentinoAltoAdige2024/ahrntal_pollen.csv
Processing pollen data for municipality: ala (45.7475502221716, 11.039857342716282)...
Pollen data for ala saved to PollenTrentinoAltoAdige2024/ala_pollen.csv
Processing pollen data for municipality: albiano (46.14157819769435, 11.1847034020592)...
Pollen data for albiano saved to PollenTrentinoAltoAdige2024/albiano_pollen.csv
Processing pollen data for municipality: aldein (46.359502055842874, 11.392560462182376)...
Pollen data for aldein saved to PollenTrentinoAltoAdige2024/aldein_pollen.csv
Processing pollen data for municipality: aldeno (45.9712530318918, 11.092583738847312)...
Pollen data for aldeno saved to PollenTrentinoAltoAdige2024/aldeno_pollen.

In [3]:
import os

folder_path = '/Users/davidegiordani/Desktop/BGTFinalCrack/PollenTrentinoAltoAdige2024'
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
csv_count = len(csv_files)

print(f"Number of .csv files in the folder: {csv_count}")

Number of .csv files in the folder: 464
