In [1]:
!pip install openmeteo-requests
!pip install requests-cache retry-requests numpy pandas



Testing the API call code for a selected pool of municipalities

In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# List of locations with their coordinates (latitude, longitude)
locations = [
    {"name": "Trento", "latitude": 46.0679, "longitude": 11.1211},
    {"name": "Bolzano", "latitude": 46.4983, "longitude": 11.3548},
    {"name": "Rovereto", "latitude": 45.8903, "longitude": 11.0372}
]

# Date range and variables for the API request
start_date = "2024-01-01"
end_date = "2024-10-30"
hourly_variables = [
    "temperature_2m", "relative_humidity_2m", "precipitation", "rain",
    "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high",
    "wind_speed_10m", "soil_temperature_0_to_7cm"
]

# Rounding rules for formatting and approximating the variables
rounding_rules = {
    'temperature_2m (°C)': 1,
    'relative_humidity_2m (%)': 0,
    'precipitation (mm)': 2,
    'rain (mm)': 2,
    'cloud_cover (%)': 0,
    'cloud_cover_low (%)': 0,
    'cloud_cover_mid (%)': 0,
    'cloud_cover_high (%)': 0,
    'wind_speed_10m (km/h)': 1,
    'soil_temperature_0_to_7cm (°C)': 1
}

# Loop through each location and retrieve weather data
for location in locations:
    print(f"Processing weather data for {location['name']}...")
    
    params = {
        "latitude": location["latitude"],
        "longitude": location["longitude"],
        "start_date": start_date,
        "end_date": end_date,
        "hourly": hourly_variables
    }
    
    responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
    
    # Process response for the current location
    response = responses[0]
    hourly = response.Hourly()
    hourly_data = {
        "date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )
    }
    for i, var in enumerate(hourly_variables):
        hourly_data[var] = hourly.Variables(i).ValuesAsNumpy()
    
    # Convert to DataFrame
    df = pd.DataFrame(data=hourly_data)
    
    # Rename columns to match the desired format
    rename_mapping = {
        'date': 'time',
        'temperature_2m': 'temperature_2m (°C)',
        'relative_humidity_2m': 'relative_humidity_2m (%)',
        'precipitation': 'precipitation (mm)',
        'rain': 'rain (mm)',
        'cloud_cover': 'cloud_cover (%)',
        'cloud_cover_low': 'cloud_cover_low (%)',
        'cloud_cover_mid': 'cloud_cover_mid (%)',
        'cloud_cover_high': 'cloud_cover_high (%)',
        'wind_speed_10m': 'wind_speed_10m (km/h)',
        'soil_temperature_0_to_7cm': 'soil_temperature_0_to_7cm (°C)'
    }
    df.rename(columns=rename_mapping, inplace=True)
    
    # Format the 'time' column
    df['time'] = pd.to_datetime(df['time']).dt.strftime('%Y-%m-%dT%H:%M')
    
    # Apply rounding rules
    for column, decimals in rounding_rules.items():
        if column in df.columns:
            if decimals == 0:
                # For integer columns, apply round and convert to int
                df[column] = df[column].round(decimals).astype(int)
            else:
                # For other columns, apply rounding with specified decimals
                df[column] = df[column].round(decimals)
    
    # Save the transformed dataset
    output_file = f"{location['name']}_weather.csv"
    df.to_csv(output_file, index=False)
    print(f"Weather data for {location['name']} saved to {output_file}")


Processing weather data for Trento...
Weather data for Trento saved to Trento_weather.csv
Processing weather data for Bolzano...
Weather data for Bolzano saved to Bolzano_weather.csv
Processing weather data for Rovereto...
Weather data for Rovereto saved to Rovereto_weather.csv


In [9]:
!pip install geopandas
import folium
import geopandas as gpd

# Carica il file GeoJSON delle regioni italiane
# Sostituisci 'italy_regions.geojson' con il percorso al tuo file GeoJSON
gdf = gpd.read_file('/Users/davidegiordani/Desktop/BGTFinalCrack/limits_IT_regions.geojson')

# Filtra per la regione Trentino-Alto Adige
trentino_alto_adige = gdf[gdf['reg_name'] == 'Trentino-Alto Adige/Südtirol']

# Ottieni il centroide per centrare la mappa
centroid = trentino_alto_adige.geometry.centroid.iloc[0]
map_center = [centroid.y, centroid.x]

# Crea la mappa centrata sul Trentino-Alto Adige
m = folium.Map(location=map_center, zoom_start=8)

# Aggiungi il contorno della regione alla mappa
folium.GeoJson(
    trentino_alto_adige,
    name='Trentino-Alto Adige',
    style_function=lambda feature: {
        'fillColor': 'blue',
        'color': 'blue',
        'weight': 2,
        'fillOpacity': 0.1,
    }
).add_to(m)

# Aggiungi i controlli per i livelli
folium.LayerControl().add_to(m)

# Salva la mappa come file HTML
m.save('trentino_alto_adige_map.html')
print("Mappa salvata come 'trentino_alto_adige_map.html'")


Mappa salvata come 'trentino_alto_adige_map.html'



  centroid = trentino_alto_adige.geometry.centroid.iloc[0]


In [15]:
import openmeteo_requests
import requests_cache
import pandas as pd
import json
from shapely.geometry import shape
import time
from retry_requests import retry
import os
import re

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Load the GeoJSON file with municipalities
municipalities_path = 'limits_IT_municipalities.geojson'
with open(municipalities_path, 'r') as file:
    geojson_data = json.load(file)

# Extract relevant municipalities in Trentino-Alto Adige and calculate centroids
municipality_data = []
for feature in geojson_data["features"]:
    properties = feature["properties"]
    geometry = feature["geometry"]
    
    # Filter for municipalities in Trentino-Alto Adige region
    if properties.get("reg_name") == "Trentino-Alto Adige/Südtirol":
        geom = shape(geometry)
        centroid = geom.centroid
        municipality_data.append({
            "name": properties["name"],
            "latitude": centroid.y,
            "longitude": centroid.x
        })

# Function to sanitize file names
def sanitize_filename(name):
    return re.sub(r'[^\w\-_\. ]', '_', name)

# Date range and variables for the API request
start_date = "2022-01-01"
end_date = "2024-05-30"
hourly_variables = [
    "temperature_2m", "relative_humidity_2m", "precipitation", "rain",
    "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high",
    "wind_speed_10m", "soil_temperature_0_to_7cm"
]

# Rounding rules for formatting and approximating the variables
rounding_rules = {
    'temperature_2m (°C)': 1,
    'relative_humidity_2m (%)': 0,
    'precipitation (mm)': 2,
    'rain (mm)': 2,
    'cloud_cover (%)': 0,
    'cloud_cover_low (%)': 0,
    'cloud_cover_mid (%)': 0,
    'cloud_cover_high (%)': 0,
    'wind_speed_10m (km/h)': 1,
    'soil_temperature_0_to_7cm (°C)': 1
}

# Start processing
current_index = 0  # Start from the first municipality
while current_index < len(municipality_data):
    municipality = municipality_data[current_index]
    latitude = municipality["latitude"]
    longitude = municipality["longitude"]
    name = municipality["name"]
    sanitized_name = sanitize_filename(name)  # Sanitize municipality name
    
    print(f"Processing weather data for municipality: {name} ({latitude}, {longitude})...")
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": hourly_variables
    }
    
    try:
        # Make the API call
        responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
        
        # Process the response
        response = responses[0]
        hourly = response.Hourly()
        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        for i, var in enumerate(hourly_variables):
            hourly_data[var] = hourly.Variables(i).ValuesAsNumpy()
        
        # Convert to DataFrame
        df = pd.DataFrame(data=hourly_data)
        
        # Rename columns to match the desired format
        rename_mapping = {
            'date': 'time',
            'temperature_2m': 'temperature_2m (°C)',
            'relative_humidity_2m': 'relative_humidity_2m (%)',
            'precipitation': 'precipitation (mm)',
            'rain': 'rain (mm)',
            'cloud_cover': 'cloud_cover (%)',
            'cloud_cover_low': 'cloud_cover_low (%)',
            'cloud_cover_mid': 'cloud_cover_mid (%)',
            'cloud_cover_high': 'cloud_cover_high (%)',
            'wind_speed_10m': 'wind_speed_10m (km/h)',
            'soil_temperature_0_to_7cm': 'soil_temperature_0_to_7cm (°C)'
        }
        df.rename(columns=rename_mapping, inplace=True)
        
        # Format the 'time' column
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%Y-%m-%dT%H:%M')
        
        # Apply rounding rules
        for column, decimals in rounding_rules.items():
            if column in df.columns:
                if decimals == 0:
                    df[column] = df[column].round(decimals).astype(int)
                else:
                    df[column] = df[column].round(decimals)
        
        # Ensure the output directory exists
        output_dir = "WeatherTrentinoAltoAdige2024"
        os.makedirs(output_dir, exist_ok=True)

        # Save the transformed dataset
        output_file = os.path.join(output_dir, f"{sanitized_name}_weather.csv")
        df.to_csv(output_file, index=False)
        print(f"Weather data for {name} saved to {output_file}")
        
        current_index += 1  # Move to the next municipality on success
    
    except Exception as e:
        error_message = str(e)
        print(f"Error for {name} ({latitude}, {longitude}): {error_message}")
        if "Minutely API request limit exceeded" in error_message:
            # Extract suggested wait time
            retry_wait_time = 60  # Default to 60 seconds
            if "one minute" in error_message:
                retry_wait_time = 60
            elif "minute" in error_message:
                try:
                    retry_wait_time = int(re.findall(r'(\d+)\s+minute', error_message)[0]) * 60
                except:
                    retry_wait_time = 120  # Fallback to 2 minutes
            
            print(f"Waiting for {retry_wait_time} seconds before retrying...")
            time.sleep(retry_wait_time)
            # Retry the same municipality
        else:
            # Skip municipality on other errors
            print(f"Skipping {name} due to unhandled error: {error_message}")
            current_index += 1


Processing weather data for municipality: Aldino/Aldein (46.359502055842874, 11.392560462182377)...
Weather data for Aldino/Aldein saved to WeatherTrentinoAltoAdige2024/Aldino_Aldein_weather.csv
Processing weather data for municipality: Andriano/Andrian (46.518480075940644, 11.232025774529575)...
Weather data for Andriano/Andrian saved to WeatherTrentinoAltoAdige2024/Andriano_Andrian_weather.csv
Processing weather data for municipality: Anterivo/Altrei (46.27956889820043, 11.368050808976268)...
Weather data for Anterivo/Altrei saved to WeatherTrentinoAltoAdige2024/Anterivo_Altrei_weather.csv
Processing weather data for municipality: Appiano sulla strada del vino/Eppan an der Weinstraße (46.467482496744346, 11.255986244457276)...
Weather data for Appiano sulla strada del vino/Eppan an der Weinstraße saved to WeatherTrentinoAltoAdige2024/Appiano sulla strada del vino_Eppan an der Weinstraße_weather.csv
Processing weather data for municipality: Avelengo/Hafling (46.659199348784504, 11.250

In [16]:
import os

folder_path = '/Users/davidegiordani/Desktop/BGTFinalCrack/WeatherTrentinoAltoAdige2024'
csv_files = [f for f in os.listdir(folder_path) if f.endswith('.csv')]
csv_count = len(csv_files)

print(f"Number of .csv files in the folder: {csv_count}")

Number of .csv files in the folder: 249


The gathered data using the geojson file where not enough to build a solid model, so by using ChatGPT and other online sources I fetched the coordinates of the municipalities of each valley and presened them in the file called 'Merged_Valleys_and_Municipalities.csv', although this file may not contain all the municipalities for each valley of the region Trentino Alto Adige, and neither all the valleys, it i enough to have a structured model with a good amount of municipalities and also with the major valleys of the region that then will be levaraged to gather the data and build the predictions models.

In [None]:
import openmeteo_requests
import requests_cache
import pandas as pd
import json
from shapely.geometry import shape
import time
from retry_requests import retry
import os
import re

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

# Load the CSV file with municipalities
municipalities_path = 'TrentinoAltoAdigeZoneMapping/Final/Merged_Valleys_and_Municipalities.csv'
data = pd.read_csv(municipalities_path)
data.columns = data.columns.str.strip()  # Strip any whitespace from column names

# Extract relevant municipalities and their coordinates
municipality_data = []
for _, row in data.iterrows():
    municipality_data.append({
        "name": row["normalized_municipality"],
        "latitude": row["latitude"],
        "longitude": row["longitude"]
    })

# Function to sanitize file names
def sanitize_filename(name):
    return re.sub(r'[^\w\-_\. ]', '_', name)

# Date range and variables for the API request
start_date = "2022-01-01"
end_date = "2024-05-30"
hourly_variables = [
    "temperature_2m", "relative_humidity_2m", "precipitation", "rain",
    "cloud_cover", "cloud_cover_low", "cloud_cover_mid", "cloud_cover_high",
    "wind_speed_10m", "soil_temperature_0_to_7cm"
]

# Rounding rules for formatting and approximating the variables
rounding_rules = {
    'temperature_2m (°C)': 1,
    'relative_humidity_2m (%)': 0,
    'precipitation (mm)': 2,
    'rain (mm)': 2,
    'cloud_cover (%)': 0,
    'cloud_cover_low (%)': 0,
    'cloud_cover_mid (%)': 0,
    'cloud_cover_high (%)': 0,
    'wind_speed_10m (km/h)': 1,
    'soil_temperature_0_to_7cm (°C)': 1
}

# Start processing
current_index = 0  # Start from the first municipality
while current_index < len(municipality_data):
    municipality = municipality_data[current_index]
    latitude = municipality["latitude"]
    longitude = municipality["longitude"]
    name = municipality["name"]
    sanitized_name = sanitize_filename(name)  # Sanitize municipality name
    
    print(f"Processing weather data for municipality: {name} ({latitude}, {longitude})...")
    
    params = {
        "latitude": latitude,
        "longitude": longitude,
        "start_date": start_date,
        "end_date": end_date,
        "hourly": hourly_variables
    }
    
    try:
        # Make the API call
        responses = openmeteo.weather_api("https://archive-api.open-meteo.com/v1/archive", params=params)
        
        # Process the response
        response = responses[0]
        hourly = response.Hourly()
        hourly_data = {
            "date": pd.date_range(
                start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                freq=pd.Timedelta(seconds=hourly.Interval()),
                inclusive="left"
            )
        }
        for i, var in enumerate(hourly_variables):
            hourly_data[var] = hourly.Variables(i).ValuesAsNumpy()
        
        # Convert to DataFrame
        df = pd.DataFrame(data=hourly_data)
        
        # Rename columns to match the desired format
        rename_mapping = {
            'date': 'time',
            'temperature_2m': 'temperature_2m (°C)',
            'relative_humidity_2m': 'relative_humidity_2m (%)',
            'precipitation': 'precipitation (mm)',
            'rain': 'rain (mm)',
            'cloud_cover': 'cloud_cover (%)',
            'cloud_cover_low': 'cloud_cover_low (%)',
            'cloud_cover_mid': 'cloud_cover_mid (%)',
            'cloud_cover_high': 'cloud_cover_high (%)',
            'wind_speed_10m': 'wind_speed_10m (km/h)',
            'soil_temperature_0_to_7cm': 'soil_temperature_0_to_7cm (°C)'
        }
        df.rename(columns=rename_mapping, inplace=True)
        
        # Format the 'time' column
        df['time'] = pd.to_datetime(df['time']).dt.strftime('%Y-%m-%dT%H:%M')
        
        # Apply rounding rules
        for column, decimals in rounding_rules.items():
            if column in df.columns:
                if decimals == 0:
                    df[column] = df[column].round(decimals).astype(int)
                else:
                    df[column] = df[column].round(decimals)
        
        # Ensure the output directory exists
        output_dir = "WeatherTrentinoAltoAdige2024"
        os.makedirs(output_dir, exist_ok=True)

        # Save the transformed dataset
        output_file = os.path.join(output_dir, f"{sanitized_name}_weather.csv")
        df.to_csv(output_file, index=False)
        print(f"Weather data for {name} saved to {output_file}")
        
        current_index += 1  # Move to the next municipality on success
    
    except Exception as e:
        error_message = str(e)
        print(f"Error for {name} ({latitude}, {longitude}): {error_message}")
        if "Minutely API request limit exceeded" in error_message:
            # Extract suggested wait time
            retry_wait_time = 60  # Default to 60 seconds
            if "one minute" in error_message:
                retry_wait_time = 60
            elif "minute" in error_message:
                try:
                    retry_wait_time = int(re.findall(r'(\d+)\s+minute', error_message)[0]) * 60
                except:
                    retry_wait_time = 120  # Fallback to 2 minutes
            
            print(f"Waiting for {retry_wait_time} seconds before retrying...")
            time.sleep(retry_wait_time)
            # Retry the same municipality
        else:
            # Skip municipality on other errors
            print(f"Skipping {name} due to unhandled error: {error_message}")
            current_index += 1


Processing weather data for municipality: abtei (46.58223389504704, 11.920465829514228)...
Weather data for abtei saved to WeatherTrentinoAltoAdige2024/abtei_weather.csv
Processing weather data for municipality: ahrntal (46.98816745720802, 11.94342284351528)...
Weather data for ahrntal saved to WeatherTrentinoAltoAdige2024/ahrntal_weather.csv
Processing weather data for municipality: ala (45.7475502221716, 11.039857342716282)...
Weather data for ala saved to WeatherTrentinoAltoAdige2024/ala_weather.csv
Processing weather data for municipality: albiano (46.14157819769435, 11.1847034020592)...
Weather data for albiano saved to WeatherTrentinoAltoAdige2024/albiano_weather.csv
Processing weather data for municipality: aldein (46.359502055842874, 11.392560462182376)...
Weather data for aldein saved to WeatherTrentinoAltoAdige2024/aldein_weather.csv
Processing weather data for municipality: aldeno (45.9712530318918, 11.092583738847312)...
Weather data for aldeno saved to WeatherTrentinoAltoA