In [20]:
import pandas as pd
import numpy as np
from shapely.wkt import loads as load_wkt
import geopandas as gpd
import pandas as pd
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [21]:
def generate_weather_fact(zipcodes, start_date="2023-12-01 00:00:00", end_date="2024-12-31 23:00:00"):
    """
    Generate a weather fact DataFrame for given ZIP codes within a specified date range.

    Args:
        zipcodes (DataFrame): A DataFrame containing ZIP codes and their geometries.
        start_date (str): The start date for the weather data retrieval in "YYYY-MM-DD HH:MM:SS" format.
        end_date (str): The end date for the weather data retrieval in "YYYY-MM-DD HH:MM:SS" format.

    Returns:
        DataFrame: A DataFrame containing weather facts for the specified ZIP codes and date range.
    """
    
    # Convert 'the_geom' to geometry and calculate centroids
    zipcodes['geometry'] = zipcodes['the_geom'].apply(load_wkt) 
    gdf = gpd.GeoDataFrame(zipcodes, geometry='geometry')
    gdf['centroid'] = gdf['geometry'].centroid

    gdf['centroid_latitude'] = gdf['centroid'].y
    gdf['centroid_longitude'] = gdf['centroid'].x

    # Extract unique locations
    unique_locations = gdf[['ZIPCODE','centroid_latitude', 'centroid_longitude']].drop_duplicates()

    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive"

    # List to store all dataframes
    dfs = []

    # Iterate over DataFrame rows
    for _, row in unique_locations.iterrows():
        params = {
            "latitude": row['centroid_latitude'],
            "longitude": row['centroid_longitude'],
            "start_date": start_date,
            "end_date": end_date,
            "hourly": [
                "temperature_2m", "relative_humidity_2m", "precipitation", "rain", 
                "snowfall", "windspeed_10m", "winddirection_10m"
            ],
            "timezone": "auto"
        }
        
        try:
            responses = openmeteo.weather_api(url, params=params)
            # Process first location
            response = responses[0]

            # Use indices based on the API documentation
            hourly = response.Hourly()
            hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
            hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
            hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
            hourly_rain = hourly.Variables(3).ValuesAsNumpy()
            hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
            hourly_windspeed_10m = hourly.Variables(5).ValuesAsNumpy()
            hourly_winddirection_10m = hourly.Variables(6).ValuesAsNumpy()

            hourly_data = {
                "date": pd.date_range(
                    start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                    end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                    freq=pd.Timedelta(seconds=hourly.Interval()),
                    inclusive="left"
                ),
                "temperature_2m": hourly_temperature_2m,
                "relative_humidity_2m": hourly_relative_humidity_2m,
                "precipitation": hourly_precipitation,
                "rain": hourly_rain,
                "snowfall": hourly_snowfall,
                "windspeed_10m": hourly_windspeed_10m,
                "winddirection_10m": hourly_winddirection_10m
            }

            hourly_dataframe = pd.DataFrame(data=hourly_data)
            hourly_dataframe['ZIPCODE'] = row['ZIPCODE']  # Add ZIP code to the dataframe
            hourly_dataframe['Latitude'] = row['centroid_latitude']  # Add Latitude to the dataframe
            hourly_dataframe['Longitude'] = row['centroid_longitude']  # Add Longitude to the dataframe

            dfs.append(hourly_dataframe)
        
        except Exception as e:
            print("An error occurred:", e)
            print("Response content:", responses)

    # Concatenate all dataframes
    result = pd.concat(dfs, ignore_index=True)

    # Generate unique keys
    result['LocationAreaKey'] = (
        result['Longitude'].astype(str).str.replace('.', '', regex=False).str.replace('-', '', regex=False) +
        result['Latitude'].astype(str).str.replace('.', '', regex=False).str.replace('-', '', regex=False)
    )
    result['WeatherKey'] = (result['LocationAreaKey'].astype(str) + '_' + result['date'].astype(str)).apply(hash)
    result['DateHourKey'] = result['date'].dt.strftime('%Y%m%d%H')

    # Prepare the WeatherFact DataFrame
    WeatherFact = result.drop(columns=['date', 'Latitude', 'Longitude', 'ZIPCODE'])
    WeatherFact = WeatherFact.rename(columns={
        "temperature_2m": "Temperature",
        "relative_humidity_2m": "Humidity",
        "precipitation": "Precipitation",
        "rain": "Rain",
        "snowfall": "Snow",
        "windspeed_10m": "WindSpeed",
        "winddirection_10m": "WindDirection"
    })

    return WeatherFact


In [22]:
WeatherFact = generate_weather_fact(zipcodes, start_date="2023-01-01", end_date="2023-12-31")

An error occurred: {'error': True, 'reason': 'Minutely API request limit exceeded. Please try again in one minute.'}
Response content: [<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse object at 0x1485957b0>]
An error occurred: {'reason': 'Minutely API request limit exceeded. Please try again in one minute.', 'error': True}
Response content: [<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse object at 0x1485957b0>]
An error occurred: {'error': True, 'reason': 'Minutely API request limit exceeded. Please try again in one minute.'}
Response content: [<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse object at 0x1485957b0>]
An error occurred: {'error': True, 'reason': 'Minutely API request limit exceeded. Please try again in one minute.'}
Response content: [<openmeteo_sdk.WeatherApiResponse.WeatherApiResponse object at 0x1485957b0>]
An error occurred: {'reason': 'Minutely API request limit exceeded. Please try again in one minute.', 'error': True}
Response content: [<openmeteo_s

In [23]:
WeatherFact

Unnamed: 0,Temperature,Humidity,Precipitation,Rain,Snow,WindSpeed,WindDirection,LocationAreaKey,WeatherKey,DateHourKey
0,11.6570,99.670319,0.1,0.1,0.0,10.182337,278.130005,771429374349402238966626952792325,-4110213040754109933,2023010104
1,11.0070,99.668633,0.0,0.0,0.0,11.659777,278.880585,771429374349402238966626952792325,6748581266405893265,2023010105
2,10.4570,100.000000,0.0,0.0,0.0,13.207634,287.447266,771429374349402238966626952792325,-4226237838032818393,2023010106
3,10.8070,94.804932,0.0,0.0,0.0,15.596767,288.853241,771429374349402238966626952792325,3748766154720126854,2023010107
4,10.2570,95.421638,0.0,0.0,0.0,15.827721,287.198639,771429374349402238966626952792325,-6325941415555862677,2023010108
...,...,...,...,...,...,...,...,...,...,...
289075,4.7325,70.553703,0.0,0.0,0.0,7.928178,177.397491,77054860919072573908859082555049,-2635160836565701956,2023123123
289076,4.5825,70.267616,0.0,0.0,0.0,8.287822,182.489502,77054860919072573908859082555049,9154666140927325096,2024010100
289077,3.6325,75.108467,0.1,0.1,0.0,7.208994,177.137650,77054860919072573908859082555049,-8133445374556158272,2024010101
289078,3.2325,76.418213,0.1,0.1,0.0,6.034700,162.645889,77054860919072573908859082555049,7401791067473064808,2024010102


In [25]:
WeatherFact['DateHourKey'].nunique()

8760

In [26]:
WeatherFact.to_csv('../data/dwh/WeatherFact.csv', index=False)