In [1]:
import pandas as pd
import numpy as np
from shapely.wkt import loads as load_wkt
import geopandas as gpd
import pandas as pd
import openmeteo_requests
import requests_cache
from retry_requests import retry

In [10]:
def generate_weather_fact(zipcodes, start_date="2023-12-01 00:00:00", end_date="2024-12-31 23:00:00"):
    """
    Generate a weather fact DataFrame for given ZIP codes within a specified date range.

    Args:
        zipcodes (DataFrame): A DataFrame containing ZIP codes and their geometries.
        start_date (str): The start date for the weather data retrieval in "YYYY-MM-DD HH:MM:SS" format.
        end_date (str): The end date for the weather data retrieval in "YYYY-MM-DD HH:MM:SS" format.

    Returns:
        DataFrame: A DataFrame containing weather facts for the specified ZIP codes and date range.
    """
    
    # Convert 'the_geom' to geometry and calculate centroids
    zipcodes['geometry'] = zipcodes['the_geom'].apply(load_wkt) 
    gdf = gpd.GeoDataFrame(zipcodes, geometry='geometry')
    gdf['centroid'] = gdf['geometry'].centroid

    gdf['centroid_latitude'] = gdf['centroid'].y
    gdf['centroid_longitude'] = gdf['centroid'].x

    # Extract unique locations
    unique_locations = gdf[['ZIPCODE','centroid_latitude', 'centroid_longitude']].drop_duplicates()

    # Setup the Open-Meteo API client with cache and retry on error
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive"

    # List to store all dataframes
    dfs = []

    # Iterate over DataFrame rows
    for _, row in unique_locations.iterrows():
        params = {
            "latitude": row['centroid_latitude'],
            "longitude": row['centroid_longitude'],
            "start_date": start_date,
            "end_date": end_date,
            "hourly": [
                "temperature_2m", "relative_humidity_2m", "precipitation", "rain", 
                "snowfall", "windspeed_10m", "winddirection_10m"
            ],
            "timezone": "auto"
        }
        
        try:
            responses = openmeteo.weather_api(url, params=params)
            # Process first location
            response = responses[0]

            # Use indices based on the API documentation
            hourly = response.Hourly()
            hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
            hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
            hourly_precipitation = hourly.Variables(2).ValuesAsNumpy()
            hourly_rain = hourly.Variables(3).ValuesAsNumpy()
            hourly_snowfall = hourly.Variables(4).ValuesAsNumpy()
            hourly_windspeed_10m = hourly.Variables(5).ValuesAsNumpy()
            hourly_winddirection_10m = hourly.Variables(6).ValuesAsNumpy()

            hourly_data = {
                "date": pd.date_range(
                    start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
                    end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
                    freq=pd.Timedelta(seconds=hourly.Interval()),
                    inclusive="left"
                ),
                "temperature_2m": hourly_temperature_2m,
                "relative_humidity_2m": hourly_relative_humidity_2m,
                "precipitation": hourly_precipitation,
                "rain": hourly_rain,
                "snowfall": hourly_snowfall,
                "windspeed_10m": hourly_windspeed_10m,
                "winddirection_10m": hourly_winddirection_10m
            }

            hourly_dataframe = pd.DataFrame(data=hourly_data)
            hourly_dataframe['ZIPCODE'] = row['ZIPCODE']  # Add ZIP code to the dataframe
            hourly_dataframe['Latitude'] = row['centroid_latitude']  # Add Latitude to the dataframe
            hourly_dataframe['Longitude'] = row['centroid_longitude']  # Add Longitude to the dataframe

            dfs.append(hourly_dataframe)
        
        except Exception as e:
            print("An error occurred:", e)
            print("Response content:", responses)

    # Concatenate all dataframes
    result = pd.concat(dfs, ignore_index=True)

    # Generate unique keys
    result['LocationAreaKey'] = (result['Longitude'].astype(str) + '_' + result['Latitude'].astype(str)).apply(lambda x: str(abs(hash(x))))
    result['WeatherKey'] = (result['LocationAreaKey'].astype(str) + '_' + result['date'].astype(str)).apply(hash)
    result['DateHourKey'] = result['date'].dt.strftime('%Y%m%d%H')

    # Prepare the WeatherFact DataFrame
    WeatherFact = result.drop(columns=['date', 'Latitude', 'Longitude', 'ZIPCODE'])
    WeatherFact = WeatherFact.rename(columns={
        "temperature_2m": "Temperature",
        "relative_humidity_2m": "Humidity",
        "precipitation": "Precipitation",
        "rain": "Rain",
        "snowfall": "Snow",
        "windspeed_10m": "WindSpeed",
        "winddirection_10m": "WindDirection"
    })

    return WeatherFact


In [11]:
test = generate_weather_fact(zipcodes, start_date="2023-12-01", end_date="2023-12-31")

In [12]:
test

Unnamed: 0,Temperature,Humidity,Precipitation,Rain,Snow,WindSpeed,WindDirection,LocationAreaKey,WeatherKey,DateHourKey
0,5.9570,65.628998,0.0,0.0,0.0,14.003029,197.969238,2062769713336722239,6949385139550194757,2023120104
1,5.8070,65.120544,0.0,0.0,0.0,13.849477,207.897171,2062769713336722239,-7289328588338018900,2023120105
2,5.5070,65.766518,0.0,0.0,0.0,13.217443,209.357651,2062769713336722239,6308546202805324250,2023120106
3,5.1570,67.384460,0.0,0.0,0.0,13.089354,211.504211,2062769713336722239,2691564809957061568,2023120107
4,4.8070,69.298523,0.0,0.0,0.0,10.691453,224.999893,2062769713336722239,5491964793461110569,2023120108
...,...,...,...,...,...,...,...,...,...,...
72163,4.8225,72.122971,0.3,0.3,0.0,8.647496,177.614105,6800480371838403178,-7917348130470600036,2023123123
72164,4.0225,80.515984,0.4,0.4,0.0,4.735060,171.253922,6800480371838403178,-8660619253573069313,2024010100
72165,3.3725,81.889183,0.0,0.0,0.0,7.145796,139.085709,6800480371838403178,97986016428884860,2024010101
72166,2.7225,86.052048,0.0,0.0,0.0,5.904439,142.431335,6800480371838403178,-1549819841685360674,2024010102
