In [1]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
import time
from datetime import datetime, timedelta


In [2]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after=-1)
retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
openmeteo = openmeteo_requests.Client(session=retry_session)

In [3]:
# Function to get weather data for a given date range
def get_weather_data(start_date, end_date):
    url = "https://archive-api.open-meteo.com/v1/archive"
    params = {
        "latitude": 40.7834,
        "longitude": -73.9663,
        "start_date": start_date,
        "end_date": end_date,
        "daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "temperature_2m_mean", "daylight_duration", "rain_sum", "snowfall_sum", "wind_speed_10m_max"],
        "temperature_unit": "fahrenheit",
        "wind_speed_unit": "mph",
        "precipitation_unit": "inch",
        "timezone": "America/New_York"
    }
    responses = openmeteo.weather_api(url, params=params)
    return responses

In [4]:
# Function to process the response and convert it into a DataFrame
def process_response(response):
    daily = response.Daily()
    daily_data = {
        "date": pd.date_range(
            start=pd.to_datetime(daily.Time(), unit="s", utc=True),
            end=pd.to_datetime(daily.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=daily.Interval()),
            inclusive="left"
        ),
        "weather_code": daily.Variables(0).ValuesAsNumpy(),
        "temperature_2m_max": daily.Variables(1).ValuesAsNumpy(),
        "temperature_2m_min": daily.Variables(2).ValuesAsNumpy(),
        "temperature_2m_mean": daily.Variables(3).ValuesAsNumpy(),
        "daylight_duration": daily.Variables(4).ValuesAsNumpy(),
        "rain_sum": daily.Variables(5).ValuesAsNumpy(),
        "snowfall_sum": daily.Variables(6).ValuesAsNumpy(),
        "wind_speed_10m_max": daily.Variables(7).ValuesAsNumpy(),
    }
    return pd.DataFrame(data=daily_data)

In [5]:
# Main function to fetch and aggregate data
def fetch_weather_data(start_date, end_date, chunk_size_days=1):
    current_date = start_date
    all_data = []

    while current_date < end_date:
        next_date = current_date + timedelta(days=chunk_size_days)
        if next_date > end_date:
            next_date = end_date

        # Fetch data for the current chunk
        responses = get_weather_data(current_date.strftime('%Y-%m-%d'), next_date.strftime('%Y-%m-%d'))

        # Process each response
        for response in responses:
            df_chunk = process_response(response)
            all_data.append(df_chunk)

        # Respect API rate limits
        time.sleep(0.75)  # Wait for 1 second between requests

        # Move to the next date chunk
        current_date = next_date

    # Concatenate all chunks into a single DataFrame
    full_dataframe = pd.concat(all_data, ignore_index=True)
    return full_dataframe

In [6]:
# Define the date range
start_date = datetime.strptime("2010-01-01", '%Y-%m-%d')
end_date = datetime.strptime("2024-04-30", '%Y-%m-%d')

# Fetch the weather data
weather_data = fetch_weather_data(start_date, end_date)

# Print the resulting DataFrame
print(weather_data)

                           date  weather_code  temperature_2m_max  \
0     2010-01-01 04:00:00+00:00          73.0           41.337502   
1     2010-01-02 04:00:00+00:00          71.0           30.177500   
2     2010-01-02 04:00:00+00:00          71.0           30.177500   
3     2010-01-03 04:00:00+00:00          71.0           22.977501   
4     2010-01-03 04:00:00+00:00          71.0           22.977501   
...                         ...           ...                 ...   
10461 2024-04-28 04:00:00+00:00          51.0           72.770897   
10462 2024-04-28 04:00:00+00:00          51.0           72.770897   
10463 2024-04-29 04:00:00+00:00          63.0           82.400902   
10464 2024-04-29 04:00:00+00:00          63.0           82.400902   
10465 2024-04-30 04:00:00+00:00          61.0           71.150902   

       temperature_2m_min  temperature_2m_mean  daylight_duration  rain_sum  \
0               24.417500            32.266247       33514.382812  0.011811   
1            

In [9]:
weather_data.to_csv('Data/NYC_weather_data.csv')