In [191]:
import numpy as np
from timezonefinder import TimezoneFinder
import pandas as pd
from datetime import datetime, timedelta
from meteostat import Point, Hourly

def get_first_sunday_of_november(year):
    """Gets the first Sunday of November for a given year."""
    date = datetime(year, 11, 1)
    while date.weekday() != 6:  # 6 represents Sunday
        date += timedelta(days=1)
    return date

def get_timezone(lat, long):
    tf = TimezoneFinder()
    return tf.timezone_at(lng=long, lat=lat)

def convert_utc_to_timezone(df, utc_column, lat, long):
    timezone = get_timezone(lat, long)
    df = df.copy()  # Ensure we're working with a copy to avoid the SettingWithCopyWarning
    df[utc_column] = pd.to_datetime(df[utc_column], utc=True)
    df['local_datetime'] = df[utc_column].dt.tz_convert(timezone)
    df['local_date'] = df['local_datetime'].dt.date
    df['local_time'] = df['local_datetime'].dt.time
    return df

def celsius_to_fahrenheit(celsius):
    """
    Converts a temperature from Celsius to Fahrenheit.
    
    Parameters:
    - celsius (float or int): Temperature in Celsius.
    
    Returns:
    - float: Temperature in Fahrenheit.
    """
    return (celsius * 9/5) + 32

def get_marathon_weather_data(latitude, longitude, race_time, date_list, marathon_name):
    location = Point(latitude, longitude)
    all_data = []
    
    for day in date_list:
        data = Hourly(location, day, day + timedelta(days=1))
        data = data.fetch()
        
        # Reset index if needed and ensure we are working with a copy to avoid warnings
        data = data.reset_index().copy()
        
        # Convert to local time
        data = convert_utc_to_timezone(data, 'time', latitude, longitude)
        
        race_time_dt = day.replace(hour=race_time)
        race_time_dt_2 = race_time_dt + timedelta(hours=4)
        
        # Filter data based on the race time and 4-hour offset
        data_filter = data[
            ((data['local_date'] == race_time_dt.date()) & (data['local_time'] == race_time_dt.time())) |
            ((data['local_date'] == race_time_dt_2.date()) & (data['local_time'] == race_time_dt_2.time()))
        ].copy()  # Ensure the slice is a copy

        data_filter['temp'] = data_filter['temp'].apply(celsius_to_fahrenheit)
        data_filter['dwpt'] = data_filter['dwpt'].apply(celsius_to_fahrenheit)
        
        all_data.append(data_filter[['temp', 'rhum', 'dwpt', 'prcp', 'wspd', 'local_date', 'local_time']])

    # Concatenate all dataframes into one
    weather_data = pd.concat(all_data)

    # Add suffix based on the local time
    weather_data['suffix'] = np.where(weather_data['local_time'] == race_time_dt.time(), '_start', '_start_plus_4')

    # Pivot the dataframe
    weather_pivot = (
        weather_data.set_index(['local_date', 'suffix'])  # Set local_date and suffix as the index
        .unstack('suffix')  # Pivot based on the suffix
    )

    # Flatten multi-level columns and rename them
    weather_pivot.columns = [f"{col[0]}{col[1]}" for col in weather_pivot.columns]

    # Reset the index for a clean result
    weather_pivot.reset_index(inplace=True)

    # Calculate mean values and add marathon name
    weather_agg = weather_pivot.drop(columns=['local_time_start', 'local_time_start_plus_4', 'local_date']).agg(['mean','min','max']).reset_index()
    weather_agg ['marathon_name'] = marathon_name

    return weather_pivot, weather_agg

In [192]:
# Get first Sunday of November for the last 20 years
first_sundays_nov = []
for year in range(date.today().year - 20, date.today().year):
    first_sundays_nov.append(get_first_sunday_of_november(year))

In [193]:
nyc, nyc_agg = get_marathon_weather_data(40.7128, -74.0060, 9, first_sundays_nov, 'NYC')

