In [129]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry
from pyrate_limiter import Duration, Rate, Limiter, BucketFullException, LimiterDelayException
import requests

In [130]:
%run config_psql.ipynb

In [131]:
# Settings configurations

pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', None)

In [132]:
def connect_open_meteo_api():
    cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
    retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
    openmeteo = openmeteo_requests.Client(session = retry_session)   
    
    return cache_session, retry_session, openmeteo

In [133]:
def rate_limiter():
    minute_rate = Rate(540, Duration.MINUTE) # 600/1.1 requests per minute
    hourly_rate = Rate(5000, Duration.HOUR) # 5000/1.1 requests per hour
    daily_rate = Rate(9090, Duration.DAY) # 10000 requests per day
    
    rates = [minute_rate, hourly_rate, daily_rate]
    limiter = Limiter(rates, max_delay = 61000)

    return limiter

In [134]:
def response_to_df(df_weather, responses, match_id):
    # Process first location. Add a for-loop for multiple locations or weather models
    response = responses[0]
    #print(f"Coordinates {response.Latitude()}°E {response.Longitude()}°N")
    #print(f"Elevation {response.Elevation()} m asl")
    #print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
    #print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
    
    # Process hourly data. The order of variables needs to be the same as requested.
    hourly = response.Hourly()
    hourly_temperature_2m = hourly.Variables(0).ValuesAsNumpy()
    hourly_relative_humidity_2m = hourly.Variables(1).ValuesAsNumpy()
    hourly_dew_point_2m = hourly.Variables(2).ValuesAsNumpy()
    hourly_apparent_temperature = hourly.Variables(3).ValuesAsNumpy()
    hourly_precipitation = hourly.Variables(4).ValuesAsNumpy()
    hourly_rain = hourly.Variables(5).ValuesAsNumpy()
    hourly_surface_pressure = hourly.Variables(6).ValuesAsNumpy()
    hourly_cloud_cover = hourly.Variables(7).ValuesAsNumpy()
    hourly_wind_speed_10m = hourly.Variables(8).ValuesAsNumpy()
    hourly_soil_temperature_0_to_7cm = hourly.Variables(9).ValuesAsNumpy()
    hourly_soil_moisture_0_to_7cm = hourly.Variables(10).ValuesAsNumpy()
    
    hourly_data = {"date": pd.date_range(
    	start = pd.to_datetime(hourly.Time(), unit = "s"),
    	end = pd.to_datetime(hourly.TimeEnd(), unit = "s"),
    	freq = pd.Timedelta(seconds = hourly.Interval()),
    	inclusive = "left"
    )}
    hourly_data["temperature_2m"] = hourly_temperature_2m
    hourly_data["relative_humidity_2m"] = hourly_relative_humidity_2m
    hourly_data["dew_point_2m"] = hourly_dew_point_2m
    hourly_data["apparent_temperature"] = hourly_apparent_temperature
    hourly_data["precipitation"] = hourly_precipitation
    hourly_data["rain"] = hourly_rain
    hourly_data["surface_pressure"] = hourly_surface_pressure
    hourly_data["cloud_cover"] = hourly_cloud_cover
    hourly_data["wind_speed_10m"] = hourly_wind_speed_10m
    hourly_data["soil_temperature_0_to_7cm"] = hourly_soil_temperature_0_to_7cm
    hourly_data["soil_moisture_0_to_7cm"] = hourly_soil_moisture_0_to_7cm
    
    df_weather = pd.concat([df_weather, pd.DataFrame(data = hourly_data).assign(match_id = match_id,
                                                                               local_timezone = response.Timezone().decode("utf-8") + " " + response.TimezoneAbbreviation().decode("utf-8"),
                                                                               diff_timezone = response.UtcOffsetSeconds()/(3600))])
        

    return df_weather
    #print(hourly_dataframe)

In [135]:
stmt = """
SELECT 
	m.match_id,
	cast(m.start_date as text) as start_date,
	cast(m.end_date as text) as end_date,
	g.latitude,
	g.longitude
FROM dwh.match m
LEFT JOIN dwh.weather w ON m.match_id = w.match_id
JOIN dwh.ground g ON m.ground_id = g.ground_id
WHERE w.match_id IS NULL and g.latitude IS NOT NULL AND g.longitude IS NOT NULL;
"""
cursor = connection.cursor()
cursor.execute(stmt)
rows = cursor.fetchall()

In [136]:
if len(rows) > 0:
    df_weather = pd.DataFrame()
    cache_session, retry_session, openmeteo = connect_open_meteo_api()
    limiter = rate_limiter()
    
    url = "https://archive-api.open-meteo.com/v1/archive"

    for request, row in enumerate(rows[:10]):
        params = {
            "latitude": row[3],
            "longitude": row[4],
            "start_date": row[1],
            "end_date": row[2],
            "hourly": ["temperature_2m", "relative_humidity_2m", "dew_point_2m", "apparent_temperature", "precipitation", "rain", "surface_pressure",
                       "cloud_cover", "wind_speed_10m", "soil_temperature_0_to_7cm", "soil_moisture_0_to_7cm"],
            "timezone": "auto"}
        limiter.try_acquire(request)
        responses = openmeteo.weather_api(url, params=params)
        df_weather = response_to_df(df_weather, responses, row[0])
        df_weather.reset_index(inplace = True, drop = True)

In [None]:
count_rows = df_weather.to_sql('weather', schema='dwh', con=engine, if_exists='append', method='multi', index=False)