In [2]:
import openmeteo_requests
import requests_cache
import pandas as pd
from retry_requests import retry

def get_weather_data(is_historical: bool):
    """
    Fetch weather data from Open-Meteo API.
    
    :param is_historical: Flag to determine if data should be historical or current.
    :return: Processed weather data.
    """
    cache_session = requests_cache.CachedSession('.cache', expire_after=-1 if is_historical else 3600)
    retry_session = retry(cache_session, retries=5, backoff_factor=0.2)
    openmeteo = openmeteo_requests.Client(session=retry_session)

    url = "https://archive-api.open-meteo.com/v1/archive" if is_historical else "https://api.open-meteo.com/v1/forecast"
    
    params = {
        "latitude": -10,
        "longitude": -55,
        "timezone": "America/Sao_Paulo"
    }
    
    if is_historical:
        params.update({
            "start_date": "2020-01-01",
            "end_date": "2024-12-31",
            "hourly": ["temperature_2m", "relative_humidity_2m", "apparent_temperature", "precipitation", "rain", "weather_code", "cloud_cover", "wind_direction_10m", "wind_speed_10m", "is_day"]
        })
    else:
        params.update({
            "hourly": "temperature_2m",
            "current": ["temperature_2m", "relative_humidity_2m", "apparent_temperature", "is_day", "precipitation", "rain", "weather_code", "cloud_cover", "wind_direction_10m", "wind_speed_10m"]
        })
    
    responses = openmeteo.weather_api(url, params=params)
    response = responses[0]
    
    print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
    print(f"Elevation {response.Elevation()} m asl")
    print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
    print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")
    
    if is_historical:
        hourly = response.Hourly()
        hourly_data = {"date": pd.date_range(
            start=pd.to_datetime(hourly.Time(), unit="s", utc=True),
            end=pd.to_datetime(hourly.TimeEnd(), unit="s", utc=True),
            freq=pd.Timedelta(seconds=hourly.Interval()),
            inclusive="left"
        )}
        
        for i, var in enumerate(params["hourly"]):
            hourly_data[var] = hourly.Variables(i).ValuesAsNumpy()
        
        hourly_dataframe = pd.DataFrame(data=hourly_data)
        hourly_dataframe.to_csv("./data/historical_data.csv", sep=";", index=False, encoding="utf-8")
        print(hourly_dataframe)
    else:
        current = response.Current()
        current_data = {var: current.Variables(i).Value() for i, var in enumerate(params["current"])}
        current_data["time"] = current.Time()
        
        print(current_data)  # Aqui você pode modificar para enviar ao Apache Flink
        
if __name__ == "__main__":
    is_historical = input("Você quer dados históricos? (s/n): ").strip().lower() == "s"
    get_weather_data(is_historical)


Você quer dados históricos? (s/n):  s


Coordinates -10.017574310302734°N -54.929962158203125°E
Elevation 420.0 m asl
Timezone b'America/Sao_Paulo'b'GMT-3'
Timezone difference to GMT+0 -10800 s
                           date  temperature_2m  relative_humidity_2m  \
0     2020-01-01 03:00:00+00:00       23.074499             95.843521   
1     2020-01-01 04:00:00+00:00       23.074499             95.843521   
2     2020-01-01 05:00:00+00:00       22.374500             97.593452   
3     2020-01-01 06:00:00+00:00       21.824499             98.785309   
4     2020-01-01 07:00:00+00:00       21.674500             99.390327   
...                         ...             ...                   ...   
43843 2024-12-31 22:00:00+00:00       25.424500             79.040604   
43844 2024-12-31 23:00:00+00:00       24.374500             86.496567   
43845 2025-01-01 00:00:00+00:00       23.874500             89.403587   
43846 2025-01-01 01:00:00+00:00       23.574499             96.733345   
43847 2025-01-01 02:00:00+00:00       22.77