In [1]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = 3600)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": 40.7128,
	"longitude": -74.006,
	"daily": ["weather_code", "temperature_2m_max", "temperature_2m_min", "apparent_temperature_max", "apparent_temperature_min", "sunrise", "sunset", "daylight_duration", "sunshine_duration", "uv_index_max", "uv_index_clear_sky_max", "precipitation_sum", "rain_sum", "showers_sum", "snowfall_sum", "precipitation_hours", "precipitation_probability_max", "wind_speed_10m_max", "wind_gusts_10m_max", "wind_direction_10m_dominant", "shortwave_radiation_sum", "et0_fao_evapotranspiration"],
	"timezone": "America/New_York",
	"forecast_days": 1
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_weather_code = daily.Variables(0).ValuesAsNumpy()
daily_temperature_2m_max = daily.Variables(1).ValuesAsNumpy()
daily_temperature_2m_min = daily.Variables(2).ValuesAsNumpy()
daily_apparent_temperature_max = daily.Variables(3).ValuesAsNumpy()
daily_apparent_temperature_min = daily.Variables(4).ValuesAsNumpy()
daily_sunrise = daily.Variables(5).ValuesAsNumpy()
daily_sunset = daily.Variables(6).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(7).ValuesAsNumpy()
daily_sunshine_duration = daily.Variables(8).ValuesAsNumpy()
daily_uv_index_max = daily.Variables(9).ValuesAsNumpy()
daily_uv_index_clear_sky_max = daily.Variables(10).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(11).ValuesAsNumpy()
daily_rain_sum = daily.Variables(12).ValuesAsNumpy()
daily_showers_sum = daily.Variables(13).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(14).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(15).ValuesAsNumpy()
daily_precipitation_probability_max = daily.Variables(16).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(17).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(18).ValuesAsNumpy()
daily_wind_direction_10m_dominant = daily.Variables(19).ValuesAsNumpy()
daily_shortwave_radiation_sum = daily.Variables(20).ValuesAsNumpy()
daily_et0_fao_evapotranspiration = daily.Variables(21).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["weather_code"] = daily_weather_code
daily_data["temperature_2m_max"] = daily_temperature_2m_max
daily_data["temperature_2m_min"] = daily_temperature_2m_min
daily_data["apparent_temperature_max"] = daily_apparent_temperature_max
daily_data["apparent_temperature_min"] = daily_apparent_temperature_min
daily_data["sunrise"] = daily_sunrise
daily_data["sunset"] = daily_sunset
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["sunshine_duration"] = daily_sunshine_duration
daily_data["uv_index_max"] = daily_uv_index_max
daily_data["uv_index_clear_sky_max"] = daily_uv_index_clear_sky_max
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["showers_sum"] = daily_showers_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["precipitation_probability_max"] = daily_precipitation_probability_max
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
daily_data["wind_direction_10m_dominant"] = daily_wind_direction_10m_dominant
daily_data["shortwave_radiation_sum"] = daily_shortwave_radiation_sum
daily_data["et0_fao_evapotranspiration"] = daily_et0_fao_evapotranspiration

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)

Coordinates 40.71033477783203°N -73.99308776855469°E
Elevation 32.0 m asl
Timezone b'America/New_York' b'EDT'
Timezone difference to GMT+0 -14400 s
                       date  weather_code  temperature_2m_max  \
0 2024-08-09 04:00:00+00:00          65.0              26.431   

   temperature_2m_min  apparent_temperature_max  apparent_temperature_min  \
0              21.031                 28.211838                 23.654509   

   sunrise  sunset  daylight_duration  sunshine_duration  ...   rain_sum  \
0        0       0       50425.789062        5097.374023  ...  39.599998   

   showers_sum  snowfall_sum  precipitation_hours  \
0          0.0           0.0                  4.0   

   precipitation_probability_max  wind_speed_10m_max  wind_gusts_10m_max  \
0                          100.0           35.583591           79.559998   

   wind_direction_10m_dominant  shortwave_radiation_sum  \
0                   149.095215                    10.57   

   et0_fao_evapotranspiration  
0 

In [2]:
daily_dataframe

Unnamed: 0,date,weather_code,temperature_2m_max,temperature_2m_min,apparent_temperature_max,apparent_temperature_min,sunrise,sunset,daylight_duration,sunshine_duration,...,rain_sum,showers_sum,snowfall_sum,precipitation_hours,precipitation_probability_max,wind_speed_10m_max,wind_gusts_10m_max,wind_direction_10m_dominant,shortwave_radiation_sum,et0_fao_evapotranspiration
0,2024-08-09 04:00:00+00:00,65.0,26.431,21.031,28.211838,23.654509,0,0,50425.789062,5097.374023,...,39.599998,0.0,0.0,4.0,100.0,35.583591,79.559998,149.095215,10.57,2.119573


In [3]:
daily_dataframe.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 23 columns):
 #   Column                         Non-Null Count  Dtype              
---  ------                         --------------  -----              
 0   date                           1 non-null      datetime64[ns, UTC]
 1   weather_code                   1 non-null      float32            
 2   temperature_2m_max             1 non-null      float32            
 3   temperature_2m_min             1 non-null      float32            
 4   apparent_temperature_max       1 non-null      float32            
 5   apparent_temperature_min       1 non-null      float32            
 6   sunrise                        1 non-null      int64              
 7   sunset                         1 non-null      int64              
 8   daylight_duration              1 non-null      float32            
 9   sunshine_duration              1 non-null      float32            
 10  uv_index_max                  

In [4]:
# Extraer solo la fecha (sin la hora) para agrupar por día
daily_dataframe['date'] = daily_dataframe['date'].dt.date

In [5]:
# Seleccionar solo las columnas deseadas
df_weather_filtered = daily_dataframe[['date', 'weather_code', 'temperature_2m_max', 'temperature_2m_min', 'rain_sum', 'snowfall_sum', 'precipitation_hours', 'wind_speed_10m_max']]

# Mostrar el DataFrame resultante
print(df_weather_filtered)

         date  weather_code  temperature_2m_max  temperature_2m_min  \
0  2024-08-09          65.0              26.431              21.031   

    rain_sum  snowfall_sum  precipitation_hours  wind_speed_10m_max  
0  39.599998           0.0                  4.0           35.583591  


In [8]:
df_weather_filtered.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1 entries, 0 to 0
Data columns (total 8 columns):
 #   Column               Non-Null Count  Dtype  
---  ------               --------------  -----  
 0   date                 1 non-null      object 
 1   weather_code         1 non-null      float32
 2   temperature_2m_max   1 non-null      float32
 3   temperature_2m_min   1 non-null      float32
 4   rain_sum             1 non-null      float32
 5   snowfall_sum         1 non-null      float32
 6   precipitation_hours  1 non-null      float32
 7   wind_speed_10m_max   1 non-null      float32
dtypes: float32(7), object(1)
memory usage: 168.0+ bytes


In [10]:
df_weather_filtered.loc[:, 'date'] = pd.to_datetime(df_weather_filtered['date'])

In [12]:
df_weather_filtered.to_parquet('dataset_clima_hoy.parquet', index=False)