# Gathering Weather Data using Open Meteo API

In [10]:

import openmeteo_requests
import pandas as pd
import requests_cache
from retry_requests import retry

In [29]:
# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": 40.7143,
	"longitude": -74.006,
	"start_date": "2014-01-01",
	"end_date": "2025-04-30",
	"daily": ["temperature_2m_mean", "apparent_temperature_mean", "precipitation_sum", "rain_sum", "snowfall_sum", "precipitation_hours", "wind_speed_10m_max", "wind_gusts_10m_max", "cloud_cover_mean", "relative_humidity_2m_mean", "daylight_duration", "visibility_min", "visibility_max"],
	"timezone": "America/New_York"
}
responses = openmeteo.weather_api(url, params=params)

In [30]:

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()}{response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

Coordinates 40.738136291503906°N -74.04254150390625°E
Elevation 51.0 m asl
Timezone b'America/New_York'b'GMT-4'
Timezone difference to GMT+0 -14400 s


In [31]:

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_mean = daily.Variables(0).ValuesAsNumpy()
daily_apparent_temperature_mean = daily.Variables(1).ValuesAsNumpy()
daily_precipitation_sum = daily.Variables(2).ValuesAsNumpy()
daily_rain_sum = daily.Variables(3).ValuesAsNumpy()
daily_snowfall_sum = daily.Variables(4).ValuesAsNumpy()
daily_precipitation_hours = daily.Variables(5).ValuesAsNumpy()
daily_wind_speed_10m_max = daily.Variables(6).ValuesAsNumpy()
daily_wind_gusts_10m_max = daily.Variables(7).ValuesAsNumpy()
daily_cloud_cover_mean = daily.Variables(8).ValuesAsNumpy()
daily_relative_humidity_2m_mean = daily.Variables(9).ValuesAsNumpy()
daily_daylight_duration = daily.Variables(10).ValuesAsNumpy()
daily_visibility_min = daily.Variables(11).ValuesAsNumpy()
daily_visibility_max = daily.Variables(12).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}

daily_data["temperature_2m_mean"] = daily_temperature_2m_mean
daily_data["apparent_temperature_mean"] = daily_apparent_temperature_mean
daily_data["precipitation_sum"] = daily_precipitation_sum
daily_data["rain_sum"] = daily_rain_sum
daily_data["snowfall_sum"] = daily_snowfall_sum
daily_data["precipitation_hours"] = daily_precipitation_hours
daily_data["wind_speed_10m_max"] = daily_wind_speed_10m_max
daily_data["wind_gusts_10m_max"] = daily_wind_gusts_10m_max
daily_data["cloud_cover_mean"] = daily_cloud_cover_mean
daily_data["relative_humidity_2m_mean"] = daily_relative_humidity_2m_mean
daily_data["daylight_duration"] = daily_daylight_duration
daily_data["visibility_min"] = daily_visibility_min
daily_data["visibility_max"] = daily_visibility_max

daily_dataframe = pd.DataFrame(data = daily_data)
daily_dataframe

Unnamed: 0,date,temperature_2m_mean,apparent_temperature_mean,precipitation_sum,rain_sum,snowfall_sum,precipitation_hours,wind_speed_10m_max,wind_gusts_10m_max,cloud_cover_mean,relative_humidity_2m_mean,daylight_duration,visibility_min,visibility_max
0,2014-01-01 04:00:00+00:00,-2.518584,-7.199565,0.000000,0.000000,0.00,0.0,15.038350,29.519999,44.375000,51.803543,33542.425781,,
1,2014-01-02 04:00:00+00:00,-2.631083,-8.301644,3.900000,0.000000,2.94,8.0,23.132626,46.799999,99.500000,75.030983,33587.007812,,
2,2014-01-03 04:00:00+00:00,-12.143584,-18.867376,10.100000,0.000000,7.14,11.0,28.766228,54.360001,54.250000,68.411957,33635.183594,,
3,2014-01-04 04:00:00+00:00,-13.076917,-17.824211,0.000000,0.000000,0.00,0.0,11.983188,17.639999,28.041666,69.838005,33686.910156,,
4,2014-01-05 04:00:00+00:00,-3.895666,-7.867000,0.100000,0.100000,0.00,1.0,15.192682,28.440001,71.750000,86.027748,33742.355469,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4133,2025-04-26 04:00:00+00:00,18.251663,17.817745,13.200001,13.200001,0.00,18.0,21.396542,48.239998,93.791664,81.701622,49591.636719,,
4134,2025-04-27 04:00:00+00:00,11.524585,6.710212,0.000000,0.000000,0.00,0.0,30.356560,77.039993,51.958332,53.213390,49737.101562,,
4135,2025-04-28 04:00:00+00:00,15.799582,13.341233,0.000000,0.000000,0.00,0.0,12.678185,32.760002,8.708333,43.262264,49881.386719,,
4136,2025-04-29 04:00:00+00:00,19.270414,17.037094,0.000000,0.000000,0.00,0.0,19.334715,45.000000,56.416668,46.893066,50024.351562,,


In [32]:
nan_counts = daily_dataframe.isna().sum()
print(nan_counts)

date                            0
temperature_2m_mean             0
apparent_temperature_mean       0
precipitation_sum               0
rain_sum                        0
snowfall_sum                    0
precipitation_hours             0
wind_speed_10m_max              0
wind_gusts_10m_max              0
cloud_cover_mean                0
relative_humidity_2m_mean       0
daylight_duration               0
visibility_min               4138
visibility_max               4138
dtype: int64


In [33]:
daily_dataframe.drop("visibility_min", axis = 1, inplace = True)
daily_dataframe.drop("visibility_max", axis = 1, inplace = True)

In [34]:
# Find missing dates 
missing_dates = pd.date_range(
    start = daily_dataframe["date"].min(),
    end = daily_dataframe["date"].max()
).difference(daily_dataframe["date"])
missing_dates

DatetimeIndex([], dtype='datetime64[ns, UTC]', freq='D')

In [35]:
daily_dataframe.to_csv("data/weather_data.csv", index = False)