In [1]:
from helpers.load_coords import load_city_coords

miami_coords, nyc_coords, chicago_coords, austin_coords = load_city_coords()

# Data Sources
## Open-Meteo
URL: https://open-meteo.com/en/docs/historical-weather-api/

Historical data dating back to 1940.

## Meteostat
URL: https://dev.meteostat.net/

Unsure of the historical data range.

## NOAA
URL: https://www.ncdc.noaa.gov/cdo-web/

Unsure of the historical data range.


## NASA Power API
URL: https://power.larc.nasa.gov/docs/v1/

Unsure of the historical data range.

## 

In [3]:
import openmeteo_requests

import requests_cache
import pandas as pd
from retry_requests import retry

# Setup the Open-Meteo API client with cache and retry on error
cache_session = requests_cache.CachedSession('.cache', expire_after = -1)
retry_session = retry(cache_session, retries = 5, backoff_factor = 0.2)
openmeteo = openmeteo_requests.Client(session = retry_session)

# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://archive-api.open-meteo.com/v1/archive"
params = {
	"latitude": miami_coords["lat"],
	"longitude": miami_coords["long"],
	"start_date": "1940-01-01",
	"end_date": "2024-03-04",
	"daily": "temperature_2m_max",
	"temperature_unit": "fahrenheit"
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["temperature_2m_max"] = daily_temperature_2m_max

daily_dataframe = pd.DataFrame(data = daily_data)
print(daily_dataframe)

Coordinates 25.764497756958008°N -80.39215087890625°E
Elevation 3.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
                           date  temperature_2m_max
0     1940-01-01 00:00:00+00:00           70.285103
1     1940-01-02 00:00:00+00:00           73.255096
2     1940-01-03 00:00:00+00:00           71.275101
3     1940-01-04 00:00:00+00:00           76.225098
4     1940-01-05 00:00:00+00:00           77.575104
...                         ...                 ...
30740 2024-02-29 00:00:00+00:00           80.940201
30741 2024-03-01 00:00:00+00:00           78.510201
30742 2024-03-02 00:00:00+00:00           79.590195
30743 2024-03-03 00:00:00+00:00           82.650200
30744 2024-03-04 00:00:00+00:00           81.930199

[30745 rows x 2 columns]


In [4]:
# Make sure all required weather variables are listed here
# The order of variables in hourly or daily is important to assign them correctly below
url = "https://api.open-meteo.com/v1/forecast"
params = {
	"latitude": miami_coords["lat"],
	"longitude": miami_coords["long"],
	"daily": "temperature_2m_max",
	"temperature_unit": "fahrenheit",
	"past_days": 1,
	"forecast_days": 0
}
responses = openmeteo.weather_api(url, params=params)

# Process first location. Add a for-loop for multiple locations or weather models
response = responses[0]
print(f"Coordinates {response.Latitude()}°N {response.Longitude()}°E")
print(f"Elevation {response.Elevation()} m asl")
print(f"Timezone {response.Timezone()} {response.TimezoneAbbreviation()}")
print(f"Timezone difference to GMT+0 {response.UtcOffsetSeconds()} s")

# Process daily data. The order of variables needs to be the same as requested.
daily = response.Daily()
daily_temperature_2m_max = daily.Variables(0).ValuesAsNumpy()

daily_data = {"date": pd.date_range(
	start = pd.to_datetime(daily.Time(), unit = "s", utc = True),
	end = pd.to_datetime(daily.TimeEnd(), unit = "s", utc = True),
	freq = pd.Timedelta(seconds = daily.Interval()),
	inclusive = "left"
)}
daily_data["temperature_2m_max"] = daily_temperature_2m_max
print(daily_data)

Coordinates 25.750526428222656°N -80.38168334960938°E
Elevation 3.0 m asl
Timezone None None
Timezone difference to GMT+0 0 s
{'date': DatetimeIndex(['2024-03-05 00:00:00+00:00'], dtype='datetime64[ns, UTC]', freq='D'), 'temperature_2m_max': array([83.7302], dtype=float32)}


In [9]:
# append daily data to dataframe
daily_dataframe = pd.concat([daily_dataframe, pd.DataFrame(data=daily_data)], ignore_index=True)
print(daily_dataframe)

                           date  temperature_2m_max
0     1940-01-01 00:00:00+00:00           70.285103
1     1940-01-02 00:00:00+00:00           73.255096
2     1940-01-03 00:00:00+00:00           71.275101
3     1940-01-04 00:00:00+00:00           76.225098
4     1940-01-05 00:00:00+00:00           77.575104
...                         ...                 ...
30741 2024-03-01 00:00:00+00:00           78.510201
30742 2024-03-02 00:00:00+00:00           79.590195
30743 2024-03-03 00:00:00+00:00           82.650200
30744 2024-03-04 00:00:00+00:00           81.930199
30745 2024-03-05 00:00:00+00:00           83.730202

[30746 rows x 2 columns]


In [10]:
# save to csv
daily_dataframe.to_csv("../data/miami_weather.csv", index=False)