In [2]:
import pandas as pd
import numpy as np 

In [3]:
from meteostat import Stations

stations = Stations()

#The coordinates below represent the center of the Bay Area

stations = stations.nearby(37.6965, -122.1158)

wc_stations = stations.fetch(1000)

In [4]:
print(wc_stations.head())

                                             name country region    wmo  icao  \
id                                                                              
72585                      Hayward / Russell City      US     CA  72585  KHWD   
72493        Metro Oakland International  Airport      US     CA  72493  KOAK   
74506                   Alameda Naval Air Station      US     CA  74506  KNGZ   
KSQL0  San Carlos / Silver Penny Mobile Home Park      US     CA   <NA>  KSQL   
72494                       San Francisco Airport      US     CA  72494  KSFO   

       latitude  longitude  elevation             timezone hourly_start  \
id                                                                        
72585   37.6589  -122.1218       16.0  America/Los_Angeles   2000-01-01   
72493   37.7167  -122.2333        2.0  America/Los_Angeles   1943-01-01   
74506   37.7833  -122.3167        4.0  America/Los_Angeles   1973-01-01   
KSQL0   37.5119  -122.2495        2.0  America/Los_Angele

In [5]:
cond_lat = (wc_stations['longitude']>= -122.6445) & (wc_stations['longitude']<= -121.5871)
cond_lon = (wc_stations['latitude'] >= 37.1897) & (wc_stations["latitude"] <=38.2033)

bayarea_station = wc_stations[cond_lat & cond_lon].copy()

In [6]:
bayarea_stations_df = pd.DataFrame(bayarea_station)

print(bayarea_stations_df[["name", "hourly_start"]])

                                              name hourly_start
id                                                             
72585                       Hayward / Russell City   2000-01-01
72493         Metro Oakland International  Airport   1943-01-01
74506                    Alameda Naval Air Station   1973-01-01
KSQL0   San Carlos / Silver Penny Mobile Home Park   2006-01-01
72494                        San Francisco Airport   1973-01-01
KLVK0                  Livermore / East Pleasanton   2000-01-01
KPAO0          Palo Alto / Runnymeade (Historical)   2006-01-01
74509                                Moffett Field   1973-01-01
BFY6K                       Concord Buchanan Field   2022-04-23
KHAF0  Half Moon Bay / El Granada Mobile Home Park   2009-07-17
KSJC0       San Jose / Santa Clara Trailer Village   1973-01-01
KC830                                        Byron   2020-01-14
KRHV0                         San Jose / Alum Rock   2006-01-01
KDVO0                             Novato

In [7]:
bayarea_stations_dict = bayarea_stations_df.iloc[:, 0].to_dict()

stations_info_df = bayarea_stations_df[["name", "latitude", "longitude", "elevation"]].copy()

stations_info_df.rename(columns = {"name":"station_name", "latitude":"station_lat", "longitude":"station_lon", "elevation":"station_elevation"}, inplace = True)

In [None]:
from datetime import datetime
from meteostat import Hourly
import warnings
warnings.filterwarnings("ignore")


start = datetime(2000, 1, 1)
end = datetime(2025, 12, 23, 23, 59)

station_name = bayarea_stations_df.reset_index().to_dict()

all_stations_columns = ["time", "station_name", "station_lat", "station_lon", "station_elevation", 'temp', 'dwpt', 'rhum', 'prcp', 'snow', 'wdir', 'wspd', 'wpgt', 'pres',
       'tsun', 'coco']

all_stations = pd.DataFrame(columns = all_stations_columns)

for station_id in bayarea_stations_df.index:

    meteo_data = Hourly(station_id, start, end)
    meteo_data = meteo_data.fetch().reset_index()


    current_station = bayarea_stations_dict[station_id]
    meteo_data["station_name"] = current_station

    meteo_data = meteo_data.merge(stations_info_df, on = ["station_name"], how = 'left')

    meteo_data = meteo_data[all_stations_columns]

    all_stations = pd.concat([all_stations, meteo_data], axis =0)

                 time  temp  dwpt  rhum  prcp  snow   wdir  wspd  wpgt  \
0 2000-01-01 01:00:00  11.0   5.9  71.0  <NA>  <NA>  260.0  13.0  <NA>   
1 2000-01-01 02:00:00  11.0   6.9  76.0  <NA>  <NA>  250.0  13.0  <NA>   
2 2000-01-01 03:00:00  11.0   5.9  71.0  <NA>  <NA>  250.0  14.8  <NA>   
3 2000-01-01 04:00:00  10.0   6.0  76.0  <NA>  <NA>  260.0  20.5  <NA>   
4 2000-01-01 05:00:00  10.0   6.0  76.0  <NA>  <NA>  220.0  11.2  <NA>   

     pres  tsun  coco  
0  1018.2  <NA>  <NA>  
1  1018.5  <NA>  <NA>  
2  1018.4  <NA>  <NA>  
3  1018.8  <NA>  <NA>  
4  1019.2  <NA>  <NA>  
                 time  temp  dwpt  rhum  prcp  snow   wdir  wspd  wpgt  \
0 2004-01-01 01:00:00  11.0   8.0  82.0  <NA>  <NA>  220.0   7.6  <NA>   
1 2004-01-01 02:00:00  11.0   8.0  82.0  <NA>  <NA>  250.0   9.4  <NA>   
2 2004-01-01 03:00:00  11.0   8.0  82.0  <NA>  <NA>    0.0   0.0  <NA>   
3 2004-01-01 04:00:00  11.0   8.0  82.0  <NA>  <NA>  230.0  11.2  <NA>   
4 2004-01-01 05:00:00  11.0   8.0  82.0  