In [57]:
import pandas as pd
import requests
import time
from datetime import datetime

# California County Mapping with approximate coordinates (county seats)
CA_COUNTY_COORDS = {
    'Alameda': (37.6017, -121.7195),
    'Alpine': (38.5976, -119.8186),
    'Amador': (38.3468, -120.5432),
    'Butte': (39.7285, -121.8375),
    'Calaveras': (38.1913, -120.5432),
    'Colusa': (39.2141, -122.2297),
    'Contra Costa': (37.9161, -121.9511),
    'Del Norte': (41.7448, -124.1328),
    'El Dorado': (38.7296, -120.8039),
    'Fresno': (36.7378, -119.7871),
    'Glenn': (39.5982, -122.3917),
    'Humboldt': (40.7450, -123.8695),
    'Imperial': (32.7947, -115.5630),
    'Inyo': (36.8032, -118.0614),
    'Kern': (35.3733, -119.0187),
    'Kings': (36.0803, -119.8551),
    'Lake': (39.0916, -122.7611),
    'Lassen': (40.6587, -120.5432),
    'Los Angeles': (34.0522, -118.2437),
    'Madera': (37.0553, -119.7010),
    'Marin': (38.0834, -122.7633),
    'Mariposa': (37.4849, -119.9663),
    'Mendocino': (39.3080, -123.7995),
    'Merced': (37.3022, -120.4830),
    'Modoc': (41.5868, -120.7432),
    'Mono': (38.0750, -119.0500),
    'Monterey': (36.6002, -121.8947),
    'Napa': (38.2975, -122.2869),
    'Nevada': (39.3616, -121.0161),
    'Orange': (33.7175, -117.8311),
    'Placer': (39.0916, -120.8039),
    'Plumas': (39.9568, -120.8349),
    'Riverside': (33.9533, -117.3962),
    'Sacramento': (38.5816, -121.4944),
    'San Benito': (36.6077, -121.0360),
    'San Bernardino': (34.1083, -117.2898),
    'San Diego': (32.7157, -117.1611),
    'San Francisco': (37.7749, -122.4194),
    'San Joaquin': (37.9577, -121.2908),
    'San Luis Obispo': (35.2828, -120.6596),
    'San Mateo': (37.5630, -122.3255),
    'Santa Barbara': (34.4208, -119.6982),
    'Santa Clara': (37.3541, -121.9552),
    'Santa Cruz': (36.9741, -122.0308),
    'Shasta': (40.5865, -122.3917),
    'Sierra': (39.5796, -120.5276),
    'Siskiyou': (41.5868, -122.3917),
    'Solano': (38.2494, -121.9018),
    'Sonoma': (38.2913, -122.4580),
    'Stanislaus': (37.6391, -120.9969),
    'Sutter': (39.0294, -121.6169),
    'Tehama': (40.0249, -122.1958),
    'Trinity': (40.6587, -123.1331),
    'Tulare': (36.2077, -119.3473),
    'Tuolumne': (37.9577, -120.2407),
    'Ventura': (34.3705, -119.1391),
    'Yolo': (38.6785, -121.7681),
    'Yuba': (39.1404, -121.6169)
}



In [59]:
def fetch_wind_data_for_county(county_name, lat, lon, start_date, end_date):
    """
    Fetch wind speed data from Open-Meteo API for a specific county
    """
    url = "https://archive-api.open-meteo.com/v1/archive"
    
    params = {
        'latitude': lat,
        'longitude': lon,
        'start_date': start_date,
        'end_date': end_date,
        'daily': 'wind_speed_10m_mean,wind_speed_10m_max,wind_speed_10m_min',
        'timezone': 'America/Los_Angeles'
    }
    
    # Retry logic with exponential backoff
    max_retries = 5
    for attempt in range(max_retries):
        try:
            time.sleep(1)  # Wait 1 second between requests
            response = requests.get(url, params=params, timeout=30)
            response.raise_for_status()
            data = response.json()
            
            if 'daily' in data:
                dates = data['daily']['time']
                wind_mean = data['daily'].get('wind_speed_10m_mean', [None] * len(dates))
                wind_max = data['daily'].get('wind_speed_10m_max', [None] * len(dates))
                wind_min = data['daily'].get('wind_speed_10m_min', [None] * len(dates))
                
                records = []
                for date, w_mean, w_max, w_min in zip(dates, wind_mean, wind_max, wind_min):
                    records.append({
                        'county_name': county_name,
                        'date': date,
                        'wind_mean': w_mean,
                        'wind_max': w_max,
                        'wind_min': w_min
                    })
                
                return records
            else:
                print(f"No data for {county_name}")
                return []
                
        except requests.exceptions.HTTPError as e:
            if e.response.status_code == 429:  # Rate limit error
                wait_time = (2 ** attempt) * 2  # Exponential backoff -  2, 4, 8, 16, 32 seconds
                print(f"Rate limited for {county_name}, waiting {wait_time} seconds (attempt {attempt + 1}/{max_retries})")
                time.sleep(wait_time)
                if attempt == max_retries - 1:
                    print(f"Failed after {max_retries} attempts for {county_name}")
                    return []
            else:
                print(f"HTTP Error for {county_name}: {e}")
                return []
        except Exception as e:
            print(f"Error fetching data for {county_name}: {e}")
            return []
    
    return []




In [61]:
def fetch_california_wind_data(start_year=2020, end_year=2025):
    """
    Fetch wind data for all California counties
    """
    start_date = f'{start_year}-01-01'
    # Use today's date as end date (Open-Meteo archive only has historical data)
    today = datetime.now().strftime('%Y-%m-%d')
    end_date = today
    
    all_data = []
    failed_counties = []
    
    total = len(CA_COUNTY_COORDS)
    
    for i, (county_name, (lat, lon)) in enumerate(CA_COUNTY_COORDS.items(), 1):
        print(f"{i}/{total} : {county_name} county")
        
        county_data = fetch_wind_data_for_county(county_name, lat, lon, start_date, end_date)
        
        if county_data:
            all_data.extend(county_data)
            print(f"Collected {len(county_data)} records \n")
        else:
            failed_counties.append(county_name)
            print(f"No data retrieved \n")
    
    if not all_data:
        print("No data")
        return pd.DataFrame()
    
    df = pd.DataFrame(all_data)
    df['date'] = pd.to_datetime(df['date'])
    
    print(f"{df['county_name'].nunique()}/58")
    
    if failed_counties:
        print(f"{len(failed_counties)} counties failed:")
        for name in failed_counties:
            print(f" - {name}")
    
    return df

In [63]:
def create_monthly_summary(df_weather):
    """Create monthly averages from daily data"""
    df = df_weather.copy()
    df['year'] = df['date'].dt.year
    df['month'] = df['date'].dt.month
    
    df_monthly = df.groupby(['county_name', 'year', 'month']).agg({
        'wind_mean': 'mean',
        'wind_max': 'mean',
        'wind_min': 'mean'
    }).reset_index()
    df_monthly = df_monthly.sort_values(['county_name', 'year', 'month']).reset_index(drop=True)
    
    return df_monthly

def create_seasonal_summary(df_monthly_summary):
    """Create seasonal averages from monthly data"""
    df = df_monthly_summary.copy()
    
    season_map = {
        12: 'Winter', 1: 'Winter', 2: 'Winter',
        3: 'Spring', 4: 'Spring', 5: 'Spring',
        6: 'Summer', 7: 'Summer', 8: 'Summer',
        9: 'Fall', 10: 'Fall', 11: 'Fall'
    }
    
    df['season'] = df['month'].map(season_map)
    
    df_seasonal = df.groupby(['county_name', 'season']).agg({
        'wind_mean': 'mean',
        'wind_max': 'mean',
        'wind_min': 'mean'
    }).reset_index()
    
    season_order = ['Winter', 'Spring', 'Summer', 'Fall']
    df_seasonal['season'] = pd.Categorical(df_seasonal['season'], categories=season_order, ordered=True)
    df_seasonal = df_seasonal.sort_values(['county_name', 'season']).reset_index(drop=True)
    
    return df_seasonal



In [65]:
# Run the data collection
print("wind data ---- Open Meteo..")

df_wind = fetch_california_wind_data(start_year=2020, end_year=2025)

if not df_wind.empty:
    # Create monthly summary
    df_monthly = create_monthly_summary(df_wind)
    
    # Create seasonal summary and save
    df_seasonal = create_seasonal_summary(df_monthly)
    df_seasonal.to_csv('seasonal_summary_wind_full.csv', index=False)
    
    print(f"summary saved to 'seasonal_summary_wind_full.csv'")
    
else:
    print("No data")

wind data ---- Open Meteo..
1/58 : Alameda county
Collected 2172 records 

2/58 : Alpine county
Collected 2172 records 

3/58 : Amador county
Collected 2172 records 

4/58 : Butte county
Collected 2172 records 

5/58 : Calaveras county
Collected 2172 records 

6/58 : Colusa county
Collected 2172 records 

7/58 : Contra Costa county
Collected 2172 records 

8/58 : Del Norte county
Collected 2172 records 

9/58 : El Dorado county
Collected 2172 records 

10/58 : Fresno county
Collected 2172 records 

11/58 : Glenn county
Collected 2172 records 

12/58 : Humboldt county
Collected 2172 records 

13/58 : Imperial county
Collected 2172 records 

14/58 : Inyo county
Rate limited for Inyo, waiting 2 seconds (attempt 1/5)
Rate limited for Inyo, waiting 4 seconds (attempt 2/5)
Rate limited for Inyo, waiting 8 seconds (attempt 3/5)
Rate limited for Inyo, waiting 16 seconds (attempt 4/5)
Collected 2172 records 

15/58 : Kern county
Collected 2172 records 

16/58 : Kings county
Collected 2172 recor