In [None]:
# Step 5: Test multiple ski resort locations
ski_resorts = {
    'Sunday River, ME': (44.4667, -70.8500),
    'Sugarloaf, ME': (45.0317, -70.3139),
    'Killington, VT': (43.6042, -72.8220),
    'Stowe, VT': (44.5303, -72.7817),
    'Loon Mountain, NH': (44.0370, -71.6212),
    'Bretton Woods, NH': (44.2589, -71.4394)
}

forecast_summary = []

for resort_name, (lat, lon) in ski_resorts.items():
    print(f"Fetching forecast for {resort_name}...", end=' ')
    
    # Get grid info
    points_url = f"https://api.weather.gov/points/{lat},{lon}"
    response = requests.get(points_url, headers=headers)
    
    if response.status_code == 200:
        points_data = response.json()
        forecast_url = points_data['properties']['forecast']
        
        # Get forecast
        forecast_response = requests.get(forecast_url, headers=headers)
        
        if forecast_response.status_code == 200:
            forecast = forecast_response.json()
            first_period = forecast['properties']['periods'][0]
            
            forecast_summary.append({
                'resort': resort_name,
                'lat': lat,
                'lon': lon,
                'grid_id': points_data['properties']['gridId'],
                'current_period': first_period['name'],
                'temperature': first_period['temperature'],
                'wind': first_period['windSpeed'],
                'forecast': first_period['shortForecast']
            })
            print("✓")
        else:
            print(f"✗ (forecast error: {forecast_response.status_code})")
    else:
        print(f"✗ (points error: {response.status_code})")
    
    time.sleep(0.5)  # Be nice to the API

print(f"\n\nSuccessfully fetched forecasts for {len(forecast_summary)}/{len(ski_resorts)} resorts\n")

df_resorts = pd.DataFrame(forecast_summary)
display(df_resorts)

In [None]:
# Step 4: Get grid forecast data (raw numerical data)
if points_data:
    response = requests.get(forecast_grid_url, headers=headers)
    
    if response.status_code == 200:
        grid_data = response.json()
        properties = grid_data['properties']
        
        print("Available Grid Data Layers:")
        print("-" * 50)
        
        # List all available data types
        data_keys = [k for k in properties.keys() if isinstance(properties[k], dict) and 'values' in properties[k]]
        
        for key in sorted(data_keys):
            layer = properties[key]
            if 'values' in layer:
                num_values = len(layer['values'])
                unit = layer.get('uom', 'N/A')
                print(f"  {key}: {num_values} values ({unit})")
        
        print(f"\n\nTotal data layers: {len(data_keys)}")
        
        # Example: Extract temperature data
        if 'temperature' in properties:
            temp_values = properties['temperature']['values']
            print(f"\n\nTemperature forecast ({len(temp_values)} periods):")
            for i, temp in enumerate(temp_values[:12]):  # First 12 entries
                print(f"  {temp}")
    else:
        print(f"Error fetching grid data: {response.status_code}")
        grid_data = None
else:
    print("Skipping - points_data not available")

In [None]:
# Step 3: Get hourly forecast
if points_data:
    response = requests.get(forecast_hourly_url, headers=headers)
    
    if response.status_code == 200:
        hourly_data = response.json()
        hourly_periods = hourly_data['properties']['periods']
        
        print(f"Hourly Forecast ({len(hourly_periods)} hours available)\n")
        
        # Show next 24 hours
        print("Next 24 hours:")
        for period in hourly_periods[:24]:
            time = pd.to_datetime(period['startTime']).strftime('%a %I%p')
            print(f"{time}: {period['temperature']}°F - {period['shortForecast']}")
        
        # Convert to DataFrame
        df_hourly = pd.DataFrame(hourly_periods)
        df_hourly['startTime'] = pd.to_datetime(df_hourly['startTime'])
        
        print(f"\n\nHourly DataFrame shape: {df_hourly.shape}")
        display(df_hourly[['startTime', 'temperature', 'windSpeed', 'shortForecast']].head(24))
    else:
        print(f"Error fetching hourly forecast: {response.status_code}")
        hourly_data = None
else:
    print("Skipping - points_data not available")

In [None]:
# Step 2: Get standard 7-day forecast (12-hour periods)
if points_data:
    response = requests.get(forecast_url, headers=headers)
    
    if response.status_code == 200:
        forecast_data = response.json()
        periods = forecast_data['properties']['periods']
        
        print(f"7-Day Forecast for Sunday River ({len(periods)} periods):\n")
        
        # Display forecast periods
        for period in periods[:10]:  # First 10 periods (~5 days)
            print(f"{period['name']}:")
            print(f"  Temp: {period['temperature']}°{period['temperatureUnit']}")
            print(f"  Wind: {period['windSpeed']} {period['windDirection']}")
            print(f"  Forecast: {period['shortForecast']}")
            print()
        
        # Convert to DataFrame for easier analysis
        df_forecast = pd.DataFrame(periods)
        print(f"\nForecast DataFrame shape: {df_forecast.shape}")
        display(df_forecast[['name', 'temperature', 'temperatureUnit', 'windSpeed', 'shortForecast']].head(10))
    else:
        print(f"Error fetching forecast: {response.status_code}")
        forecast_data = None
else:
    print("Skipping - points_data not available")

In [None]:
# Step 1: Get grid coordinates and forecast endpoints from /points
points_url = f"https://api.weather.gov/points/{test_lat},{test_lon}"
response = requests.get(points_url, headers=headers)

print(f"Status: {response.status_code}")

if response.status_code == 200:
    points_data = response.json()
    
    # Extract key info
    properties = points_data['properties']
    grid_id = properties['gridId']
    grid_x = properties['gridX']
    grid_y = properties['gridY']
    
    forecast_url = properties['forecast']
    forecast_hourly_url = properties['forecastHourly']
    forecast_grid_url = properties['forecastGridData']
    
    print(f"\n✓ Grid Location: {grid_id} ({grid_x}, {grid_y})")
    print(f"\nForecast URLs:")
    print(f"  Standard (12h): {forecast_url}")
    print(f"  Hourly: {forecast_hourly_url}")
    print(f"  Grid Data: {forecast_grid_url}")
else:
    print(f"Error: {response.text}")
    points_data = None

In [None]:
# Required User-Agent header for weather.gov API
headers = {
    'User-Agent': '(portfolio-weather-app, nate@example.com)',
    'Accept': 'application/geo+json'
}

# Test coordinates: Sunday River, ME (44.4667, -70.8500)
test_lat = 44.4667
test_lon = -70.8500

print(f"Testing with Sunday River coordinates: {test_lat}, {test_lon}")

## 2. Explore weather.gov API for Real-Time & Forecasts

Testing the weather.gov API (different from CDO) for current conditions and 7-day forecasts.

**Key differences:**
- No API token required (just User-Agent header)
- Real-time + 7-day forecast data
- Grid-based system (2.5km resolution)
- Free with generous rate limits

# NOAA Data Service API Exploration

Testing the NOAA Data Service API v1 to get historical weather data for ski resorts.

**Goal:** Find weather stations near major ski resorts and pull snowfall/precipitation data.

In [None]:
import requests
import pandas as pd
import json
import dotenv
import os
dotenv.load_dotenv()

In [None]:
noaa_api_token= os.environ.get('NOAA_API_TOKEN')

In [None]:
noaa_api_token

## Explore some weather stations

In [None]:
all_stations = []
offset = 0
limit = 1000

# New England bounding box: minLat,minLon,maxLat,maxLon
# CT (south) to ME (north), VT (west) to ME (east) with buffer
ne_extent = "40.5,-74,48,-66"

while True:
    response = requests.get(
        "https://www.ncei.noaa.gov/cdo-web/api/v2/stations",
        headers={'token': noaa_api_token},
        params={'limit': limit, 'offset': offset, 'extent': ne_extent}
    )
    try:
        data = response.json()
        stations = data.get('results', [])
        if not stations:
            break
        all_stations.extend(stations)
        print(f"Fetched {len(all_stations)}", end='\r')
        offset += limit
    except json.JSONDecodeError:
        print(response.content)
        offset = offset
    else: 
        offset += limit

df_stations = pd.DataFrame(all_stations)
df_stations

In [None]:
# Extract state from station name and filter for New England states
df_stations['state'] = df_stations['name'].str.extract(r'(ME|NH|VT)\s+US$')[0]

# Filter for New Engla`nd stations
df_ne_stations = df_stations[df_stations['state'].notna()]
print(f"New England stations: {len(df_ne_stations):,}")

#filter to only GHCND stations
df_ne_stations = df_ne_stations[df_ne_stations['id'].str.contains('GHCND')]
print(f"\nFiltered to {len(df_ne_stations):,} GHCND stations in New England")
# Show distribution by state    
print("\nStations by state:")
state_counts = df_ne_stations['state'].value_counts().sort_index()
print(state_counts)

print("\n" + "="*60)
print("Sample New England stations:")
display(df_ne_stations.head(20))

# Save to CSV for easier exploration
df_ne_stations.to_csv('ne_weather_stations.csv', index=False)
print(f"\nSaved {len(df_ne_stations)} New England stations to 'ne_weather_stations.csv'")


In [None]:
# Test with Sunday River ZIP code (Newry, ME 04261)
response = requests.get(
    "https://www.ncdc.noaa.gov/cdo-web/api/v2/data",
    headers={'token': noaa_api_token},
    params={
        'datasetid': 'GHCND',
        'locationid': 'ZIP:04261',
        'startdate': '2023-01-01',
        'enddate': '2023-01-07',
        'limit': 100
    }
)

print(f"Status: {response.status_code}")

if response.status_code == 200:
    data = response.json()
    print(f"Records: {len(data.get('results', []))}")
    if data.get('results'):
        print(json.dumps(data['results'][:3], indent=2))
else:
    print(f"Error: {response.text}")

## 1.  See how many GHCND stations have data
Let's start by testing a simple query to see what the data looks like.

In [None]:
import time

stations_with_data = []

for idx, station in df_ne_stations.iterrows():
    response = requests.get(
        "https://www.ncdc.noaa.gov/cdo-web/api/v2/data",
        headers={'token': noaa_api_token},
        params={
            'datasetid': 'GHCND',
            'stationid': station['id'],
            'startdate': '2025-11-01',
            'enddate': '2025-11-30',
            'limit': 1
        }
    )
    
    if response.status_code == 200 and response.json().get('results'):
        stations_with_data.append(station['id'])
        print(f"✓ {len(stations_with_data)}/{len(df_ne_stations)}", end='\r')
        print(stations_with_data[-1])
    
    time.sleep(0.21)

print(f"\n\nFound {len(stations_with_data)} stations with data")
df_active = df_ne_stations[df_ne_stations['id'].isin(stations_with_data)]
df_active

In [None]:
df_active.to_csv("active_stations_2023.csv")

In [None]:
import time

# Filter to GHCND stations
ghcnd_stations = df_stations[df_stations['id'].str.startswith('GHCND:', na=False)]
print(f"Testing {len(ghcnd_stations)} GHCND stations...")

stations_with_data = []

for idx, station in ghcnd_stations.iterrows():
    station_id = station['id']
    
    response = requests.get(
        "https://www.ncdc.noaa.gov/cdo-web/api/v2/data",
        headers={'token': noaa_api_token},
        params={
            'datasetid': 'GHCND',
            'stationid': station_id,
            'startdate': '2021-01-01',
            'enddate': '2021-01-31',
            'limit': 1
        }
    )
    
    if response.status_code == 200:
        data = response.json()
        if data.get('results'):
            stations_with_data.append(station_id)
            print(f"✓ {station_id} ({len(stations_with_data)}/{len(ghcnd_stations)})", end='\r')
    
    time.sleep(0.21)  # ~5 req/sec

print(f"\n\nFound {len(stations_with_data)} stations with data")
df_active = ghcnd_stations[ghcnd_stations['id'].isin(stations_with_data)]
df_active

In [None]:
# List available datasets
response = requests.get(
    "https://www.ncdc.noaa.gov/cdo-web/api/v2/datasets",
    headers={'token': noaa_api_token}
)

print(f"Status: {response.status_code}")
if response.status_code == 200:
    datasets = response.json()
pd.DataFrame(datasets['results'])