In [1]:

import pandas as pd
import requests
import time
import math

# Load your data
df = pd.read_csv('test.csv')

# Load NOAA API token from file
with open('API_token.txt', 'r') as token_file:
    NOAA_TOKEN = token_file.read().strip()

print("NOAA Token loaded successfully: " + NOAA_TOKEN)

BASE_URL = "https://www.ncei.noaa.gov/cdo-web/api/v2/data"
HEADERS = {"token": NOAA_TOKEN}


NOAA Token loaded successfully: xoMBEjEaGDOQpPljQjueOOokNPPpmFqN


In [2]:

def parse_response_for_errors(response):
    if response.status_code != 200:
        print("Error detected in response:")
        print(f"Status Code: {response.status_code}")
        print(f"URL: {response.url}")
        print(f"Response Text: {response.text}")
        try:
            error_details = response.json()
            print("Error Details:", error_details)
        except ValueError:
            print("Response is not in JSON format.")
        return True
    return False

def make_request(url, params, retries=5):
    for attempt in range(retries):
        response = requests.get(url, headers=HEADERS, params=params)
        if response.status_code == 503:
            print(f"503 Service Unavailable. Retrying in {2 ** attempt} seconds...")
            time.sleep(2 ** attempt)
            continue
        return response
    print("Failed after retries. Server is unavailable.")
    return None




In [3]:


def calculate_distance(lat1, lon1, lat2, lon2):
    # Calculate the distance between two points using the Haversine formula
    R = 6371  # Radius of the Earth in kilometers
    dlat = math.radians(lat2 - lat1)
    dlon = math.radians(lon2 - lon1)
    a = math.sin(dlat / 2)**2 + math.cos(math.radians(lat1)) * math.cos(math.radians(lat2)) * math.sin(dlon / 2)**2
    c = 2 * math.atan2(math.sqrt(a), math.sqrt(1 - a))
    return R * c

def fetch_weather_data(lat, lon, start_date, end_date, datasetid="GHCND", datatypeid="TAVG", limit=1000, retries=5):
    stations_url = "https://www.ncei.noaa.gov/cdo-web/api/v2/stations"
    params = {
        "datasetid": datasetid,
        "limit": 100,  # Fetch multiple stations to find the closest one
        "latitude": lat,
        "longitude": lon,
        "sortfield": "datacoverage"  # Sort by data coverage
    }
    print("Fetching station data with parameters:", params)
    response = make_request(stations_url, params, retries=retries)
    if not response or parse_response_for_errors(response):
        print("Failed to fetch station data.")
        print("Response:", response.text if response else "No response")
        return None

    stations = response.json().get('results')
    if not stations:
        print("No stations found near:", lat, lon)
        return None

    # Find the closest station manually
    closest_station = min(stations, key=lambda station: calculate_distance(lat, lon, station['latitude'], station['longitude']))
    station_id = closest_station['id']
    print("Closest Station ID:", station_id)
    print("Closest Station Name:", closest_station['name'])

    # Continue fetching weather data for the closest station...
    all_results = []
    offset = 1
    while True:
        data_params = {
            "datasetid": datasetid,
            "datatypeid": datatypeid,
            "stationid": station_id,
            "startdate": start_date,
            "enddate": end_date,
            "limit": limit,
            "units": "metric",
            "offset": offset
        }
        response = make_request(BASE_URL, data_params, retries=retries)
        if not response or parse_response_for_errors(response):
            print("Failed to fetch weather data. Check your parameters or API limits.")
            return None

        results = response.json().get("results", [])
        all_results.extend(results)

        if len(results) < limit:
            break
        offset += limit
    print(f"Fetched {len(all_results)} records for station {station_id} from {start_date} to {end_date}.")
    return all_results

In [4]:

# Sample query to test the function
sample_lat = 43.0731
sample_lon = -89.4012  # Madison, WI
sample_start_date = "2023-01-01"
sample_end_date = "2023-01-31"

# Fetch weather data for the sample query
sample_weather_data = fetch_weather_data(
    lat=sample_lat,
    lon=sample_lon,
    start_date=sample_start_date,
    end_date=sample_end_date
)

print("Sample Weather Data:", sample_weather_data)


Fetching station data with parameters: {'datasetid': 'GHCND', 'limit': 100, 'latitude': 43.0731, 'longitude': -89.4012, 'sortfield': 'datacoverage'}
Closest Station ID: GHCND:USC00470519
Closest Station Name: BARABOO RIVER AT WEST BARABOO, WI US
Fetched 0 records for station GHCND:USC00470519 from 2023-01-01 to 2023-01-31.
Sample Weather Data: []
