In [1]:
import requests
import pandas as pd
import re
import time

In [2]:
# NOAA NCEI API 需要一个 API Key
API_TOKEN = "XbyuERiTrXrJMbExiMRwzCNOZKwjQQep"  # 需要去 https://www.ncdc.noaa.gov/cdo-web/token 申请

HEADERS = {
    "token": API_TOKEN
}

def get_noaa_station_id(lat, lon):
    """ 通过经纬度查询 NOAA 气象站 ID """
    # 定义查询范围（latitude +/- 0.5, longitude +/- 0.5 以找到最近站点）
    min_lat, max_lat = lat - 0.1, lat + 0.1
    min_lon, max_lon = lon - 0.1, lon + 0.1
    
    # 构造 API 请求 URL
    stations_url = f"https://www.ncdc.noaa.gov/cdo-web/api/v2/stations"
    params = {
        "extent": f"{min_lat},{min_lon},{max_lat},{max_lon}",  # 指定查询范围
        "datasetid": "GHCND",  # 选择 GHCND 数据集
        "limit": 5  # 限制返回站点数
    }
    response = requests.get(stations_url, headers=HEADERS, params=params)

    if response.status_code == 200:
        data = response.json()
        if "results" in data:
            stations = data["results"]
            print('Succeeded!')
            return stations[0]['id']  # 返回最近的气象站 ID
        else:
            print("没有找到合适的气象站")
            return None
    else:
        print("请求 NOAA API 失败:", response.status_code, response.text)
        return None

In [15]:
country = 'USA-NPN_individual_phenometrics_data'
df = pd.read_csv(f'{country}.csv')
df.rename(columns={'State': 'location', 'Latitude': 'lat', 'Longitude': 'long'}, inplace=True)
# df['location'] = df['location'].str.replace('South Korea/', '')
df['location'] = df['location'].str.lower()

In [19]:
df.columns

Index(['Site_ID', 'lat', 'long', 'Elevation_in_Meters', 'location',
       'Species_ID', 'Genus', 'Species', 'Common_Name', 'Kingdom',
       'Individual_ID', 'Phenophase_ID', 'Phenophase_Description',
       'First_Yes_Year', 'First_Yes_Month', 'First_Yes_Day', 'First_Yes_DOY',
       'First_Yes_Julian_Date', 'NumDays_Since_Prior_No', 'Last_Yes_Year',
       'Last_Yes_Month', 'Last_Yes_Day', 'Last_Yes_DOY',
       'Last_Yes_Julian_Date', 'NumDays_Until_Next_No', 'AGDD', 'AGDD_in_F',
       'Tmax_Winter', 'Tmax_Spring', 'Tmax_Summer', 'Tmax_Fall', 'Tmin_Winter',
       'Tmin_Spring', 'Tmin_Summer', 'Tmin_Fall', 'Prcp_Winter', 'Prcp_Spring',
       'Prcp_Summer', 'Prcp_Fall', 'Accum_Prcp', 'Daylength'],
      dtype='object')

In [17]:
df.sample(5)

Unnamed: 0,Site_ID,lat,long,Elevation_in_Meters,location,Species_ID,Genus,Species,Common_Name,Kingdom,...,Tmin_Winter,Tmin_Spring,Tmin_Summer,Tmin_Fall,Prcp_Winter,Prcp_Spring,Prcp_Summer,Prcp_Fall,Accum_Prcp,Daylength
91,28923,38.986008,-76.938278,26,md,228,Prunus,yedoensis,Yoshino cherry,Plantae,...,-1.79,7.47,20.32,10.41,169.0,364.0,498.0,180.0,204.0,44928
44,8901,35.590618,-83.470062,1794,tn,1189,Prunus,pensylvanica,pin cherry,Plantae,...,-2.66,3.16,11.47,5.85,512.0,671.0,510.0,219.0,834.0,50112
107,29916,38.908012,-77.072189,41,dc,227,Prunus,serrulata,Japanese flowering cherry,Plantae,...,-2.07,7.17,19.93,10.0,168.83,434.13,559.51,171.05,209.87,46295
89,28752,38.985867,-76.947266,51,md,228,Prunus,yedoensis,Yoshino cherry,Plantae,...,-1.98,7.24,20.03,10.15,172.0,370.0,508.0,181.0,208.0,45274
129,31254,38.891289,-77.03009,3,dc,227,Prunus,serrulata,Japanese flowering cherry,Plantae,...,-1.02,8.78,20.17,11.31,341.0,315.0,383.0,545.0,340.0,48038


In [21]:
df = df[['location', 'lat', 'long']].copy()
df.drop_duplicates(inplace=True)
df.head()

Unnamed: 0,location,lat,long
0,nc,35.926178,-78.692719
2,nc,35.875225,-78.737076
3,dc,38.886761,-77.041016
6,tn,35.879211,-84.172203
7,ca,37.799515,-122.412834


In [27]:
df.shape

(89, 3)

In [53]:
station_dict

{}

In [65]:
station_list

[]

In [61]:
station_list = []

In [67]:
for index, row in df.iterrows():
    if row['lat'] in station_dict:
        print(f'The ID of {row['lat']} has already been obtained.')
    else:
        # 目标经纬度
        latitude = row['lat'] 
        longitude = row['long']
        print(f'Obtaining {row['location']}\'s station ID....')
        # 获取气象站 ID
        station = get_noaa_station_id(latitude, longitude)
        if not station:
            print(f'The ID of {row['location']} is none.')
        else:
            station_dict = {}
            station_dict['lat'] = latitude
            station_dict['long'] = longitude
            station_dict['STATION'] = station.replace('GHCND:','')
            station_list.append(station_dict)
        time.sleep(2)

Obtaining nc's station ID....
Succeeded!
Obtaining nc's station ID....
Succeeded!
Obtaining dc's station ID....
Succeeded!
Obtaining tn's station ID....
Succeeded!
Obtaining ca's station ID....
Succeeded!
Obtaining va's station ID....
Succeeded!
Obtaining ny's station ID....
Succeeded!
Obtaining tn's station ID....
Succeeded!
Obtaining nc's station ID....
Succeeded!
Obtaining co's station ID....
Succeeded!
Obtaining ny's station ID....
Succeeded!
Obtaining in's station ID....
请求 NOAA API 失败: 503 <!DOCTYPE HTML PUBLIC "-//IETF//DTD HTML 2.0//EN">
<html><head>
<title>503 Service Unavailable</title>
</head><body>
<h1>Service Unavailable</h1>
<p>The server is temporarily unable to service your
request due to maintenance downtime or capacity
problems. Please try again later.</p>
<p>Additionally, a 503 Service Unavailable
error was encountered while trying to use an ErrorDocument to handle the request.</p>
</body></html>

The ID of in is none.
Obtaining ma's station ID....
Succeeded!
Obtaini

In [29]:
for index, row in df.iterrows():
    if row['location'] in station_dict:
        print(f'The ID of {row['location']} has already been obtained.')
    else:
        # 目标经纬度
        latitude = row['lat'] 
        longitude = row['long']
        print(f'Obtaining {row['location']}\'s station ID....')
        # 获取气象站 ID
        station = get_noaa_station_id(latitude, longitude)
        if not station:
            print(f'The ID of {row['location']} is none.')
        else:
            station_dict[row['location']] = statistation_dfon.replace('GHCND:','')
        time.sleep(2)

Obtaining nc's station ID....
Succeeded!
The ID of nc has already been obtained.
Obtaining dc's station ID....
Succeeded!
Obtaining tn's station ID....
Succeeded!
Obtaining ca's station ID....
Succeeded!
Obtaining va's station ID....
Succeeded!
Obtaining ny's station ID....
Succeeded!
The ID of tn has already been obtained.
The ID of nc has already been obtained.
Obtaining co's station ID....
Succeeded!
The ID of ny has already been obtained.
Obtaining in's station ID....
Succeeded!
Obtaining ma's station ID....
Succeeded!
Obtaining mn's station ID....
Succeeded!
The ID of nc has already been obtained.
Obtaining mo's station ID....
Succeeded!
The ID of ny has already been obtained.
The ID of mn has already been obtained.
The ID of mn has already been obtained.
Obtaining md's station ID....
Succeeded!
The ID of md has already been obtained.
The ID of md has already been obtained.
The ID of md has already been obtained.
The ID of md has already been obtained.
Obtaining or's station ID...

In [43]:
station_df = pd.DataFrame([station_dict])

In [173]:
station_df = pd.DataFrame(list(station_dict.items()), columns=['location', 'STATION'])

In [177]:
station_df.to_csv('korea_station_id.csv', index=False)

In [175]:
station_df

Unnamed: 0,location,STATION
0,sokcho,KSW00043254
1,chuncheon,KSM00047101
2,gangneung,KSM00047105
3,seoul,KSM00047108
4,incheon,KS000047112
5,wonju,KSW00043222
6,ulleungdo,KS000047115
7,suwon,KSW00043206
8,daejeon,KSM00047133
9,pohang,KSM00047138
