# 使用前须知
使用 nmc_met_io 读取天擎数据库，在配置文件中设置地址和用户信息(若不需要, 则相应项无需配置).

在系统用户目录下("C:\Users\用户名"(windows)或"/home/用户名/"(Linux)), 建立文件夹".nmcdev"(若Windows下无法直接创建, 在命令窗口中输入mkdir .nmcdev创建)
在".nmcdev"中创建文本文件"config.ini", 内容模板为:

```
# 用于nmc_met_io读取大数据云, MICAPS服务器等的配置文件.
# 若用不到某个服务器, 则不设置或删除改段落即可.
# 注意设置IP地址时, 不要加http等前缀信息.

# 当读取CMADaas, MICAPS Cassandra等数据时, 采取了本地文件缓存机制, 以便加快
# 文件的读取速度和减少数据库访问次数(设置函数参数cache=False可以不用缓存机制).
# CACHE_DIR, 缓存目录, 若没有设置, 默认为 /user_home/.nmcdev/cache
# CACHE_DAYS, 缓存天数, 若没有设置, 默认为7, 即7天之前的缓存数据将被删除
[CACHE]
# CACHE_DIR = ~
CACHE_DAYS = 0

# CMADaaS大数据云平台配置:
#     DNS为IP地址, PORT为端口
#     USER_ID和PASSWORD分别为用户名和密码
#     serviceNodeId为服务节点名称(一般为 NMIC_MUSIC_CMADAAS)
[CMADaaS]
DNS = 101.35.88.42
PORT = 8000
USER_ID = heywhale
PASSWORD = DayDayUp
serviceNodeId = NMIC_MUSIC_CMADAAS

```

In [1]:
import os
import sys
from datetime import datetime,timedelta
import json
import math
import numpy as np
import pandas as pd
import dateutil.parser
import dateutil.rrule
import nmc_met_io
import nmc_met_io.retrieve_cmadaas
import nmc_met_io.retrieve_cmadaas_history
import warnings
warnings.filterwarnings('ignore')

# 地面观测数据集

常用的站点观测数据集有:

- SURF_CHN_MUL_MIN, 中国地面分钟数据
- SURF_CHN_MUL_HOR, 中国地面逐小时资料
- SURF_CHN_MUL_HOR_N, 中国地面逐小时(国家站)
- SURF_CHN_MUL_DAY, 中国地面日值数据
- SURF_CHN_MUL_DAY_N, 中国地面日值数据(国家站)

以上数据集在模拟天擎中只实现了以下接口

- getSurfEleInRegionByTime(按时间、地区检索地面要素数据)
- getSurfEleInRectByTime(按时间、经纬度范围检索地面数据要素)
- getSurfEleByTimeAndStaID(按时间、站号检索地面数据要素)
- getSurfEleByTimeRangeAndStaID(按时间段、站号检索地面数据要素)


# 读取单站观测数据

In [2]:
# set retrieve parameters
data_code = "SURF_CHN_MUL_HOR_N"  # 中国地面逐小时(国家站)
time_range = "[20211101000000,20211130230000]"
elements = "Station_Id_C,Lat,Lon,Alti,Year,Mon,Day,Hour,PRS_sea,TEM,DPT,RHU,PRE_1h,PRE_12h,PRE_24h"
sta_ids = "53691"

In [3]:
data = nmc_met_io.retrieve_cmadaas.cmadaas_obs_by_time_range_and_id(
    time_range,
    data_code=data_code, 
    elements=elements, 
    sta_ids=sta_ids
)

In [4]:
data

Unnamed: 0,Station_Id_C,Lat,Lon,Alti,Year,Mon,Day,Hour,PRS_sea,TEM,DPT,RHU,PRE_1h,PRE_12h,PRE_24h
0,53691,38.15,114.57,0.0,2021,11,1,0,0.0,-1.6,-0.8,0.0,-0.2,0.8,-0.7
1,53691,38.15,114.57,0.0,2021,11,1,1,-1.1,0.7,-1.5,0.8,-0.8,-0.1,0.4
2,53691,38.15,114.57,0.0,2021,11,1,2,0.3,-1.0,-0.3,-0.7,-0.5,1.8,0.8
3,53691,38.15,114.57,0.0,2021,11,1,3,1.1,-1.9,0.0,0.3,-1.1,0.6,0.8
4,53691,38.15,114.57,0.0,2021,11,1,4,-0.1,-0.6,1.1,0.2,2.0,0.3,0.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
715,53691,38.15,114.57,0.0,2021,11,30,19,1.6,0.0,0.3,1.1,1.5,-0.8,0.6
716,53691,38.15,114.57,0.0,2021,11,30,20,0.4,-0.3,-0.9,0.3,0.1,0.6,1.2
717,53691,38.15,114.57,0.0,2021,11,30,21,1.3,0.3,1.1,-0.4,0.7,-0.5,0.8
718,53691,38.15,114.57,0.0,2021,11,30,22,0.9,-0.5,-0.2,0.6,0.3,0.4,-0.4


如果需要读取比较长历史的观测记录, 由于大数据云平台存在下载数量限制, 可使用retrieve_cmadass_history模块中的函数逐年下载并拼接.

In [5]:
# set retrieve parameters
years = np.arange(2000, 2011,1)
data_code = "SURF_CHN_MUL_HOR_N"    # 逐小时观测资料
elements = 'Station_Id_d,Datetime,Lat,Lon,Alti,TEM,DPT,RHU,PRE_1h,PRE_6h,PRE_24h'
sta_ids = "53691"

In [6]:
# retrieve data
data = nmc_met_io.retrieve_cmadaas_history.get_hist_obs_id(years=years, data_code=data_code, elements=elements, sta_ids=sta_ids)

Years: 100%|███████████████████████████████████████████████████████████████████████████| 11/11 [00:06<00:00,  1.79it/s]


In [7]:
data

Unnamed: 0,Station_Id_d,Datetime,Lat,Lon,Alti,TEM,DPT,RHU,PRE_1h,PRE_6h,PRE_24h
0,-1.6,2000-01-01 00:00:00,38.15,114.57,0.0,0.5,-0.1,-2.8,-0.2,0.7,0.4
1,-0.9,2000-01-01 01:00:00,38.15,114.57,0.0,-0.5,-0.9,0.1,-1.0,-0.2,-0.9
2,-0.9,2000-01-01 02:00:00,38.15,114.57,0.0,0.8,-1.1,-0.1,0.2,-0.5,-0.3
3,0.9,2000-01-01 03:00:00,38.15,114.57,0.0,0.1,0.8,-0.2,0.6,-2.3,0.7
4,0.0,2000-01-01 04:00:00,38.15,114.57,0.0,1.8,1.5,-0.8,0.0,1.0,-1.4
...,...,...,...,...,...,...,...,...,...,...,...
7915,0.5,2010-01-30 19:00:00,38.15,114.57,0.0,-0.5,1.1,-1.8,0.7,-0.2,1.3
7916,0.5,2010-01-30 20:00:00,38.15,114.57,0.0,0.7,-0.2,1.3,0.6,0.4,-0.6
7917,1.7,2010-01-30 21:00:00,38.15,114.57,0.0,-1.7,-0.9,-1.7,-0.6,1.5,-1.3
7918,0.6,2010-01-30 22:00:00,38.15,114.57,0.0,0.5,-1.5,-1.4,0.0,0.9,1.2


# 读取单站的逐日观测数据

In [8]:
# set retrieve parameters
years = np.arange(2000, 2011,1)
data_code = "SURF_CHN_MUL_DAY"    # 逐小时观测资料
elements = 'Station_Id_d,Datetime,Lat,Lon,Alti,TEM_Avg,TEM_Max,TEM_Min,PRE_Time_2020,PRE_Time_0808,WIN_S_2mi_Avg,WIN_S_Max'
sta_ids = "53691"

In [9]:
# retrieve data
data = nmc_met_io.retrieve_cmadaas_history.get_hist_obs_id(years=years, data_code=data_code, elements=elements, sta_ids=sta_ids)

Years: 100%|███████████████████████████████████████████████████████████████████████████| 11/11 [00:01<00:00,  7.15it/s]


In [10]:
data

Unnamed: 0,Station_Id_d,Datetime,Lat,Lon,Alti,TEM_Avg,TEM_Max,TEM_Min,PRE_Time_2020,PRE_Time_0808,WIN_S_2mi_Avg,WIN_S_Max
0,0.6,2000-01-01,38.15,114.57,0.0,0.8,-2.1,0.2,-0.2,0.8,-0.9,0.9
1,0.9,2000-01-02,38.15,114.57,0.0,-0.6,1.0,1.3,0.1,-2.5,0.2,-0.5
2,0.3,2000-01-03,38.15,114.57,0.0,1.8,0.8,-0.4,1.3,-0.5,-0.3,-0.4
3,-0.3,2000-01-04,38.15,114.57,0.0,0.7,0.1,0.0,-0.8,0.6,-0.8,0.9
4,0.6,2000-01-05,38.15,114.57,0.0,-0.3,0.3,0.1,-0.6,-0.8,0.4,1.1
...,...,...,...,...,...,...,...,...,...,...,...,...
1095,0.6,2010-04-06,38.15,114.57,0.0,-1.0,0.6,-1.3,-1.5,-0.1,0.4,-1.8
1096,-1.1,2010-04-07,38.15,114.57,0.0,0.3,0.8,-1.6,0.4,-0.1,-2.1,0.8
1097,0.8,2010-04-08,38.15,114.57,0.0,0.0,0.0,-0.8,1.0,-0.3,-0.4,0.1
1098,-1.3,2010-04-09,38.15,114.57,0.0,-1.3,-1.4,-1.7,0.3,-1.1,1.0,-1.0
