-
Notifications
You must be signed in to change notification settings - Fork 985
/
midc.py
269 lines (238 loc) · 9.35 KB
/
midc.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
"""Functions to read NREL MIDC data.
"""
import io
import requests
import pandas as pd
# MIDC_VARIABLE_MAP maps some variables of interest at each MIDC site to their
# pvlib counterparts. The mapping dictionary for a site can be found by looking
# up the Site's id in the dictionary. It is not a comprehensive list, and may
# not be the best fit for your application, but should serve as a base for
# creating your own mappings.
#
# In particular, these mappings coincide with the raw ddata files.
# All site's field list can be found at:
# https://midcdmz.nrel.gov/apps/daily.pl?site=<SITE ID>&live=1
# Where id is the key found in this dictionary
MIDC_VARIABLE_MAP = {
'BMS': {
'Global CMP22 (vent/cor) [W/m^2]': 'ghi',
'Direct CHP1-1 [W/m^2]': 'dni_chp1',
# NIP was mapped to dni for pvlib<=0.10.5
'Direct NIP [W/m^2]': 'dni_nip',
'Diffuse CM22-1 (vent/cor) [W/m^2]': 'dhi',
'Avg Wind Speed @ 6ft [m/s]': 'wind_speed',
'Tower Dry Bulb Temp [deg C]': 'temp_air',
'Tower RH [%]': 'relative_humidity'},
'UOSMRL': {
'Global CMP22 [W/m^2]': 'ghi',
'Direct CHP1 [W/m^2]': 'dni_chp1',
'Diffuse [W/m^2]': 'dhi',
# NIP was mapped to dni for pvlib<=0.10.5
'Direct NIP [W/m^2]': 'dni_nip',
# Schenk was mapped to dhi for pvlib<=0.10.5
# 'Diffuse Schenk [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Relative Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed'},
'HSU': {
'Global Horiz [W/m^2]': 'ghi',
'Direct Normal (calc) [W/m^2]': 'dni',
'Diffuse Horiz (band_corr) [W/m^2]': 'dhi'},
'UTPASRL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'CHP1 Temp [deg C]': 'temp_air'},
'UAT': {
'Global Horiz (platform) [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horiz [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
'STAC': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity'},
'UNLV': {
'Global Horiz [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horiz (calc) [W/m^2]': 'dhi',
'Dry Bulb Temp [deg C]': 'temp_air',
'Avg Wind Speed @ 30ft [m/s]': 'wind_speed'},
'ORNL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 42ft [m/s]': 'wind_speed'},
'NELHA': {
'Global Horizontal [W/m^2]': 'ghi',
'Air Temperature [W/m^2]': 'temp_air',
'Avg Wind Speed @ 10m [m/s]': 'wind_speed',
'Rel Humidity [%]': 'relative_humidity'},
'ULL': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
'Air Temperature [deg C]': 'temp_air',
'Rel Humidity [%]': 'relative_humidity',
'Avg Wind Speed @ 3m [m/s]': 'wind_speed'},
'NWTC': {
'Global Horizontal [W/m^2]': 'ghi',
'Direct Normal [W/m^2]': 'dni',
'Diffuse Horizontal [W/m^2]': 'dhi',
# PSP instrument was removed Feb. 2021
# PSP was mapped to ghi for pvlib<=0.10.5
# 'Global PSP [W/m^2]': 'ghi',
'Temperature @ 2m [deg C]': 'temp_air',
'Avg Wind Speed @ 2m [m/s]': 'wind_speed',
'Relative Humidity [%]': 'relative_humidity'},
}
# Maps problematic timezones to 'Etc/GMT' for parsing.
TZ_MAP = {
'PST': 'Etc/GMT+8',
'CST': 'Etc/GMT+6',
}
def _format_index(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the second (time) column.
Parameters
----------
data: Dataframe
Must contain 'DATE (MM/DD/YYYY)' column, second column must be labeled
with the timezone and contain times in 'HH:MM' format.
Returns
-------
data: Dataframe
Dataframe with DatetimeIndex localized to the provided timezone.
"""
tz_raw = data.columns[1]
timezone = TZ_MAP.get(tz_raw, tz_raw)
datetime = data['DATE (MM/DD/YYYY)'] + data[tz_raw]
datetime = pd.to_datetime(datetime, format='%m/%d/%Y%H:%M')
data = data.set_index(datetime)
data = data.tz_localize(timezone)
return data
def _format_index_raw(data):
"""Create DatetimeIndex for the Dataframe localized to the timezone provided
as the label of the third column.
Parameters
----------
data: Dataframe
Must contain columns 'Year' and 'DOY'. Timezone must be found as the
label of the third (time) column.
Returns
-------
data: Dataframe
The data with a Datetime index localized to the provided timezone.
"""
tz_raw = data.columns[3]
timezone = TZ_MAP.get(tz_raw, tz_raw)
year = data.Year.apply(str)
jday = data.DOY.apply(lambda x: '{:03d}'.format(x))
time = data[tz_raw].apply(lambda x: '{:04d}'.format(x))
index = pd.to_datetime(year + jday + time, format="%Y%j%H%M")
data = data.set_index(index)
data = data.tz_localize(timezone)
return data
def read_midc(filename, variable_map={}, raw_data=False, **kwargs):
"""Read in National Renewable Energy Laboratory Measurement and
Instrumentation Data Center weather data. The MIDC is described in [1]_.
Parameters
----------
filename: string or file-like object
Filename, url, or file-like object of data to read.
variable_map: dictionary
Dictionary for mapping MIDC field names to pvlib names. Used to rename
the columns of the resulting DataFrame. Does not map names by default.
See Notes for an example.
raw_data: boolean
Set to true to use format_index_raw to correctly format the date/time
columns of MIDC raw data files.
kwargs : dict
Additional keyword arguments to pass to `pandas.read_csv`
Returns
-------
data: Dataframe
A dataframe with DatetimeIndex localized to the provided timezone.
Notes
-----
The `variable_map` argument should map fields from MIDC data to pvlib
names.
E.g. if a MIDC file contains the variable 'Global Horizontal [W/m^2]',
passing the dictionary below will rename the column to 'ghi' in
the returned Dataframe.
{'Global Horizontal [W/m^2]': 'ghi'}
See the MIDC_VARIABLE_MAP for collection of mappings by site.
For a full list of pvlib variable names see the
:ref:`variables_style_rules`.
Be sure to check the units for the variables you will use on the
`MIDC site <https://midcdmz.nrel.gov/>`_.
References
----------
.. [1] NREL: Measurement and Instrumentation Data Center
`https://midcdmz.nrel.gov/ <https://midcdmz.nrel.gov/>`_
"""
data = pd.read_csv(filename, **kwargs)
if raw_data:
data = _format_index_raw(data)
else:
data = _format_index(data)
data = data.rename(columns=variable_map)
return data
def read_midc_raw_data_from_nrel(site, start, end, variable_map={},
timeout=30):
"""Request and read MIDC data directly from the raw data api.
Parameters
----------
site: string
The MIDC station id.
start: datetime-like
Start date for requested data.
end: datetime-like
End date for requested data.
variable_map: dict
A dictionary mapping MIDC field names to pvlib names. Used to
rename columns of the resulting DataFrame. See Notes of
:py:func:`pvlib.iotools.read_midc` for example.
timeout : float, default 30
Number of seconds to wait to connect/read from the API before
failing.
Returns
-------
data:
Dataframe with DatetimeIndex localized to the station location.
Raises
------
requests.HTTPError
For any error in retrieving the CSV file from the MIDC API
requests.Timeout
If data is not received in within ``timeout`` seconds
Notes
-----
Requests spanning an instrumentation change will yield an error. See the
MIDC raw data api page
`here <https://midcdmz.nrel.gov/apps/data_api_doc.pl?_idtextlist>`_
for more details and considerations.
"""
args = {'site': site,
'begin': pd.to_datetime(start).strftime('%Y%m%d'),
'end': pd.to_datetime(end).strftime('%Y%m%d')}
url = 'https://midcdmz.nrel.gov/apps/data_api.pl'
# NOTE: just use requests.get(url, params=args) to build querystring
# number of header columns and data columns do not always match,
# so first parse the header to determine the number of data columns
# to parse
csv_request = requests.get(url, timeout=timeout, params=args)
csv_request.raise_for_status()
raw_csv = io.StringIO(csv_request.text)
first_row = pd.read_csv(raw_csv, nrows=0)
col_length = len(first_row.columns)
raw_csv.seek(0)
return read_midc(raw_csv, variable_map=variable_map, raw_data=True,
usecols=range(col_length))