In [12]:
from netCDF4 import Dataset
import datetime

In [13]:
filePath = "ghcnd_daily_by_year_meta_data.nc"

In [14]:
nc_file = Dataset(filePath, "w", format="NETCDF4", clobber=True)

In [15]:
print(nc_file)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    dimensions(sizes): 
    variables(dimensions): 
    groups: 


In [16]:
# Global attributes
nc_file.title = 'GHCN Daily Metadata by Year'
nc_file.institution = 'National Centers for Environmental Information (NCEI)'
nc_file.source = 'Global Historical Climatology Network (GHCN) Daily'
nc_file.history = f'Created {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}'
nc_file.references = 'More information: https://www.ncei.noaa.gov/products/land-based-station/global-historical-climatology-network-daily  The Data can be found here: https://www.ncei.noaa.gov/pub/data/ghcn/daily/by_year/'
nc_file.comment = 'This dataset follows the FAIR principles for scientific data management and stewardship.'
nc_file.identifier = '10.7289/V5D21VHZ'
nc_file.documentation = 'https://www.ncei.noaa.gov/pub/data/ghcn/daily/readme.txt'

#create dimensions. Not quite sure about this
#nc_file.createDimension('STATION_LENGTH', 11)

#Create variables
station_id = nc_file.createVariable('STATION_ID', 'str')
#station_id = nc_file.createVariable('STATION_ID', 'str', ('STATION_LENGTH',))
station_id.long_name = 'Station Identification Code'
station_id.description = '11 character station identification code'
station_id.comment = 'The corresponding station codes can be found here: https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt'

year = nc_file.createVariable('DATE', 'str')
year.long_name = 'Date of the observation'
year.description = '8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986)'

element = nc_file.createVariable('ELEMENT', 'str')
element.long_name = 'Element Type'
element.description = ''''
Core elements:
PRCP = Precipitation (tenths of mm)
SNOW = Snowfall (mm)
SNWD = Snow depth (mm)
TMAX = Maximum temperature (tenths of degrees C)
TMIN = Minimum temperature (tenths of degrees C)
Additional elements: ACMC, ACMH, ACSC, ACSH, ADPT, ASLP, ASTP, AWBT, AWDR, AWND, DAEV, DAPR, DASF, DATN, DATX, DAWM, DWPR, EVAP, FMTM, FRGB, FRGT, FRTH, GAHT, MDEV, MDPR, MDSF, MDTN, MDTX, MDWM, MNPN, MXPN, PGTM, PSUN, RHAV, RHMN, RHMX, SN*#, SX*#, TAXN, TAVG, THIC, TOBS, TSUN, WDF1, WDF2, WDF5, WDFG, WDFI, WDFM, WDMV, WESD, WESF, WSF1, WSF2, WSF5, WSFG, WSFI, WSFM, WT**, WV**
'''
element.comment = 'Core and additional elements as described in the GHCN Daily documentation file'

value = nc_file.createVariable('VALUE', 'i4')
value.long_name = 'Parameter Value'
value.units = 'variable-specific units'
value.missing_value = -9999
value.description = 'Value of the Parameter'
value.comment = 'Units can be found in the documentation file for each parameter'

mflag = nc_file.createVariable('MFLAG', 'str')
mflag.long_name = 'Measurement Flag'
mflag.description = '''
Measurement flags:
Blank = no measurement information applicable
B     = precipitation total formed from two 12-hour totals
D     = precipitation total formed from four six-hour totals
H     = represents highest or lowest hourly temperature (TMAX or TMIN) or the average of hourly values (TAVG)
K     = converted from knots 
L     = temperature appears to be lagged with respect to reported hour of observation 
O     = converted from oktas 
P     = identified as "missing presumed zero" in DSI 3200 and 3206
T     = trace of precipitation, snowfall, or snow depth
W     = converted from 16-point WBAN code (for wind direction)
'''

qflag = nc_file.createVariable('QFLAG', 'str')
qflag.long_name = 'Quality Flag for the First Day of the Month'
qflag.description = '''
Quality flags:
Blank = did not fail any quality assurance check
D     = failed duplicate check
G     = failed gap check
I     = failed internal consistency check
K     = failed streak/frequent-value check
L     = failed check on length of multiday period 
M     = failed megaconsistency check
N     = failed naught check
O     = failed climatological outlier check
R     = failed lagged range check
S     = failed spatial consistency check
T     = failed temporal consistency check
W     = temperature too warm for snow
X     = failed bounds check
Z     = flagged as a result of an official Datzilla investigation
'''

sflag = nc_file.createVariable('SFLAG', 'str')
sflag.long_name = 'Source Flag for the First Day of the Month'
sflag.description = '''
Source flags:
Blank = No source (i.e., data value missing)
0     = U.S. Cooperative Summary of the Day (NCDC DSI-3200)
6     = CDMP Cooperative Summary of the Day (NCDC DSI-3206)
7     = U.S. Cooperative Summary of the Day -- Transmitted via WxCoder3 (NCDC DSI-3207)
A     = U.S. Automated Surface Observing System (ASOS) real-time data (since January 1, 2006)
a     = Australian data from the Australian Bureau of Meteorology
B     = U.S. ASOS data for October 2000-December 2005 (NCDC DSI-3211)
b     = Belarus update
C     = Environment Canada
D     = Short time delay US National Weather Service CF6 daily summaries provided by the High Plains Regional Climate Center
E     = European Climate Assessment and Dataset (Klein Tank et al., 2002)
F     = U.S. Fort data 
G     = Official Global Climate Observing System (GCOS) or other government-supplied data
H     = High Plains Regional Climate Center real-time data
I     = International collection (non U.S. data received through personal contacts)
K     = U.S. Cooperative Summary of the Day data digitized from paper observer forms (from 2011 to present)
M     = Monthly METAR Extract (additional ASOS data)
f     = Data provided courtesy of the Fiji Met Service
m     = Data from the Mexican National Water Commission (Comision National del Agua -- CONAGUA)
N     = Community Collaborative Rain, Hail,and Snow (CoCoRaHS)
Q     = Data from several African countries that had been "quarantined", that is, withheld from public release until permission was granted from the respective meteorological services
R     = NCEI Reference Network Database (Climate Reference Network and Regional Climate Reference Network)
r     = All-Russian Research Institute of Hydrometeorological Information-World Data Center
S     = Global Summary of the Day (NCDC DSI-9618)
s     = China Meteorological Administration/National Meteorological Information Center/ Climatic Data Center (http://cdc.cma.gov.cn)
T     = SNOwpack TELemtry (SNOTEL) data obtained from the U.S. Department of Agriculture's Natural Resources Conservation Service
U     = Remote Automatic Weather Station (RAWS) data obtained from the Western Regional Climate Center
u     = Ukraine update	   
W     = WBAN/ASOS Summary of the Day from NCDC's Integrated Surface Data (ISD).  
X     = U.S. First-Order Summary of the Day (NCDC DSI-3210)
Z     = Datzilla official additions or replacements 
z     = Uzbekistan update

When data are available for the same time from more than one source, the highest priority source is chosen according to the following priority order (from highest to lowest):
Z,R,D,0,6,C,X,W,K,7,F,B,M,f,m,r,E,z,u,b,s,a,G,Q,I,A,N,T,U,H,S
'''

In [17]:
print(nc_file)

<class 'netCDF4._netCDF4.Dataset'>
root group (NETCDF4 data model, file format HDF5):
    title: GHCN Daily Metadata by Year
    institution: National Centers for Environmental Information (NCEI)
    source: Global Historical Climatology Network (GHCN) Daily
    history: Created 2024-07-30 00:30:10
    references: More information: https://www.ncei.noaa.gov/products/land-based-station/global-historical-climatology-network-daily  The Data can be found here: https://www.ncei.noaa.gov/pub/data/ghcn/daily/by_year/
    comment: This dataset follows the FAIR principles for scientific data management and stewardship.
    identifier: 10.7289/V5D21VHZ
    documentation: https://www.ncei.noaa.gov/pub/data/ghcn/daily/readme.txt
    dimensions(sizes): SAMPLE_ID(0)
    variables(dimensions): <class 'str'> STATION_ID(), <class 'str'> DATE(), <class 'str'> ELEMENT(), int32 VALUE(), <class 'str'> MFLAG(), <class 'str'> QFLAG(), <class 'str'> SFLAG()
    groups: 


In [18]:
# Close the dataset
nc_file.close()

In [22]:

fileTest = Dataset(filePath, 'r', format='NETCDF4')
print(fileTest.variables)

{'STATION_ID': <class 'netCDF4._netCDF4.Variable'>
vlen STATION_ID()
    long_name: Station Identification Code
    description: 11 character station identification code
    comment: The corresponding station codes can be found here: https://www.ncei.noaa.gov/pub/data/ghcn/daily/ghcnd-stations.txt
vlen data type: <class 'str'>
unlimited dimensions: 
current shape = (), 'DATE': <class 'netCDF4._netCDF4.Variable'>
vlen DATE()
    long_name: Date of the observation
    description: 8 character date in YYYYMMDD format (e.g. 19860529 = May 29, 1986)
vlen data type: <class 'str'>
unlimited dimensions: 
current shape = (), 'ELEMENT': <class 'netCDF4._netCDF4.Variable'>
vlen ELEMENT()
    long_name: Element Type
    description: '
Core elements:
PRCP = Precipitation (tenths of mm)
SNOW = Snowfall (mm)
SNWD = Snow depth (mm)
TMAX = Maximum temperature (tenths of degrees C)
TMIN = Minimum temperature (tenths of degrees C)
Additional elements: ACMC, ACMH, ACSC, ACSH, ADPT, ASLP, ASTP, AWBT, AWDR,