/
hdf5IO.py
165 lines (123 loc) · 4.57 KB
/
hdf5IO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# TODO: HDF5 wrappers for dict forms.
# Integration with IO.py file IO.
# See also PEMtk.fit._io for similar functionality/prototypes.
# And dev in http://jake/jupyter/user/paul/doc/tree/code-share/jupyter-shared/PEMtk_dev_2022/io_dev/xarray_dict_IO_tests_210622-Jake.ipynb
# Imports
import os
# import re
# import numpy as np
# import pandas as pd
# from io import StringIO
import xarray as xr
from pathlib import Path
from ast import literal_eval
try:
import h5py
h5Flag = True
except ImportError as e:
if e.msg != "No module named 'h5py'":
raise
print('* h5py not found, HDF5 export not available. ')
h5Flag = False
from epsproc.util.misc import deconstructDims, reconstructDims
from epsproc.util.xrIO import splitComplex, combineComplex, sanitizeAttrsNetCDF
from epsproc.util.io import setTimeStampedFileName
def writeXarrayToHDF5(data, fileName = None, filePath = None, dataName = None, appendFlag = True):
"""Write Xarray or dictionary to HDF5"""
if not h5Flag:
print('*** Install h5py for HDF5 export. ')
return None
if filePath is None:
# dataPath = os.getcwd() # OR
filePath = Path().absolute()
# Create file if None
if fileName is None:
fileName = setTimeStampedFileName(ext='h5')
# Name group
# TODO: try and get this from input data structure
if dataName is None:
dataName = 'xr'
# Open file, append if existing
fOut = Path(filePath,fileName)
# This needs more thought - fails if file exists and data key exists in both cases.
if fOut.is_file() and appendFlag:
hf = h5py.File(fOut.as_posix(), 'a')
else:
hf = h5py.File(fOut.as_posix(), 'w')
dict_group = hf.create_group(dataName)
# Convert Xarray to safe dictionary format
# TODO: test with DataSets
if isinstance(data, xr.core.dataarray.DataArray): # or isinstance(data, xr.core.dataarray.DataSet):
dataDict = deconstructDims(data, returnType = 'dict')
# Reformat output for HDF5
# See also
for k,v in dataDict.items():
# print(k)
# v = dict_test[k]
# if not v:
# dict_group[k] = str(v)
# else:
# dict_group[k] = v
# This is OK for general handling
# try:
# dict_group[k] = v
# except:
# dict_group[k] = str(v)
# Force all groups to str except data
# This avoids issues with tuples, empty items and nested dicts
# See also ep.util.xrIO.sanitizeAttrsNetCDF() - may want to implement that instead?
if k == 'data':
dict_group[k] = v
else:
dict_group[k] = str(v)
hf.close()
return fOut
def readXarrayFromHDF5(fileName, filePath = None, dataName = None, evalStrings = True):
"""
Read Xarray or dictionary to HDF5
TODO: better handling of coords & attrs. Currently only works with string eval, which is hacky.
"""
if not h5Flag:
print('*** Install h5py for HDF5 read. ')
return None
if filePath is None:
# dataPath = os.getcwd() # OR
filePath = Path().absolute()
# Name group
# TODO: try and get this from input data structure
# TODO: default to reading all?
if dataName is None:
dataName = 'xr'
# Open file
fIn = Path(filePath,fileName)
hf = h5py.File(fIn.as_posix(), 'r')
# Load data - currently assumes single object
dict_new = {}
dict_group_load = hf[dataName]
dict_group_keys = dict_group_load.keys()
for k in dict_group_keys:
v= dict_group_load[k][()] #[:] # Get data (not just object), see https://docs.h5py.org/en/stable/high/dataset.html#reading-writing-data
# print(v)
# Try converting items if necessary
# ok - WORKS FOR ALL CASES EXCEPT NON-EXECUTABLE STRS
# Note this assumes items are safe to eval!
try:
if isinstance(v,bytes):
dict_new[k] = v.decode("utf-8")
else:
dict_new[k] = v
if evalStrings:
dict_new[k] = literal_eval(dict_new[k])
# Push to output directly
except:
dict_new[k] = v
hf.close()
# Rebuild
# TODO: add some error checking here
# TODO: also make this optional and add restack() routine?
try:
xrFromDict = reconstructDims(dict_new)
return (dict_new, xrFromDict)
except:
print(f'*** Failed to rebuild Xarray from {fIn}, returning dict only.')
return (dict_new)