/
h5_merger.py
3033 lines (2502 loc) · 129 KB
/
h5_merger.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
"""
h5parm merger script for merging h5parms containing phase and/or amplitude solutions for calibrating LOFAR observations.
This script is a standalone script, such that it is easy to copy-paste it to other pipelines.
For examples to run this script from the command line, please have a look at:
https://github.com/jurjen93/lofar_helpers/blob/master/h5_merger_examples.md
It is a work in progress, so for any bug reports or suggestions for improvements:
please contact jurjendejong@strw.leidenuniv.nl (or find me on LOFAR slack pages)
"""
__author__ = "Jurjen de Jong (jurjendejong@strw.leidenuniv.nl)"
from casacore import tables as ct
from collections import OrderedDict
from glob import glob
from losoto.h5parm import h5parm
from losoto.lib_operations import reorderAxes
from numpy import zeros, ones, round, unique, array_equal, append, where, isfinite, complex128, expand_dims, \
pi, array, all, exp, angle, sort, sum, finfo, take, diff, equal, take, transpose, cumsum, insert, abs, asarray, newaxis, argmin
import os
import re
from scipy.interpolate import interp1d
import sys
import tables
import warnings
from argparse import ArgumentParser
from astropy.coordinates import SkyCoord
from astropy import units as u
warnings.filterwarnings('ignore')
__all__ = ['merge_h5', 'output_check', 'move_source_in_sourcetable', 'h5_check']
diagdiag_math = """
Diagonal times Diagonal
-------------------------------------------------
/Axx 0 \ /Bxx 0 \ /Axx*Bxx 0 \
| | X | | = | |
\ 0 Ayy/ \ 0 Byy/ \ 0 Ayy*Byy/
-------------------------------------------------
"""
diagfull_math = """
Diagonal times Fulljones
-------------------------------------------------
/Axx 0 \ /Bxx Bxy\ /Axx*Bxx Axx*Bxy\
| | X | | = | |
\ 0 Ayy/ \Byx Byy/ \Ayy*Byx Ayy*Byy/
-------------------------------------------------
"""
fulldiag_math = """
Fulljones times Diagonal
-------------------------------------------------
/Axx Axy \ /Bxx 0 \ /Axx*Bxx Axy*Byy\
| | X | | = | |
\Ayx Ayy/ \ 0 Byy/ \Ayx*Bxx Ayy*Byy/
-------------------------------------------------
"""
doublefulljones_math = """
Fulljones times Fulljones
-------------------------------------------------
/Axx Axy \ /Bxx Bxy \ /Axx*Bxx + Axy*Byx Axy*Byy + Axx*Bxy\
| | X | | = | |
\Ayx Ayy/ \ Byx Byy/ \Ayx*Bxx + Ayy*Byx Ayy*Byy + Ayx*Bxy/
-------------------------------------------------
"""
lin2circ_math = """
Convert linear polarization to circular polarization
-----------------------------
RR = XX - iXY + iYX + YY
RL = XX + iXY + iYX - YY
LR = XX - iXY - iYX - YY
LL = XX + iXY - iYX + YY
-----------------------------
"""
circ2lin_math = """
Convert circular polarization to linear polarization
-----------------------------
XX = RR + RL + LR + LL
XY = iRR - iRL + iLR - iLL
YX = -iRR - iRL + iLR + iLL
YY = RR - RL - LR + LL
-----------------------------
"""
debug_message = 'Please contact jurjendejong@strw.leidenuniv.nl.'
def remove_numbers(inp):
"""
Remove numbers from string (keep only letters)
:param inp: string input
"""
return "".join(re.findall("[a-zA-z]+", inp))
def make_utf8(inp):
"""
Convert input to utf8 instead of bytes
:param inp: string input
"""
try:
inp = inp.decode('utf8')
return inp
except (UnicodeDecodeError, AttributeError):
return inp
def overwrite_table(T, solset, table, values, title=None):
"""
Create table for given solset, opened with the package tables.
Best to use for antenna or source table.
:param T: Table opeend with tables
:param solset: solution set of the table (f.ex. sol000)
:param table: table name (f.ex. antenna or source)
:param values: new values
:param title: title of new table
"""
try:
T.root
except:
sys.exit('ERROR: Create table failed. Given table is not opened with the package "tables" (https://pypi.org/project/tables/).')
if 'sol' not in solset:
print('WARNING: Usual input have sol*** as solset name.')
ss = T.root._f_get_child(solset)
ss._f_get_child(table)._f_remove()
if table == 'source':
values = array(values, dtype=[('name', 'S128'), ('dir', '<f4', (2,))])
title = 'Source names and directions'
elif table == 'antenna':
title = 'Antenna names and positions'
values = array(values, dtype=[('name', 'S16'), ('position', '<f4', (3,))])
else:
try: # check if numpy structure
values.shape
except:
values = array(values)
T.create_table(ss, table, values, title=title)
return
def copy_antennas_from_MS_to_h5(MS, h5, solset):
"""
Copy the antennas from an MS to an h5 file
:param MS: measurement set
:param h5: h5 file
:param solset: solution set name
"""
# Read MS antennas
t = ct.table(MS + "::ANTENNA", ack=False)
new_antlist = t.getcol('NAME')
new_antpos = t.getcol('POSITION')
antennas_ms = list(zip(new_antlist, new_antpos))
t.close()
# Open H5 antenna table
T = tables.open_file(h5, 'r+')
ss = T.root._f_get_child(solset)
ants_h5 = [a.decode('utf8') if type(a) != str else a for a in
T.root._f_get_child(solset)._f_get_child(list(ss._v_groups.keys())[0]).ant[:]]
# Write antenna table
if ants_h5 == new_antlist:
overwrite_table(T, solset, 'antenna', antennas_ms, title=None)
else:
new_antennas = list(zip(ants_h5, [[0., 0., 0.]] * len(ants_h5)))
for n, ant in enumerate(antennas_ms):
ant_ms = make_utf8(ant[0])
# add antenna from ms if not in H5
if ant_ms in list(ants_h5):
new_antennas[ants_h5.index(ant_ms)] = ant
ss.antenna._f_remove()
T.create_table(ss, 'antenna', array(new_antennas, dtype=[('name', 'S16'), ('position', '<f4', (3,))]),
title='Antenna names and positions')
T.close()
return
def find_closest_indices(arr1, arr2):
"""
Index mapping between two arrays where each index in arr1 corresponds to the index of the closest value in arr2.
Parameters:
arr1 (np.array): The first array.
arr2 (np.array): The second array, where we find the closest value to each element of arr1.
Returns:
np.array: An array of indices from arr2, corresponding to each element in arr1.
"""
# Convert lists to NumPy arrays if not already
arr1 = asarray(arr1)
arr2 = asarray(arr2)
# Calculate the absolute differences between each element of arr1 and all elements in arr2
# The resulting matrix will have shape (len(arr1), len(arr2))
diff_matrix = abs(arr1[:, newaxis] - arr2)
# Find the index of the minimum value in arr2 for each element in arr1
# np.argmin will return the indices of the closest values along axis 1 (across columns)
closest_indices = argmin(diff_matrix, axis=1)
return closest_indices
def take_numpy_axis(numpy_array, axis, idx):
"""
Take specific axis from multidimensional array
:param numpy_array: numpy array
:param axis: axis
:param idx: indices
:return numpy array
"""
if axis == 0:
return numpy_array[idx, ...]
elif axis == 1:
return numpy_array[:, idx, ...]
elif axis == 2:
return numpy_array[:, :, idx, ...]
elif axis == 3:
return numpy_array[:, :, :, idx, ...]
elif axis == 4:
return numpy_array[:, :, :, :, idx, ...]
else:
sys.exit('ERROR: typical index number does not exceed 4, but you asked for ' + str(axis) +
'\nOr this script is outdated or you made a mistake.')
def has_integer(input):
"""
Check if string has integer
:param input: input string
:return: return boolean (True/False) if integer in string
"""
try:
for s in str(input):
if s.isdigit():
return True
return False
except: # dangerous but ok for now ;-)
return False
def coordinate_distance(c1, c2):
"""
Find distance between sources
:param c1: first coordinate
:param c2: second coordinate
"""
if sys.version_info.major == 2:
print("WARNING: --min_distance function does not work in Python 2")
return 9999
if abs(c1[0])<2*pi and abs(c1[1])<pi/2 and abs(c2[0])<2*pi and abs(c2[1])<pi/2:
c1 = SkyCoord(c1[0], c1[1], unit='radian', frame='icrs') # your coords
c2 = SkyCoord(c2[0], c2[1], unit='radian', frame='icrs')
else:
c1 = SkyCoord(c1[0], c1[1], unit='degree', frame='icrs') # your coords
c2 = SkyCoord(c2[0], c2[1], unit='degree', frame='icrs')
return c1.separation(c2).to(u.degree).value
class MergeH5:
"""Merge multiple h5 tables"""
def __init__(self, h5_out, h5_tables=None, ms_files=None, h5_time_freq=None, convert_tec=True,
merge_all_in_one=False, solset='sol000', filtered_dir=None, no_antenna_crash=None,
freq_concat=None, time_concat=None):
"""
:param h5_out: name of merged output h5 table
:param h5_tables: h5 tables to merge, can be both list and string
:param ms_files: read time and frequency from measurement set
:param h5_time_freq: read time and frequency from h5
:param convert_tec: convert TEC to phase or not
:param merge_all_in_one: merge all in one direction
:param solset: solset name
:param filtered_dir: directions to filter (needs to be list with indices)
:param no_antenna_crash: do not crash if antenna tables are not the same between h5s
:param freq_concat: merging tables with different frequencies
"""
# output name
self.h5name_out = h5_out
# for now this is standard sol000, might change in future version
self.solset = solset
# read in ms files
if type(ms_files) == list:
self.ms = ms_files
elif type(ms_files) == str:
self.ms = glob(ms_files)
else:
self.ms = []
# read in h5 solution files
if type(h5_tables) == list:
self.h5_tables = h5_tables
elif type(h5_tables) == str:
self.h5_tables = glob(h5_tables)
else:
print('No h5 table given. Use all h5 tables in current folder.')
self.h5_tables = tuple(glob('*.h5'))
# get time and freq axis
if type(h5_time_freq) == bool and h5_time_freq == True:
if len(self.ms) > 0:
print('Ignore MS for time and freq axis, as --h5_time_freq is given.')
self.ax_time = array([])
self.ax_freq = array([])
for h5_name in self.h5_tables:
h5 = tables.open_file(h5_name)
for solset in h5.root._v_groups.keys():
ss = h5.root._f_get_child(solset)
for soltab in ss._v_groups.keys():
st = ss._f_get_child(soltab)
axes = make_utf8(st.val.attrs['AXES']).split(',')
if 'time' in axes:
time = st._f_get_child('time')[:]
self.ax_time = sort(unique(append(self.ax_time, time)))
else:
print('No time axes in ' + h5_name + '/' + solset + '/' + soltab)
if 'freq' in axes:
freq = st._f_get_child('freq')[:]
self.ax_freq = sort(unique(append(self.ax_freq, freq)))
else:
print('No freq axes in ' + h5_name + '/' + solset + '/' + soltab)
h5.close()
elif type(h5_time_freq) == str:
if len(self.ms) > 0:
print('Ignore MS for time and freq axis, as --h5_time_freq is given.')
print('Take the time and freq from the following h5 solution file:\n' + h5_time_freq)
T = tables.open_file(h5_time_freq)
self.ax_time = T.root.sol000.phase000.time[:]
self.ax_freq = T.root.sol000.phase000.freq[:]
T.close()
# use ms files for available information
elif len(self.ms) > 0:
print('Take the time and freq from the following measurement sets:\n' + '\n'.join(self.ms))
self.ax_time = array([])
self.ax_freq = array([])
for m in self.ms:
t = ct.taql('SELECT CHAN_FREQ, CHAN_WIDTH FROM ' + m + '::SPECTRAL_WINDOW')
self.ax_freq = append(self.ax_freq, t.getcol('CHAN_FREQ')[0])
t.close()
t = ct.table(m)
self.ax_time = append(self.ax_time, t.getcol('TIME'))
t.close()
self.ax_time = array(sorted(unique(self.ax_time)))
self.ax_freq = array(sorted(unique(self.ax_freq)))
# if no ms files, use the longest time and frequency resolution from h5 tables
else:
print(
'No MS or h5 file given for time/freq axis.\nUse frequency and time axis by combining all input h5 tables.')
self.ax_time = array([])
self.ax_freq = array([])
for h5_name in self.h5_tables:
h5 = tables.open_file(h5_name)
for solset in h5.root._v_groups.keys():
ss = h5.root._f_get_child(solset)
for soltab in ss._v_groups.keys():
st = ss._f_get_child(soltab)
axes = make_utf8(st.val.attrs['AXES']).split(',')
if 'time' in axes:
time = st._f_get_child('time')[:]
self.ax_time = sort(unique(append(self.ax_time, time)))
else:
print('No time axes in ' + h5_name + '/' + solset + '/' + soltab)
if 'freq' in axes:
freq = st._f_get_child('freq')[:]
self.ax_freq = sort(unique(append(self.ax_freq, freq)))
else:
print('No freq axes in ' + h5_name + '/' + solset + '/' + soltab)
h5.close()
# get polarization output axis and check number of error and tec tables in merge list
self.polarizations, polarizations = [], []
self.doublefulljones = False
self.tecnum, self.errornum = 0, 0 # to average in case of multiple tables
for n, h5_name in enumerate(self.h5_tables):
h5 = tables.open_file(h5_name)
if 'phase000' in h5.root.sol000._v_children.keys() \
and 'pol' in make_utf8(h5.root.sol000.phase000.val.attrs["AXES"]).split(','):
polarizations = h5.root.sol000.phase000.pol[:]
# having two fulljones solution files to merge --> we will use a matrix multiplication for this type of merge
if len(polarizations) == len(self.polarizations) == 4:
self.doublefulljones = True
self.fulljones_phases = OrderedDict()
self.fulljones_amplitudes = OrderedDict()
# take largest polarization list/array
if len(polarizations) > len(self.polarizations):
if type(self.polarizations) == list:
self.polarizations = polarizations
else:
self.polarizations = polarizations.copy()
if 'tec000' in h5.root.sol000._v_children.keys():
self.tecnum += 1
if 'error000' in h5.root.sol000._v_children.keys():
self.errornum += 1
h5.close()
# check if fulljones
if len(self.polarizations) == 4:
self.fulljones = True
elif len(self.polarizations) > 4 or len(self.polarizations) == 3:
sys.exit('Output solutions cannot have ' + str(len(self.polarizations)) + ' solutions')
else:
self.fulljones = False
print('Output polarization:\n' + str(self.polarizations))
self.poldim = len(self.polarizations)
# validation checks
if len(self.ax_freq) == 0:
sys.exit('ERROR: Cannot read frequency axis from input MS set or input H5.')
if len(self.ax_time) == 0:
sys.exit('ERROR: Cannot read time axis from input MS or input H5.')
if not self.have_same_antennas and not no_antenna_crash:
sys.exit('ERROR: Antenna tables are not the same')
# convert tec to phase?
self.convert_tec = convert_tec
self.merge_all_in_one = merge_all_in_one
if filtered_dir:
self.filtered_dir = filtered_dir
else:
self.filtered_dir = None
# possible solution axis in order used for our merging script
self.solaxnames = ['pol', 'dir', 'ant', 'freq', 'time']
# directions in an ordered dictionary
self.directions = OrderedDict()
if len(self.directions) > 1 and self.doublefulljones:
sys.exit("ERROR: Merging not compatitable with multiple directions and double fuljones merge") # TODO: update
self.freq_concat = freq_concat
self.time_concat = time_concat
@property
def have_same_antennas(self):
"""
Compare antenna tables with each other.
These should be the same.
:return: boolean if antennas are the same (True/False).
"""
for h5_name1 in self.h5_tables:
H_ref = tables.open_file(h5_name1)
for solset1 in H_ref.root._v_groups.keys():
ss1 = H_ref.root._f_get_child(solset1)
antennas_ref = ss1.antenna[:]
for soltab1 in ss1._v_groups.keys():
if (len(antennas_ref['name']) != len(ss1._f_get_child(soltab1).ant[:])) or \
(not all(antennas_ref['name'] == ss1._f_get_child(soltab1).ant[:])):
message = '\n'.join(['\nMismatch in antenna tables in ' + h5_name1,
'Antennas from ' + '/'.join([solset1, 'antenna']),
str(antennas_ref['name']),
'Antennas from ' + '/'.join([solset1, soltab1, 'ant']),
ss1._f_get_child(soltab1).ant[:]])
print(message)
H_ref.close()
return False
for soltab2 in ss1._v_groups.keys():
if (len(ss1._f_get_child(soltab1).ant[:]) !=
len(ss1._f_get_child(soltab2).ant[:])) or \
(not all(ss1._f_get_child(soltab1).ant[:] ==
ss1._f_get_child(soltab2).ant[:])):
message = '\n'.join(['\nMismatch in antenna tables in ' + h5_name1,
'Antennas from ' + '/'.join([solset1, soltab1, 'ant']),
ss1._f_get_child(soltab1).ant[:],
'Antennas from ' + '/'.join([solset1, soltab2, 'ant']),
ss1._f_get_child(soltab2).ant[:]])
print(message)
H_ref.close()
return False
for h5_name2 in self.h5_tables:
H = tables.open_file(h5_name2)
for solset2 in H.root._v_groups.keys():
ss2 = H.root._f_get_child(solset2)
antennas = ss2.antenna[:]
if (len(antennas_ref['name']) != len(antennas['name'])) \
or (not all(antennas_ref['name'] == antennas['name'])):
message = '\n'.join(
['\nMismatch between antenna tables from ' + h5_name1 + ' and ' + h5_name2,
'Antennas from ' + h5_name1 + ' and ',
str(antennas_ref['name']),
'Antennas from ' + h5_name2 + ':',
str(antennas['name'])])
print(message)
H.close()
H_ref.close()
return False
H.close()
H_ref.close()
return True
def concat(self, values, soltab, axes, ax_name):
"""
Concat instead of interpolation
:param values: values (phase or amplitude)
:param soltab: amplitude or phase soltab
:param axes: h5 axes (to be concatted)
:param ax_name: over time or freq
:return: concattenated
"""
if ax_name == 'freq':
axes_new = self.ax_freq
elif ax_name == 'time':
axes_new = self.ax_time
else:
sys.exit("ERROR: Should not arrive here")
shape = list(values.shape)
shape[self.axes_current.index(ax_name)] = len(axes_new)
values_tmp = zeros(shape)
if 'pol' in self.axes_current and 'amplitude' in soltab:
if self.axes_current.index('pol') == 0:
values_tmp[-1, ...] = 1
values_tmp[0, ...] = 1
elif self.axes_current.index('pol') == 1:
values_tmp[:, -1, ...] = 1
values_tmp[:, 0, ...] = 1
elif self.axes_current.index('pol') == 2:
values_tmp[:, :, -1, ...] = 1
values_tmp[:, :, 0, ...] = 1
elif self.axes_current.index('pol') == 3:
values_tmp[:, :, :, -1, ...] = 1
values_tmp[:, :, :, 0, ...] = 1
elif self.axes_current.index('pol') == 4:
values_tmp[:, :, :, :, -1, ...] = 1
values_tmp[:, :, :, :, 0, ...] = 1
idx = find_closest_indices(axes, axes_new)
if self.axes_current.index(ax_name) == 0:
values_tmp[idx, ...] = values[:]
elif self.axes_current.index(ax_name) == 1:
values_tmp[:, idx, ...] = values[:]
elif self.axes_current.index(ax_name) == 2:
values_tmp[:, :, idx, ...] = values[:]
elif self.axes_current.index(ax_name) == 3:
values_tmp[:, :, :, idx, ...] = values[:]
elif self.axes_current.index(ax_name) == 4:
values_tmp[:, :, :, :, idx, ...] = values[:]
return values_tmp
def _unpack_h5(self, st, solset, soltab):
"""
Unpack, check, and reorder the values from the h5 table to merge.
:param st: solution table
:param solset: solset name
:param soltab: soltab name
:return: values, time axis, frequency axis
"""
# Check if there is a polarization axes
if 'pol' in st.getAxesNames():
print("polarization is in {solset}/{soltab}".format(solset=solset, soltab=soltab))
else:
print("polarization is not in {solset}/{soltab}".format(solset=solset, soltab=soltab))
# Get time axes
time_axes = st.getAxisValues('time')
# Get freq axes
if 'freq' in st.getAxesNames():
freq_axes = st.getAxisValues('freq')
else:
freq_axes = self.ax_freq
# Do checks
if self.ax_time[0] > time_axes[-1] or time_axes[0] > self.ax_time[-1]:
sys.exit("ERROR: Time axes of h5 and MS are not overlapping.\n"
"SUGGESTION: add --h5_time_freq=true if you want to use the input h5 files to construct the time and freq axis.")
if self.ax_freq[0] > freq_axes[-1] or freq_axes[0] > self.ax_freq[-1] and not self.freq_concat:
sys.exit("ERROR: Frequency axes of h5 and MS are not overlapping.\n"
"SUGGESTION: add --h5_time_freq=true if you want to use the input h5 files to construct the time and freq axis.")
if float(soltab[-3:]) > 0:
sys.exit("ERROR: {soltab} does not end on 000".format(soltab=soltab))
for av in self.axes_final:
if av in st.getAxesNames() and st.getAxisLen(av) == 0:
print("No {av} in {solset}/{soltab}".format(av=av, solset=solset, soltab=soltab))
if len(st.getAxesNames()) != len(st.getValues()[0].shape):
sys.exit('ERROR: Axes ({axlen}) and Value dimensions ({vallen}) are not equal'.format(
axlen=len(st.getAxesNames()), vallen=len(st.getValues()[0].shape)))
print('Value shape before --> {values}'.format(values=st.getValues()[0].shape))
# Reorder and add dir
axes_current = [an for an in self.solaxnames if an in st.getAxesNames()]
if 'dir' in st.getAxesNames():
values = reorderAxes(st.getValues()[0], st.getAxesNames(), axes_current)
else:
print('Add dir axis.')
origin_values = st.getValues()[0]
if 'dir' not in axes_current:
if 'pol' not in axes_current:
axes_current.insert(0, 'dir')
else:
axes_current.insert(1, 'dir')
values = reorderAxes(origin_values.reshape(origin_values.shape + (1,)), st.getAxesNames() + ['dir'],
axes_current)
# current axes for reordering of axes
self.axes_current = [an for an in self.solaxnames if an in st.getAxesNames()]
if 'dir' not in self.axes_current:
if 'pol' in self.axes_current:
self.axes_current.insert(1, 'dir')
else:
self.axes_current.insert(0, 'dir')
# remove invalid values
values = self.remove_invalid_values(st.getType(), values, self.axes_current)
if remove_numbers(st.getType()) == 'tec':
# add frequencies
if 'freq' not in st.getAxesNames():
ax = self.axes_final.index('freq') - len(self.axes_final)
values = expand_dims(values, axis=ax)
self.axes_current.insert(-1, 'freq')
valuestmp = values
for _ in range(len(self.ax_freq) - 1):
values = append(values, valuestmp, axis=-2)
if self.convert_tec:
# convert tec to phase
shape = [1 for _ in range(values.ndim)]
shape[self.axes_current.index('freq')] = -1
values = self.tecphase_conver(values, self.ax_freq.reshape(shape))
soltab = 'phase'
# expand pol dimensions
if self.fulljones:
if 'pol' not in self.axes_current:
values = self._expand_poldim(values, 4, remove_numbers(soltab), False)
self.axes_current.insert(0, 'pol')
elif st.getAxisLen('pol') != 4:
values = self._expand_poldim(values, 4, remove_numbers(soltab), True)
elif 'pol' in self.axes_current and 'pol' in self.axes_final:
if st.getAxisLen('pol') > self.phases.shape[0]:
self.phases = self._expand_poldim(self.phases, st.getAxisLen('pol'), remove_numbers(soltab), True)
elif len(self.polarizations) > st.getAxisLen('pol'):
values = self._expand_poldim(values, len(self.polarizations), remove_numbers(soltab), True)
elif 'pol' not in self.axes_current and 'pol' in self.axes_final:
values = self._expand_poldim(values, len(self.polarizations), remove_numbers(soltab), False)
self.axes_current.insert(0, 'pol')
elif 'pol' in self.axes_current and 'pol' not in self.axes_final:
self.phases = self._expand_poldim(self.phases, st.getAxisLen('pol'), remove_numbers(soltab), False)
self.axes_final.insert(0, 'pol')
elif 'pol' not in self.axes_current and 'pol' not in self.axes_final and len(
self.polarizations) > 0:
if len(self.phases.shape) != 5:
self.phases = self._expand_poldim(self.phases, len(self.polarizations), remove_numbers(soltab), False)
if 'pol' not in self.axes_final:
self.axes_final.insert(0, 'pol')
values = self._expand_poldim(values, len(self.polarizations), remove_numbers(soltab), False)
self.axes_final.insert(0, 'pol')
# time interpolation
if self.time_concat:
values = self.concat(values, st.getType(), time_axes, 'time')
else:
values = self._interp_along_axis(values, time_axes, self.ax_time,
self.axes_current.index('time'))
# freq interpolation
if self.freq_concat:
values = self.concat(values, st.getType(), freq_axes, 'freq')
elif remove_numbers(st.getType()) != 'tec' and remove_numbers(st.getType()) != 'error':
values = self._interp_along_axis(values, freq_axes, self.ax_freq,
self.axes_current.index('freq'))
return values
def _sort_soltabs(self, soltabs):
"""
Sort solution tables.
This is important to run the steps and add directions according to our algorithm.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Dont touch this part if you dont have to. ;-)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
:param soltabs: solutions tables
:return: sorted phase, tec, amplitude, rotation, error tables
"""
soltabs = set(soltabs)
if self.convert_tec:
tp_phasetec = [li for li in soltabs if 'tec' in li or 'phase' in li]
tp_amplitude = [li for li in soltabs if 'amplitude' in li]
tp_rotation = [li for li in soltabs if 'rotation' in li]
tp_error = [li for li in soltabs if 'error' in li]
return [sorted(tp_amplitude, key=lambda x: float(x[-3:])),
sorted(tp_rotation, key=lambda x: float(x[-3:])),
sorted(sorted(tp_phasetec), key=lambda x: float(x[-3:])),
sorted(tp_error, key=lambda x: float(x[-3:]))]
else:
tp_phase = [li for li in soltabs if 'phase' in li]
tp_tec = [li for li in soltabs if 'tec' in li]
tp_amplitude = [li for li in soltabs if 'amplitude' in li]
tp_rotation = [li for li in soltabs if 'rotation' in li]
tp_error = [li for li in soltabs if 'error' in li]
return [sorted(tp_phase, key=lambda x: float(x[-3:])),
sorted(tp_tec, key=lambda x: float(x[-3:])),
sorted(tp_amplitude, key=lambda x: float(x[-3:])),
sorted(tp_rotation, key=lambda x: float(x[-3:])),
sorted(tp_error, key=lambda x: float(x[-3:]))]
def get_allkeys(self):
"""
Get all solution sets, solutions tables, and ax names in lists.
This returns an order that is optimized for this code.
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
Dont touch this part if you dont have to. ;-)
%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%%
"""
self.all_soltabs, self.all_solsets, self.all_axes, self.ant = [], [], [], []
for h5_name in self.h5_tables:
h5 = h5parm(h5_name)
for solset in h5.getSolsetNames():
self.all_solsets += [solset]
ss = h5.getSolset(solset)
for n, soltab in enumerate(ss.getSoltabNames()):
self.all_soltabs += [soltab]
st = ss.getSoltab(soltab)
self.all_axes += ['/'.join([solset, soltab, an]) for an in st.getAxesNames()]
if n == 0:
self.ant = st.getAxisValues('ant') # check if same for all h5
elif list(self.ant) != list(st.getAxisValues('ant')):
sys.exit('ERROR: antennas not the same.')
h5.close()
self.all_soltabs = self._sort_soltabs(self.all_soltabs)
self.all_solsets = set(self.all_solsets)
self.all_axes = set(self.all_axes)
return self
def _make_template_values(self, soltab):
"""
Make template numpy array to fill up during merging.
:param soltab: solution table name
:param st: solution table itself
"""
num_dir = max(len(self.directions), 1)
if 'amplitude' in soltab and len(self.polarizations) > 0:
self.amplitudes = ones(
(max(len(self.polarizations), 1), num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
if len(self.polarizations) == 4:
self.amplitudes[1, ...] = 0.
self.amplitudes[2, ...] = 0.
else:
self.amplitudes = ones((num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
if 'phase' in soltab and len(self.polarizations) > 0:
self.phases = zeros(
(max(len(self.polarizations), 1), num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
elif 'rotation' in soltab:
self.phases = zeros((num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
else:
self.phases = zeros((num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
if 'tec' in soltab and not self.convert_tec and len(self.polarizations) > 0:
self.tec = zeros(
(max(len(self.polarizations), 1), num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
elif 'tec' in soltab and not self.convert_tec:
self.tec = zeros((num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
if 'error' in soltab and len(self.polarizations) > 0:
self.error = zeros(
(max(len(self.polarizations), 1), num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
elif 'error' in soltab:
self.error = zeros((num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
if self.doublefulljones:
self.fullgains = ones((4, num_dir, len(self.ant), len(self.ax_freq), len(self.ax_time)))
self.n = len(self.directions) # direction number reset
return self
@staticmethod
def tecphase_conver(tec, freqs):
"""
convert tec to phase --> See Equation 1 from Sweijen et al. 2022
:param tec: TEC
:param freqs: frequencies
:return: tec phase converted values
"""
return -8.44797245e9 * tec / freqs
@staticmethod
def _interp_along_axis(x, interp_from, interp_to, axis, fill_value='extrapolate'):
"""
Interpolate along axis.
:param x: frequency or time axis. Must be equal to the length of interp_from.
:param interp_from: interpolate from this axis.
:param interp_to: interpolate to this axis
:param axis: interpolation axis
:return: the interpolated result
"""
# axis with length 1
if len(interp_from) == 1:
new_vals = x
for _ in range(len(interp_to) - 1):
new_vals = append(new_vals, x, axis=axis)
else:
interp_vals = interp1d(interp_from, x, axis=axis, kind='nearest', fill_value=fill_value, bounds_error=False)
new_vals = interp_vals(interp_to)
return new_vals
def get_model_h5(self, solset, soltab):
"""
Get model (clean) h5 table.
:param solset: solution set name (sol000)
:param soltab: solution table name
"""
if '000' in soltab:
# make template
for h5_name in self.h5_tables:
h5_to_merge = h5parm(h5_name)
if solset not in h5_to_merge.getSolsetNames():
h5_to_merge.close()
sys.exit('ERROR ' + solset + ' does not exist in ' + h5_name)
else:
ss = h5_to_merge.getSolset(solset)
if soltab not in ss.getSoltabNames():
h5_to_merge.close()
continue # use other h5 table with soltab
else:
st = ss.getSoltab(soltab)
if not self.convert_tec or (self.convert_tec and 'tec' not in soltab):
self._make_template_values(soltab)
self.axes_final = [an for an in self.solaxnames if an in st.getAxesNames()]
if len(self.polarizations) > 1 and 'pol' not in st.getAxesNames():
self.axes_final.insert(0, 'pol')
elif 'tec' in soltab and self.convert_tec:
for st_group in self.all_soltabs:
if soltab in st_group and ('phase000' not in st_group and 'phase{n}'.format(n=soltab[-3:])):
self._make_template_values(soltab)
self.axes_final = [an for an in self.solaxnames if an in st.getAxesNames()]
if len(self.polarizations) > 1 and 'pol' not in st.getAxesNames():
self.axes_final = ['pol'] + self.axes_final
# add dir if missing
if 'dir' not in self.axes_final:
if 'pol' in self.axes_final:
self.axes_final.insert(1, 'dir')
else:
self.axes_final.insert(0, 'dir')
h5_to_merge.close()
break
return self
@staticmethod
def get_number_of_directions(st):
"""
Get number of directions in solution table
:param st: solution table
:return: number of directions
"""
if 'dir' in st.getAxesNames():
dir_index = st.getAxesNames().index('dir')
return st.getValues()[0].shape[dir_index]
else:
return 1
def add_direction(self, source):
"""
Add direction to dictionary
:param source: source direction
"""
self.directions.update(source)
self.directions = OrderedDict(sorted(self.directions.items()))
return self
@staticmethod
def remove_invalid_values(soltab, values, axlist):
"""
Correct invalid values in tables (infinite, nan).
Only finite values allowed.
:param soltab: solution table name
:param values: numpy array values
:param axlist: list with axes
:return: new values
"""
if 'phase' in soltab or 'tec' in soltab:
values[~isfinite(values)] = 0.
elif 'amplitude' in soltab:
if 'pol' in axlist:
if axlist.index('pol') == 0:
values[(~isfinite(values))] = 0.
values[0, ...][(~isfinite(values[0, ...])) | (values[0, ...] == 0.)] = 1.
values[-1, ...][(~isfinite(values[-1, ...])) | (values[-1, ...] == 0.)] = 1.
elif axlist.index('pol') - len(axlist) == -1:
values[(~isfinite(values))] = 0.
values[..., 0][(~isfinite(values[..., 0])) | (values[..., 0] == 0.)] = 1.
values[..., -1][(~isfinite(values[..., -1])) | (values[..., -1] == 0.)] = 1.
else:
sys.exit('ERROR: polarization found at unexpected axis ' + str(axlist))
else:
values[(~isfinite(values)) | (values == 0.)] = 1.
return values
@staticmethod
def _expand_poldim(values, dim_pol, type, haspol):
"""
Add extra polarization dimensions
:param values: values which need to get a polarization
:param dim_pol: number of dimensions
:param type: phase or amplitude
:param haspol: has polarization in input values