forked from astropy/astropy
/
core.py
1848 lines (1522 loc) · 64 KB
/
core.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
# Licensed under a 3-clause BSD style license - see LICENSE.rst
"""An extensible ASCII table reader and writer.
core.py:
Core base classes and functions for reading and writing tables.
:Copyright: Smithsonian Astrophysical Observatory (2010)
:Author: Tom Aldcroft (aldcroft@head.cfa.harvard.edu)
"""
import copy
import csv
import fnmatch
import functools
import inspect
import itertools
import operator
import os
import re
import warnings
from collections import OrderedDict
from contextlib import suppress
from io import StringIO
import numpy
from astropy.table import Table
from astropy.utils.data import get_readable_fileobj
from astropy.utils.exceptions import AstropyWarning
from . import connect
from .docs import READ_DOCSTRING, WRITE_DOCSTRING
# Global dictionary mapping format arg to the corresponding Reader class
FORMAT_CLASSES = {}
# Similar dictionary for fast readers
FAST_CLASSES = {}
def _check_multidim_table(table, max_ndim):
"""Check that ``table`` has only columns with ndim <= ``max_ndim``.
Currently ECSV is the only built-in format that supports output of arbitrary
N-d columns, but HTML supports 2-d.
"""
# No limit?
if max_ndim is None:
return
# Check for N-d columns
nd_names = [col.info.name for col in table.itercols() if len(col.shape) > max_ndim]
if nd_names:
raise ValueError(
f"column(s) with dimension > {max_ndim} "
"cannot be be written with this format, try using 'ecsv' "
"(Enhanced CSV) format"
)
class CsvWriter:
"""
Internal class to replace the csv writer ``writerow`` and ``writerows``
functions so that in the case of ``delimiter=' '`` and
``quoting=csv.QUOTE_MINIMAL``, the output field value is quoted for empty
fields (when value == '').
This changes the API slightly in that the writerow() and writerows()
methods return the output written string instead of the length of
that string.
Examples
--------
>>> from astropy.io.ascii.core import CsvWriter
>>> writer = CsvWriter(delimiter=' ')
>>> print(writer.writerow(['hello', '', 'world']))
hello "" world
"""
# Random 16-character string that gets injected instead of any
# empty fields and is then replaced post-write with doubled-quotechar.
# Created with:
# ''.join(random.choice(string.printable[:90]) for _ in range(16))
replace_sentinel = "2b=48Av%0-V3p>bX"
def __init__(self, csvfile=None, **kwargs):
self.csvfile = csvfile
# Temporary StringIO for catching the real csv.writer() object output
self.temp_out = StringIO()
self.writer = csv.writer(self.temp_out, **kwargs)
dialect = self.writer.dialect
self.quotechar2 = dialect.quotechar * 2
self.quote_empty = (dialect.quoting == csv.QUOTE_MINIMAL) and (
dialect.delimiter == " "
)
def writerow(self, values):
"""
Similar to csv.writer.writerow but with the custom quoting behavior.
Returns the written string instead of the length of that string.
"""
has_empty = False
# If QUOTE_MINIMAL and space-delimited then replace empty fields with
# the sentinel value.
if self.quote_empty:
for i, value in enumerate(values):
if value == "":
has_empty = True
values[i] = self.replace_sentinel
return self._writerow(self.writer.writerow, values, has_empty)
def writerows(self, values_list):
"""
Similar to csv.writer.writerows but with the custom quoting behavior.
Returns the written string instead of the length of that string.
"""
has_empty = False
# If QUOTE_MINIMAL and space-delimited then replace empty fields with
# the sentinel value.
if self.quote_empty:
for values in values_list:
for i, value in enumerate(values):
if value == "":
has_empty = True
values[i] = self.replace_sentinel
return self._writerow(self.writer.writerows, values_list, has_empty)
def _writerow(self, writerow_func, values, has_empty):
"""
Call ``writerow_func`` (either writerow or writerows) with ``values``.
If it has empty fields that have been replaced then change those
sentinel strings back to quoted empty strings, e.g. ``""``.
"""
# Clear the temporary StringIO buffer that self.writer writes into and
# then call the real csv.writer().writerow or writerows with values.
self.temp_out.seek(0)
self.temp_out.truncate()
writerow_func(values)
row_string = self.temp_out.getvalue()
if self.quote_empty and has_empty:
row_string = re.sub(self.replace_sentinel, self.quotechar2, row_string)
# self.csvfile is defined then write the output. In practice the pure
# Python writer calls with csvfile=None, while the fast writer calls with
# a file-like object.
if self.csvfile:
self.csvfile.write(row_string)
return row_string
class MaskedConstant(numpy.ma.core.MaskedConstant):
"""A trivial extension of numpy.ma.masked.
We want to be able to put the generic term ``masked`` into a dictionary.
The constant ``numpy.ma.masked`` is not hashable (see
https://github.com/numpy/numpy/issues/4660), so we need to extend it
here with a hash value.
See https://github.com/numpy/numpy/issues/11021 for rationale for
__copy__ and __deepcopy__ methods.
"""
def __hash__(self):
"""All instances of this class shall have the same hash."""
# Any large number will do.
return 1234567890
def __copy__(self):
"""This is a singleton so just return self."""
return self
def __deepcopy__(self, memo):
return self
masked = MaskedConstant()
class InconsistentTableError(ValueError):
"""
Indicates that an input table is inconsistent in some way.
The default behavior of ``BaseReader`` is to throw an instance of
this class if a data row doesn't match the header.
"""
class OptionalTableImportError(ImportError):
"""
Indicates that a dependency for table reading is not present.
An instance of this class is raised whenever an optional reader
with certain required dependencies cannot operate because of
an ImportError.
"""
class ParameterError(NotImplementedError):
"""
Indicates that a reader cannot handle a passed parameter.
The C-based fast readers in ``io.ascii`` raise an instance of
this error class upon encountering a parameter that the
C engine cannot handle.
"""
class FastOptionsError(NotImplementedError):
"""
Indicates that one of the specified options for fast
reading is invalid.
"""
class NoType:
"""
Superclass for ``StrType`` and ``NumType`` classes.
This class is the default type of ``Column`` and provides a base
class for other data types.
"""
class StrType(NoType):
"""
Indicates that a column consists of text data.
"""
class NumType(NoType):
"""
Indicates that a column consists of numerical data.
"""
class FloatType(NumType):
"""
Describes floating-point data.
"""
class BoolType(NoType):
"""
Describes boolean data.
"""
class IntType(NumType):
"""
Describes integer data.
"""
class AllType(StrType, FloatType, IntType):
"""
Subclass of all other data types.
This type is returned by ``convert_numpy`` if the given numpy
type does not match ``StrType``, ``FloatType``, or ``IntType``.
"""
class Column:
"""Table column.
The key attributes of a Column object are:
* **name** : column name
* **type** : column type (NoType, StrType, NumType, FloatType, IntType)
* **dtype** : numpy dtype (optional, overrides **type** if set)
* **str_vals** : list of column values as strings
* **fill_values** : dict of fill values
* **shape** : list of element shape (default [] => scalar)
* **data** : list of converted column values
* **subtype** : actual datatype for columns serialized with JSON
"""
def __init__(self, name):
self.name = name
self.type = NoType # Generic type (Int, Float, Str etc)
self.dtype = None # Numpy dtype if available
self.str_vals = []
self.fill_values = {}
self.shape = []
self.subtype = None
class BaseInputter:
"""
Get the lines from the table input and return a list of lines.
"""
encoding = None
"""Encoding used to read the file"""
def get_lines(self, table, newline=None):
"""Get the lines from the ``table`` input.
The input table can be one of:
* File name
* String (newline separated) with all header and data lines (must have at least 2 lines)
* File-like object with read() method
* List of strings
Parameters
----------
table : str, file-like, list
Can be either a file name, string (newline separated) with all header and data
lines (must have at least 2 lines), a file-like object with a
``read()`` method, or a list of strings.
newline :
Line separator. If `None` use OS default from ``splitlines()``.
Returns
-------
lines : list
List of lines
"""
try:
if hasattr(table, "read") or (
"\n" not in table + "" and "\r" not in table + ""
):
with get_readable_fileobj(table, encoding=self.encoding) as fileobj:
table = fileobj.read()
if newline is None:
lines = table.splitlines()
else:
lines = table.split(newline)
except TypeError:
try:
# See if table supports indexing, slicing, and iteration
table[0]
table[0:1]
iter(table)
if len(table) > 1:
lines = table
else:
# treat single entry as if string had been passed directly
if newline is None:
lines = table[0].splitlines()
else:
lines = table[0].split(newline)
except TypeError:
raise TypeError(
'Input "table" must be a string (filename or data) or an iterable'
)
return self.process_lines(lines)
def process_lines(self, lines):
"""Process lines for subsequent use. In the default case do nothing.
This routine is not generally intended for removing comment lines or
stripping whitespace. These are done (if needed) in the header and
data line processing.
Override this method if something more has to be done to convert raw
input lines to the table rows. For example the
ContinuationLinesInputter derived class accounts for continuation
characters if a row is split into lines.
"""
return lines
class BaseSplitter:
"""
Base splitter that uses python's split method to do the work.
This does not handle quoted values. A key feature is the formulation of
__call__ as a generator that returns a list of the split line values at
each iteration.
There are two methods that are intended to be overridden, first
``process_line()`` to do pre-processing on each input line before splitting
and ``process_val()`` to do post-processing on each split string value. By
default these apply the string ``strip()`` function. These can be set to
another function via the instance attribute or be disabled entirely, for
example::
reader.header.splitter.process_val = lambda x: x.lstrip()
reader.data.splitter.process_val = None
"""
delimiter = None
""" one-character string used to separate fields """
def process_line(self, line):
"""Remove whitespace at the beginning or end of line. This is especially useful for
whitespace-delimited files to prevent spurious columns at the beginning or end.
"""
return line.strip()
def process_val(self, val):
"""Remove whitespace at the beginning or end of value."""
return val.strip()
def __call__(self, lines):
if self.process_line:
lines = (self.process_line(x) for x in lines)
for line in lines:
vals = line.split(self.delimiter)
if self.process_val:
yield [self.process_val(x) for x in vals]
else:
yield vals
def join(self, vals):
if self.delimiter is None:
delimiter = " "
else:
delimiter = self.delimiter
return delimiter.join(str(x) for x in vals)
class DefaultSplitter(BaseSplitter):
"""Default class to split strings into columns using python csv. The class
attributes are taken from the csv Dialect class.
Typical usage::
# lines = ..
splitter = ascii.DefaultSplitter()
for col_vals in splitter(lines):
for col_val in col_vals:
...
"""
delimiter = " "
""" one-character string used to separate fields. """
quotechar = '"'
""" control how instances of *quotechar* in a field are quoted """
doublequote = True
""" character to remove special meaning from following character """
escapechar = None
""" one-character stringto quote fields containing special characters """
quoting = csv.QUOTE_MINIMAL
""" control when quotes are recognized by the reader """
skipinitialspace = True
""" ignore whitespace immediately following the delimiter """
csv_writer = None
csv_writer_out = StringIO()
def process_line(self, line):
"""Remove whitespace at the beginning or end of line. This is especially useful for
whitespace-delimited files to prevent spurious columns at the beginning or end.
If splitting on whitespace then replace unquoted tabs with space first.
"""
if self.delimiter == r"\s":
line = _replace_tab_with_space(line, self.escapechar, self.quotechar)
return line.strip() + "\n"
def process_val(self, val):
"""Remove whitespace at the beginning or end of value."""
return val.strip(" \t")
def __call__(self, lines):
"""Return an iterator over the table ``lines``, where each iterator output
is a list of the split line values.
Parameters
----------
lines : list
List of table lines
Yields
------
line : list of str
Each line's split values.
"""
if self.process_line:
lines = [self.process_line(x) for x in lines]
delimiter = " " if self.delimiter == r"\s" else self.delimiter
csv_reader = csv.reader(
lines,
delimiter=delimiter,
doublequote=self.doublequote,
escapechar=self.escapechar,
quotechar=self.quotechar,
quoting=self.quoting,
skipinitialspace=self.skipinitialspace,
)
for vals in csv_reader:
if self.process_val:
yield [self.process_val(x) for x in vals]
else:
yield vals
def join(self, vals):
delimiter = " " if self.delimiter is None else str(self.delimiter)
if self.csv_writer is None:
self.csv_writer = CsvWriter(
delimiter=delimiter,
doublequote=self.doublequote,
escapechar=self.escapechar,
quotechar=self.quotechar,
quoting=self.quoting,
)
if self.process_val:
vals = [self.process_val(x) for x in vals]
out = self.csv_writer.writerow(vals).rstrip("\r\n")
return out
def _replace_tab_with_space(line, escapechar, quotechar):
"""Replace tabs with spaces in given string, preserving quoted substrings.
Parameters
----------
line : str
String containing tabs to be replaced with spaces.
escapechar : str
Character in ``line`` used to escape special characters.
quotechar : str
Character in ``line`` indicating the start/end of a substring.
Returns
-------
line : str
A copy of ``line`` with tabs replaced by spaces, preserving quoted substrings.
"""
newline = []
in_quote = False
lastchar = "NONE"
for char in line:
if char == quotechar and lastchar != escapechar:
in_quote = not in_quote
if char == "\t" and not in_quote:
char = " "
lastchar = char
newline.append(char)
return "".join(newline)
def _get_line_index(line_or_func, lines):
"""Return the appropriate line index, depending on ``line_or_func`` which
can be either a function, a positive or negative int, or None.
"""
if callable(line_or_func):
return line_or_func(lines)
elif line_or_func:
if line_or_func >= 0:
return line_or_func
else:
n_lines = sum(1 for line in lines)
return n_lines + line_or_func
else:
return line_or_func
class BaseHeader:
"""
Base table header reader.
"""
auto_format = "col{}"
""" format string for auto-generating column names """
start_line = None
""" None, int, or a function of ``lines`` that returns None or int """
comment = None
""" regular expression for comment lines """
splitter_class = DefaultSplitter
""" Splitter class for splitting data lines into columns """
names = None
""" list of names corresponding to each data column """
write_comment = False
write_spacer_lines = ["ASCII_TABLE_WRITE_SPACER_LINE"]
def __init__(self):
self.splitter = self.splitter_class()
def _set_cols_from_names(self):
self.cols = [Column(name=x) for x in self.names]
def update_meta(self, lines, meta):
"""
Extract any table-level metadata, e.g. keywords, comments, column metadata, from
the table ``lines`` and update the OrderedDict ``meta`` in place. This base
method extracts comment lines and stores them in ``meta`` for output.
"""
if self.comment:
re_comment = re.compile(self.comment)
comment_lines = [x for x in lines if re_comment.match(x)]
else:
comment_lines = []
comment_lines = [
re.sub("^" + self.comment, "", x).strip() for x in comment_lines
]
if comment_lines:
meta.setdefault("table", {})["comments"] = comment_lines
def get_cols(self, lines):
"""Initialize the header Column objects from the table ``lines``.
Based on the previously set Header attributes find or create the column names.
Sets ``self.cols`` with the list of Columns.
Parameters
----------
lines : list
List of table lines
"""
start_line = _get_line_index(self.start_line, self.process_lines(lines))
if start_line is None:
# No header line so auto-generate names from n_data_cols
# Get the data values from the first line of table data to determine n_data_cols
try:
first_data_vals = next(self.data.get_str_vals())
except StopIteration:
raise InconsistentTableError(
"No data lines found so cannot autogenerate column names"
)
n_data_cols = len(first_data_vals)
self.names = [self.auto_format.format(i) for i in range(1, n_data_cols + 1)]
else:
for i, line in enumerate(self.process_lines(lines)):
if i == start_line:
break
else: # No header line matching
raise ValueError("No header line found in table")
self.names = next(self.splitter([line]))
self._set_cols_from_names()
def process_lines(self, lines):
"""Generator to yield non-blank and non-comment lines."""
re_comment = re.compile(self.comment) if self.comment else None
# Yield non-comment lines
for line in lines:
if line.strip() and (not self.comment or not re_comment.match(line)):
yield line
def write_comments(self, lines, meta):
if self.write_comment not in (False, None):
for comment in meta.get("comments", []):
lines.append(self.write_comment + comment)
def write(self, lines):
if self.start_line is not None:
for i, spacer_line in zip(
range(self.start_line), itertools.cycle(self.write_spacer_lines)
):
lines.append(spacer_line)
lines.append(self.splitter.join([x.info.name for x in self.cols]))
@property
def colnames(self):
"""Return the column names of the table."""
return tuple(
col.name if isinstance(col, Column) else col.info.name for col in self.cols
)
def remove_columns(self, names):
"""
Remove several columns from the table.
Parameters
----------
names : list
A list containing the names of the columns to remove
"""
colnames = self.colnames
for name in names:
if name not in colnames:
raise KeyError(f"Column {name} does not exist")
self.cols = [col for col in self.cols if col.name not in names]
def rename_column(self, name, new_name):
"""
Rename a column.
Parameters
----------
name : str
The current name of the column.
new_name : str
The new name for the column
"""
try:
idx = self.colnames.index(name)
except ValueError:
raise KeyError(f"Column {name} does not exist")
col = self.cols[idx]
# For writing self.cols can contain cols that are not Column. Raise
# exception in that case.
if isinstance(col, Column):
col.name = new_name
else:
raise TypeError(f"got column type {type(col)} instead of required {Column}")
def get_type_map_key(self, col):
return col.raw_type
def get_col_type(self, col):
try:
type_map_key = self.get_type_map_key(col)
return self.col_type_map[type_map_key.lower()]
except KeyError:
raise ValueError(
f'Unknown data type ""{col.raw_type}"" for column "{col.name}"'
)
def check_column_names(self, names, strict_names, guessing):
"""
Check column names.
This must be done before applying the names transformation
so that guessing will fail appropriately if ``names`` is supplied.
For instance if the basic reader is given a table with no column header
row.
Parameters
----------
names : list
User-supplied list of column names
strict_names : bool
Whether to impose extra requirements on names
guessing : bool
True if this method is being called while guessing the table format
"""
if strict_names:
# Impose strict requirements on column names (normally used in guessing)
bads = [" ", ",", "|", "\t", "'", '"']
for name in self.colnames:
if (
_is_number(name)
or len(name) == 0
or name[0] in bads
or name[-1] in bads
):
raise InconsistentTableError(
f"Column name {name!r} does not meet strict name requirements"
)
# When guessing require at least two columns, except for ECSV which can
# reliably be guessed from the header requirements.
if (
guessing
and len(self.colnames) <= 1
and self.__class__.__name__ != "EcsvHeader"
):
raise ValueError(
"Table format guessing requires at least two columns, got {}".format(
list(self.colnames)
)
)
if names is not None and len(names) != len(self.colnames):
raise InconsistentTableError(
f"Length of names argument ({len(names)}) does not match number "
f"of table columns ({len(self.colnames)})"
)
class BaseData:
"""
Base table data reader.
"""
start_line = None
""" None, int, or a function of ``lines`` that returns None or int """
end_line = None
""" None, int, or a function of ``lines`` that returns None or int """
comment = None
""" Regular expression for comment lines """
splitter_class = DefaultSplitter
""" Splitter class for splitting data lines into columns """
write_spacer_lines = ["ASCII_TABLE_WRITE_SPACER_LINE"]
fill_include_names = None
fill_exclude_names = None
fill_values = [(masked, "")]
formats = {}
def __init__(self):
# Need to make sure fill_values list is instance attribute, not class attribute.
# On read, this will be overwritten by the default in the ui.read (thus, in
# the current implementation there can be no different default for different
# Readers). On write, ui.py does not specify a default, so this line here matters.
self.fill_values = copy.copy(self.fill_values)
self.formats = copy.copy(self.formats)
self.splitter = self.splitter_class()
def process_lines(self, lines):
"""
READ: Strip out comment lines and blank lines from list of ``lines``.
Parameters
----------
lines : list
All lines in table
Returns
-------
lines : list
List of lines
"""
nonblank_lines = (x for x in lines if x.strip())
if self.comment:
re_comment = re.compile(self.comment)
return [x for x in nonblank_lines if not re_comment.match(x)]
else:
return list(nonblank_lines)
def get_data_lines(self, lines):
"""
READ: Set ``data_lines`` attribute to lines slice comprising table data values.
"""
data_lines = self.process_lines(lines)
start_line = _get_line_index(self.start_line, data_lines)
end_line = _get_line_index(self.end_line, data_lines)
if start_line is not None or end_line is not None:
self.data_lines = data_lines[slice(start_line, end_line)]
else: # Don't copy entire data lines unless necessary
self.data_lines = data_lines
def get_str_vals(self):
"""Return a generator that returns a list of column values (as strings)
for each data line.
"""
return self.splitter(self.data_lines)
def masks(self, cols):
"""READ: Set fill value for each column and then apply that fill value.
In the first step it is evaluated with value from ``fill_values`` applies to
which column using ``fill_include_names`` and ``fill_exclude_names``.
In the second step all replacements are done for the appropriate columns.
"""
if self.fill_values:
self._set_fill_values(cols)
self._set_masks(cols)
def _set_fill_values(self, cols):
"""READ, WRITE: Set fill values of individual cols based on fill_values of BaseData.
fill values has the following form:
<fill_spec> = (<bad_value>, <fill_value>, <optional col_name>...)
fill_values = <fill_spec> or list of <fill_spec>'s
"""
if self.fill_values:
# when we write tables the columns may be astropy.table.Columns
# which don't carry a fill_values by default
for col in cols:
if not hasattr(col, "fill_values"):
col.fill_values = {}
# if input is only one <fill_spec>, then make it a list
with suppress(TypeError):
self.fill_values[0] + ""
self.fill_values = [self.fill_values]
# Step 1: Set the default list of columns which are affected by
# fill_values
colnames = set(self.header.colnames)
if self.fill_include_names is not None:
colnames.intersection_update(self.fill_include_names)
if self.fill_exclude_names is not None:
colnames.difference_update(self.fill_exclude_names)
# Step 2a: Find out which columns are affected by this tuple
# iterate over reversed order, so last condition is set first and
# overwritten by earlier conditions
for replacement in reversed(self.fill_values):
if len(replacement) < 2:
raise ValueError(
"Format of fill_values must be "
"(<bad>, <fill>, <optional col1>, ...)"
)
elif len(replacement) == 2:
affect_cols = colnames
else:
affect_cols = replacement[2:]
for i, key in (
(i, x)
for i, x in enumerate(self.header.colnames)
if x in affect_cols
):
cols[i].fill_values[replacement[0]] = str(replacement[1])
def _set_masks(self, cols):
"""READ: Replace string values in col.str_vals and set masks."""
if self.fill_values:
for col in (col for col in cols if col.fill_values):
col.mask = numpy.zeros(len(col.str_vals), dtype=bool)
for i, str_val in (
(i, x) for i, x in enumerate(col.str_vals) if x in col.fill_values
):
col.str_vals[i] = col.fill_values[str_val]
col.mask[i] = True
def _replace_vals(self, cols):
"""WRITE: replace string values in col.str_vals."""
if self.fill_values:
for col in (col for col in cols if col.fill_values):
for i, str_val in (
(i, x) for i, x in enumerate(col.str_vals) if x in col.fill_values
):
col.str_vals[i] = col.fill_values[str_val]
if masked in col.fill_values and hasattr(col, "mask"):
mask_val = col.fill_values[masked]
for i in col.mask.nonzero()[0]:
col.str_vals[i] = mask_val
def str_vals(self):
"""WRITE: convert all values in table to a list of lists of strings.
This sets the fill values and possibly column formats from the input
formats={} keyword, then ends up calling table.pprint._pformat_col_iter()
by a circuitous path. That function does the real work of formatting.
Finally replace anything matching the fill_values.
Returns
-------
values : list of list of str
"""
self._set_fill_values(self.cols)
self._set_col_formats()
for col in self.cols:
col.str_vals = list(col.info.iter_str_vals())
self._replace_vals(self.cols)
return [col.str_vals for col in self.cols]
def write(self, lines):
"""Write ``self.cols`` in place to ``lines``.
Parameters
----------
lines : list
List for collecting output of writing self.cols.
"""
if callable(self.start_line):
raise TypeError("Start_line attribute cannot be callable for write()")
else:
data_start_line = self.start_line or 0
while len(lines) < data_start_line:
lines.append(itertools.cycle(self.write_spacer_lines))
col_str_iters = self.str_vals()
for vals in zip(*col_str_iters):
lines.append(self.splitter.join(vals))
def _set_col_formats(self):
"""WRITE: set column formats."""
for col in self.cols:
if col.info.name in self.formats:
col.info.format = self.formats[col.info.name]
def convert_numpy(numpy_type):
"""Return a tuple containing a function which converts a list into a numpy
array and the type produced by the converter function.
Parameters
----------
numpy_type : numpy data-type
The numpy type required of an array returned by ``converter``. Must be a
valid `numpy type <https://numpy.org/doc/stable/user/basics.types.html>`_
(e.g., numpy.uint, numpy.int8, numpy.int64, numpy.float64) or a python
type covered by a numpy type (e.g., int, float, str, bool).
Returns
-------
converter : callable
``converter`` is a function which accepts a list and converts it to a
numpy array of type ``numpy_type``.
converter_type : type
``converter_type`` tracks the generic data type produced by the
converter function.
Raises
------
ValueError