forked from bccp/nbodykit
-
Notifications
You must be signed in to change notification settings - Fork 1
/
species.py
262 lines (210 loc) · 9.44 KB
/
species.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
from nbodykit.base.catalog import CatalogSourceBase
from nbodykit.utils import attrs_to_dict
import numpy
import logging
from six import string_types
class MultipleSpeciesCatalog(CatalogSourceBase):
"""
A CatalogSource interface for handling multiples species
of particles.
This CatalogSource stores a copy of the original CatalogSource objects
for each species, providing access to the columns via the format
``species/`` where "species" is one of the species names provided.
Parameters
----------
names : list of str
list of strings specifying the names of the various species;
data columns are prefixed with "species/" where "species" is
in ``names``
*species : two or more CatalogSource objects
catalogs to be combined into a single catalog, which give the
data for different species of particles; as many catalogs
as names must be provided
Examples
--------
Initialization:
>>> data = UniformCatalog(nbar=3e-5, BoxSize=512., seed=42)
>>> randoms = UniformCatalog(nbar=3e-5, BoxSize=512., seed=84)
>>> cat = MultipleSpeciesCatalog(['data', 'randoms'], data, randoms)
Accessing the Catalogs for individual species:
>>> data = cat["data"] # a copy of the original "data" object
Accessing individual columns:
>>> data_pos = cat["data/Position"]
Setting new columns:
>>> cat["data"]["new_column"] = 1.0
>>> assert "data/new_column" in cat
"""
logger = logging.getLogger('MultipleSpeciesCatalog')
def __repr__(self):
return "MultipleSpeciesCatalog(species=%s)" %str(self.attrs['species'])
def __init__(self, names, *species, **kwargs):
if len(set(names)) != len(names):
raise ValueError("each species must have a unique name")
if not all(cat.comm is species[0].comm for cat in species):
raise ValueError("communicator mismatch in MultipleSpeciesCatalog")
if len(names) != len(species):
raise ValueError("a name must be provided for each species catalog provided")
CatalogSourceBase.__init__(self, species[0].comm)
self.attrs['species'] = names
# update the dictionary with data/randoms attrs
for cat, name in zip(species, names):
self.attrs.update(attrs_to_dict(cat, name + '.'))
# update the rest of meta-data
self.attrs.update(kwargs)
# no size!
self.size = NotImplemented
self.csize = NotImplemented
# store copies of the original input catalogs as (name:catalog) dict
self._sources = {name:cat.copy() for name,cat in zip(names, species)}
@property
def species(self):
"""
List of species names
"""
return self.attrs['species']
@property
def columns(self):
"""
Columns for individual species can be accessed using a ``species/``
prefix and the column name, i.e., ``data/Position``.
"""
return ['%s/%s' %(species, col)
for species in self.species
for col in self._sources[species].columns]
@property
def hardcolumns(self):
"""
Hardcolumn of the form ``species/name``
"""
return ['%s/%s' %(species, col)
for species in self.species
for col in self._sources[species].hardcolumns]
def __getitem__(self, key):
"""
This provides access to the underlying data in two ways:
- The CatalogSource object for a species can be accessed if ``key``
is a species name.
- Individual columns for a species can be accessed using the
format: ``species/column``.
"""
# return a new CatalogSource holding only the specific species
if isinstance(key, string_types):
if key in self.species:
return self._sources[key]
species, subcol = split_column(key, self.species)
return CatalogSourceBase.__getitem__(self._sources[species], subcol)
return CatalogSourceBase.__getitem__(self, key)
def __setitem__(self, col, value):
"""
Add columns to any of the species catalogs.
.. note::
New column names should be prefixed by 'species/' where
'species' is a name in the :attr:`species` attribute.
"""
species, subcol = split_column(col, self.species)
# check size
size = len(self._sources[species])
if not numpy.isscalar(value):
if len(value) != size:
args = (col, size, len(value))
raise ValueError("error setting '%s' column, data must be array of size %d, not %d" % args)
# add the column to the CatalogSource in "_sources"
return CatalogSourceBase.__setitem__(self._sources[species], subcol, value)
def __delitem__(self, col):
"""
Delete a column of the form ``species/column``
"""
species, subcol = split_column(col, self.species)
return CatalogSourceBase.__delitem__(self._sources[species], subcol)
def to_mesh(self, Nmesh=None, BoxSize=None, dtype='f4', interlaced=False,
compensated=False, resampler='cic', weight='Weight',
value='Value', selection='Selection', position='Position', window=None):
"""
Convert the catalog to a mesh, which knows how to "paint" the
the combined density field, summed over all particle species.
Parameters
----------
Nmesh : int, 3-vector, optional
the number of cells per box side; can be inferred from ``attrs``
if the value is the same for all species
BoxSize : float, 3-vector, optional
the size of the box; can be inferred from ``attrs``
if the value is the same for all species
dtype : str, dtype, optional
the data type of the mesh when painting
interlaced : bool, optional
whether to use interlacing to reduce aliasing when painting the
particles on the mesh
compensated : bool, optional
whether to apply a Fourier-space transfer function to account for
the effects of the gridding + aliasing
resampler: str, optional
the string name of the resampler to use when interpolating the
weight : str, optional
the name of the column specifying the weight for each particle
selection : str, optional
the name of the column that specifies which (if any) slice
of the CatalogSource to take
value: str, optional
the name of the column specifying the field value for each particle
position : str, optional
the name of the column that specifies the position data of the
objects in the catalog
window : str, optional
the string name of the window to use when interpolating (deprecated, use resampler)
"""
from nbodykit.source.mesh.species import MultipleSpeciesCatalogMesh
if window is not None:
raise RuntimeError("use resampler instead")
# verify that all of the required columns exist
for name in self.species:
for col in [position, selection, weight, value]:
if col not in self[name]:
raise ValueError("the '%s' species is missing the '%s' column" %(name, col))
# try to find BoxSize and Nmesh
if BoxSize is None:
BoxSize = check_species_metadata('BoxSize', self.attrs, self.species)
if Nmesh is None:
Nmesh = check_species_metadata('Nmesh', self.attrs, self.species)
# return the mesh
return MultipleSpeciesCatalogMesh(self,
Nmesh=Nmesh,
BoxSize=BoxSize,
dtype=dtype,
selection=selection,
position=position,
weight=weight,
value=value,
interlaced=interlaced,
compensated=compensated,
resampler=resampler)
def check_species_metadata(name, attrs, species):
"""
Check to see if there is a single value for
``name`` in the meta-data of all the species
"""
_vals = []
if name in attrs:
_vals.append(attrs[name])
for s in species:
if s + '.' + name in attrs:
_vals.append(attrs.get(s + '.' + name ))
if len(_vals) == 0:
raise ValueError("please specify ``%s`` attributes" % name)
if not all(numpy.equal(_vals[0], i).all() for i in _vals):
raise ValueError("please specify ``%s`` attributes that are consistent for each species and for the multi species catalog; " % name)
return _vals[0]
def split_column(col, species):
"""
Split the column name of the form 'species/name'
"""
fields = col.split('/')
if len(fields) != 2:
msg = "new column names should be prefixed by 'species/' where "
msg += "'species' is one of %s" % str(species)
raise ValueError(msg)
this_species, subcol = fields
if this_species not in species:
args = (this_species, str(species))
raise ValueError("species '%s' is not valid; should be one of %s" %args)
return this_species, subcol