From 1fed0ed9b7c02e42b98f0dcc8f1851c6f8009409 Mon Sep 17 00:00:00 2001 From: Stuart Sim Date: Mon, 24 Mar 2014 19:01:25 +0000 Subject: [PATCH 1/5] proposed fix to warning about Pandas copying --- tardis/atomic.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tardis/atomic.py b/tardis/atomic.py index ef76003e389..92a28fb21da 100644 --- a/tardis/atomic.py +++ b/tardis/atomic.py @@ -477,6 +477,7 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat if max_ion_number is not None: self.lines = self.lines[self.lines['ion_number'] <= max_ion_number] + self.lines.is_copy = False self.lines.sort('wavelength', inplace=True) self.lines_index = pd.Series(np.arange(len(self.lines), dtype=int), index=self.lines.index) From 88a26c414a018dbd2e511d1fcd8b2e9bfd425cdd Mon Sep 17 00:00:00 2001 From: Stuart Sim Date: Tue, 25 Mar 2014 15:06:32 +0000 Subject: [PATCH 2/5] Cleaned up version that removes copies of the dataframes for lines and levels --- tardis/atomic.py | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/tardis/atomic.py b/tardis/atomic.py index 92a28fb21da..2a3f6c09c58 100644 --- a/tardis/atomic.py +++ b/tardis/atomic.py @@ -274,10 +274,10 @@ class AtomData(object): ::important to note here is that ion describes the final ion state e.g. H I - H II is described with ion=2 - levels_data : ~astropy.table.Table + levels : ~astropy.table.Table containing the levels data: z, ion, level_number, energy, g - lines_data : ~astropy.table.Table + lines : ~astropy.table.Table containing the lines data: wavelength, z, ion, levels_number_lower, levels_number_upper, f_lu, f_ul @@ -313,8 +313,8 @@ def from_hdf5(cls, fname=None): atom_data = read_basic_atom_data(fname) ionization_data = read_ionization_data(fname) - levels_data = read_levels_data(fname) - lines_data = read_lines_data(fname) + levels = read_levels_data(fname) + lines = read_lines_data(fname) with h5py.File(fname, 'r') as h5_file: h5_datasets = h5_file.keys() @@ -344,8 +344,8 @@ def from_hdf5(cls, fname=None): else: ion_cx_data = None - atom_data = cls(atom_data=atom_data, ionization_data=ionization_data, levels_data=levels_data, - lines_data=lines_data, macro_atom_data=macro_atom_data, zeta_data=zeta_data, + atom_data = cls(atom_data=atom_data, ionization_data=ionization_data, levels=levels, + lines=lines, macro_atom_data=macro_atom_data, zeta_data=zeta_data, collision_data=(collision_data, collision_data_temperatures), synpp_refs=synpp_refs, ion_cx_data=ion_cx_data) @@ -361,7 +361,7 @@ def from_hdf5(cls, fname=None): return atom_data - def __init__(self, atom_data, ionization_data, levels_data, lines_data, macro_atom_data=None, zeta_data=None, + def __init__(self, atom_data, ionization_data, levels, lines, macro_atom_data=None, zeta_data=None, collision_data=None, synpp_refs=None, ion_cx_data=None): @@ -417,18 +417,18 @@ def __init__(self, atom_data, ionization_data, levels_data, lines_data, macro_at self.ionization_data.ionization_energy = units.Unit('eV').to('erg', self.ionization_data.ionization_energy.values) - self.levels_data = DataFrame(levels_data.__array__()) - self.levels_data.energy = units.Unit('eV').to('erg', self.levels_data.energy.values) + self.levels = DataFrame(levels.__array__()) + self.levels.energy = units.Unit('eV').to('erg', self.levels.energy.values) - self.lines_data = DataFrame(lines_data.__array__()) - self.lines_data.set_index('line_id', inplace=True) - self.lines_data['nu'] = units.Unit('angstrom').to('Hz', self.lines_data['wavelength'], units.spectral()) - self.lines_data['wavelength_cm'] = units.Unit('angstrom').to('cm', self.lines_data['wavelength']) + self.lines = DataFrame(lines.__array__()) + self.lines.set_index('line_id', inplace=True) + self.lines['nu'] = units.Unit('angstrom').to('Hz', self.lines['wavelength'], units.spectral()) + self.lines['wavelength_cm'] = units.Unit('angstrom').to('cm', self.lines['wavelength']) - #tmp_lines_index = pd.MultiIndex.from_arrays(self.lines_data) + #tmp_lines_index = pd.MultiIndex.from_arrays(self.lines) #self.lines_inde self.symbol2atomic_number = OrderedDict(zip(self.atom_data['symbol'].values, self.atom_data.index)) @@ -445,7 +445,7 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat nlte_species=[]): """ Prepares the atom data to set the lines, levels and if requested macro atom data. - This function mainly cuts the `levels_data` and `lines_data` by discarding any data that is not needed (any data + This function mainly cuts the `levels` and `lines` by discarding any data that is not needed (any data for atoms that are not needed Parameters @@ -466,18 +466,17 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat self.nlte_species = nlte_species - self.levels = self.levels_data[self.levels_data['atomic_number'].isin(self.selected_atomic_numbers)] + self.levels = self.levels[self.levels['atomic_number'].isin(self.selected_atomic_numbers)] if max_ion_number is not None: self.levels = self.levels[self.levels['ion_number'] <= max_ion_number] self.levels = self.levels.set_index(['atomic_number', 'ion_number', 'level_number']) self.levels_index = pd.Series(np.arange(len(self.levels), dtype=int), index=self.levels.index) #cutting levels_lines - self.lines = self.lines_data[self.lines_data['atomic_number'].isin(self.selected_atomic_numbers)] + self.lines = self.lines[self.lines['atomic_number'].isin(self.selected_atomic_numbers)] if max_ion_number is not None: self.lines = self.lines[self.lines['ion_number'] <= max_ion_number] - self.lines.is_copy = False self.lines.sort('wavelength', inplace=True) self.lines_index = pd.Series(np.arange(len(self.lines), dtype=int), index=self.lines.index) @@ -551,7 +550,7 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat def __repr__(self): return "" % \ - (self.uuid1, self.md5, self.lines_data.atomic_number.count(), self.levels_data.energy.count()) + (self.uuid1, self.md5, self.lines.atomic_number.count(), self.levels.energy.count()) class NLTEData(object): From 503a657fc6b0834c0fb0602b1d7f380a8183f416 Mon Sep 17 00:00:00 2001 From: Michi Date: Wed, 26 Mar 2014 13:43:20 +0100 Subject: [PATCH 3/5] new version with self._levels --- tardis/atomic.py | 67 ++++++++++++++++++++++++++---------------------- 1 file changed, 36 insertions(+), 31 deletions(-) diff --git a/tardis/atomic.py b/tardis/atomic.py index 2a3f6c09c58..6b87cf7efe5 100644 --- a/tardis/atomic.py +++ b/tardis/atomic.py @@ -228,8 +228,10 @@ def read_collision_data(fname): def read_ion_cx_data(fname): try: h5_file = h5py.File(fname, 'r') - ion_cx_th_data = h5_file['ionization_cx_threshold'] - ion_cx_sp_data = h5_file['ionization_cx_support'] + ion_cx_th_data = None + ion_cx_sp_data = None + #ion_cx_th_data = h5_file['ion_cx_data'] + #ion_cx_sp_data = h5_file['ion_cx_sp_data'] return ion_cx_th_data, ion_cx_sp_data except IOError, err: print(err.errno) @@ -313,8 +315,8 @@ def from_hdf5(cls, fname=None): atom_data = read_basic_atom_data(fname) ionization_data = read_ionization_data(fname) - levels = read_levels_data(fname) - lines = read_lines_data(fname) + levels_data = read_levels_data(fname) + lines_data = read_lines_data(fname) with h5py.File(fname, 'r') as h5_file: h5_datasets = h5_file.keys() @@ -340,28 +342,28 @@ def from_hdf5(cls, fname=None): synpp_refs = None if 'ion_cx_data' in h5_datasets and 'ion_cx_data' in h5_datasets: - ion_cx_data = read_ion_cx_data(fname) + ion_cx_data = None else: ion_cx_data = None - atom_data = cls(atom_data=atom_data, ionization_data=ionization_data, levels=levels, - lines=lines, macro_atom_data=macro_atom_data, zeta_data=zeta_data, + atom_data = cls(atom_data=atom_data, ionization_data=ionization_data, levels_data=levels_data, + lines_data=lines_data, macro_atom_data=macro_atom_data, zeta_data=zeta_data, collision_data=(collision_data, collision_data_temperatures), synpp_refs=synpp_refs, ion_cx_data=ion_cx_data) - with h5py.File(fname, 'r') as h5_file: - atom_data.uuid1 = h5_file.attrs['uuid1'] - atom_data.md5 = h5_file.attrs['md5'] - atom_data.version = h5_file.attrs.get('database_version', None) + #with h5py.File(fname, 'r') as h5_file: + # atom_data.uuid1 = h5_file.attrs['uuid1'] + # atom_data.md5 = h5_file.attrs['md5'] + # atom_data.version = h5_file.attrs.get('database_version', None) - if atom_data.version is not None: - atom_data.data_sources = pickle.loads(h5_file.attrs['data_sources']) + # if atom_data.version is not None: + # atom_data.data_sources = pickle.loads(h5_file.attrs['data_sources']) - logger.info('Read Atom Data with UUID=%s and MD5=%s', atom_data.uuid1, atom_data.md5) + # logger.info('Read Atom Data with UUID=%s and MD5=%s', atom_data.uuid1, atom_data.md5) return atom_data - def __init__(self, atom_data, ionization_data, levels, lines, macro_atom_data=None, zeta_data=None, + def __init__(self, atom_data, ionization_data, levels_data, lines_data, macro_atom_data=None, zeta_data=None, collision_data=None, synpp_refs=None, ion_cx_data=None): @@ -417,13 +419,13 @@ def __init__(self, atom_data, ionization_data, levels, lines, macro_atom_data=No self.ionization_data.ionization_energy = units.Unit('eV').to('erg', self.ionization_data.ionization_energy.values) - self.levels = DataFrame(levels.__array__()) - self.levels.energy = units.Unit('eV').to('erg', self.levels.energy.values) + self._levels = DataFrame(levels_data.__array__()) + self._levels.energy = units.Unit('eV').to('erg', self._levels.energy.values) - self.lines = DataFrame(lines.__array__()) - self.lines.set_index('line_id', inplace=True) - self.lines['nu'] = units.Unit('angstrom').to('Hz', self.lines['wavelength'], units.spectral()) - self.lines['wavelength_cm'] = units.Unit('angstrom').to('cm', self.lines['wavelength']) + self._lines = DataFrame(lines_data.__array__()) + self._lines.set_index('line_id', inplace=True) + self._lines['nu'] = units.Unit('angstrom').to('Hz', self._lines['wavelength'], units.spectral()) + self._lines['wavelength_cm'] = units.Unit('angstrom').to('cm', self._lines['wavelength']) @@ -466,23 +468,26 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat self.nlte_species = nlte_species - self.levels = self.levels[self.levels['atomic_number'].isin(self.selected_atomic_numbers)] + self._levels = self._levels[self._levels['atomic_number'].isin(self.selected_atomic_numbers)] if max_ion_number is not None: - self.levels = self.levels[self.levels['ion_number'] <= max_ion_number] - self.levels = self.levels.set_index(['atomic_number', 'ion_number', 'level_number']) + self._levels = self._levels[self._levels['ion_number'] <= max_ion_number] + self._levels = self._levels.set_index(['atomic_number', 'ion_number', 'level_number']) + self.levels = self.lines.copy() - self.levels_index = pd.Series(np.arange(len(self.levels), dtype=int), index=self.levels.index) + self.levels_index = pd.Series(np.arange(len(self._levels), dtype=int), index=self._levels.index) #cutting levels_lines - self.lines = self.lines[self.lines['atomic_number'].isin(self.selected_atomic_numbers)] + self._lines = self._lines[self._lines['atomic_number'].isin(self.selected_atomic_numbers)] if max_ion_number is not None: - self.lines = self.lines[self.lines['ion_number'] <= max_ion_number] + self._lines = self._lines[self._lines['ion_number'] <= max_ion_number] - self.lines.sort('wavelength', inplace=True) + self._lines.sort('wavelength', inplace=True) - self.lines_index = pd.Series(np.arange(len(self.lines), dtype=int), index=self.lines.index) + self.lines = self._lines.copy() + + self.lines_index = pd.Series(np.arange(len(self._lines), dtype=int), index=self._lines.index) - tmp_lines_lower2level_idx = pd.MultiIndex.from_arrays([self.lines['atomic_number'], self.lines['ion_number'], - self.lines['level_number_lower']]) + tmp_lines_lower2level_idx = pd.MultiIndex.from_arrays([self._lines['atomic_number'], self._lines['ion_number'], + self._lines['level_number_lower']]) self.lines_lower2level_idx = self.levels_index.ix[tmp_lines_lower2level_idx].values.astype(np.int64) From a4b205ca312d09a3af17f9b41ded45b26f96b074 Mon Sep 17 00:00:00 2001 From: Michi Date: Wed, 26 Mar 2014 16:13:06 +0100 Subject: [PATCH 4/5] changed self.levels to self._levels --- tardis/atomic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tardis/atomic.py b/tardis/atomic.py index 6b87cf7efe5..9e6b06c183f 100644 --- a/tardis/atomic.py +++ b/tardis/atomic.py @@ -472,7 +472,7 @@ def prepare_atom_data(self, selected_atomic_numbers, line_interaction_type='scat if max_ion_number is not None: self._levels = self._levels[self._levels['ion_number'] <= max_ion_number] self._levels = self._levels.set_index(['atomic_number', 'ion_number', 'level_number']) - self.levels = self.lines.copy() + self.levels = self._levels.copy() self.levels_index = pd.Series(np.arange(len(self._levels), dtype=int), index=self._levels.index) #cutting levels_lines From 6013ec8c26f00b68403f7009fff66f8cc7045b8b Mon Sep 17 00:00:00 2001 From: Michi Date: Wed, 26 Mar 2014 16:23:49 +0100 Subject: [PATCH 5/5] comment removed --- tardis/atomic.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/tardis/atomic.py b/tardis/atomic.py index 9e6b06c183f..e561c61cb7d 100644 --- a/tardis/atomic.py +++ b/tardis/atomic.py @@ -228,10 +228,8 @@ def read_collision_data(fname): def read_ion_cx_data(fname): try: h5_file = h5py.File(fname, 'r') - ion_cx_th_data = None - ion_cx_sp_data = None - #ion_cx_th_data = h5_file['ion_cx_data'] - #ion_cx_sp_data = h5_file['ion_cx_sp_data'] + ion_cx_th_data = h5_file['ion_cx_data'] + ion_cx_sp_data = h5_file['ion_cx_sp_data'] return ion_cx_th_data, ion_cx_sp_data except IOError, err: print(err.errno) @@ -351,15 +349,15 @@ def from_hdf5(cls, fname=None): collision_data=(collision_data, collision_data_temperatures), synpp_refs=synpp_refs, ion_cx_data=ion_cx_data) - #with h5py.File(fname, 'r') as h5_file: - # atom_data.uuid1 = h5_file.attrs['uuid1'] - # atom_data.md5 = h5_file.attrs['md5'] - # atom_data.version = h5_file.attrs.get('database_version', None) + with h5py.File(fname, 'r') as h5_file: + atom_data.uuid1 = h5_file.attrs['uuid1'] + atom_data.md5 = h5_file.attrs['md5'] + atom_data.version = h5_file.attrs.get('database_version', None) - # if atom_data.version is not None: - # atom_data.data_sources = pickle.loads(h5_file.attrs['data_sources']) + if atom_data.version is not None: + atom_data.data_sources = pickle.loads(h5_file.attrs['data_sources']) - # logger.info('Read Atom Data with UUID=%s and MD5=%s', atom_data.uuid1, atom_data.md5) + logger.info('Read Atom Data with UUID=%s and MD5=%s', atom_data.uuid1, atom_data.md5) return atom_data