Skip to content

Commit

Permalink
Merge pull request #116 from wkerzendorf/delete_duplicate_levels
Browse files Browse the repository at this point in the history
Delete duplicate levels
  • Loading branch information
ssim committed Aug 7, 2018
2 parents e0de950 + f74079f commit 0d6a899
Show file tree
Hide file tree
Showing 10 changed files with 5,717 additions and 72 deletions.
18 changes: 17 additions & 1 deletion carsus/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,4 +14,20 @@
#if not _ASTROPY_SETUP_:
# from example_mod import *

from .base import init_db
import logging, sys
from .base import init_db
from tardis.util.colored_logger import ColoredFormatter, formatter_message

FORMAT = "[$BOLD%(name)-20s$RESET][%(levelname)-18s] %(message)s ($BOLD%(filename)s$RESET:%(lineno)d)"
COLOR_FORMAT = formatter_message(FORMAT, True)


logging.captureWarnings(True)
logger = logging.getLogger('carsus')
logger.setLevel(logging.INFO)

console_handler = logging.StreamHandler(sys.stdout)
console_formatter = ColoredFormatter(COLOR_FORMAT)
console_handler.setFormatter(console_formatter)
logger.addHandler(console_handler)
logging.getLogger('py.warnings').addHandler(console_handler)
12 changes: 6 additions & 6 deletions carsus/io/__init__.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from carsus.io.nist import NISTIonizationEnergiesParser, NISTIonizationEnergiesIngester,\
NISTWeightsCompPyparser, NISTWeightsCompIngester
from carsus.io.chianti_ import ChiantiIonReader, ChiantiIngester
from carsus.io.kurucz import GFALLReader, GFALLIngester
from carsus.io.output import AtomData
from carsus.io.zeta import KnoxLongZetaIngester
#from carsus.io.nist import NISTIonizationEnergiesParser, NISTIonizationEnergiesIngester,\
# NISTWeightsCompPyparser, NISTWeightsCompIngester
#from carsus.io.chianti_ import ChiantiIonReader, ChiantiIngester
#from carsus.io.kurucz import GFALLReader, GFALLIngester
#from carsus.io.output import AtomData
#from carsus.io.zeta import KnoxLongZetaIngester
139 changes: 87 additions & 52 deletions carsus/io/kurucz/gfall.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import re
import re, logging

import numpy as np
import pandas as pd

Expand All @@ -13,6 +14,7 @@

GFALL_AIR_THRESHOLD = 200 # [nm], wavelengths above this value are given in air

logger = logging.getLogger(__name__)

class GFALLReader(object):
"""
Expand All @@ -28,12 +30,46 @@ class GFALLReader(object):
Return pandas DataFrame representation of gfall
"""
def __init__(self, fname):

gfall_fortran_format = ('F11.4,F7.3,F6.2,F12.3,F5.2,1X,A10,F12.3,F5.2,1X,'
'A10,F6.2,F6.2,F6.2,A4,I2,I2,I3,F6.3,I3,F6.3,I5,I5,'
'1X,I1,A1,1X,I1,A1,I1,A3,I5,I5,I6')

gfall_columns = ['wavelength', 'loggf', 'element_code', 'e_first', 'j_first',
'blank1', 'label_first', 'e_second', 'j_second', 'blank2',
'label_second', 'log_gamma_rad', 'log_gamma_stark',
'log_gamma_vderwaals', 'ref', 'nlte_level_no_first',
'nlte_level_no_second', 'isotope', 'log_f_hyperfine',
'isotope2', 'log_iso_abundance', 'hyper_shift_first',
'hyper_shift_second', 'blank3', 'hyperfine_f_first',
'hyperfine_note_first', 'blank4', 'hyperfine_f_second',
'hyperfine_note_second', 'line_strength_class', 'line_code',
'lande_g_first', 'lande_g_second', 'isotopic_shift']

default_unique_level_identifier = ['energy', 'j']
def __init__(self, fname, unique_level_identifier=None):
"""
Parameters
----------
fname: str
path to the gfall file
unique_level_identifier: list
list of attributes to identify unique levels from. Will always use
atomic_number and ion charge in addition.
"""
self.fname = fname
self._gfall_raw = None
self._gfall = None
self._levels = None
self._lines = None
if unique_level_identifier is None:
logger.warn('A specific combination to identify unique levels from '
'the gfall data has not been given. Defaulting to '
'["energy", "j"].')
self.unique_level_identifier = self.default_unique_level_identifier


@property
def gfall_raw(self):
Expand Down Expand Up @@ -77,44 +113,26 @@ def read_gfall_raw(self, fname=None):
if fname is None:
fname = self.fname

logger.info('Parsing GFALL {0}'.format(fname))

# FORMAT(F11.4,F7.3,F6.2,F12.3,F5.2,1X,A10,F12.3,F5.2,1X,A10,
# 3F6.2,A4,2I2,I3,F6.3,I3,F6.3,2I5,1X,A1,A1,1X,A1,A1,i1,A3,2I5,I6)

kurucz_fortran_format = ('F11.4,F7.3,F6.2,F12.3,F5.2,1X,A10,F12.3,F5.2,1X,'
'A10,F6.2,F6.2,F6.2,A4,I2,I2,I3,F6.3,I3,F6.3,I5,I5,'
'1X,I1,A1,1X,I1,A1,I1,A3,I5,I5,I6')

number_match = re.compile(r'\d+(\.\d+)?')
type_match = re.compile(r'[FIXA]')
type_dict = {'F': np.float64, 'I': np.int64, 'X': 'S1', 'A': 'S10'}
type_dict = {'F': np.float64, 'I': np.int64, 'X': str, 'A': str}
field_types = tuple([type_dict[item] for item in number_match.sub(
'', kurucz_fortran_format).split(',')])
'', self.gfall_fortran_format).split(',')])

field_widths = type_match.sub('', kurucz_fortran_format)
field_widths = type_match.sub('', self.gfall_fortran_format)
field_widths = map(int, re.sub(r'\.\d+', '', field_widths).split(','))

def read_remove_empty(fname):
""" Generator to remove empty lines from the gfall file"""
with open(fname, "r") as f:
for line in f:
if not re.match(r'^\s*$', line):
yield line

gfall = np.genfromtxt(read_remove_empty(fname), dtype=field_types, delimiter=field_widths)

columns = ['wavelength', 'loggf', 'element_code', 'e_first', 'j_first',
'blank1', 'label_first', 'e_second', 'j_second', 'blank2',
'label_second', 'log_gamma_rad', 'log_gamma_stark',
'log_gamma_vderwaals', 'ref', 'nlte_level_no_first',
'nlte_level_no_second', 'isotope', 'log_f_hyperfine',
'isotope2', 'log_iso_abundance', 'hyper_shift_first',
'hyper_shift_second', 'blank3', 'hyperfine_f_first',
'hyperfine_note_first', 'blank4', 'hyperfine_f_second',
'hyperfine_note_second', 'line_strength_class', 'line_code',
'lande_g_first', 'lande_g_second', 'isotopic_shift']

gfall = pd.DataFrame(gfall)
gfall.columns = columns
field_type_dict = {col:dtype for col, dtype in zip(self.gfall_columns, field_types)}
gfall = pd.read_fwf(fname, widths=field_widths, skip_blank_lines=True,
names=self.gfall_columns, dtypes=field_type_dict)
#remove empty lines
gfall = gfall[~gfall.isnull().all(axis=1)]

return gfall

Expand All @@ -131,14 +149,17 @@ def parse_gfall(self, gfall_raw=None):
pandas.DataFrame
a level DataFrame
"""


gfall = gfall_raw if gfall_raw is not None else self.gfall_raw.copy()

gfall = gfall.rename(columns={'e_first':'energy_first',
'e_second':'energy_second'})
double_columns = [item.replace('_first', '') for item in gfall.columns if
item.endswith('first')]

# due to the fact that energy is stored in 1/cm
order_lower_upper = (gfall["e_first"].abs() <
gfall["e_second"].abs())
order_lower_upper = (gfall["energy_first"].abs() <
gfall["energy_second"].abs())

for column in double_columns:
data = pd.concat([gfall['{0}_first'.format(column)][order_lower_upper],
Expand Down Expand Up @@ -166,10 +187,10 @@ def parse_gfall(self, gfall_raw=None):
gfall = gfall.loc[~((gfall["label_lower"].isin(ignored_labels)) |
(gfall["label_upper"].isin(ignored_labels)))].copy()

gfall['e_lower_predicted'] = gfall["e_lower"] < 0
gfall["e_lower"] = gfall["e_lower"].abs()
gfall['e_upper_predicted'] = gfall["e_upper"] < 0
gfall["e_upper"] = gfall["e_upper"].abs()
gfall['energy_lower_predicted'] = gfall["energy_lower"] < 0
gfall["energy_lower"] = gfall["energy_lower"].abs()
gfall['energy_upper_predicted'] = gfall["energy_upper"] < 0
gfall["energy_upper"] = gfall["energy_upper"].abs()

gfall['atomic_number'] = gfall.element_code.astype(int)
gfall['ion_charge'] = ((gfall.element_code.values -
Expand All @@ -181,7 +202,8 @@ def parse_gfall(self, gfall_raw=None):

def extract_levels(self, gfall=None, selected_columns=None):
"""
Extract levels from `gfall`
Extract levels from `gfall`. We first generate a concatenated DataFrame
of all lower and upper levels. Then we drop the duplicate leves
Parameters
----------
Expand All @@ -203,8 +225,8 @@ def extract_levels(self, gfall=None, selected_columns=None):
selected_columns = ['atomic_number', 'ion_charge', 'energy', 'j',
'label', 'theoretical']

column_renames = {'e_{0}': 'energy', 'j_{0}': 'j', 'label_{0}': 'label',
'e_{0}_predicted': 'theoretical'}
column_renames = {'energy_{0}': 'energy', 'j_{0}': 'j', 'label_{0}': 'label',
'energy_{0}_predicted': 'theoretical'}

e_lower_levels = gfall.rename(
columns=dict([(key.format('lower'), value)
Expand All @@ -216,16 +238,18 @@ def extract_levels(self, gfall=None, selected_columns=None):

levels = pd.concat([e_lower_levels[selected_columns],
e_upper_levels[selected_columns]])
unique_level_id = ['atomic_number', 'ion_charge'] + self.unique_level_identifier

levels = levels.sort_values(['atomic_number', 'ion_charge', 'energy', 'j', 'label']).\
drop_duplicates(['atomic_number', 'ion_charge', 'energy', 'j', 'label'])
levels.drop_duplicates(unique_level_id, inplace=True)
levels = levels.sort_values(['atomic_number', 'ion_charge', 'energy',
'j', 'label'])

levels["method"] = levels["theoretical"].\
apply(lambda x: "theor" if x else "meas") # Theoretical or measured
levels.drop("theoretical", 1, inplace=True)

levels["level_index"] = levels.groupby(['atomic_number', 'ion_charge'])['j'].\
transform(lambda x: np.arange(len(x))).values
transform(lambda x: np.arange(len(x), dtype=np.int64)).values
levels["level_index"] = levels["level_index"].astype(int)

# ToDo: The commented block below does not work with all lines. Find a way to parse it.
Expand Down Expand Up @@ -259,25 +283,36 @@ def extract_lines(self, gfall=None, levels=None, selected_columns=None):
levels = self.levels

if selected_columns is None:
selected_columns = ['wavelength', 'loggf', 'atomic_number', 'ion_charge']
selected_columns = ['atomic_number', 'ion_charge']
selected_columns += [item + '_lower' for item in self.unique_level_identifier]
selected_columns += [item + '_upper' for item in self.unique_level_identifier]
selected_columns += ['wavelength', 'loggf']


logger.info('Extracting line data: {0}'.format(', '.join(selected_columns)))
unique_level_id = ['atomic_number', 'ion_charge'] + self.unique_level_identifier
levels_idx = levels.reset_index()
levels_idx = levels_idx.set_index(['atomic_number', 'ion_charge', 'energy', 'j', 'label'])
levels_idx = levels_idx.set_index(unique_level_id)

lines = gfall[selected_columns].copy()
lines["gf"] = np.power(10, lines["loggf"])
lines = lines.drop(["loggf"], 1)

level_lower_idx = gfall[['atomic_number', 'ion_charge', 'e_lower', 'j_lower', 'label_lower']].values.tolist()
level_lower_idx = [tuple(item) for item in level_lower_idx]
# Assigning levels to lines

level_upper_idx = gfall[['atomic_number', 'ion_charge', 'e_upper', 'j_upper', 'label_upper']].values.tolist()
level_upper_idx = [tuple(item) for item in level_upper_idx]
levels_unique_idxed = self.levels.reset_index().set_index(['atomic_number', 'ion_charge'] + self.unique_level_identifier)

lines['level_index_lower'] = levels_idx.loc[level_lower_idx, "level_index"].values
lines['level_index_upper'] = levels_idx.loc[level_upper_idx, "level_index"].values
lines_lower_unique_idx = (['atomic_number', 'ion_charge'] +
[item + '_lower' for item in self.unique_level_identifier])
lines_upper_unique_idx = (['atomic_number', 'ion_charge'] +
[item + '_upper' for item in self.unique_level_identifier])
lines_lower_idx = lines.set_index(lines_lower_unique_idx)
lines_lower_idx['level_index_lower'] = levels_unique_idxed['level_index']
lines_upper_idx = lines_lower_idx.reset_index().set_index(lines_upper_unique_idx)
lines_upper_idx['level_index_upper'] = levels_unique_idxed['level_index']
lines = lines_upper_idx.reset_index().set_index(
['atomic_number', 'ion_charge', 'level_index_lower', 'level_index_upper'])

lines.set_index(['atomic_number', 'ion_charge', 'level_index_lower', 'level_index_upper'], inplace=True)

return lines

Expand Down
6 changes: 3 additions & 3 deletions docs/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@


import sphinx_bootstrap_theme

import sphinx_rtd_theme
extensions = [
'sphinx.ext.autodoc',
'numpydoc',
Expand Down Expand Up @@ -120,8 +120,8 @@
# Add any paths that contain custom themes here, relative to this directory.
# To use a different custom theme, add the directory containing the theme.
#html_theme_path = []
html_theme = 'bootstrap'
html_theme_path = sphinx_bootstrap_theme.get_html_theme_path()
html_theme = 'sphinx_rtd_theme'
html_theme_path = sphinx_rtd_theme.get_html_theme_path()

# The theme to use for HTML and HTML Help pages. See the documentation for
# a list of builtin themes. To override the custom theme, set this to the
Expand Down
13 changes: 5 additions & 8 deletions docs/index.rst
Original file line number Diff line number Diff line change
@@ -1,13 +1,10 @@
Carsus
======
Carsus is a TARDIS support package for creating and working with atomic datasets
####################
Carsus Documentation
####################

See the :ref:`notebooks/quickstart.ipynb` for a short introduction.
Carsus is a TARDIS support package for creating and working with atomic datasets

Supported data sources:
-----------------------

- :ref:`notebooks/nist_weights_comp.ipynb`

For more information about how to use this package, see the :ref:`api`.

Expand All @@ -18,7 +15,7 @@ Contents:
:maxdepth: 1

notebooks/quickstart.ipynb
notebooks/nist_weights_comp.ipynb
io/data_sources
api
model
macroatom
Expand Down
14 changes: 14 additions & 0 deletions docs/io/data_sources.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
********
Datasets
********

Carsus provides tools to ingest several different datasets. While these
tools are in general aimed at ingesting it directly into the
Carsus database format, they can also be used separately to explore these
datasets.


.. toctree::

kurucz/index.rst
nist/index.rst
10 changes: 10 additions & 0 deletions docs/io/kurucz/index.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
Kurucz Dataset
==============

Robert Kurucz offers several different datasets containing atomic data. In the following, we showcase notebooks that
are used to ingest several of those datasets.


.. toctree::

reading_gfall.ipynb

0 comments on commit 0d6a899

Please sign in to comment.