# Compare Atomic Files

This notebook shows how to compare the `levels_prepared` and `lines_prepared` DataFrames of the atomic files generated by Carsus.

In [11]:
import os
import pandas as pd
from carsus.util import parse_selected_species
from collections import defaultdict
from carsus.io.nist import NISTWeightsComp, NISTIonizationEnergies
from carsus.io.kurucz import GFALLReader
from carsus.io.zeta import KnoxLongZeta
from carsus.io.chianti_ import ChiantiReader
from carsus.io.output import TARDISAtomData

In [7]:
ATOM1_PATH = os.environ.get("ATOM1_PATH", None)
ATOM2_PATH = os.environ.get("ATOM2_PATH", None)

Define the following functions to compare both dataframes.

In [None]:
import pandas as pd
from carsus.util import parse_selected_species

In [8]:
def highlight_values(val):
    if val == True:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'
    
def highlight_diff(val):
    if val == 0:
        return 'background-color: #BCF5A9'
    else:
        return 'background-color: #F5A9A9'

In [12]:
class AtomDataCompare:
    def __init__(self, d1_path=None, d2_path=None):
        self.d1_path = d1_path
        self.d2_path = d2_path
        self.setup()

    def setup(self):
        self.d1 = pd.HDFStore(self.d1_path)
        self.d2 = pd.HDFStore(self.d2_path) 

    def teardown(self):
        self.d1.close()
        self.d2.close()

    def comparision_table(self):
        d1_keys = self.d1.keys()
        d2_keys = self.d2.keys()
        d1_df = pd.DataFrame(index=d1_keys, columns=['exists'])
        d2_df = pd.DataFrame(index=d2_keys, columns=['exists'])
        d1_df['exists'] = True
        d2_df['exists'] = True
        joined_df = d1_df.join(d2_df, how='outer', lsuffix='_1', rsuffix='_2')
        joined_df = joined_df.fillna(False)
        return joined_df
            

In [13]:
pat = "/home/atharva/workspace/code/tardis-main/carsus/kurucz_cd23_chianti_He_cmfgen_H_Si_I-II.h5"
atc=AtomDataCompare(pat, pat)
atc.comparision_table()

Unnamed: 0,exists_1,exists_2
/atom_data,True,True
/collisions_data,True,True
/collisions_metadata,True,True
/ionization_data,True,True
/levels_data,True,True
/lines_data,True,True
/macro_atom_data,True,True
/macro_atom_references,True,True
/metadata,True,True
/photoionization_data,True,True


In [None]:
def compare_levels_lines(path_a, path_b, ions='H-Zn'):
    
    # Read data
    levels_a = pd.read_hdf(path_a, key='levels_data')
    levels_b = pd.read_hdf(path_b, key='levels_data')
    lines_a = pd.read_hdf(path_a, key='lines_data')
    lines_b = pd.read_hdf(path_b, key='lines_data')
    
    # Get ions list
    ions = parse_selected_species(ions)
    
    lvl_eq = []
    lns_eq = []
    for ion in ions:
        
        # How many levels per ion in A
        try:
            num_lvl_a = len(levels_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_a = 0
            
        # How many levels per ion in B
        try:
            num_lvl_b = len(levels_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lvl_b = 0

        # If level number is the same in A and B (and not zero) 
        # then compare cell against cell. `True` means all cells 
        # are equal in both dataframes.
        if num_lvl_a == num_lvl_b:
            val_lvl = True
            
            if num_lvl_a != 0:
                try:
                    k = levels_a.loc[ion].eq(levels_b.loc[ion]).sum().sum()
                    if num_lvl_a*3 != k:  # x3 because this df has three columns!
                        val_lvl = False
                        
                except (KeyError, TypeError, ValueError):
                    pass

        else:
            val_lvl = False
            
        # Append the results
        lvl_eq.append((ion, num_lvl_a, num_lvl_b, val_lvl))
        
        
        # Same for lines
        try:
            num_lns_a = len(lines_a.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_a = 0
            
        try:
            num_lns_b = len(lines_b.loc[ion])
            
        except (KeyError, TypeError, ValueError):
            num_lns_b = 0

        if num_lns_a == num_lns_b:
            val_lns = True
            
            if num_lns_a != 0:
                try:
                    k = lines_a.loc[ion].eq(lines_b.loc[ion]).sum().sum()
                    if num_lns_a*8 != k:
                        val_lvl = False
            
                except (KeyError, TypeError, ValueError):
                    pass
            
        else:
            val_lns = False
        
        lns_eq.append((ion, num_lns_a, num_lns_b, val_lns))
            
    df_lvl = pd.DataFrame(lvl_eq, columns=['ion', 'num_lvl_a', 'num_lvl_b', 'val_lvl'])
    df_lns = pd.DataFrame(lns_eq, columns=['ion', 'num_lns_a', 'num_lns_b', 'val_lns'])
    df = pd.merge(df_lvl, df_lns).set_index('ion')
    
    df['diff_lvl'] = abs(df['num_lvl_b'] - df['num_lvl_a'])
    df['diff_lns'] = abs(df['num_lns_b'] - df['num_lns_a'])
    df = df[['num_lvl_a', 'num_lvl_b', 'diff_lvl', 'val_lvl', 
          'num_lns_a', 'num_lns_b', 'diff_lns', 'val_lns']]

    return df

In [3]:
tt = compare_levels_lines('A.h5', 'B.h5', ions='H-C')

NameError: name 'compare_levels_lines' is not defined

### Custom Atomic Data

To generate and compare atomic data files locally, please uncomment the code below.<br>
For the first atomic file we grab species `H-C` from GFALL and `H-He` from Chianti.

In [None]:
# atomic_weights = NISTWeightsComp()
# ionization_energies = NISTIonizationEnergies('H-C')
# gfall_reader = GFALLReader(ions='H-C')
# chianti_reader = ChiantiReader(ions='H-He', collisions=True, priority=20)
# zeta_data = KnoxLongZeta()

In [None]:
# atom_data_a = TARDISAtomData(atomic_weights,
#                              ionization_energies,
#                              gfall_reader,
#                              zeta_data,
#                              chianti_reader)

In [None]:
# atom_data_a.to_hdf('A.h5')

For the second atomic file we grab species `H-C` from GFALL and `C` from Chianti.

In [None]:
# chianti_reader = ChiantiReader(ions='C', collisions=True, priority=20)

In [None]:
# atom_data_b = TARDISAtomData(atomic_weights,
#                              ionization_energies,
#                              gfall_reader,
#                              zeta_data,
#                              chianti_reader)

In [None]:
# atom_data_b.to_hdf('B.h5')

In [None]:
# tt = compare_levels_lines('A.h5', 'B.h5', ions='H-C')

```
num_xxx_y (int) : number of levels/lines.
diff_xxx (int) : difference in number of levels/lines.
val_xxx (bool) : `True` if levels/lines have the same value.
```

In [None]:
tt.style.applymap(highlight_values, subset=['val_lvl', 'val_lns']).applymap(
                    highlight_diff, subset=['diff_lvl', 'diff_lns'])