## Extract and save experimental atomic energy levels

In [None]:
import pandas as pd
import numpy as np
import os, re

In [None]:
# Specify the atom and the charge state
atom = 'B'
charge = 0

In [None]:
astr = atom + '+' + 'i' * (charge + 1)
url = f'https://physics.nist.gov/cgi-bin/ASD/energy1.pl?de=0&spectrum={astr}&submit=Retrieve+Data&units=0&format=0&output=0&page_size=15&multiplet_ordered=0&conf_out=on&term_out=on&level_out=on&unc_out=1&j_out=on&lande_out=on&perc_out=on&biblio=on&temp='

In [None]:
# Read all the tables
dfs = pd.read_html(url, header=0)

In [None]:
# Select the table that contains spectroscopic data
for df in dfs:
    if 'Configuration' in df.columns:
        dfexpt = df

In [None]:
# Keep selected columns
ecol = 'Level (cm-1)'
cols = ['Configuration', 'Term', 'J', ecol]
dfexpt = dfexpt[cols]

In [None]:
# Remove spaces from numbers and convert to float
for irow, estr in enumerate(dfexpt[ecol]):
    try:
        estr = str(estr)  # in case it has already been converted to a number
        e = float(re.sub('\s', '', estr.strip()))
    except ValueError:
        # energy might have "?" or other qualifier; replace with Nan
        print(f'** removing non-numeric energy value "{estr}"')
        e = np.nan
    dfexpt.at[irow, ecol] = e

In [None]:
# Discard rows that lack energies
dfexpt = dfexpt[dfexpt[ecol].notna()]
# Re-index to make continuous
dfexpt = dfexpt.reset_index(drop=True)
display(dfexpt)

In [None]:
# Where Configuration or Term is NaN, replace it with value from preceding row
for i, row in dfexpt.iterrows():
    for col in ['Configuration', 'Term']:
        if str(row[col]).lower() == 'nan':
            dfexpt.loc[i, col] = dfexpt.loc[i-1, col]

In [None]:
display(dfexpt)

In [None]:
# Save to Excel file
atomlbl = atom + '_' + 'I' * (charge + 1)
fxl = atomlbl + '_exptl_levels.xlsx'
# Uncomment the following two lines to put the file somewhere else
fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\\' + atomlbl
fxl = os.sep.join([fdir, fxl])

if os.path.isfile(fxl):
    print(f'File {fxl} already exists!')
else:
    dfexpt.to_excel(fxl, index=False)
    print(f'Data saved to file {fxl}')
    print(f'in folder {fdir}')

In [None]:
# Get naive value of SOC for low terms
import chem_subs as chem

print(f'Naive values of E_so for low terms of {atom}')
ecol = 'Level (cm-1)'
configs = []
terms = []
dflow = dfexpt[dfexpt[ecol] < 10000]  # below 10,000 cm-1
for c, t in zip(dflow.Configuration, dflow.Term):
    if (t not in terms) or (c not in configs):
        terms.append(t)
        configs.append(c)
for term, config in zip(terms, configs):
    S, L = chem.SL_from_term(term)
    mult = (2*S + 1) * (2*L + 1)
    subdf = dfexpt[(dfexpt.Term == term) & (dfexpt.Configuration == config)].copy()
    subdf['g'] = 2 * subdf['J'].apply(chem.halves_to_float) + 1
    if subdf.g.sum() != mult:
        print(f'*** Total multiplicity should be {mult} but levels provide {subdf.g.sum()}')
    else:
        SOC = -1 * np.dot(subdf[ecol], subdf.g) / mult
        print(f'E_so({term}) = {SOC:.2f} cm-1 from eq. (1)')
    #display(subdf)