In [1]:
# Extract SO-CI information from MOLPRO outputs for atoms
#   Read exptl data from Excel file, combine with weights to get E_so
#   The experimental Excel file is generated by get_NIST_atomic_data.ipynb
# More robust J assignments (attempted)
# KKI 8/30/2024
# Working for difficult Ta case (9/25/2024)
import re, sys, os
import numpy as np
import pandas as pd
from collections import Counter
import random
import matplotlib.pyplot as plt
#from sklearn.cluster import KMeans

import chem_subs as chem
import molpro_subs as mpr
import molpro_subs2 as m2

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)

### Specify Molpro SO-CI output file

In [2]:
#fname = 'kr_1SDPS_3SDP_ac5zpp.pro'
fname = 'Ta_Q10D28S5_cvtz-pp.out'

In [3]:
# my atom subdirectory names look like "Ar_I" (for neutral argon)
el = fname.split('_')[0].capitalize()
fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\{:s}_I'.format(el)
#fdir = r'C:\Users\dagbaglo\Desktop\So-ci_energy\{:s}_I'.format(el)

In [4]:
fsoc = os.sep.join([fdir, fname])
print(f'Reading MOLPRO file')
print(fsoc)

Reading MOLPRO file
C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\Ta_I\Ta_Q10D28S5_cvtz-pp.out


In [5]:
# Identify the different sections of the output file
major_sections, linenos = m2.identify_sections(fsoc)
if False:
    print('Major sections:')
    for k, v in major_sections.items():
        print(f'   {k:<11s}   {len(v)} text blocks')

In [6]:
# section "header"
basisset = m2.basisset_name(major_sections['header'][-1])
# section "integrals"
PG = m2.point_group(major_sections['integrals'][-1])
print(f'Computational point group = {PG}')
if PG != 'Ci':
    chem.print_err('', 'Ci point group is required for this analysis')
nprim = m2.nbf_primitive(major_sections['integrals'][-1])
nbf = m2.nbf(major_sections['integrals'][-1])
print(f'{basisset} basis set')
print(f'    {nprim} primitives')
print(f'    {nbf} contracted basis functions')
crd = m2.coordinates(major_sections['integrals'][-1])
atom = crd[-1]['el']
if atom != el:
    chem.print_err('', f'This looks like the wrong atom ({atom}) for the filename ({el})')
Qtot = m2.nuclear_charge_total(major_sections['integrals'][-1])
print(f'Atom "{atom}" with nuclear charge = {Qtot}')
Zel = chem.elz(atom, 'Z')
if Zel > Qtot:
    print(f'    pseudopotential replaces {Zel - Qtot} core electrons')

Computational point group = Ci
CC-PWCVTZ-PP basis set
    165 primitives
    97 contracted basis functions
Atom "Ta" with nuclear charge = 13
    pseudopotential replaces 60 core electrons


In [7]:
def color_by_orb(dforb, c1='white', c2='lightgrey'):
    # Given a DataFrame of orbitals ('Orbital')
    #   display it with shading alternating with each
    #   orbital, instead of with each line of the DF
    palette = [c1, c2]
    color = []
    prev = ''
    i = 0
    for orb in dforb.Orbital:
        if orb != prev:
            # change
            i += 1
        color.append(palette[i%2])
        prev = orb
    styler = dforb.style
    styler = styler.apply(lambda x: [f'background-color: {color[i]}' for i in x.index],
                         axis=0)
    styler = styler.hide()  # suppress the index, which is differently shaded
    display(styler)
    return 

In [8]:
# section "rhf"
occup_hf = m2.hf_occup(major_sections['rhf'][-1])
print('HF occupations: ', occup_hf)
hf_results = m2.scf_result('RHF', major_sections['rhf'][-1])
print('HF energy = {:.6f} for state {:s}'.format(hf_results['E'], hf_results['Label']))
orbtitle, dfHForb = m2.parse_orbitals(major_sections['rhf'][-1])
nel_HF = sum(sum(v) for v in occup_hf.values())
print(f'HF has {nel_HF} electrons (charge = {Qtot - nel_HF})')
print(orbtitle)
color_by_orb(dfHForb)

HF occupations:  {'alpha': [5, 3], 'beta ': [2, 3]}
HF energy = -56.709920 for state 1.1
HF has 13 electrons (charge = 0)
ELECTRON ORBITALS


Orbital,Occ,Energy,Cen,Mu,Typ,Coeff
1.1,2.0,-3.16479,1,1,s,0.99916
2.1,2.0,-0.25201,1,2,s,0.96661
3.1,1.0,-0.30733,1,1,d1+,-0.5589
3.1,1.0,-0.30733,1,1,d1-,0.82955
4.1,1.0,-0.30733,1,1,d1+,0.82955
4.1,1.0,-0.30733,1,1,d1-,0.5589
5.1,1.0,-0.29429,1,1,d0,0.98009
1.2,2.0,-1.75634,1,1,px,-0.55835
1.2,2.0,-1.75634,1,1,py,0.82873
2.2,2.0,-1.75634,1,1,px,0.82873


In [9]:
# break section "multi" into sub-sections
multisec = m2.multi_sections(major_sections['multi'][-1])
#multisec.keys()

In [10]:
# parsing MULTI sub-sections
dynfac = m2.get_dynfac(multisec['top'][-1])
orbspace = m2.orbital_spaces(multisec['top'][-1])
statesym = m2.state_symmetry_groups(multisec['top'][-1])
convergence = m2.multi_convergence(multisec['iterations'][-1])
weights = m2.multi_weights(multisec['iterations'][-1])
dfiter = m2.multi_iterations(multisec['iterations'][-1])
dfstates = m2.multi_results(multisec['results'])
dfexpec = m2.multi_expec(multisec['trans'][-1])
dftrans = m2.multi_transmom(multisec['trans'][-1])
orbtitle, dfNO = m2.parse_orbitals(multisec['natorb'][-1])
ddfcivec, dEcas = m2.multi_civecs(multisec['civector'])

In [11]:
nactel = statesym[0]['nelec']
nactorb = sum(orbspace['active'])
print(f'CASSCF active space is ({nactel}/{nactorb}) with active orbitals {orbspace["active"]}')
if 'closed-shell' in orbspace.keys():
    print(f'    closed orbitals are {orbspace["closed-shell"]}')
else:
    print( '    There are no "closed" orbitals')
if 'frozen' in orbspace.keys():
    print(f'    frozen orbitals are {orbspace["frozen"]}')
else:
    print( '    There are no "frozen" orbitals')
# Count the states
mult_count = {}
ncas = 0
for st in statesym:
    mult = st['spin']
    mult_count[mult] = st['nstates'] + mult_count.get(mult, 0)
    ncas += st['nstates']
print(f'{ncas} CASSCF states:')
for mult, n in mult_count.items():
    print(f'   {n:3d} {mult}')
    
# Show the state weights, renormalized for reading convenience
print('CASSCF relative state weights (subject to rounding error):')
uweights = m2.unnormalize_cas_weights(weights)
for k, wts in uweights.items():
    print('    ', np.round(wts, 1))
    
# Are <L**2> values clean?
ilsq = np.rint(dfexpec['L**2'])
maxdev = np.abs(ilsq - dfexpec['L**2']).max()
if maxdev:
    print(f'Largest deviation of <L**2> from integer = {maxdev():.8f}')
else:
    print('Values of <L**2> are clean')
CAS_rel_HF = dfstates.E.min() - hf_results['E']
print(f'For the ground state, [E(CASSCF) - E(HF)] = {CAS_rel_HF:.6f}')
if CAS_rel_HF >= 0:
    print('   *** this difference should normally be negative')
print()
print(orbtitle)
orb_styler = color_by_orb(dfNO)

CASSCF active space is (13/10) with active orbitals [7, 3]
    There are no "closed" orbitals
    There are no "frozen" orbitals
43 CASSCF states:
    10 Quartet
    28 Doublet
     5 Sextet
CASSCF relative state weights (subject to rounding error):
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.
 1. 1. 1. 1.]
     [1. 1. 1. 1. 1.]
Values of <L**2> are clean
For the ground state, [E(CASSCF) - E(HF)] = -0.053876

NATURAL ORBITALS (STATE AVERAGED)


Orbital,Occ,Energy,Cen,Mu,Typ,Coeff
1.1,1.99976,-3.1421,1,1,s,0.99971
2.1,1.80453,-0.22286,1,2,s,1.00492
3.1,0.64169,0.01152,1,1,d1-,1.00366
4.1,0.64169,0.01152,1,1,d1+,1.00366
5.1,0.64169,0.01152,1,1,d0,1.00366
6.1,0.64169,0.01152,1,1,d2+,1.00366
7.1,0.64169,0.01152,1,1,d2-,1.00366
1.2,1.99576,-1.73164,1,1,pz,0.99977
2.2,1.99576,-1.73164,1,1,py,0.99977
3.2,1.99576,-1.73164,1,1,px,0.99977


In [12]:
if False:
    # print results from parsing MULTI output
    print(f'DYNW = {dynfac}')
    print('Spaces: ', orbspace)
    print('CASSCF state groups:')
    for g in statesym:
        print('   ', g)
    print(convergence)
    print('CASSCF state weights:')
    for k, v in weights.items():
        print(f'  {k:>2s}: ', v)
    display(dfiter)
    display(dfstates)
    display(dfexpec)
    for op, df in dftrans.items():
        print(f'Operator {op}')
        display(df)
    print(orbtitle)
    display(dfNO)
    for k, df in ddfcivec.items():
        print(k, dEcas[k])
        display(df.head())

In [13]:
# Summarize CASSCF results
dfcas = dfstates[['Label', 'irrep', 'E']].copy()
Svals = []
for g in statesym:
    for i in range(g['nstates']):
        Svals.append(chem.MULTSPIN[g['spin']])
dfcas.insert(2, 'S', Svals)
dfcas['L**2'] = dfexpec['L**2']
dfcas['L'] = np.sqrt(dfexpec['L**2']).astype(int)
tsymb = []
for S, L, irr in zip(dfcas.S, dfcas.L, dfcas.irrep):
    parity = 3 - 2*irr
    trm = chem.term_symbol(L, S, parity, linear=False)
    tsymb.append(trm)
dfcas['term'] = tsymb
#print('CASSCF states')
#dfcas

In [14]:
dfcasterm = m2.collect_atomic_terms(dfcas)
nterm = len(dfcasterm)
print(f'There are {ncas} CASSCF states in {nterm} terms')
# Add J values
Jvals = [chem.possible_J_from_term(trm) for trm in dfcasterm['term']]
dfcasterm['J_vals'] = Jvals
display(dfcasterm)

There are 43 CASSCF states in 7 terms


Unnamed: 0,term,E,Erel,erange_cm,irrep,L,S,idx,Labels,J_vals
0,4F,-56.763795,0.0,9e-06,1,3,1.5,"[0, 1, 2, 3, 4, 5, 6]","[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1]","[1.5, 2.5, 3.5, 4.5]"
1,4P,-56.741867,4812.6,5e-06,1,1,1.5,"[7, 8, 9]","[8.1, 9.1, 10.1]","[0.5, 1.5, 2.5]"
2,6D,-56.735205,6274.9,6e-06,1,2,2.5,"[38, 39, 40, 41, 42]","[1.1, 2.1, 3.1, 4.1, 5.1]","[0.5, 1.5, 2.5, 3.5, 4.5]"
3,2G,-56.723141,8922.5,8e-06,1,4,0.5,"[10, 11, 12, 13, 14, 15, 16, 17, 18]","[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1]","[3.5, 4.5]"
4,2D,-56.71946,9730.4,1e-05,1,2,0.5,"[19, 20, 21, 22, 23]","[10.1, 11.1, 12.1, 13.1, 14.1]","[1.5, 2.5]"
5,2P,-56.713596,11017.4,4e-06,1,1,0.5,"[24, 25, 26]","[15.1, 16.1, 17.1]","[0.5, 1.5]"
6,2H,-56.701322,13711.3,1.3e-05,1,5,0.5,"[27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37]","[18.1, 19.1, 20.1, 21.1, 22.1, 23.1, 24.1, 25....","[4.5, 5.5]"


In [15]:
# Parse MRCI results and summarize in DataFrame
dfmrci = pd.DataFrame()
for imrci, sec in enumerate(major_sections['mrci']):
    print(f'MRCI calculation #{imrci+1}')
    mrcisec = m2.mrci_sections(sec)
    mrci_meta = m2.mrci_info(mrcisec['top'][0])
    mrci_iter = m2.mrci_iterations(mrcisec['iterations'][0])
    mrci_results = m2.mrci_results(mrcisec['results'][0])
    nstate = len(mrci_results['state'])
    print(f'    {nstate} states')
    # Report on orbital spaces in the MRCI
    print('    orbital spaces, by irrep')
    for sp in ['core', 'closed', 'active', 'external']:
        print('\t{:10s} {}'.format(sp, mrci_meta['spaces'].get(sp, [])))
    lbll =  []  # list of state labels
    c0rot = []  # list of C0 (rotated) values
    El =    []  # list of energies
    davl =  []  # list of Davidson-corrected energies (rotated ref)
    erefl = []  # list of reference energies
    spinmult = mrci_meta['smult']
    S = chem.MULTSPIN[spinmult]
    irrep = mrci_meta['irrep']
    for lbl, v in mrci_results['state'].items():
        lbll.append(lbl)
        try:
            c0rot.append(v['C0']['rotated'])
            davl.append(v['Energy']['davidson']['rotated'])
        except KeyError:
            # no "rotated" values if there is only one state
            c0rot.append(v['C0']['relaxed'])
            davl.append(v['Energy']['davidson']['relaxed'])
        El.append(v['Energy']['total'])
        erefl.append(v['Energy']['ref E'])
    init_ref_no = [k for k in mrci_iter['init_ref'].keys()][:nstate]
    reflbl_tentat = [f'{i}.{irrep}' for i in init_ref_no]
    init_refE = [v for v in mrci_iter['init_ref'].values()][:nstate]
    dfci = pd.DataFrame({'Label': lbll, 'irrep': irrep, 'S': S, 'E': El,
                        'Edav': davl, 'C0': c0rot, 'Eref': erefl, 
                        'init_ref': init_refE, 'iref_nr': init_ref_no,
                        'irlbl': reflbl_tentat})
    # find matching CASSCF reference
    etol = 1.e-6  # tolerance for matching reference energies
    caslbll = []
    castrml = []
    subcas = dfcas[dfcas.S == S]
    for ici, cirow in dfci.iterrows():
        irlbl = reflbl_tentat[ici]
        subrow = subcas[subcas.Label.str.contains(irlbl)]
        if abs(subrow.iloc[0]['E'] - cirow['init_ref']) < etol:
            # this is a match
            caslbll.append(subrow.iloc[0]['Label'])
            castrml.append(subrow.iloc[0]['term'])
        else:
            # something wrong
            caslbll.append('?')
            castrml.append('?')
    dfci['ref_lbl'] = caslbll
    dfci['term'] = castrml
    if '?' in caslbll:
        print('    *** failure matching MRCI states to CASSCF states')
        display(dfci)
    dfmrci = pd.concat([dfmrci, dfci], ignore_index=True)

MRCI calculation #1
    10 states
    orbital spaces, by irrep
	core       []
	closed     []
	active     [7, 3]
	external   [48, 39]
MRCI calculation #2
    28 states
    orbital spaces, by irrep
	core       []
	closed     []
	active     [7, 3]
	external   [48, 39]
MRCI calculation #3
    5 states
    orbital spaces, by irrep
	core       []
	closed     []
	active     [7, 3]
	external   [48, 39]


In [16]:
nmrci = len(dfmrci)
#dfmrci
dfciterm = m2.collect_atomic_terms(dfmrci, 'Edav')
termsIn = set(dfciterm.term)
print(f'There are {nmrci} MRCI states in {len(dfciterm)} terms')
if nterm != len(dfciterm):
    chem.print_err('', 'Different number of terms from CASSCF and from MRCI')
# Make prefixes enumerative
dfciterm['term'] = chem.enumerative_prefix(dfciterm.term.values)
print('MRCI terms:')
dfciterm

There are 43 MRCI states in 7 terms
MRCI terms:


Unnamed: 0,term,Edav,Erel,erange_cm,irrep,L,S,idx,Labels
0,4F,-57.134039,0.0,0.504792,1,3,1.5,"[0, 1, 2, 3, 4, 5, 6]","[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1]"
1,4P,-57.113585,4489.2,0.085595,1,1,1.5,"[7, 8, 9]","[8.1, 9.1, 10.1]"
2,2G,-57.103005,6811.2,3.42161,1,4,0.5,"[10, 11, 12, 13, 14, 15, 16, 17, 18]","[1.1, 2.1, 3.1, 4.1, 5.1, 6.1, 7.1, 8.1, 9.1]"
3,6D,-57.098997,7690.9,0.013168,1,2,2.5,"[38, 39, 40, 41, 42]","[1.1, 2.1, 3.1, 4.1, 5.1]"
4,2D,-57.098329,7837.6,0.074621,1,2,0.5,"[19, 20, 21, 22, 23]","[10.1, 11.1, 12.1, 13.1, 14.1]"
5,2P,-57.096828,8167.0,0.109737,1,1,0.5,"[24, 25, 26]","[15.1, 16.1, 17.1]"
6,2H,-57.086691,10391.8,3.461115,1,5,0.5,"[27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37]","[18.1, 19.1, 20.1, 21.1, 22.1, 23.1, 24.1, 25...."


In [17]:
# section "SOintegrals"
if 'SOintegrals' in major_sections.keys():
    SOintgrl = m2.SO_integrals(major_sections['SOintegrals'][0])
    print(SOintgrl)

In [18]:
# break section "soci" into sub-sections
sosec = m2.soci_sections(major_sections['soci'][0])
sosec.keys()

dict_keys(['matel_comput', 'so_calc', 'so_levels', 'so_vectors', 'so_compos', 'prop', 'tail'])

In [19]:
# SOCI sub-section 'matel_comput'
hlsdiag = m2.soci_replacements(sosec['matel_comput'][0])
n_cistates = sum([x['nstate'] for x in hlsdiag.values()])
print(f'There are {n_cistates} states in the HLSDIAG list')
mat_elems = m2.soci_matelems(sosec['matel_comput'][0])
if mat_elems:
    print(mat_elems)

There are 43 states in the HLSDIAG list


In [20]:
# SOCI sub-section 'basis_prop'
if 'basis_prop' in sosec.keys():
    basprop = m2.soci_basis_prop(sosec['basis_prop'][0], n_cistates)
    print(basprop['DMZ'][0,:])

In [21]:
# SOCI sub-section 'so_calc'
E0 = m2.soci_E0(sosec['so_calc'][0])
print(f'E0 = {E0:.6f} in the SO-CI')
somat = m2.soci_matrix(sosec['so_calc'][0])
dimen = somat['matrix'].shape[0]
print(f'There are {dimen} SOCI states')

E0 = -57.134040 in the SO-CI
There are 126 SOCI states


In [22]:
# Check for zero spin-orbit coupling
offdiag =somat['matrix'].copy()
np.fill_diagonal(offdiag, 0)
amax = np.max(np.abs(offdiag))
if amax == 0:
    chem.print_err('', 'Off-diagonal elements of spin-orbit matrix are all zero')

In [23]:
# display formatting
fmt = {'Eshift': '{:.1f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1',
           'wmean', 'wstds', 'uwmean', 'uwstds', 'change', 'rwmse']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'

In [24]:
# Add MRCI and term parentage of the basis states
for i, bas in enumerate(somat['basis']):
    S = bas['S']
    lbl = bas['State']
    subdf = dfmrci[(dfmrci.Label == lbl) & (dfmrci.S == S)]
    #display(subdf)
    ici = subdf.index[0]
    bas['ici'] = ici
    for iterm, trow in dfciterm.iterrows():
        if ici in trow.idx:
            bas['iterm'] = iterm
            break

In [25]:
# SOCI sub-section 'so_levels'
so_energies = m2.soci_energies(sosec['so_levels'][0])
df_soE = pd.DataFrame(so_energies)
print(f'There are {len(df_soE)} spin-orbit levels')
#df_soE

There are 126 spin-orbit levels


In [26]:
SOCraw = min(so_energies['Eshift'])
print(f'From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = {SOCraw:.2f} cm-1')

From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = -3819.58 cm-1


In [27]:
# SO-CI sub-section 'so_vectors'
so_vecs = m2.soci_vectors(sosec['so_vectors'][0])
#so_vecs.keys()

In [28]:
# check eigenvectors for normality
#    eigenvectors are columns of so_vecs['matrix']
tol = 1.e-8
mat = so_vecs['matrix']
for i in range(dimen):
    prod = np.dot(np.conjugate(mat[:, i]), mat[:, i])
    if np.abs(1 - prod) > tol:
        print(i, i, ':  ', np.round(prod, 5))

In [29]:
# check eigenvectors for orthogonality
#    eigenvectors are columns of so_vecs['matrix']
mat = so_vecs['matrix']
for i in range(dimen):
    for j in range(i):
        prod = np.dot(np.conjugate(mat[:, i]), mat[:, j])
        if np.abs(prod) > tol:
            print(i, j, ':  ', np.round(prod, 5))

In [30]:
# SO-CI sub-section 'so_compos'
so_compos = m2.soci_composition(sosec['so_compos'][0])
#so_compos.keys()

In [31]:
# check that all listings of basis states are consistent
for a, b, c in zip(somat['basis'], so_vecs['basis'], so_compos['basis']):
    for k in c.keys():
        if (a[k] != b[k]) or (a[k] != c[k]):
            print(a)
            print(b)
            print(c)
            print('----------------')

In [32]:
# check that composition is consistent with eigenvectors
magnit = np.conjugate(so_vecs['matrix']) * so_vecs['matrix']
# get differences in percent (printed by Molpro to 0.01% precision)
difmat = (magnit * 100) - so_compos['matrix']
dmax = np.abs(difmat).max()
print(f'Largest inconsistency between composition and eigenvectors = {dmax:.2f} %')

Largest inconsistency between composition and eigenvectors = 0.00 %


In [33]:
# Convert basis-state compositions (percent) to term compositions
use_printed = False  # use composition % as printed by Molpro
if use_printed:
    # compositions are printed to 0.01% precision
    print('Using compositions as printed by Molpro')
else:
    # eigenvectors are orthonormal and printed to 1e-8 precision
    print('Using compositions derived from eigenvectors')
    magpct = np.real(magnit * 100)
term_compos = np.zeros((nterm, dimen))
for ibas in range(dimen):
    iterm = somat['basis'][ibas]['iterm']
    if use_printed:
        term_compos[iterm,:] += so_compos['matrix'][ibas,:]
    else:
        # use composition computed from eigenvectors
        term_compos[iterm,:] += magpct[ibas,:]

Using compositions derived from eigenvectors


In [34]:
# Add J values to dfciterm
jpossl = []
for term in dfciterm.term:
    jposs = chem.possible_J_from_term(term)
    jpossl.append(jposs)
dfciterm['J'] = jpossl

In [35]:
# Get target J counts corresponding to the CASSCF terms
allJ = []
for jl in dfcasterm['J_vals']:
    allJ.extend(jl)
J_all = dict(Counter(allJ))
print('Required level counts     :', J_all)
nlevels = len(allJ)
print(f'    There are {nlevels} J-levels')
Jxg = {k: int(v * (2*k+1)) for k, v in J_all.items()}
J_left = Jxg.copy()  # copy to be decremented
print('Required sublevel counts:', Jxg)
df_soE['J'] = None

Required level counts     : {1.5: 5, 2.5: 4, 3.5: 3, 4.5: 4, 0.5: 3, 5.5: 1}
    There are 20 J-levels
Required sublevel counts: {1.5: 20, 2.5: 24, 3.5: 24, 4.5: 40, 0.5: 6, 5.5: 12}


### Assign values of <em>J</em> to levels

In [36]:
# Use term composition data to determine possible J assignment for each level
thrpct = 10.  # percentage threshold for consideration
print(f'Considering term compositions above {thrpct}% ')
jpossl = []
npossl = []
for iso in range(dimen):
    #print(f'Level {iso} with Erel = {df_soE.iloc[iso]["Erel"]}')
    jposs = None
    for iterm, c in enumerate(term_compos[:, iso]):
        if c < thrpct:
            continue
        if jposs is None:
            # first contributing term
            jposs = set(dfciterm.at[iterm, 'J'])
        else:
            # subsequent term; take intersection
            jposs = jposs.intersection(dfciterm.at[iterm, 'J'])
    jpossl.append(jposs)
    npossl.append(len(jposs))
df_soE['J_poss'] = jpossl
df_soE['nposs'] = npossl
#df_soE['term_comp'] = list(np.round(term_compos, 1).T)
df_soE['term_comp'] = list(term_compos.T)

Considering term compositions above 10.0% 


In [37]:
# Check for 0-possibility problems
subdf = df_soE[df_soE.nposs < 1]
if len(subdf):
    print(f'*** Some levels have all possibilities eliminated ***')
    display(subdf)
    sys.exit(1)

In [38]:
thr_degen = 5  # threshold (cm-1) for being clearly degenerate
thr_big = 500  # threshold for clearly non-degenerate
thr_tcomp = 3  # threshold (%) for similar maximum term-composition difference
thr_tcbig = 15 # threshold (%) for clearly different term composition

df_soE['J'] = None

In [39]:
n_unassigned = m2.assign_J_laboriously(df_soE, J_left, thr_degen, thr_big,
                         thr_tcomp, thr_tcbig, verbose=False)
if n_unassigned:
    print(f'J ASSIGNMENTS FAILED FOR {n_unassigned} LEVELS')
else:
    print('All levels were assigned!')
    # check the assignments against Jxg{}
    for J, nJ in Jxg.items():
        dfJ = df_soE[df_soE.J == J]
        if len(dfJ) != nJ:
            print(f'Assignment error!  For J = {J}, {nJ} levels ' +
                  f'were needed but {len(dfJ)} were assigned')

All levels were assigned!


In [40]:
dflev = m2.collect_atomic_J_sets(df_soE)
# Add leading term
termlist = dfciterm.term
tlead = []
composDl = []
TC_approx = []  # term compositions rounded for display
for tcomp in dflev.term_comp:
    iterm = np.argmax(tcomp)
    tlead.append(termlist[iterm])
    composD = {}
    cround1 = {}
    for trm, pct in zip(termlist, tcomp):
        composD[trm] = pct
        cround1[trm] = round(pct, 1)
    composDl.append(composD)
    TC_approx.append(cround1)
dflev['Lead'] = tlead
dflev['Composition'] = composDl
dflev['TC_approx'] = TC_approx
Jlist = dflev.J
Jlbl = [f'{t}_{chem.halves(J)}' for t, J in zip(tlead, Jlist)]
dflev['Jlbl'] = Jlbl
# reorder columns, drop 'nposs' and 'term_comp'
dflev = dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift', 'Composition', 'E', 'idx',
             'Erel_spread', 'TC_spread', 'TC_approx']]

In [41]:
print('Check the table below for questionable assignments.')
print('"Erel_spread" shows how much the energies (cm-1) differ within a level.')
print('"TC_spread" shows how much the term compositions (%) differ within a level.')
print('"idx" shows which magnetic sublevels in "df_soE" compose each level.')
dflev[['Jlbl', 'Erel', 'Erel_spread', 'TC_spread', 'idx']]

Check the table below for questionable assignments.
"Erel_spread" shows how much the energies (cm-1) differ within a level.
"TC_spread" shows how much the term compositions (%) differ within a level.
"idx" shows which magnetic sublevels in "df_soE" compose each level.


Unnamed: 0,Jlbl,Erel,Erel_spread,TC_spread,idx
0,4F_3/2,0.28,0.57,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[0, 1, 2, 3]"
1,4F_5/2,1724.73,0.7,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[4, 5, 6, 7, 8, 9]"
2,4F_7/2,3596.0,4.72,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[10, 11, 12, 13, 14, 15, 16, 17]"
3,4F_9/2,5119.16,6.98,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[18, 19, 20, 21, 22, 23, 24, 25, 26, 27]"
4,4P_3/2,5991.61,2.8,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[28, 29, 30, 31]"
5,4P_1/2,6045.58,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[32, 33]"
6,4P_5/2,8950.89,70.93,"[0.0, 4.0, 0.0, 4.0, 0.0, 0.0, 0.0]","[34, 35, 36, 37, 38, 39]"
7,6D_1/2,9656.49,0.0,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[40, 41]"
8,2G_7/2,9771.37,1.88,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[42, 43, 44, 45, 46, 47, 48, 49]"
9,6D_3/2,9897.32,1.68,"[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]","[50, 51, 52, 53]"


In [42]:
print('Level assignments from the calculation:')
showcols = ['Lead', 'J', 'Jlbl', 'Erel', 'Eshift', 'TC_approx']
display(dflev[showcols])
# Are there duplicated leading terms?
dups = False
for j, grp in dflev.groupby('J'):
    leads = list(grp.Lead)
    if len(leads) > len(set(leads)):
        print(f'*** Duplicate leading term for J = {j} ***')
        dups = True
        for lead in set(leads):
            leads.remove(lead)
        dfdup = grp[grp.Lead.isin(leads)].copy()
        display(dfdup[showcols].style.format(fmt))

Level assignments from the calculation:


Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,TC_approx
0,4F,1.5,4F_3/2,0.28,-3819.3,"{'4F': 91.5, '4P': 0.1, '2G': 0.0, '6D': 0.0, ..."
1,4F,2.5,4F_5/2,1724.73,-2094.85,"{'4F': 96.6, '4P': 0.0, '2G': 0.0, '6D': 0.0, ..."
2,4F,3.5,4F_7/2,3596.0,-223.59,"{'4F': 96.4, '4P': 0.0, '2G': 3.6, '6D': 0.1, ..."
3,4F,4.5,4F_9/2,5119.16,1299.58,"{'4F': 85.0, '4P': 0.0, '2G': 13.7, '6D': 0.1,..."
4,4P,1.5,4P_3/2,5991.61,2172.02,"{'4F': 2.0, '4P': 68.8, '2G': 0.0, '6D': 1.3, ..."
5,4P,0.5,4P_1/2,6045.58,2226.0,"{'4F': 0.0, '4P': 85.6, '2G': 0.0, '6D': 3.0, ..."
6,4P,2.5,4P_5/2,8950.89,5131.3,"{'4F': 0.3, '4P': 90.6, '2G': 0.0, '6D': 5.1, ..."
7,6D,0.5,6D_1/2,9656.49,5836.91,"{'4F': 0.0, '4P': 2.3, '2G': 0.2, '6D': 94.5, ..."
8,2G,3.5,2G_7/2,9771.37,5951.78,"{'4F': 3.5, '4P': 0.0, '2G': 96.2, '6D': 0.2, ..."
9,6D,1.5,6D_3/2,9897.32,6077.73,"{'4F': 1.5, '4P': 2.8, '2G': 0.4, '6D': 75.0, ..."


In [43]:
# Change assignments of any duplicates
if dups:
    print('Correct the duplicate term assignments')
    ifix = input('Level for which to re-assign the term? ')
    while ifix:
        ifix = int(ifix)
        trm = input(f'Which term do you want to assign to level {ifix}? ')
        dflev.loc[ifix, 'Lead'] = trm
        ifix = input('Another level to re-assign (empty to end)? ')
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dflev.Lead, dfso.J)]
    dfso['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift']])

In [44]:
# Check for problems in assignments
nAssign = len(set(dflev.Lead))
nTerm = len(dfciterm)
dropT = False
if nAssign != nTerm:
    print(f'*** I started with {nTerm} terms but have {nAssign} leading terms ***')
    print('Starting: ', sorted(termsIn))
    termsOut = set(dflev.Lead)
    print('Leading : ', sorted(termsOut))
    if nAssign > nTerm:
        addT = termsOut - termsIn
        print('Added terms: ', addT)
    else:
        dropT = termsIn - termsOut
        print('Dropped terms: ', dropT)
        # Add weights from dropped terms and display
        for term in dropT:
            wtcol = []
            for comp in dflev.Composition:
                pct = comp.get(term, 0)
                wtcol.append(pct)
            dflev[term] = pct
        print('Weights (%) of dropped terms in levels:')
        display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))
nlvl = (2 * dflev.J + 1).sum()  # number of sublevels
if nlvl != dimen:
    print(f'*** I started with {nSO} (sub)levels but now have {nlvl} ***')

In [45]:
# Manually assign any dropped terms
if dropT:
    for drt in dropT:
        ia = int(input(f'Which level do you want to assign to term {drt}? '))
        dflev.loc[ia, 'Lead'] = drt
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dflev.Lead, dflev.J)]
    dflev['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))

In [46]:
# Inversion parity of the calculated levels
irreps_ci = set(dfciterm.irrep)
if (PG == 'Ci') and (len(irreps_ci) == 1):
    if 1 in irreps_ci:
        parity = 'even'
    else:
        parity = 'odd'
else:
    # ask user for parity of interest
    parity = input('Please choose "even" or "odd" parity: ')
print(f'Experimental states will be restricted to parity = {parity}')

Experimental states will be restricted to parity = even


### Read experimental energy levels

In [47]:
charge = Qtot - mrci_meta['nelec']  # number of electrons in the last MRCI
labels_ordinated = False  # flag to prevent multiple (1)(1)(1) etc. 
if charge >= 0:
    atstr = atom + '_' + 'I' * (charge + 1)
else:
    # anion
    atstr = atom + '_neg'
fxl = f'{atstr}_exptl_levels.xlsx'
fxlalt = None
exp_alt = False
# Special cases
if atstr == 'Ra_I':
    fxlalt = 'Ra_I_exptl_levels_plus_theory.xlsx'
if atstr in ['Ar_I', 'Pb_I', 'Kr_I']:
    fxlalt = f'{atstr}_exptl_even_assign.xlsx'
if atstr in ['Br_I', 'I_I']:
    fxlalt = f'{atstr}_exptl_odd_assign.xlsx'

if fxlalt is not None:
    print('** Using alternative experimental data file ***')
    exp_alt = True
    fxl = fxlalt
xlpath = os.sep.join([fdir, fxl])
dfexpt = pd.read_excel(xlpath)
if exp_alt and ('LS' in dfexpt.columns):
    # use manual assignments
    print('** Using term labels in column "LS"')
    dfexpt.loc[dfexpt['LS'].notnull(), 'Term'] = dfexpt[dfexpt['LS'].notnull()]['LS']
    #dfexpt['Term'] = dfexpt['LS']
print(f'Experimental energy levels read from {fxl}')
# If there is a column "comment", replace NaN with ''
if 'comment' in dfexpt.columns:
    dfexpt['comment'] = dfexpt['comment'].fillna('')

Experimental energy levels read from Ta_I_exptl_levels.xlsx


In [48]:
# Find the number of decimal places in the level energies
Ecol = 'Level (cm-1)'  # the exptl energy column
ndecim = 0
for e in dfexpt[Ecol]:
    words = str(e).split('.')
    # count numeric digits
    n = sum(c.isdigit() for c in words[-1])
    ndecim = max(n, ndecim)
print(f'Experimental energies are provided to {ndecim} decimal digits')
# display formatting
fmt[Ecol] = '{:.' + str(ndecim) + 'f}'

Experimental energies are provided to 2 decimal digits


In [49]:
# Delete any ionization limit
ilim = dfexpt[dfexpt.Term == 'Limit'].index.min()
# delete the "Limit" row and everything past it
n1 = len(dfexpt)
dfexpt = dfexpt.truncate(after=ilim-1)
n2 = len(dfexpt)
if n2 < n1:
    print(f'Discarding {n1-n2} ionized or metastable states')
oddstr = r'\*$|°' # characters to identify terms of odd parity
# Sometimes parity is shown in configuration alone?
#dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr))].copy()
#dfodd = dfexpt[dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr)].copy()
dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr))].copy()
dfodd = dfexpt[dfexpt.Term.str.contains(oddstr)].copy()
print(f'{len(dfexpt)} experimental levels ({len(dfeven)} even and {len(dfodd)} odd)')
# Select by parity
if parity == 'even':
    # discard odd levels ('Term' field ends with '*')
    dfexpt = dfeven.copy()
elif parity == 'odd':
    dfexpt = dfodd.copy()
else:
    chem.print_err('', f'Parity of "{parity}" is not recognized')
n3 = len(dfexpt)
print(f'{n3} levels accepted for parity = {parity}')
# Reject bad values of J
for i in dfexpt.index:
    try:
        chem.halves_to_float(dfexpt.loc[i, 'J'])
    except ValueError:
        dfexpt.at[i, 'J'] = np.nan
nbad = dfexpt.J.isna().sum()
if nbad:
    print(f'** Rejecting {nbad} levels with malformed J values')
    dfexpt = dfexpt.dropna()
    n4 = len(dfexpt)
    print(f'{n4} level retained')
# Assign unique term symbols
if not labels_ordinated:
    dfexpt = chem.unique_labels_exptl_terms(dfexpt, verbose=True, always=True)
    labels_ordinated = True
# Add column for degeneracy
dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)).astype(int) + 1
dfexpt

Discarding 4 ionized or metastable states
275 experimental levels (72 even and 203 odd)
72 levels accepted for parity = even


Unnamed: 0,Configuration,Term,J,Level (cm-1),uTerm,degen
0,5d36s2,a 4F,3/2,0.0,(1)4F,4
1,5d36s2,a 4F,5/2,2010.1,(1)4F,6
2,5d36s2,a 4F,7/2,3963.92,(1)4F,8
3,5d36s2,a 4F,9/2,5621.04,(1)4F,10
4,5d36s2,a 4P,1/2,6049.42,(1)4P,2
5,5d36s2,a 4P,3/2,6068.91,(1)4P,4
6,5d36s2,a 4P,5/2,9253.43,(1)4P,6
7,5d36s2,a 2G,7/2,9705.38,(1)2G,8
8,5d36s2,a 2G,9/2,10690.32,(1)2G,10
9,5d4(5D)6s,a 6D,1/2,9758.97,(1)6D,2


In [50]:
def match_term_symbol(symb_expt, symb_calc):
    # Return True if they are the same, else False
    # Tolerate extra prefix '(1)' or 'a ' in symb_expt
    retval = (symb_calc == symb_expt)
    retval |= (symb_calc == symb_expt.replace('(1)', '').replace('a ', ''))
    return retval

In [51]:
def match_th_expt_1J(dfJth, dfJx, bigerr=2000):
    '''
    Given DataFrame's of theoretical and experimental levels for the same J,
    Return a list of indices into dfJx that match those in dfJth
        match is based upon value of J and leading term 
    'bigerr' is in cm-1 and triggers extra scrutiny
    '''
    idx = []
    for i, row in dfJth.iterrows():
        # look for matching leading term
        term = row.Lead  # leading term in calculation
        tmatch = [match_term_symbol(str(tx), term) for tx in dfJx.uTerm]
        subx = dfJx[tmatch]  # exptl Term matches
        nmatch = len(subx)
        if nmatch == 0:
            # no exptl term has this term assignment
            idx.append(np.nan)
        elif nmatch == 1:
            # ideal situation
            idx.append(subx.index[0])
        else:
            # multiple matches
            idx.append(subx.index.values)
    # deal with any problems 
    idx = check_match_1J(dfJth, dfJx, idx, bigerr=bigerr)
    return idx

In [52]:
def check_match_1J(dfJth, dfJx, idx, bigerr):
    # Handle problems with 'idx' from match_th_expt_1J()
    # 'bigerr' is in cm-1 and triggers extra scrutiny
    # This function to be modified to handle problems as they arise
    toomuch = 20.  # difference in composition (%) that is too big to ignore
    ok = True
    if np.isnan(idx).any():
        # a theor level got no exptl match
        ok = False
        for i in np.argwhere(np.isnan(idx)):
            row = dfJth.iloc[i]
            print(f'*** no exptl level matched to {row.Jlbl} at Erel = {row.Erel}')
            pass # NYI
    for i, id in enumerate(idx):
        try:
            if len(id) > 1:
                ok = False
                row = dfJth.iloc[i]
                print(f'*** exptl terms {dfJx.loc[id, "uTerm"]} matched to {row.Jlbl}' +
                     f' at Erel = {row.Erel}')
            pass # NYI
        except:
            # normal situation, do nothing
            pass
    if ok:
        # all looks OK, but check for big errors in energy
        errs = dfJth.Erel.values - dfJx.loc[idx][Ecol].values
        ibig = np.argwhere(np.abs(errs) > bigerr).flatten()
        for ifix in ibig:
            jlbl = dfJth.iloc[ifix]['Jlbl']
            print(f'  Level {jlbl} has big error = {errs[ifix]:.0f} cm-1')
            tcomp, lbls, comp = chem.sort_dict_by_value(dfJth.iloc[ifix]['TC_approx'], 
                                            reverse=True, lists=True)
            print( '    composition =', tcomp)
            #display(dfJth.iloc[ifix].to_frame().T)
            # Is the weight of the second term close to that of the first?
            dif = comp[0] - comp[1]
            if dif > toomuch:
                print(f'    term assignment is clear (weight diff = {dif:.1f} %)')
            else:
                print(f'    leading ({lbls[0]}) and second ({lbls[1]}) weights' +
                      f' differ by {dif:.1f}%')
                if lbls[1] in dfJth.Lead.values:
                    # another level has already been assigned the second term
                    #   but check weights
                    acomp = dfJth[dfJth.Lead == lbls[1]]['Composition'].values[0]
                    acomp, tms, pct = chem.sort_dict_by_value(acomp, reverse=True,
                                                             lists=True)
                    ldif = pct[0] - pct[1]
                    print(f'      but {lbls[1]} already leads a level, ahead by ' +
                          f'{ldif:.1f} %')
                else:
                    print(f'Do you want to reassign this level to term {lbls[1]}?')
                    print('---TBD---')
    return idx

In [53]:
def match_theory_to_expt(dfth, dfx, bigerr=2000):
    '''
    Match experimental levels to theoretical
    Return a DataFrame containing both theory and expt, and
      the index of the highest level matched
    'bigerr' is in cm-1 and triggers extra scrutiny
    '''
    print('Matching experimental levels with theoretical levels')
    Jlist = sorted(set(dfth.J))
    dfcomp = dfexpt.copy()
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['leadwt'] = ''
    dfcomp['Ecalc'] = np.nan
    dfcomp['termwt'] = None
    dfcomp['Composition'] = None
    imax = 0  # index of highest level matched
    for J in Jlist:
        print(f'J = {J}')
        hJ = chem.halves(J)  # as str fraction
        # get the indices of the exptl levels that best match theoretical
        dfJth = dflev[dflev.J == J]
        dfJx = dfexpt[dfexpt.J == hJ]
        idx = match_th_expt_1J(dfJth, dfJx, bigerr=bigerr)
        for i, ix in enumerate(idx):
            rowth = dfJth.iloc[i]
            dfcomp.at[ix, 'Tcalc'] = rowth.Lead
            dfcomp.at[ix, 'leadwt'] = rowth.TC_approx[rowth.Lead]
            dfcomp.at[ix, 'Ecalc'] = rowth.Erel
            dfcomp.at[ix, 'termwt'] = chem.sort_dict_by_value(rowth.TC_approx,
                                        reverse=True)
            dfcomp.at[ix, 'Composition'] = rowth.Composition
            imax = max(imax, ix)
    return dfcomp, imax

In [54]:
# Match theoretical and experimental levels
# imax is the index of the highest-energy level matched
dfdiff, imax = match_theory_to_expt(dflev, dfexpt)
dfdiff['err'] = dfdiff.Ecalc - dfdiff[Ecol]

Matching experimental levels with theoretical levels
J = 0.5
J = 1.5
  Level 2D_3/2 has big error = -5395 cm-1
    composition = {'2D': 41.2, '6D': 22.7, '4P': 21.5, '2P': 9.1, '4F': 3.8, '2G': 0.9, '2H': 0.7}
    leading (2D) and second (6D) weights differ by 18.5%
      but 6D already leads a level, ahead by 61.0 %
  Level 2P_3/2 has big error = 4560 cm-1
    composition = {'2P': 62.3, '2D': 29.8, '4P': 6.7, '4F': 1.2, '6D': 0.1, '2G': 0.0, '2H': 0.0}
    term assignment is clear (weight diff = 32.5 %)
J = 2.5
J = 3.5
J = 4.5
J = 5.5


In [55]:
t = dfdiff.loc[imax, 'Tcalc']
J = dfdiff.loc[imax, 'J']
levmax = f'{t}_{J}'
emax = dfdiff.loc[imax, Ecol]
showcols = ['Configuration', 'uTerm', 'Tcalc', 'J', Ecol, 'Ecalc', 'err', 'termwt']
print(f'\nHighest level matched is {levmax} at {emax} cm-1 (exptl energy)')
# Notify about any exptl levels that the calculation skipped over
dfskipped = dfdiff[dfdiff.Ecalc.isna()].loc[:imax]
if len(dfskipped):
    print('** Some experimental levels are skipped in the calculation **')
    #display(dfskipped)
    display(dfdiff.loc[:imax][showcols])


Highest level matched is 2H_9/2 at 15391.01 cm-1 (exptl energy)
** Some experimental levels are skipped in the calculation **


Unnamed: 0,Configuration,uTerm,Tcalc,J,Level (cm-1),Ecalc,err,termwt
0,5d36s2,(1)4F,4F,3/2,0.0,0.28,0.28,"{'4F': 91.5, '2D': 7.8, '2P': 0.6, '4P': 0.1, ..."
1,5d36s2,(1)4F,4F,5/2,2010.1,1724.73,-285.37,"{'4F': 96.6, '2D': 3.3, '4P': 0.0, '2G': 0.0, ..."
2,5d36s2,(1)4F,4F,7/2,3963.92,3596.0,-367.92,"{'4F': 96.4, '2G': 3.6, '6D': 0.1, '4P': 0.0, ..."
3,5d36s2,(1)4F,4F,9/2,5621.04,5119.16,-501.88,"{'4F': 85.0, '2G': 13.7, '2H': 1.1, '6D': 0.1,..."
4,5d36s2,(1)4P,4P,1/2,6049.42,6045.58,-3.84,"{'4P': 85.6, '2P': 11.4, '6D': 3.0, '4F': 0.0,..."
5,5d36s2,(1)4P,4P,3/2,6068.91,5991.61,-77.3,"{'4P': 68.8, '2P': 21.4, '2D': 6.4, '4F': 2.0,..."
6,5d36s2,(1)4P,4P,5/2,9253.43,8950.89,-302.54,"{'4P': 90.6, '6D': 5.1, '2D': 4.0, '4F': 0.3, ..."
7,5d36s2,(1)2G,2G,7/2,9705.38,9771.37,65.99,"{'2G': 96.2, '4F': 3.5, '6D': 0.2, '4P': 0.0, ..."
8,5d36s2,(1)2G,2G,9/2,10690.32,10579.27,-111.05,"{'2G': 48.6, '2H': 37.3, '4F': 12.2, '6D': 1.3..."
9,5d4(5D)6s,(1)6D,6D,1/2,9758.97,9656.49,-102.48,"{'6D': 94.5, '2P': 3.0, '4P': 2.3, '2G': 0.2, ..."


In [56]:
# Convert str values of J to float
dfdiff['J'] = dfdiff.J.apply(chem.halves_to_float)
warnThresh = 1000  # highlight errors larger than this (cm-1)
# drop rows with NaN (no matching level in the calculation)
dfdiff = dfdiff.dropna(axis=0)
#selcols = ['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'err']
# Print a warning if experimental levels are missing
nth = len(dflev); ndiff = len(dfdiff)
expt_missing = nth - ndiff
if expt_missing > 0:
    print(f'\n**** There are {nth} theoretical levels but only {ndiff} matching experimental levels ****')
else:
    # use as flag
    expt_missing = 0
if 'comment' in dfdiff.columns:
    selcols.append('comment')
print(f'Please inspect the following pairing of theory ("Ecalc") with expt ("{Ecol}")')
print('Disagreements in term assignments are highlighted in red')
print(f'Errors > {warnThresh} cm-1 are highlighted in yellow')
display(dfdiff[showcols].style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
              subset=pd.IndexSlice[['err']]).apply(lambda x: (match_term_symbol(dfdiff['uTerm'], x)).map({True: "background-color: red; \
              color: white", False: ""}), subset=['Tcalc']).format(fmt))

Please inspect the following pairing of theory ("Ecalc") with expt ("Level (cm-1)")
Disagreements in term assignments are highlighted in red
Errors > 1000 cm-1 are highlighted in yellow


Unnamed: 0,Configuration,uTerm,Tcalc,J,Level (cm-1),Ecalc,err,termwt
0,5d36s2,(1)4F,4F,1.5,0.0,0.3,0.3,"{'4F': 91.5, '2D': 7.8, '2P': 0.6, '4P': 0.1, '2G': 0.0, '6D': 0.0, '2H': 0.0}"
1,5d36s2,(1)4F,4F,2.5,2010.1,1724.7,-285.4,"{'4F': 96.6, '2D': 3.3, '4P': 0.0, '2G': 0.0, '6D': 0.0, '2P': 0.0, '2H': 0.0}"
2,5d36s2,(1)4F,4F,3.5,3963.92,3596.0,-367.9,"{'4F': 96.4, '2G': 3.6, '6D': 0.1, '4P': 0.0, '2D': 0.0, '2P': 0.0, '2H': 0.0}"
3,5d36s2,(1)4F,4F,4.5,5621.04,5119.2,-501.9,"{'4F': 85.0, '2G': 13.7, '2H': 1.1, '6D': 0.1, '4P': 0.0, '2D': 0.0, '2P': 0.0}"
4,5d36s2,(1)4P,4P,0.5,6049.42,6045.6,-3.8,"{'4P': 85.6, '2P': 11.4, '6D': 3.0, '4F': 0.0, '2G': 0.0, '2D': 0.0, '2H': 0.0}"
5,5d36s2,(1)4P,4P,1.5,6068.91,5991.6,-77.3,"{'4P': 68.8, '2P': 21.4, '2D': 6.4, '4F': 2.0, '6D': 1.3, '2G': 0.0, '2H': 0.0}"
6,5d36s2,(1)4P,4P,2.5,9253.43,8950.9,-302.5,"{'4P': 90.6, '6D': 5.1, '2D': 4.0, '4F': 0.3, '2G': 0.0, '2P': 0.0, '2H': 0.0}"
7,5d36s2,(1)2G,2G,3.5,9705.38,9771.4,66.0,"{'2G': 96.2, '4F': 3.5, '6D': 0.2, '4P': 0.0, '2D': 0.0, '2P': 0.0, '2H': 0.0}"
8,5d36s2,(1)2G,2G,4.5,10690.32,10579.3,-111.0,"{'2G': 48.6, '2H': 37.3, '4F': 12.2, '6D': 1.3, '2D': 0.3, '4P': 0.2, '2P': 0.1}"
9,5d4(5D)6s,(1)6D,6D,0.5,9758.97,9656.5,-102.5,"{'6D': 94.5, '2P': 3.0, '4P': 2.3, '2G': 0.2, '2D': 0.1, '4F': 0.0, '2H': 0.0}"


In [57]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for term in dfdiff.uTerm:
    if term not in xterms:
        xterms.append(term)
for Term in xterms:
    subdf = dfexpt[dfexpt.uTerm == Term]
    emean = np.dot(subdf.degen, subdf[Ecol]) / subdf.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1) (experimental data with naive model)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
lowterm = dfeq1.at[0, 'Term']
print(f'The term of lowest energy is \t{lowterm} \twith SOC1 = {SOC1} cm-1')
levterm = dfexpt.uTerm.values[0]

target = levterm

if levterm != lowterm:
    # The lowest term is not the leading term in the lowest level
    SOC1alt = SOC1
    SOC1 = -1 * np.round(dfeq1[dfeq1.Term == levterm]['Eterm'].values[0], 3)
    print(f'The lowest level belongs to \t{levterm} \twith SOC1 = {SOC1} cm-1')
print()
print(f'Term {target} is selected for calculating the spin-orbit correction')
print('    to change this, assign the variable "target" to another term')

Term energies (cm-1) using eq. (1) (experimental data with naive model)


Unnamed: 0,Term,Eterm
0,(1)4F,3570.8
1,(1)4P,7657.9
2,(1)2G,10252.6
3,(1)2P,11230.9
4,(1)6D,11942.5
5,(1)2D,14081.1
6,(1)2H,15240.0


The term of lowest energy is 	(1)4F 	with SOC1 = -3570.799 cm-1

Term (1)4F is selected for calculating the spin-orbit correction
    to change this, assign the variable "target" to another term


In [58]:
def term_energy_from_levels(df, term, returnDF=False):
    # Given a DataFrame with the right columns ['J', 'Composition', Ecol],
    #   where 'Ecol' is the header for the column of exptl level energies,
    # Return the term's average energy as derived from the levels [eq. (2) in pub.]
    # If 'returnDF', also return a DataFrame for the selected term
    termwt = []  # weight of term "term" in each level
    compos = df.Composition.values
    for twt in compos:
        if term not in twt.keys():
            # maybe need to remove leading "(1)"
            term = term.replace('(1)', '')
        termwt.append(twt[term])
    termwt = np.array(termwt)
    dweight = df.degen.values * termwt  # weights including degeneracies
    Eterm = np.dot(df[Ecol], dweight) / dweight.sum()
    if not returnDF:
        return Eterm
    # Construct DF showing distribution of term among levels
    df_distrib = df[['Configuration', 'J', 'degen', Ecol, 'Ecalc', 'err']].copy()
    df_distrib.insert(0, 'weight', termwt / 100)  # fraction instead of percent
    return Eterm, df_distrib

In [59]:
target

'(1)4F'

In [60]:
# Use experimental level energies via eq. (2) (from the publication)
Eterm, df_distrib = term_energy_from_levels(dfdiff, target, returnDF=True)
SOC2 = -Eterm
print('Applying eq. (2) (experimental energies and theoretical term weights)')
print(f'For term {target}, SOC2 = {SOC2:.2f} cm-1')

Applying eq. (2) (experimental energies and theoretical term weights)
For term (1)4F, SOC2 = -4163.31 cm-1


In [61]:
print(f'Distribution of term {term} among levels:')
display(df_distrib.sort_values('weight', ascending=False))
wtot = df_distrib.weight.sum()
dwtot = np.dot(df_distrib.weight, df_distrib.degen)
print('Total weight = {:.3f} ({:.3f} including degeneracies)'.format(wtot, dwtot))

Distribution of term (1)2H among levels:


Unnamed: 0,weight,Configuration,J,degen,Level (cm-1),Ecalc,err
1,0.965949,5d36s2,2.5,6,2010.1,1724.73,-285.37
2,0.96396,5d36s2,3.5,8,3963.92,3596.0,-367.92
0,0.914984,5d36s2,1.5,4,0.0,0.28,0.28
3,0.850432,5d36s2,4.5,10,5621.04,5119.16,-501.88
8,0.122195,5d36s2,4.5,10,10690.32,10579.27,-111.05
18,0.038385,5d36s2,1.5,4,15903.77,10509.04,-5394.73
7,0.035288,5d36s2,3.5,8,9705.38,9771.37,65.99
17,0.029204,5d36s2,2.5,6,12865.97,12754.29,-111.68
20,0.02371,5d36s2,4.5,10,15391.01,15091.88,-299.13
5,0.02045,5d36s2,1.5,4,6068.91,5991.61,-77.3


Total weight = 4.002 (28.000 including degeneracies)


In [62]:
print(f'Molpro source file: {fname}\n')
print(f'Alternative values for E_so[{target}] of atom {atom}:')
print('-' * 25)
print('{:12s} {:.2f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.2f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.2f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

Molpro source file: Ta_Q10D28S5_cvtz-pp.out

Alternative values for E_so[(1)4F] of atom Ta:
-------------------------
eq (1)       -3570.80 cm-1
raw theory   -3819.58 cm-1
eq (2)       -4163.31 cm-1
-------------------------


In [63]:
# Term energy errors as inferred from all levels
termlist = []
wmean = []
wstds = []
# also consider unsigned (absolute value) errors
uwmean = []
uwstds = []
# also consider RMSE
trmse = []
for term in set(dfdiff.uTerm):
    termlist.append(term)
    eterm, df_distro = term_energy_from_levels(dfdiff, term, returnDF=True)
    wts = df_distro.weight.values * df_distro.degen
    m, s = chem.weighted_mean(df_distro.err, wts)
    wmean.append(m)
    wstds.append(s)
    uerr = np.abs(df_distro.err.values)
    um, us = chem.weighted_mean(uerr, wts)
    uwmean.append(um)
    uwstds.append(us)
    umsq, ussq = chem.weighted_mean(uerr ** 2, wts)
    trmse.append(np.sqrt(umsq))
dftermerr = pd.DataFrame({'Term': termlist, 'wmean': wmean, 'wstds': wstds,
                         'uwmean': uwmean, 'uwstds': uwstds, 'rwmse': trmse})

print('Errors in term energies (cm-1) as inferred from the full distribution')
print('    of each term over all levels')
# default order same as experimental terms
dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
dftermerr = dftermerr.sort_values('Term')
#dftermerr.sort_values('uwmean').style.format(fmt)
dftermerr.style.format(fmt)

Errors in term energies (cm-1) as inferred from the full distribution
    of each term over all levels


Unnamed: 0,Term,wmean,wstds,uwmean,uwstds,rwmse
2,(1)4F,-343.6,76.0,360.3,74.4,571.9
6,(1)4P,-461.8,404.3,664.7,431.7,1613.2
0,(1)2G,-119.1,95.2,175.5,63.8,321.2
3,(1)6D,-568.8,167.9,569.7,168.0,1039.8
4,(1)2P,1470.3,1496.5,2318.7,1196.4,3230.5
5,(1)2D,-436.2,1052.4,1522.0,1082.8,2699.6
1,(1)2H,-171.8,51.3,171.8,51.3,264.1
