In [None]:
# Extract SO-CI information from MOLPRO outputs for atoms
#   Read exptl data from Excel file, combine with weights to get E_so
#   The experimental Excel file is generated by get_NIST_atomic_data.ipynb
# More robust J assignments (attempted)
# KKI 8/30/2024
# Working for difficult Ta case (9/25/2024)
# TO DO:
#    (1) get it working for even-electron case (Kr test)
#    (2) transparent handling of missing exptl levels in eq. (
import re, sys, os
import numpy as np
import pandas as pd
from collections import Counter
import random
import matplotlib.pyplot as plt
#from sklearn.cluster import KMeans

import chem_subs as chem
import molpro_subs as mpr
import molpro_subs2 as m2

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)

### Specify Molpro SO-CI output file

In [None]:
#fname = 'kr_1SDPS_3SDP_dac5z_87_w99.pro'
fname = 'Ta_Q10D28S5_cvtz-pp.out'

In [None]:
# my atom subdirectory names look like "Ar_I" (for neutral argon)
el = fname.split('_')[0].capitalize()
fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\{:s}_I'.format(el)
#fdir = r'C:\Users\dagbaglo\Desktop\So-ci_energy\{:s}_I'.format(el)

In [None]:
fsoc = os.sep.join([fdir, fname])
print(f'Reading MOLPRO file')
print(fsoc)

In [None]:
# Identify the different sections of the output file
major_sections, linenos = m2.identify_sections(fsoc)
if False:
    print('Major sections:')
    for k, v in major_sections.items():
        print(f'   {k:<11s}   {len(v)} text blocks')

In [None]:
# section "header"
basisset = m2.basisset_name(major_sections['header'][-1])
# section "integrals"
PG = m2.point_group(major_sections['integrals'][-1])
print(f'Computational point group = {PG}')
if PG != 'Ci':
    chem.print_err('', 'Ci point group is required for this analysis')
nprim = m2.nbf_primitive(major_sections['integrals'][-1])
nbf = m2.nbf(major_sections['integrals'][-1])
print(f'{basisset} basis set')
print(f'    {nprim} primitives')
print(f'    {nbf} contracted basis functions')
crd = m2.coordinates(major_sections['integrals'][-1])
atom = crd[-1]['el']
if atom != el:
    chem.print_err('', f'This looks like the wrong atom ({atom}) for the filename ({el})')
Qtot = m2.nuclear_charge_total(major_sections['integrals'][-1])
print(f'Atom "{atom}" with nuclear charge = {Qtot}')
Zel = chem.elz(atom, 'Z')
if Zel > Qtot:
    print(f'    pseudopotential replaces {Zel - Qtot} core electrons')

In [None]:
# section "rhf"
occup_hf = m2.hf_occup(major_sections['rhf'][-1])
print('HF occupations: ', occup_hf)
hf_results = m2.scf_result('RHF', major_sections['rhf'][-1])
print('HF energy = {:.6f} for state {:s}'.format(hf_results['E'], hf_results['Label']))
orbtitle, dfHForb = m2.parse_orbitals(major_sections['rhf'][-1])
nel_HF = sum(sum(v) for v in occup_hf.values())
print(f'HF has {nel_HF} electrons (charge = {Qtot - nel_HF})')
print(orbtitle)
m2.color_by_orb(dfHForb)

In [None]:
# break section "multi" into sub-sections
multisec = m2.multi_sections(major_sections['multi'][-1])
#multisec.keys()

In [None]:
# parsing MULTI sub-sections
dynfac = m2.get_dynfac(multisec['top'][-1])
orbspace = m2.orbital_spaces(multisec['top'][-1])
statesym = m2.state_symmetry_groups(multisec['top'][-1])
convergence = m2.multi_convergence(multisec['iterations'][-1])
weights = m2.multi_weights(multisec['iterations'][-1])
dfiter = m2.multi_iterations(multisec['iterations'][-1])
dfstates = m2.multi_results(multisec['results'])
dfexpec = m2.multi_expec(multisec['trans'][-1])
dftrans = m2.multi_transmom(multisec['trans'][-1])
orbtitle, dfNO = m2.parse_orbitals(multisec['natorb'][-1])
ddfcivec, dEcas = m2.multi_civecs(multisec['civector'])

In [None]:
nactel = statesym[0]['nelec']
nactorb = sum(orbspace['active'])
print(f'CASSCF active space is ({nactel}/{nactorb}) with active orbitals {orbspace["active"]}')
if 'closed-shell' in orbspace.keys():
    print(f'    closed orbitals are {orbspace["closed-shell"]}')
else:
    print( '    There are no "closed" orbitals')
if 'frozen' in orbspace.keys():
    print(f'    frozen orbitals are {orbspace["frozen"]}')
else:
    print( '    There are no "frozen" orbitals')
# Count the states
mult_count = {}
ncas = 0
for st in statesym:
    mult = st['spin']
    mult_count[mult] = st['nstates'] + mult_count.get(mult, 0)
    ncas += st['nstates']
print(f'{ncas} CASSCF states:')
for mult, n in mult_count.items():
    print(f'   {n:3d} {mult}')
    
# Show the state weights, renormalized for reading convenience
print('CASSCF relative state weights (subject to rounding error):')
uweights = m2.unnormalize_cas_weights(weights)
for k, wts in uweights.items():
    print('    ', np.round(wts, 1))
    
# Are <L**2> values clean?
ilsq = np.rint(dfexpec['L**2'])
maxdev = np.abs(ilsq - dfexpec['L**2']).max()
if maxdev:
    print(f'Largest deviation of <L**2> from integer = {maxdev:.1e}')
else:
    print('Values of <L**2> are clean')
CAS_rel_HF = dfstates.E.min() - hf_results['E']
print(f'For the ground state, [E(CASSCF) - E(HF)] = {CAS_rel_HF:.6f}')
if CAS_rel_HF >= 0:
    print('   *** this difference should normally be negative')
print()
print(orbtitle)
orb_styler = m2.color_by_orb(dfNO)

In [None]:
if False:
    # print results from parsing MULTI output
    print(f'DYNW = {dynfac}')
    print('Spaces: ', orbspace)
    print('CASSCF state groups:')
    for g in statesym:
        print('   ', g)
    print(convergence)
    print('CASSCF state weights:')
    for k, v in weights.items():
        print(f'  {k:>2s}: ', v)
    display(dfiter)
    display(dfstates)
    display(dfexpec)
    for op, df in dftrans.items():
        print(f'Operator {op}')
        display(df)
    print(orbtitle)
    display(dfNO)
    for k, df in ddfcivec.items():
        print(k, dEcas[k])
        display(df.head())

In [None]:
# Summarize CASSCF results
dfcas = dfstates[['Label', 'irrep', 'E']].copy()
Svals = []
for g in statesym:
    for i in range(g['nstates']):
        Svals.append(chem.MULTSPIN[g['spin']])
dfcas.insert(2, 'S', Svals)
dfcas['L**2'] = dfexpec['L**2']
dfcas['L'] = np.sqrt(dfexpec['L**2']).astype(int)
tsymb = []
for S, L, irr in zip(dfcas.S, dfcas.L, dfcas.irrep):
    parity = 3 - 2*irr
    trm = chem.term_symbol(L, S, parity, linear=False)
    tsymb.append(trm)
dfcas['term'] = tsymb
print('CASSCF states')
dfcas

In [None]:
dfcasterm = m2.collect_atomic_terms(dfcas)
nterm = len(dfcasterm)
print(f'There are {ncas} CASSCF states in {nterm} terms')
# Add J values
Jvals = [chem.possible_J_from_term(trm) for trm in dfcasterm['term']]
dfcasterm['J_vals'] = Jvals
display(dfcasterm)

In [None]:
# Parse MRCI results and summarize in DataFrame
dfmrci = pd.DataFrame()
for imrci, sec in enumerate(major_sections['mrci']):
    print(f'MRCI calculation #{imrci+1}')
    mrcisec = m2.mrci_sections(sec)
    mrci_meta = m2.mrci_info(mrcisec['top'][0])
    mrci_iter = m2.mrci_iterations(mrcisec['iterations'][0])
    mrci_results = m2.mrci_results(mrcisec['results'][0])
    nstate = len(mrci_results['state'])
    print(f'    {mrci_meta["smult"]}, irrep {mrci_meta["irrep"]}')
    print(f'    {nstate} states')
    # Report on orbital spaces in the MRCI
    print('    orbital spaces, by irrep')
    for sp in ['core', 'closed', 'active', 'external']:
        print('\t{:10s} {}'.format(sp, mrci_meta['spaces'].get(sp, [])))
    lbll =  []  # list of state labels
    c0rot = []  # list of C0 (rotated) values
    El =    []  # list of energies
    davl =  []  # list of Davidson-corrected energies (rotated ref)
    erefl = []  # list of reference energies
    spinmult = mrci_meta['smult']
    S = chem.MULTSPIN[spinmult]
    irrep = mrci_meta['irrep']
    for lbl, v in mrci_results['state'].items():
        lbll.append(lbl)
        try:
            c0rot.append(v['C0']['rotated'])
            davl.append(v['Energy']['davidson']['rotated'])
        except KeyError:
            # no "rotated" values if there is only one state
            c0rot.append(v['C0']['relaxed'])
            davl.append(v['Energy']['davidson']['relaxed'])
        El.append(v['Energy']['total'])
        erefl.append(v['Energy']['ref E'])
    init_ref_no = [k for k in mrci_iter['init_ref'].keys()][:nstate]
    reflbl_tentat = [f'{i}.{irrep}' for i in init_ref_no]
    init_refE = [v for v in mrci_iter['init_ref'].values()][:nstate]
    dfci = pd.DataFrame({'Label': lbll, 'irrep': irrep, 'S': S, 'E': El,
                        'Edav': davl, 'C0': c0rot, 'Eref': erefl, 
                        'init_ref': init_refE, 'iref_nr': init_ref_no,
                        'irlbl': reflbl_tentat})
    # find matching CASSCF reference
    etol = 1.e-6  # tolerance for matching reference energies
    caslbll = []
    castrml = []
    subcas = dfcas[dfcas.S == S]
    for ici, cirow in dfci.iterrows():
        irlbl = reflbl_tentat[ici]
        subrow = subcas[subcas.Label.str.contains(irlbl)]
        if abs(subrow.iloc[0]['E'] - cirow['init_ref']) < etol:
            # this is a match
            caslbll.append(subrow.iloc[0]['Label'])
            castrml.append(subrow.iloc[0]['term'])
        else:
            # something wrong
            caslbll.append('?')
            castrml.append('?')
    dfci['ref_lbl'] = caslbll
    dfci['term'] = castrml
    if '?' in caslbll:
        print('    *** failure matching MRCI states to CASSCF states')
        display(dfci)
    dfmrci = pd.concat([dfmrci, dfci], ignore_index=True)

In [None]:
nmrci = len(dfmrci)
#dfmrci
dfciterm = m2.collect_atomic_terms(dfmrci, 'Edav')
termsIn = set(dfciterm.term)
print(f'There are {nmrci} MRCI states in {len(dfciterm)} terms')
if nterm != len(dfciterm):
    chem.print_err('', 'Different number of terms from CASSCF and from MRCI')
# Make prefixes enumerative
dfciterm['term'] = chem.enumerative_prefix(dfciterm.term.values)
print('MRCI terms:')
dfciterm

In [None]:
# section "SOintegrals"
if 'SOintegrals' in major_sections.keys():
    SOintgrl = m2.SO_integrals(major_sections['SOintegrals'][0])
    #print(SOintgrl)

In [None]:
# break section "soci" into sub-sections
sosec = m2.soci_sections(major_sections['soci'][0])
#sosec.keys()

In [None]:
# SOCI sub-section 'matel_comput'
hlsdiag = m2.soci_replacements(sosec['matel_comput'][0])
n_cistates = sum([x['nstate'] for x in hlsdiag.values()])
print(f'There are {n_cistates} states in the HLSDIAG list')
mat_elems = m2.soci_matelems(sosec['matel_comput'][0])
if mat_elems:
    print(mat_elems)

In [None]:
# SOCI sub-section 'basis_prop'
if 'basis_prop' in sosec.keys():
    basprop = m2.soci_basis_prop(sosec['basis_prop'][0], n_cistates)
    print(basprop['DMZ'][0,:])

In [None]:
# SOCI sub-section 'so_calc'
E0 = m2.soci_E0(sosec['so_calc'][0])
print(f'E0 = {E0:.6f} in the SO-CI')
somat = m2.soci_matrix(sosec['so_calc'][0])
dimen = somat['matrix'].shape[0]
print(f'There are {dimen} SO-CI states')

In [None]:
# Check for zero spin-orbit coupling
offdiag =somat['matrix'].copy()
np.fill_diagonal(offdiag, 0)
amax = np.max(np.abs(offdiag))
if amax == 0:
    chem.print_err('', 'Off-diagonal elements of spin-orbit matrix are all zero')

In [None]:
# how to format DataFrames
fmt = {'Eshift': '{:.1f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1',
           'wmean', 'wstds', 'uwmean', 'uwstds', 'change', 'rwmse']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'
fmt['weight'] = '{:.6f}'

In [None]:
# Add MRCI and term parentage of the basis states
for i, bas in enumerate(somat['basis']):
    S = bas['S']
    lbl = bas['State']
    subdf = dfmrci[(dfmrci.Label == lbl) & (dfmrci.S == S)]
    #display(subdf)
    ici = subdf.index[0]
    bas['ici'] = ici
    for iterm, trow in dfciterm.iterrows():
        if ici in trow.idx:
            bas['iterm'] = iterm
            break

In [None]:
# SOCI sub-section 'so_levels'
so_energies = m2.soci_energies(sosec['so_levels'][0])
df_soE = pd.DataFrame(so_energies)
print(f'There are {len(df_soE)} spin-orbit levels')
df_soE

In [None]:
SOCraw = min(so_energies['Eshift'])
print(f'From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = {SOCraw:.2f} cm-1')

In [None]:
# SO-CI sub-section 'so_vectors'
# In case of symmetry blocking, the last one should be the summary
so_vecs = m2.soci_vectors(sosec['so_vectors'][-1])
so_vecs.keys()

In [None]:
# check eigenvectors for normality
#    eigenvectors are columns of so_vecs['matrix']
tol = 1.e-7
mat = so_vecs['matrix']
for i in range(dimen):
    prod = np.dot(np.conjugate(mat[:, i]), mat[:, i])
    if np.abs(1 - prod) > tol:
        print(i, i, ':  ', prod)

In [None]:
# check eigenvectors for orthogonality
#    eigenvectors are columns of so_vecs['matrix']
mat = so_vecs['matrix']
for i in range(dimen):
    for j in range(i):
        prod = np.dot(np.conjugate(mat[:, i]), mat[:, j])
        if np.abs(prod) > tol:
            print(i, j, ':  ', np.abs(prod))

In [None]:
# SO-CI sub-section 'so_compos'
so_compos = m2.soci_composition(sosec['so_compos'][0])
so_compos.keys()

In [None]:
# check that all listings of basis states are consistent
for a, b, c in zip(somat['basis'], so_vecs['basis'], so_compos['basis']):
    for k in a.keys():
        if (k in b.keys()) and (k in c.keys()):
            if (a[k] != b[k]) or (a[k] != c[k]):
                print(a)
                print(b)
                print(c)
                print('----------------')

In [None]:
# check that composition is consistent with eigenvectors
magnit = np.conjugate(so_vecs['matrix']) * so_vecs['matrix']
# get differences in percent (printed by Molpro to 0.01% precision)
difmat = (magnit * 100) - so_compos['matrix']
dmax = np.abs(difmat).max()
print(f'Largest inconsistency between composition and eigenvectors = {dmax:.2f} %')

In [None]:
# Convert basis-state compositions (percent) to term compositions
use_printed = False  # use composition % as printed by Molpro
if use_printed:
    # compositions are printed to 0.01% precision
    print('Using compositions as printed by Molpro')
else:
    # eigenvectors are orthonormal and printed to 1e-8 precision
    print('Using compositions derived from eigenvectors')
    magpct = np.real(magnit * 100)
term_compos = np.zeros((nterm, dimen))
for ibas in range(dimen):
    iterm = somat['basis'][ibas]['iterm']
    if use_printed:
        term_compos[iterm,:] += so_compos['matrix'][ibas,:]
    else:
        # use composition computed from eigenvectors
        term_compos[iterm,:] += magpct[ibas,:]

In [None]:
# Add J values to dfciterm
jpossl = []
for term in dfciterm.term:
    jposs = chem.possible_J_from_term(term)
    jpossl.append(jposs)
dfciterm['J'] = jpossl

In [None]:
# Get target J counts corresponding to the CASSCF terms
allJ = []
for jl in dfcasterm['J_vals']:
    allJ.extend(jl)
J_all = dict(Counter(allJ))
print('Required level counts     :', J_all)
nlevels = len(allJ)
print(f'    There are {nlevels} J-levels')
Jxg = {k: int(v * (2*k+1)) for k, v in J_all.items()}
J_left = Jxg.copy()  # copy to be decremented
print('Required sublevel counts:', Jxg)
df_soE['J'] = None

### Assign values of <em>J</em> to levels

In [None]:
# Use term composition data to determine possible J assignment for each level
thrpct = 10.  # percentage threshold for consideration
print(f'Considering term compositions above {thrpct}%' +
     ' when evaluating J possibilities')
jpossl = []
npossl = []
for iso in range(dimen):
    #print(f'Level {iso} with Erel = {df_soE.iloc[iso]["Erel"]}')
    jposs = None
    for iterm, c in enumerate(term_compos[:, iso]):
        if c < thrpct:
            continue
        if jposs is None:
            # first contributing term
            jposs = set(dfciterm.at[iterm, 'J'])
        else:
            # subsequent term; take intersection
            jposs = jposs.intersection(dfciterm.at[iterm, 'J'])
    jpossl.append(jposs)
    npossl.append(len(jposs))
df_soE['J_poss'] = jpossl
df_soE['nposs'] = npossl
#df_soE['term_comp'] = list(np.round(term_compos, 1).T)
df_soE['term_comp'] = list(term_compos.T)

In [None]:
# Check for 0-possibility problems
subdf = df_soE[df_soE.nposs < 1]
if len(subdf):
    print(f'*** Some levels have all possibilities eliminated ***')
    display(subdf)
    sys.exit(1)

In [None]:
thr_degen = 5  # threshold (cm-1) for being clearly degenerate
thr_big = 500  # threshold for clearly non-degenerate
thr_tcomp = 3  # threshold (%) for similar maximum term-composition difference
thr_tcbig = 15 # threshold (%) for clearly different term composition

df_soE['J'] = None

In [None]:
n_unassigned = m2.assign_J_laboriously(df_soE, J_left, thr_degen, thr_big,
                         thr_tcomp, thr_tcbig, verbose=False)
if n_unassigned:
    print(f'J ASSIGNMENTS FAILED FOR {n_unassigned} LEVELS')
else:
    print('All levels were assigned!')
    # check the assignments against Jxg{}
    for J, nJ in Jxg.items():
        dfJ = df_soE[df_soE.J == J]
        if len(dfJ) != nJ:
            print(f'Assignment error!  For J = {J}, {nJ} levels ' +
                  f'were needed but {len(dfJ)} were assigned')

In [None]:
dflev = m2.collect_atomic_J_sets(df_soE)
# Add leading term
termlist = dfciterm.term
tlead = []
composDl = []   # term compositions, not rounded
TC_approx = []  # term compositions rounded for display
shownull = True # display zero values of term compositions 
for tcomp in dflev.term_comp:
    iterm = np.argmax(tcomp)
    tlead.append(termlist[iterm])
    composD = {}
    cround1 = {}
    for trm, pct in zip(termlist, tcomp):
        composD[trm] = pct
        p = round(pct, 1)
        if (p != 0) or shownull:
            cround1[trm] = p
    composDl.append(composD)
    # For display, sort by decreasing composition
    cround1 = dict(sorted(cround1.items(), key=lambda item: item[1], reverse=True))
    TC_approx.append(cround1)
dflev['Lead'] = tlead
dflev['Composition'] = composDl
dflev['TC_approx'] = TC_approx
Jlist = dflev.J
Jlbl = [f'{t}_{chem.halves(J)}' for t, J in zip(tlead, Jlist)]
dflev['Jlbl'] = Jlbl
# reorder columns, drop 'nposs' and 'term_comp'
dflev = dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift', 'Composition', 'E', 'idx',
             'Erel_spread', 'TC_spread', 'TC_approx']]

In [None]:
print('Check the table below for questionable grouping of levels')
print('"Erel_spread" shows how much the energies differ within a level (cm-1)')
print('"TC_spread" shows how much the term compositions differ within a level (%)')
print('"idx" shows which magnetic sublevels in "df_soE" compose each level')
dflev[['J', 'Erel', 'Erel_spread', 'TC_spread', 'idx']]

In [None]:
print('Level assignments from the calculation:')
showcols = ['Lead', 'J', 'Jlbl', 'Erel', 'Eshift', 'TC_approx']
display(dflev[showcols])
# Are there duplicated leading terms?
dups = False
for j, grp in dflev.groupby('J'):
    leads = list(grp.Lead)
    if len(leads) > len(set(leads)):
        print(f'*** Duplicate leading term for J = {j} ***')
        dups = True
        for lead in set(leads):
            leads.remove(lead)
        dfdup = grp[grp.Lead.isin(leads)].copy()
        display(dfdup[showcols].style.format(fmt))

In [None]:
# Change assignments of any duplicates
if dups:
    print('Correct the duplicate term assignments')
    ifix = input('Level for which to re-assign the term? ')
    while ifix:
        ifix = int(ifix)
        trm = input(f'Which term do you want to assign to level {ifix}? ')
        dflev.loc[ifix, 'Lead'] = trm
        ifix = input('Another level to re-assign (empty to end)? ')
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dflev.Lead, dflev.J)]
    dflev['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift']])

In [None]:
# Check for problems in assignments
nAssign = len(set(dflev.Lead))
nTerm = len(dfciterm)
dropT = False
if nAssign != nTerm:
    print(f'*** I started with {nTerm} terms but have {nAssign} leading terms ***')
    print('Starting: ', sorted(termsIn))
    termsOut = set(dflev.Lead)
    print('Leading : ', sorted(termsOut))
    if nAssign > nTerm:
        addT = termsOut - termsIn
        print('Added terms: ', addT)
    else:
        dropT = termsIn - termsOut
        print('Dropped terms: ', dropT)
        # Add weights from dropped terms and display
        for term in dropT:
            wtcol = []
            for comp in dflev.Composition:
                pct = comp.get(term, 0)
                wtcol.append(pct)
            dflev[term] = pct
        print('Weights (%) of dropped terms in levels:')
        display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))
nlvl = (2 * dflev.J + 1).sum()  # number of sublevels
if nlvl != dimen:
    print(f'*** I started with {nSO} (sub)levels but now have {nlvl} ***')

In [None]:
# Manually assign any dropped terms
if dropT:
    for drt in dropT:
        ia = int(input(f'Which level do you want to assign to term {drt}? '))
        dflev.loc[ia, 'Lead'] = drt
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dflev.Lead, dflev.J)]
    dflev['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dflev[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))

In [None]:
# Inversion parity of the calculated levels
irreps_ci = set(dfciterm.irrep)
if (PG == 'Ci') and (len(irreps_ci) == 1):
    if 1 in irreps_ci:
        parity = 'even'
    else:
        parity = 'odd'
else:
    # ask user for parity of interest
    parity = input('Please choose "even" or "odd" parity: ')
print(f'Experimental states will be restricted to parity = {parity}')

### Read experimental energy levels

In [None]:
charge = Qtot - mrci_meta['nelec']  # number of electrons in the last MRCI
labels_ordinated = False  # flag to prevent multiple (1)(1)(1) etc. 
if charge >= 0:
    atstr = atom + '_' + 'I' * (charge + 1)
else:
    # anion
    atstr = atom + '_neg'
fxl = f'{atstr}_exptl_levels.xlsx'
fxlalt = None
exp_alt = False
# Special cases
if atstr == 'Ra_I':
    fxlalt = 'Ra_I_exptl_levels_plus_theory.xlsx'
#if atstr in ['Ar_I', 'Pb_I', 'Kr_I']:
if atstr in ['Ar_I', 'Pb_I']:
    fxlalt = f'{atstr}_exptl_even_assign.xlsx'
if atstr in ['Br_I', 'I_I']:
    fxlalt = f'{atstr}_exptl_odd_assign.xlsx'

if fxlalt is not None:
    print('** Using alternative experimental data file ***')
    exp_alt = True
    fxl = fxlalt
xlpath = os.sep.join([fdir, fxl])
dfexpt = pd.read_excel(xlpath)
if exp_alt and ('LS' in dfexpt.columns):
    # use manual assignments
    print('** Using term labels in column "LS"')
    dfexpt.loc[dfexpt['LS'].notnull(), 'Term'] = dfexpt[dfexpt['LS'].notnull()]['LS']
    #dfexpt['Term'] = dfexpt['LS']
print(f'Experimental energy levels read from {fxl}')
# If there is a column "comment", replace NaN with ''
if 'comment' in dfexpt.columns:
    dfexpt['comment'] = dfexpt['comment'].fillna('')

In [None]:
# Find the number of decimal places in the level energies
Ecol = 'Level (cm-1)'  # the exptl energy column
ndecim = 0
for e in dfexpt[Ecol]:
    words = str(e).split('.')
    # count numeric digits
    n = sum(c.isdigit() for c in words[-1])
    ndecim = max(n, ndecim)
print(f'Experimental energies are provided to {ndecim} decimal digits')
# display formatting
fmt[Ecol] = '{:.' + str(ndecim) + 'f}'

In [None]:
# Delete any ionization limit
ilim = dfexpt[dfexpt.Term == 'Limit'].index.min()
# delete the "Limit" row and everything past it
n1 = len(dfexpt)
dfexpt = dfexpt.truncate(after=ilim-1)
n2 = len(dfexpt)
if n2 < n1:
    print(f'Discarding {n1-n2} ionized or metastable states')
oddstr = r'\*$|°' # characters to identify terms of odd parity
# Sometimes parity is shown in configuration alone?
#dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr))].copy()
#dfodd = dfexpt[dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr)].copy()
dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr))].copy()
dfodd = dfexpt[dfexpt.Term.str.contains(oddstr)].copy()
print(f'{len(dfexpt)} experimental levels ({len(dfeven)} even and {len(dfodd)} odd)')
# Select by parity
if parity == 'even':
    # discard odd levels ('Term' field ends with '*')
    dfexpt = dfeven.copy()
elif parity == 'odd':
    dfexpt = dfodd.copy()
else:
    chem.print_err('', f'Parity of "{parity}" is not recognized')
n3 = len(dfexpt)
print(f'{n3} levels accepted for parity = {parity}')
# Reject bad values of J
for i in dfexpt.index:
    try:
        chem.halves_to_float(dfexpt.loc[i, 'J'])
    except ValueError:
        dfexpt.at[i, 'J'] = np.nan
nbad = dfexpt.J.isna().sum()
if nbad:
    print(f'** Rejecting {nbad} levels with malformed J values')
    dfexpt = dfexpt.dropna()
    n4 = len(dfexpt)
    print(f'{n4} level retained')
# Assign unique term symbols
if not labels_ordinated:
    dfexpt = chem.unique_labels_exptl_terms(dfexpt, verbose=True, always=True)
    labels_ordinated = True
# Add column for degeneracy
dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)).astype(int) + 1
dfexpt

In [None]:
def match_term_symbol(symb_expt, symb_calc):
    # Return True if they are the same, else False
    # Tolerate extra prefix '(1)' or 'a ' in symb_expt
    retval = (symb_calc == symb_expt)  # exact match
    tx = symb_expt[-len(symb_calc):]   # match last characters
    retval |= (symb_calc == tx)
    return retval

In [None]:
def match_theory_to_expt(dfth, dfx, bigerr=2000):
    '''
    Match experimental levels to theoretical
    Return a DataFrame containing both theory and expt, and
      the index of the highest level matched
    'bigerr' is in cm-1 and triggers extra scrutiny
    '''
    print('Matching experimental levels with theoretical levels')
    Jlist = sorted(set(dfth.J))
    dfcomp = dfexpt.copy()
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['leadwt'] = ''
    dfcomp['Ecalc'] = np.nan
    dfcomp['termwt'] = None
    dfcomp['Composition'] = None
    imax = 0  # index of highest exptl level matched
    integerJ = (Jlist[0] == int(Jlist[0]))
    for J in Jlist:
        print(f'J = {J}')
        # get the indices of the exptl levels that best match theoretical
        dfJth = dflev[dflev.J == J]
        #display(dfJth[['Lead', 'J', 'Erel', 'TC_approx']])
        dfJx = dfexpt[dfexpt.J == J]
        if len(dfJx) < 1:
            # try str representation
            hJstr = chem.halves(J)  # as str fraction
            dfJx = dfexpt[dfexpt.J == hJstr]
        #display(dfJx)
        idx = match_th_expt_1J_V2(dfJth, dfJx, bigerr=bigerr)
        for i, ix in enumerate(idx):
            rowth = dfJth.iloc[i]
            dfcomp.at[ix, 'Tcalc'] = rowth.Lead
            dfcomp.at[ix, 'leadwt'] = rowth.TC_approx[rowth.Lead]
            dfcomp.at[ix, 'Ecalc'] = rowth.Erel
            dfcomp.at[ix, 'termwt'] = chem.sort_dict_by_value(rowth.TC_approx,
                                        reverse=True)
            dfcomp.at[ix, 'Composition'] = rowth.Composition
            imax = max(imax, ix)
    return dfcomp, imax

In [None]:
def match_th_expt_1J_V2(dfJth, dfJx, bigerr=2000):
    '''
    Given DataFrame's of theoretical and experimental levels for the same J,
    Return a list of indices into dfJx that match those in dfJth
        based upon energy, but with preference for same leading term
    'bigerr' is in cm-1 and triggers extra scrutiny
    '''
    idx = []  # index of exptl level that matches each theor level
    for i, row in dfJth.iterrows():
        Eth = row.Erel
        thTerm = row.Lead

        def same_Lead(xTerm):
            # Does the exptl "Term" match the theoretical "Lead"?
            return match_term_symbol(xTerm, thTerm)
    
        # Is there an exptl level with the same leading term?
        termatch = dfJx.Term.apply(same_Lead)
        subx = dfJx.loc[termatch]
        if len(subx) == 1:
            # The nicest situation
            print(f'    exptl term "{subx.Term.iloc[0]}" matches leading "{thTerm}"')
            j = subx.index.values[0]
        else:
            # There are multiple or no matching terms
            #   rely upon energy ordering (among matching terms, if any)
            if len(subx) > 1:
                # Exptl data assign the same leading term to multiple levels of this J (unusual)
                #print(f'    leading "{thTerm}" matches multiple exptl terms "{subx.Term.values}"')
                pass
            else:
                # No matching term symbols
                #print(f'    leading "{thTerm}" matches no exptl term labels')
                subx = dfJx  # consider all levels because none match the term symbol
            for j, rowx in subx.iterrows():
                # expect energy ordering to be similar
                #    there is no way to detect inversions
                if j in idx:
                    # this exptl level already matched
                    continue
                # we get here if this level has not yet been matched; take the first one
                print(f'    leading "{thTerm}" matched with exptl "{rowx.Term}"')
                break
        idx.append(j)
    return idx

In [None]:
# Match theoretical and experimental levels
# imax is the index of the highest-energy level matched
dfdiff, imax = match_theory_to_expt(dflev, dfexpt)
dfdiff['err'] = dfdiff.Ecalc - dfdiff[Ecol]

In [None]:
t = dfdiff.loc[imax, 'Tcalc']
J = dfdiff.loc[imax, 'J']
levmax = f'{t}_{J}'
emax = dfdiff.loc[imax, Ecol]
showcols = ['Configuration', 'uTerm', 'Tcalc', 'J', Ecol, 'Ecalc', 'err', 'termwt']
print(f'\nHighest level matched is {levmax} at {emax} cm-1 (exptl energy)')
# Notify about any exptl levels that the calculation skipped over
dfskipped = dfdiff[dfdiff.Ecalc.isna()].loc[:imax]
if len(dfskipped):
    print('** Some experimental levels are skipped in the calculation **')
    display(dfskipped)
    #display(dfdiff.loc[:imax][showcols])

In [None]:
# Convert str values of J to float
dfdiff['J'] = dfdiff.J.apply(chem.halves_to_float)
warnThresh = 1000  # highlight errors larger than this (cm-1)
# drop rows with NaN (no matching level in the calculation)
dfdiff = dfdiff.dropna(axis=0)
#selcols = ['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'err']
# Print a warning if experimental levels are missing
nth = len(dflev); ndiff = len(dfdiff)
expt_missing = nth - ndiff
if expt_missing > 0:
    print(f'\n**** There are {nth} theoretical levels but only {ndiff} matching experimental levels ****')
else:
    # use as flag
    expt_missing = 0
if 'comment' in dfdiff.columns:
    selcols.append('comment')
print(f'Please inspect the following pairing of theory ("Ecalc") with expt ("{Ecol}")')
print('Disagreements in term assignments are highlighted in red')
print(f'Errors > {warnThresh} cm-1 are highlighted in yellow')
display(dfdiff[showcols].style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
              subset=pd.IndexSlice[['err']]).apply(lambda x: (~match_term_symbol(dfdiff['uTerm'], x)).map({True: "background-color: red; \
              color: white", False: ""}), subset=['Tcalc']).format(fmt))

In [None]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for term in dfdiff.uTerm:
    if term not in xterms:
        xterms.append(term)
for Term in xterms:
    subdf = dfexpt[dfexpt.uTerm == Term]
    emean = np.dot(subdf.degen, subdf[Ecol]) / subdf.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1) (experimental data with naive model)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
lowterm = dfeq1.at[0, 'Term']
print(f'The term of lowest energy is \t{lowterm} \twith SOC1 = {SOC1} cm-1')
levterm = dfexpt.uTerm.values[0]

target = levterm

if levterm != lowterm:
    # The lowest term is not the leading term in the lowest level
    SOC1alt = SOC1
    SOC1 = -1 * np.round(dfeq1[dfeq1.Term == levterm]['Eterm'].values[0], 3)
    print(f'The lowest level belongs to \t{levterm} \twith SOC1 = {SOC1} cm-1')
print()
print(f'Term {target} is selected for calculating the spin-orbit correction')
print('    to change this, assign the variable "target" to another term')

In [None]:
def term_energy_from_levels(df, term, returnDF=False):
    # Given a DataFrame with the right columns ['J', 'Composition', Ecol],
    #   where 'Ecol' is the header for the column of exptl level energies,
    # Return the term's average energy as derived from the levels [eq. (2) in pub.]
    # If 'returnDF', also return a DataFrame for the selected term
    termwt = []  # weight of term "term" in each level
    compos = df.Composition.values
    for twt in compos:
        if term not in twt.keys():
            # maybe need to remove leading "(1)"
            term = term.replace('(1)', '')
        try:
            termwt.append(twt[term])
        except KeyError:
            # can't find this term among theoretical terms
            return None, None
    termwt = np.array(termwt)
    dweight = df.degen.values * termwt  # weights including degeneracies
    Eterm = np.dot(df[Ecol], dweight) / dweight.sum()
    if not returnDF:
        return Eterm
    # Construct DF showing distribution of term among levels
    cols = ['Configuration', 'uTerm', 'Tcalc', 'J', 'degen', Ecol, 'Ecalc', 'err']
    df_distrib = df[cols].copy()
    df_distrib.insert(0, 'weight', termwt / 100)  # fraction instead of percent
    return Eterm, df_distrib

In [None]:
target

In [None]:
# Use experimental level energies via eq. (2) (from the publication)
Eterm, df_distrib = term_energy_from_levels(dfdiff, target, returnDF=True)
SOC2 = -Eterm
print('Applying eq. (2) (experimental energies and theoretical term weights)')
print(f'For term {target}, SOC2 = {SOC2:.2f} cm-1')

In [None]:
print(f'Distribution of term {target} among levels:')
wthr = 1.e-6  # minimum weight to display
dfshow = df_distrib[df_distrib.weight > wthr]
display(dfshow.sort_values('weight', ascending=False).style.format(fmt))
wtot = dfshow.weight.sum()
dwtot = np.dot(dfshow.weight, dfshow.degen)
print('Total displayed weight = {:.3f} ({:.3f} including degeneracies)'.format(wtot, dwtot))

In [None]:
print(f'Molpro source file: {fname}\n')
print(f'Alternative values for E_so[{target}] of atom {atom}:')
print('-' * 25)
print('{:12s} {:.2f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.2f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.2f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

In [None]:
# Term energy errors as inferred from all levels
termlist = []
wmean = []
wstds = []
# also consider unsigned (absolute value) errors
uwmean = []
uwstds = []
# also consider RMSE
trmse = []
for term in set(dfdiff.uTerm):
    eterm, df_distro = term_energy_from_levels(dfdiff, term, returnDF=True)
    if eterm is None:
        print(f'No theoretical term matches exptl {term}')
        continue
    termlist.append(term)
    wts = df_distro.weight.values * df_distro.degen
    m, s = chem.weighted_mean(df_distro.err, wts)
    wmean.append(m)
    wstds.append(s)
    uerr = np.abs(df_distro.err.values)
    um, us = chem.weighted_mean(uerr, wts)
    uwmean.append(um)
    uwstds.append(us)
    umsq, ussq = chem.weighted_mean(uerr ** 2, wts)
    trmse.append(np.sqrt(umsq))
dftermerr = pd.DataFrame({'Term': termlist, 'wmean': wmean, 'wstds': wstds,
                         'uwmean': uwmean, 'uwstds': uwstds, 'rwmse': trmse})

print('Errors in term energies (cm-1) as inferred from the full distribution')
print('    of each term over all levels')
# default order same as experimental terms
dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
dftermerr = dftermerr.sort_values('Term')
#dftermerr.sort_values('uwmean').style.format(fmt)
dftermerr.style.format(fmt)

### Optional distribution of some other term

In [None]:
other_term = '(1)2P'
Eterm, df_distrib = term_energy_from_levels(dfdiff, other_term, returnDF=True)
if Eterm is None:
    print(f'*** Cannot evaluate errors for levels within term "{other_term}"')
else:
    print(f'Distribution of term {other_term} among levels:')
    display(df_distrib.sort_values('weight', ascending=False).style.format(fmt))
    wtot = df_distrib.weight.sum()
    dwtot = np.dot(df_distrib.weight, df_distrib.degen)
    print('Total weight = {:.3f} ({:.3f} including degeneracies)'.format(wtot, dwtot))