In [None]:
# Extract SO-CI information from MOLPRO outputs for atoms
#   Read exptl data from Excel file, combine with weights to get E_so
#   The experimental Excel file is generated by get_NIST_atomic_data.ipynb
# Add a fourth way to get E_so:  by fitting term energies to exptl level energies
#   (off-diagonal elements of the spin-orbit matrix are from a Molpro SO-CI calculation)
# KKI 2/5/2024
import re, sys, os
import numpy as np
import pandas as pd
from collections import Counter
#import matplotlib.pyplot as plt
#from collections import Counter
#from sklearn.cluster import KMeans

import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)
debug = False

### Specify Molpro SO-CI output file

In [None]:
fname = 'as_4S2DP_4DP_57_dac5x2c.pro'
fname = 'Pt_15T21S_cv5z-pp.out'
#fname = 'Ir_20Q24D-1710-cvtz-pp.out'

In [None]:
# my atom subdirectory names look like "Ar_I" (for neutral argon)
el = fname.split('_')[0].capitalize()
fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\{:s}_I'.format(el)
#fdir = r'C:\Users\dagbaglo\Desktop\So-ci_energy\{:s}_I'.format(el)

In [None]:
if debug:
    fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC'
    #fname = 'Cr_1SQ18-cvqz-pp.out'
    fname = 'Mn_S6Q24_cvtz-pp_kki.pro'
    #fname = 'Zr_26T10Q14S_cvtz-pp.out'
    #fname = 'Au_2S5S_acvtz.out'
    #fname = 'au_2S5S_actvz_b.pro'
    #fname = 'Ru_15Q19T_acvqz-pp.out'
    #fname = 'ru_15Q19T_actzpp.pro'
    el = fname.split('_')[0].capitalize()

In [None]:
fsoc = os.sep.join([fdir, fname])
print(f'Reading MOLPRO file')
print(fsoc)
atom = mpr.stoichiometry(fsoc)
charge = int(np.rint(mpr.total_charge(fsoc, verbose=True)))
print(f'The atom is {atom} with charge {charge}')
if atom != el:
    chem.print_err('', 'This looks like the wrong atom')
# append charge, to match sheet names in exptl data file
if charge > 0: 
    atom += '+'
elif charge < 0:
    atom += '-'
if abs(charge) > 1:
    atom += f'{abs(charge)}'

In [None]:
ishybrid = '_hyb' in fname
if ishybrid:
    print('** Hybrid SO-CI')
SOCI = mpr.fullmatSOCI(fsoc, atom=True, hybrid=ishybrid)
labels_ordinated = False  # flag to prevent multiple (1)(1)(1) etc. 

In [None]:
def readSOmatrixBlocks(fname):
    # return a list of line buffers (each a list of lines) 
    # also a list of line numbers for the start of each block
    rx_energ = re.compile(r' Spin-orbit eigenstates ')
    rx_somat = re.compile(r' Spin-Orbit Matrix \(CM-1\)')
    rx_symm_ad = re.compile(r'Spin-orbit calculation in the basis of symmetry adapted')
    retval = []
    lineno = []
    with open(fname, 'r', errors='replace') as F:
        somatbuf = []
        in_somat = False
        for lno, line in enumerate(F):
            if in_somat:
                if rx_energ.match(line) or rx_symm_ad.search(line):
                    # end of block
                    retval.append(somatbuf)
                    in_somat = False
                    somatbuf = []
                    continue
                somatbuf.append(line)
            if rx_somat.match(line):
                in_somat = True
                lineno.append(lno)
                print(line.rstrip())
    return retval, lineno

In [None]:
print(f'CASSCF active space is ({SOCI.cas.nactel()}/{SOCI.cas.nactorb()})')
# Get the basis set
rx_bs = re.compile(' SETTING BASIS\s+=\s+(\S+)')
rx_nbf = re.compile(' NUMBER OF CONTRACTIONS:\s+(\d+) ')
with open(fsoc, 'r') as F:
    for line in F:
        m = rx_bs.match(line)
        if m:
            basisset = m.group(1)
        m = rx_nbf.match(line)
        if m:
            nbf = int(m.group(1))
print(f'Basis set is {basisset} with {nbf} contracted functions')
dynw = SOCI.cas.dynw()
caswts = SOCI.cas.weights(rescale=True)
if dynw:
    print(f'Dynamical weighting with dynw={dynw}')
    print('Final weights:')
else:
    print('CASSCF state weights (subject to rounding error):')
for wts in caswts:
    print('    ', np.round(wts, 1))

In [None]:
# Is this basis set intended for scalar relatistic Hamiltonian?
relbs = False
if ('DK' in basisset.upper()) or ('X2C' in basisset.upper()):
    # Yes.  Check for use of appropriate Hamiltonian
    relbs = True
# Is it a relativistic Hamiltonian? 
relH = 0
re_relH = re.compile('^\s*(set|int),\s*dkho\s*=\s*(\d+)', re.IGNORECASE)
with open(fsoc, 'r') as F:
    for line in F:
        m = re_relH.match(line)
        if m:
            relH = int(m.group(2))
if relH:
    print('Scalar relativistic Hamiltonian: ', end='')
    if relH == 101:
        print('X2C')
    else:
        print(f'DKH{relH}')
    if basisset == 'USERDEF':
        print('**** Check for relativistic USERDEF basis set')
    elif not relbs:
        print('**** Warning:  non-relativistic basis set ****')
elif relbs:
    print('**** Warning:  relativistic basis set with non-relativistic Hamiltonian ****')

In [None]:
lsq = SOCI.cas.results['L**2'].values
if ((lsq - np.round(lsq)) > 1e-10).any():
    print('*** Non-integer values of <L**2> ***')
display(SOCI.cas.results[['Spin', 'Label', 'Energy', 'L**2', 'Term']])

In [None]:
# Report on orbital spaces in the MRCI
print('MRCI orbital spaces, by irrep')
print('    {:10s} {}'.format('core', SOCI.cilist[0].orbs_core))
print('    {:10s} {}'.format('closed', SOCI.cilist[0].orbs_closed))
print('    {:10s} {}'.format('active', SOCI.cilist[0].orbs_active))
print('    {:10s} {}'.format('external', SOCI.cilist[0].orbs_external))

In [None]:
nCI = len(SOCI.dfci)
nSO = len(SOCI.SOe.results)
print(f'There are {nCI} MRCI states')
print(f'There are {nSO} spin-orbit levels')

In [None]:
SOCraw = SOCI.vals.min()
print(f'From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = {SOCraw:.2f} cm-1')

In [None]:
# Check for zero spin-orbit coupling
offdiag = SOCI.matrix.copy()
np.fill_diagonal(offdiag, 0)
amax = np.max(np.abs(offdiag))
if amax == 0:
    chem.print_err('', 'Off-diagonal elements of spin-orbit matrix are all zero')

In [None]:
# display formatting
fmt = {'Eshift': '{:.1f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1', 'fitted',
           'wmean', 'wstds', 'uwmean', 'uwstds', 'change', 'rwmse']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'

In [None]:
dfterm = SOCI.average_terms(be_close=['Energy', 'Edav', 'Eref', 'dipZ', 'C0'], 
                            atom=True, always=True)
nTerm = len(dfterm)
termsIn = set(dfterm.Term)
# drop the dipZ column
dfterm.drop(columns=['dipZ'], inplace=True)
dfso = SOCI.assign_atomic_J(quiet=False)  # create SOCI.dfso
Egl = SOCI.dfso.E.min()  # energy of ground level
dfterm['Erel'] = (dfterm.Edav - Egl) * chem.AU2CM
print(f'Averaged terms from MRCI ({fname}):')
display(dfterm.style.format(fmt))
print('"ecm"  is relative to the lowest term (cm-1)')
print('"Erel" is relative to the ground level')

In [None]:
print('Level assignments from the calculation:')
showcols = ['Lead', 'J', 'Jlbl', 'Erel', 'Eshift', 'Composition']
display(dfso[showcols].style.format(fmt))
# Are there duplicated leading terms?
dups = False
for j, grp in dfso.groupby('J'):
    leads = list(grp.Lead)
    if len(leads) > len(set(leads)):
        print(f'*** Duplicate leading term for J = {j} ***')
        dups = True
        for lead in set(leads):
            leads.remove(lead)
        dfdup = grp[grp.Lead.isin(leads)].copy()
        display(dfdup[showcols].style.format(fmt))

In [None]:
# Change assignments of duplicates
print('Correct any duplicate term assignments')
if dups:
    ifix = input('Level to re-assign the term? ')
    while ifix:
        ifix = int(ifix)
        trm = input(f'Which term do you want to assign to level {ifix}? ')
        dfso.loc[ifix, 'Lead'] = trm
        ifix = input('Another level to re-assign (empty to end)? ')
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dfso.Lead, dfso.J)]
    dfso['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift']].style.format(fmt))

In [None]:
# Check for problems in assignments
nAssign = len(set(dfso.Lead))
dropT = False
if nAssign != nTerm:
    print(f'*** I started with {nTerm} terms but have {nAssign} leading terms ***')
    print('Starting: ', sorted(termsIn))
    termsOut = set(dfso.Lead)
    print('Leading : ', sorted(termsOut))
    if nAssign > nTerm:
        addT = termsOut - termsIn
        print('Added terms: ', addT)
    else:
        dropT = termsIn - termsOut
        print('Dropped terms: ', dropT)
        # Add weights from dropped terms and display
        for term in dropT:
            df = SOCI.level_contributions_from_term(term, thr=0)
            dfso[term] = df[term]
        print('Weights of dropped terms in levels:')
        display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))
nlvl = (2 * dfso.J + 1).sum()  # number of sublevels
if nlvl != nSO:
    print(f'*** I started with {nSO} (sub)levels but now have {nlvl} ***')

In [None]:
# Manually assign any dropped terms
if dropT:
    for drt in dropT:
        ia = int(input(f'Which level do you want to assign to term {drt}? '))
        dfso.loc[ia, 'Lead'] = drt
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dfso.Lead, dfso.J)]
    dfso['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))

In [None]:
irreps_ci = set(SOCI.dfci.Irrep)
if (SOCI.PG == 'Ci') and (len(irreps_ci) == 1):
    if 1 in irreps_ci:
        parity = 'even'
    else:
        parity = 'odd'
else:
    # ask user for parity of interest
    parity = input('Please choose "even" or "odd" parity: ')
print(f'Experimental states will be restricted to parity = {parity}')

In [None]:
# Read experimental energy levels
if charge >= 0:
    atstr = atom + '_' + 'I' * (charge + 1)
else:
    # anion
    atstr = atom + '_neg'
fxl = f'{atstr}_exptl_levels.xlsx'
fxlalt = None
exp_alt = False
# Special cases
if atstr == 'Ra_I':
    fxlalt = 'Ra_I_exptl_levels_plus_theory.xlsx'
if atstr in ['Ar_I', 'Pb_I', 'Kr_I']:
    fxlalt = f'{atstr}_exptl_even_assign.xlsx'
if atstr in ['Br_I', 'I_I']:
    fxlalt = f'{atstr}_exptl_odd_assign.xlsx'
'''
if atstr == 'Ar_I':
    fxlalt = 'Ar_I_exptl_even_assign.xlsx'
if atstr == 'Pb_I':
    fxlalt = 'Pb_I_exptl_even_assign.xlsx'
if atstr == 'Kr_I':
    fxlalt = 'Kr_I_exptl_even_assign.xlsx'
if atstr == 'Br_I':
    fxlalt = 'Br_I_exptl_odd_assign.xlsx'
if atstr == 'I_I':
    fxlalt = 'I_I_exptl_odd_assign.xlsx'
'''
if fxlalt is not None:
    print('** Using alternative experimental data file ***')
    exp_alt = True
    fxl = fxlalt
xlpath = os.sep.join([fdir, fxl])
dfexpt = pd.read_excel(xlpath)
if exp_alt and ('LS' in dfexpt.columns):
    # use manual assignments
    print('** Using term labels in column "LS"')
    dfexpt.loc[dfexpt['LS'].notnull(), 'Term'] = dfexpt[dfexpt['LS'].notnull()]['LS']
    #dfexpt['Term'] = dfexpt['LS']
print(f'Experimental energy levels read from {fxl}')
# If there is a column "comment", replace NaN with ''
if 'comment' in dfexpt.columns:
    dfexpt['comment'] = dfexpt['comment'].fillna('')

In [None]:
dfexpt

In [None]:
# Find the number of decimal places in the level energies
Ecol = 'Level (cm-1)'  # the exptl energy column
ndecim = 0
for e in dfexpt[Ecol]:
    words = str(e).split('.')
    # count numeric digits
    n = sum(c.isdigit() for c in words[-1])
    ndecim = max(n, ndecim)
print(f'Experimental energies are provided to {ndecim} decimal digits')

In [None]:
# display formatting
fmt[Ecol] = '{:.' + str(ndecim) + 'f}'

In [None]:
dfexpt

In [None]:
# Delete any ionization limit
ilim = dfexpt[dfexpt.Term == 'Limit'].index.min()
# delete the "Limit" row and everything past it
n1 = len(dfexpt)
dfexpt = dfexpt.truncate(after=ilim-1)
n2 = len(dfexpt)
if n2 < n1:
    print(f'Discarding {n1-n2} ionized or metastable states')
oddstr = r'\*$|°' # characters to identify terms of odd parity
# Sometimes parity is shown in configuration alone?
#dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr))].copy()
#dfodd = dfexpt[dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr)].copy()
dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr))].copy()
dfodd = dfexpt[dfexpt.Term.str.contains(oddstr)].copy()
print(f'{len(dfexpt)} experimental levels ({len(dfeven)} even and {len(dfodd)} odd)')
# Select by parity
if parity == 'even':
    # discard odd levels ('Term' field ends with '*')
    dfexpt = dfeven.copy()
elif parity == 'odd':
    dfexpt = dfodd.copy()
else:
    chem.print_err('', f'Parity of "{parity}" is not recognized')
n3 = len(dfexpt)
print(f'{n3} levels accepted for parity = {parity}')
# Reject bad values of J
for i in dfexpt.index:
    try:
        chem.halves_to_float(dfexpt.loc[i, 'J'])
    except ValueError:
        dfexpt.at[i, 'J'] = np.nan
nbad = dfexpt.J.isna().sum()
if nbad:
    print(f'** Rejecting {nbad} levels with malformed J values')
    dfexpt = dfexpt.dropna()
    n4 = len(dfexpt)
    print(f'{n4} level retained')
# Assign unique term symbols
if not labels_ordinated:
    dfexpt = chem.unique_labels_exptl_terms(dfexpt, verbose=True, always=True)
    labels_ordinate = True
dfexpt

In [None]:
def match_expt_theory_simple(dfexpt, dftheory):
    # Match theoretical levels to experimental, based upon leading term
    #    (will fail if term assignments differ)
    # Return a DataFrame containing both theory and expt
    print('---- function match_expt_theory_simple() ----')
    if 'degen' not in dfexpt.columns:
        # Add degeneracy
        dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)) + 1
    dfcomp = dfexpt.copy()
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['termwt'] = ''
    dfcomp['Ecalc'] = np.nan
    for i, row in dftheory.iterrows():
        term = row.Lead
        J = chem.halves_to_float(row.J)
        #print(f'theory row {i} with J = {J} for term {term}')
        for ix, rowx in dfexpt.iterrows():
            xTerm = rowx.uTerm
            xJ = chem.halves_to_float(rowx.J)
            #print(f'    expt row {ix} with J = {xJ} for term {xTerm}')
            if (xJ == J) and (term == xTerm):
                if not np.isnan(dfcomp.at[ix, 'Ecalc']):
                    print('Already paired!')
                    display(rowx.to_frame().T)
                else:
                    dfcomp.at[ix, 'Ecalc'] = row.Erel
                    dfcomp.at[ix, 'Tcalc'] = term
                    dfcomp.at[ix, 'termwt'] = row.termwt
                break
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    return dfcomp

In [None]:
def match_expt_theory_by_order(dfexpt, dftheory):
    # Match theoretical levels to experimental, based upon energy order
    #    (ignores term assignments)
    # Return a DataFrame containing both theory and expt
    # This function starts with exptl levels, which may be incomplete
    print('==== function match_expt_theory_by_order() ====')
    if 'degen' not in dfexpt.columns:
        # Add degeneracy
        dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)) + 1
    Ecol = 'Level (cm-1)'
    dfcomp = dfexpt.copy().sort_values(['degen', Ecol])
    dfth = dftheory.copy().sort_values(['J', 'Erel'])
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['termwt'] = ''
    dfcomp['Ecalc'] = np.nan
    imatched = []
    for i, row in dfth.iterrows():
        term = row.Lead
        J = row.J
        #print(f'theory row {i} with J = {J} for term {term}')
        for ix, rowx in dfcomp.iterrows():
            if ix in imatched:
                continue
            xTerm = rowx.uTerm
            xJ = chem.halves_to_float(rowx.J)
            if xJ != J:
                continue
            #print(f'    expt row {ix} with J = {xJ} for term {xTerm}')
            # Experimental J matches theoretical; make assignment
            dfcomp.at[ix, 'Ecalc'] = row.Erel
            dfcomp.at[ix, 'Tcalc'] = term
            dfcomp.at[ix, 'termwt'] = row.termwt
            imatched.append(ix)
            break
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    dfcomp = dfcomp.sort_values(Ecol)
    return dfcomp

In [None]:
def match_theory_expt_alt(dfexpt, dftheory):
    # Match experimental levels to theoretical, based upon leading term
    #    (will fail if term assignments differ)
    # Return a DataFrame containing both theory and expt
    print('~~~~ function match_theory_expt_alt() ~~~~')
    dfcomp = dftheory.drop(['E', 'Nr', 'Eshift', 'Jlbl'], axis=1)
    dfcomp['degen'] = (2 * dfcomp.J) + 1
    #dfcomp['Tcalc'] = dfcomp.Lead.apply(remove_ordinal_prefix)
    dfcomp['Tcalc'] = dfcomp.Lead
    dfcomp['Ecalc'] = dfcomp.Erel
    dfcomp['Configuration'] = ''
    dfcomp['Term'] = ''
    dfcomp['uTerm'] = ''
    Ecol = 'Level (cm-1)'
    dfcomp[Ecol] = np.nan
    selcols = ['Configuration', 'Term', 'J', Ecol, 'uTerm', 'degen', 'Tcalc', 'termwt', 'Ecalc', 'err']
    if 'comment' in dfexpt.columns:
        dfcomp['comment'] = ''
        selcols.append('comment')
    missing_expt = False
    imatched = []  # exptl rows already matched
    for i, row in dftheory.iterrows():
        term = row.Lead
        J = chem.halves_to_float(row.J)
        #print(f'theory row {i} with J = {J} for term {term}')
        for ix, rowx in dfexpt.iterrows():
            xTerm = rowx.uTerm
            xJ = chem.halves_to_float(rowx.J)
            #print(f'    expt row {ix} with J = {xJ} for term {xTerm}')
            if (xJ == J) and (term == xTerm):
                if ix in imatched:
                #if not np.isnan(dfcomp.at[i, Ecol]):
                    print('Already paired!')
                    display(rowx.to_frame().T)
                else:
                    dfcomp.at[i, Ecol] = rowx[Ecol]
                    dfcomp.at[i, 'Configuration'] = rowx.Configuration
                    dfcomp.at[i, 'uTerm'] = xTerm
                    dfcomp.at[i, 'Term'] = rowx.Term
                    try:
                        dfcomp.at[i, 'comment'] = rowx.comment
                    except:
                        pass
                    imatched.append(ix)
                break
        if np.isnan(dfcomp.at[i, Ecol]):
            # use theoretical energy instead
            Jlbl = dftheory.at[i, 'Jlbl']
            dfcomp.at[i, Ecol] = dfcomp.at[i, 'Ecalc']
            dfcomp.at[i, 'uTerm'] = dfcomp.at[i, 'Tcalc']
            dfcomp.at[i, 'Term'] = remove_ordinal_prefix(dfcomp.at[i, 'Tcalc'])
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    # keep only the columns produced by match_expt_theory_simple()
    dfcomp = dfcomp[selcols]
    return dfcomp

In [None]:
def remove_ordinal_prefix(label):
    re_pre = re.compile('\(\d+\)')
    m = re_pre.match(label)
    if m:
        return label.replace(m.group(0), '')
    else:
        return label

In [None]:
def match_expt_theory_by_order_B(dfexpt, dftheory):
    # Match theoretical levels to experimental, based upon energy order
    #    (ignores term assignments)
    # Return a DataFrame containing both theory and expt
    # This function starts with computed levels
    print(':::: function match_expt_theory_by_order_B() ::::')
    if 'degen' not in dfexpt.columns:
        # Add degeneracy
        dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)) + 1
    Ecol = 'Level (cm-1)'
    dfx = dfexpt.copy().sort_values(['degen', Ecol])
    dfcomp = dftheory[['Lead', 'J', 'Erel', 'termwt']].copy().sort_values(['J', 'Erel'])
    dfcomp = dfcomp.rename({'Lead': 'Tcalc', 'Erel': 'Ecalc'}, axis=1)
    dfcomp['uTerm'] = ''  # term assignment in exptl table
    dfcomp[Ecol] = np.nan  # exptl level energies
    dfcomp['Configuration'] = ''
    imatched = []
    for i, row in dftheory.sort_values('Erel').iterrows():
        term = row.Lead
        J = row.J
        #print(f'theory row {i} with J = {J} for term {term}')
        for ix, rowx in dfexpt.iterrows():
            if ix in imatched:
                continue
            xJ = chem.halves_to_float(rowx.J)
            if xJ != J:
                continue
            #print(f'    expt row {ix} with J = {xJ} for term {xTerm}')
            # Experimental J matches theoretical; make assignment
            xTerm = rowx.uTerm
            dfcomp.at[i, Ecol] = rowx[Ecol]
            dfcomp.at[i, 'uTerm'] = xTerm
            dfcomp.at[i, 'Configuration'] = rowx.Configuration
            imatched.append(ix)
            break
    dfcomp = dfcomp[['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'termwt']]
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    dfcomp = dfcomp.sort_values(Ecol)
    return dfcomp

In [None]:
#  For matching with expt, try both of the following functions (dfdiff = ...)
dfdiff = match_expt_theory_simple(dfexpt, dfso)
#dfdiff = match_expt_theory_by_order_B(dfexpt, dfso)
# Convert str values of J to float
dfdiff['J'] = dfdiff.J.apply(chem.halves_to_float)
warnThresh = 1000  # highlight errors larger than this (cm-1)
# drop rows with NaN (no matching level in the calculation)
dfdiff = dfdiff.dropna(axis=0)
selcols = ['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'err']
# Print a warning if experimental levels are missing
nth = len(dfso); ndiff = len(dfdiff)
expt_missing = nth - ndiff
if expt_missing > 0:
    print(f'\n**** There are {nth} theoretical levels but only {ndiff} matching experimental levels ****')
    print('**** Using theoretical energies where expt is missing ****')
    dfdiff = match_theory_expt_alt(dfexpt, dfso)
else:
    # use as flag
    expt_missing = 0
if 'comment' in dfdiff.columns:
    selcols.append('comment')
print(f'Please inspect the following pairing of theory ("Ecalc") with expt ("{Ecol}")')
print('Disagreements in term assignments are highlighted in red')
print(f'Errors > {warnThresh} cm-1 are highlighted in yellow')
display(dfdiff[selcols].style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
              subset=pd.IndexSlice[['err']]).apply(lambda x: (x != dfdiff['uTerm']).map({True: "background-color: red; \
              color: white", False: ""}), subset=['Tcalc']).format(fmt))

In [None]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for term in dfdiff.uTerm:
    if term not in xterms:
        xterms.append(term)
for Term in xterms:
    subdf = dfexpt[dfexpt.uTerm == Term]
    emean = np.dot(subdf.degen, subdf[Ecol]) / subdf.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1) (experimental data with naive model)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
lowterm = dfeq1.at[0, 'Term']
print(f'The term of lowest energy is \t{lowterm} \twith SOC1 = {SOC1} cm-1')
levterm = dfexpt.uTerm.values[0]

target = levterm

if levterm != lowterm:
    # The lowest term is not the leading term in the lowest level
    SOC1alt = SOC1
    SOC1 = -1 * np.round(dfeq1[dfeq1.Term == levterm]['Eterm'].values[0], 3)
    print(f'The lowest level belongs to \t{levterm} \twith SOC1 = {SOC1} cm-1')
print()
print(f'Term {target} is selected for further SOC calculation')
print('    to change this, assign the variable "target" to another term in the table above')

In [None]:
def term_energy_from_levels(df, term):
    # Given a DataFrame with the right columns ['J', 'termwt', Ecol],
    #   where 'Ecol' is the header for the column of level energies,
    # Return the term's average energy as derived from the levels
    global SOCI
    # find index for term 'term'
    iterm = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    termwt = []
    for twt in df.termwt:
        try:
            termwt.append(twt[iterm])
        except IndexError:
            # assume scalar
            termwt.append(twt)
    termwt = np.array(termwt)
    #termwt = np.array([twt[iterm] for twt in df.termwt])
    try:
        degen = df.degen.values
    except:
        degen = 2 * df.J.values + 1
    dweight = degen * termwt  # total weight, including degeneracies
    Eterm = np.dot(df[Ecol], dweight) / dweight.sum()
    return Eterm

In [None]:
target

In [None]:
# Use experimental level energies via eq. (2)
Eterm = term_energy_from_levels(dfdiff, target)
SOC2 = -Eterm
print('Applying eq. (2) (experimental energies and theoretical term weights)')
print(f'For term {target}, SOC2 = {SOC2:.2f} cm-1')

In [None]:
print(f'Molpro source file: {fname}\n')
print(f'Alternative values for E_so[{target}] of atom {atom}:')
print('-' * 25)
print('{:12s} {:.2f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.2f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.2f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

In [None]:
def term_distrib(term, df):
    # return the weights (including 2J+1) of term in levels
    global SOCI
    itarget = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    wt = []
    for twt in df.termwt:
        try:
            wt.append(twt[itarget])
        except IndexError:
            # assume scalar
            wt.append(twt)
    wt = np.array(wt)
    #wt = [twt[itarget] for twt in df.termwt]  # without 2J+1 weighting
    wt = wt * (2*df.J + 1)
    return wt

In [None]:
# Term energy errors as inferred from all levels
dftermerr = pd.DataFrame(columns=['Term', 'wmean', 'wstds'])
termlist = []
wmean = []
wstds = []
# also consider unsigned (absolute value) errors
uwmean = []
uwstds = []
# also consider RMSE
trmse = []
for term in set(dfdiff.uTerm):
    termlist.append(term)
    weights = term_distrib(term, dfdiff).values
    m, s = chem.weighted_mean(dfdiff.err, weights)
    wmean.append(m)
    wstds.append(s)
    uerr = np.abs(dfdiff.err.values)
    um, us = chem.weighted_mean(uerr, weights)
    uwmean.append(um)
    uwstds.append(us)
    umsq, ussq = chem.weighted_mean(uerr ** 2, weights)
    trmse.append(np.sqrt(umsq))
dftermerr['Term'] = termlist
dftermerr['wmean'] = wmean
dftermerr['wstds'] = wstds
dftermerr['uwmean'] = uwmean
dftermerr['uwstds'] = uwstds
dftermerr['rwmse'] = trmse

if dftermerr.isnull().values.any():
    print('*** Some terms are missing ***')
    print('Try decreasing the energy maximum ("termcut")')
else:
    print('Errors in term energies (cm-1) as inferred from full level distribution')
    print('    (not only levels where leading)')
    # default order same as experimental terms
    dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
    dftermerr = dftermerr.sort_values('Term')
#dftermerr.sort_values('uwmean').style.format(fmt)
dftermerr.style.format(fmt)

In [None]:
target = '(1)2P°'
target = '(1)4S°'
print(f'Distribution of term "{target}" among levels:')
thrsh = 1.e-6
dfdistrib = dfso.copy()
dfdistrib[target] = term_distrib(target, dfso)
# remove rows with negligible weights
dfdistrib = dfdistrib[dfdistrib[target] > thrsh]
dfdistrib.drop(['termwt', 'Composition'], axis=1, inplace=True)
display(dfdistrib.sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dfdistrib[target].sum():.3f}')
print(f'Number of levels listed above: {len(dfdistrib)}')

In [None]:
# Looking at levels may help in assigning LS term labels to exptl levels (when missing)
dfassign = dfso[['Lead', 'J', 'Erel', 'termwt']]
dfasub = dfassign[dfassign.Lead == '(1)3P']
display(dfasub.style.format(fmt))

In [None]:
# Show the term weights for a selected level
ilevel = 0
print(f'Term weights for level {ilevel} with J = {dfso.at[ilevel, "J"]} and ', end='')
dfterm2 = SOCI.composition_of_level(ilevel, thr=1.e-6)
display(dfterm2)