In [1]:
# Read ab initio term energies (from many Molpro files) from file Pb_SOCI_results.xlsx
# Report the weighted stats for all valence terms (3P, 1D, 1S)
# Also "eq. (2)", recommended term energies
# KKI 2/9/2026

import sys, os, re
import pandas as pd
import numpy as np
sys.path.insert(0, r'../../atomic_SOC')
import chem_subs as chem

In [2]:
def read_datafile(fname):
    # read data from CSV or Excel file
    froot = os.path.split(fname)[-1]
    if 'xlsx' in fname:
        xl = pd.ExcelFile(fname)
        if len(xl.sheet_names) > 1:
            print('Available worksheets:', xl.sheet_names)
            sheet = input('\tchoose a worksheet: ')
        else:
            sheet = xl.sheet_names[0]
        print(f'Reading worksheet "{sheet}" from Excel file {froot}')
        df = xl.parse(sheet, header=None)
    return sheet, df

In [3]:
xldir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\MSS'
xlfile = 'Pb_SOCI_results.xlsx'
fxl = os.sep.join([xldir, xlfile])

In [4]:
print(f'Reading SO-CI letter codes from {fxl}')
xl = pd.ExcelFile(fxl)
df_letters = xl.parse('letter codes', skiprows=1)
ncalc = len(df_letters)
print(f'There are results for {ncalc} calculations')
re_num = re.compile(r'\d\d+')
# Check that filenames are consistent with other descriptors
nbad = 0
iPPl = []  # rows with PP basis set
iAEl = []  # rows with AE basis set
for irow, row in df_letters.iterrows():
    fn = row['Filename']
    # basis set
    bas = row['Basis set']
    bad = False
    if 'pp' in bas:
        iPPl.append(irow)
        if bas not in fn:
            bad = True
    else:
        # all-electron calculation
        iAEl.append(irow) 
        if 'pp' in fn:
            bad = True
        else:
            # remove suffix
            for suf in ['dk3', 'dk', 'x2c']:
                bas = bas.replace(suf, '')
            if bas not in fn:
                bad = True
    if bad:
        print(f'*** basis = {bas} for filename = {fn} looks wrong ***')
        nbad += 1
        continue  # don't check this filename further
    # multi-digit numbers in filename should be 3P weight or active space
    numl = re_num.findall(fn)
    if len(numl):
        wt = str(row['3P wt in CASSCF'])
        acts = row['Active space'].replace(')', '').replace('(', '').replace('/', '')
        for num in numl:
            if (wt != num) and (acts != num):
                print(f'*** filname {fn} does not match active space {row["Active space"]} or 3P weight {wt}')
                nbad += 1
if nbad:
    print(f'*** There are {nbad} questionable filenames ***')
else:
    print('Filenames look reasonable')

Reading SO-CI letter codes from C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\MSS\Pb_SOCI_results.xlsx
There are results for 34 calculations
Filenames look reasonable


In [5]:
# Read the "raw" (ab initio) term energies
dfraw = xl.parse('energies', skiprows=1)

In [6]:
# Read the level errors from each calculation
dfd = {}  # key = letter code, value = DataFrame
for lett in df_letters.Calc:
    dfd[lett] = xl.parse(lett)

In [7]:
def wtd_err(letter, term, err_choice = 'unsigned'):
    # For calculation 'letter', return the mean error in level energies
    #   as weighted by the weight of 'term' in each level
    # 'err_choice' may be either 'unsigned' or 'rms'
    df = dfd[letter]
    terml = [term, '(1)' + term]
    if err_choice == 'unsigned':
        err = np.abs(df.err.values)
    elif err_choice == 'rms':
        err = err = df.err.values ** 2
    else:
        print(f'*** unrecognized err_choice = {err_choice} for letter = {letter} and term = {term} ***')
        return np.nan
    wtl  = []  # term weights
    for tstr in df.termwts:
        td = eval(tstr)  # convert string to dict
        for t in td.keys():
            if t in terml:
                wtl.append(td[t])
                break
        else:
            wtl.append(0)
    # Are all weights zero?
    wta = np.array(wtl)
    if (wta == 0).all():
        print(f'Term {term} is not present in calculation {letter}')
        return np.nan
    wdegen = 2 * df.J.values + 1
    wta = wta * wdegen  # include the 2J+1 degeneracy
    wmean = np.dot(err, wta) / wta.sum()
    if err_choice == 'rms':
        wmean = np.sqrt(wmean)
    return wmean

In [8]:
# Compute overall weighted error using weights from each valence term
err_choice = 'unsigned'
vterms = ['3P', '1D', '1S']
df_werr = df_letters[['Calc']].copy()
for term in vterms:
    werrl = []
    for calc in df_werr.Calc:
        werrl.append(np.round(wtd_err(calc, term, err_choice), 1))
    df_werr[term] = werrl

Term 1D is not present in calculation R
Term 1D is not present in calculation S
Term 1S is not present in calculation F
Term 1S is not present in calculation G
Term 1S is not present in calculation H
Term 1S is not present in calculation I
Term 1S is not present in calculation J
Term 1S is not present in calculation R
Term 1S is not present in calculation S
Term 1S is not present in calculation T
Term 1S is not present in calculation AA


### Ab initio "raw" term energies

In [9]:
max_err = 1000.  # max allowed energy err, cm-1
pexp = -1  # exponenent for converted wtd level error to weight for calculation
print(f'Ab initio term energies (err_choice = {err_choice}, pexp = {pexp})')
print(f'Total number of calculations = {len(df_letters)}')
dfsel = {}  # calculations with acceptable weighted level errors; key = term
for term in vterms:
    subdf = df_werr.copy()
    newcol = f'raw({term})'
    subdf[newcol] = dfraw[term] - dfraw['SO-CI']  # term energy relative to ground level
    subdf[newcol] = np.round(subdf[newcol] * chem.AU2CM, 1)  # convert to cm-1
    subdf = subdf[subdf[term] < max_err][['Calc', term, newcol]].sort_values(term).copy()
    print(f'\nFor {term}, {len(subdf)} errors are below {max_err} cm-1')
    #display(subdf.T)
    print(f'Weighted stats for "raw" ab initio energy of term {term}')
    mraw, uraw = chem.weighted_mean(subdf[newcol], subdf[term] ** pexp)
    print(f'    wmean = {mraw:.0f}, wstds = {uraw:.0f} (cm-1)')
    dfsel[term] = subdf

Ab initio term energies (err_choice = unsigned, pexp = -1)
Total number of calculations = 34

For 3P, 16 errors are below 1000.0 cm-1
Weighted stats for "raw" ab initio energy of term 3P
    wmean = 11023, wstds = 42 (cm-1)

For 1D, 12 errors are below 1000.0 cm-1
Weighted stats for "raw" ab initio energy of term 1D
    wmean = 16568, wstds = 33 (cm-1)

For 1S, 13 errors are below 1000.0 cm-1
Weighted stats for "raw" ab initio energy of term 1S
    wmean = 25282, wstds = 26 (cm-1)


## Recommended "eq. (2)" term energies (rounded term weights)

In [10]:
def eq2(letter, term):
    # For calculation 'letter', return the eq. (2) energy of 'term'
    Ecol = 'Expt/cm-1'  # experimental level energies
    df = dfd[letter]
    terml = [term, '(1)' + term]
    wtl  = []  # term weights
    for tstr in df.termwts:
        td = eval(tstr)  # convert string to dict
        for t in td.keys():
            if t in terml:
                wtl.append(td[t])
                break
        else:
            wtl.append(0)
    # Are all weights zero?
    wta = np.array(wtl)
    if (wta == 0).all():
        print(f'Term {term} is not present in calculation {letter}')
        return np.nan
    wdegen = 2 * df.J.values + 1
    wta = wta * wdegen  # include the 2J+1 degeneracy
    wmean, wstds = chem.weighted_mean(df[Ecol], wta)
    return wmean, wstds

In [11]:
print(f'Eq (2) term energies (err_choice = {err_choice}, pexp = {pexp})')
uincr = 2.
print(f'Weighted standard deviations will be increased by factor {uincr}')
newcol = 'Eq (2)'
for term, df in dfsel.items():
    print(f'\nTerm {term} ({len(df)} calculations)')
    eq2l = []
    # obtain Eq (2) for each selected calculation
    for calc in df.Calc:
        wmean, wstds = eq2(calc, term)
        eq2l.append(wmean)
    df[newcol] = np.round(eq2l, 1)
    wta = df[term] ** pexp
    wmean, wstds = chem.weighted_mean(df[newcol], wta)
    unc = wstds * uincr
    print(f'    wmean = {wmean:.0f}, unc = {unc:.0f}')

Eq (2) term energies (err_choice = unsigned, pexp = -1)
Weighted standard deviations will be increased by factor 2.0

Term 3P (16 calculations)
    wmean = 11486, unc = 21

Term 1D (12 calculations)
    wmean = 17195, unc = 17

Term 1S (13 calculations)
    wmean = 25669, unc = 41
