In [3]:
# Extract SO-CI information from MOLPRO outputs for atoms
#   Read exptl data from Excel file, combine with weights to get E_so
#   The experimental Excel file is generated by get_NIST_atomic_data.ipynb
# Add a fourth way to get E_so:  by fitting term energies to exptl level energies
#   (off-diagonal elements of the spin-orbit matrix are from a Molpro SO-CI calculation)
# KKI 2/5/2024
import re, sys, os
import numpy as np
import pandas as pd
import scipy
#import matplotlib.pyplot as plt
#from collections import Counter
#from sklearn.cluster import KMeans

import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)

### Specify Molpro SO-CI output file

In [548]:
fname = 'ba_1S3D3F3P_acqz_99.pro'

In [549]:
# my atom subdirectory names look like "Ar_I" (for neutral argon)
el = fname.split('_')[0].capitalize()
fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\{:s}_I'.format(el)

In [550]:
fsoc = os.sep.join([fdir, fname])
print(f'Reading MOLPRO file')
print(fsoc)
atom = mpr.stoichiometry(fsoc)
charge = int(np.rint(mpr.total_charge(fsoc, verbose=True)))
print(f'The atom is {atom} with charge {charge}')
# append charge, to match sheet names in exptl data file
if charge > 0: 
    atom += '+'
elif charge < 0:
    atom += '-'
if abs(charge) > 1:
    atom += f'{abs(charge)}'

Reading MOLPRO file
C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\calculations\Ba_I\ba_1S3D3F3P_ac5zpp.pro
--- ECP replaces 46 nuclear charges ---
The atom is Ba with charge 0


In [551]:
ishybrid = '_hyb' in fname
if ishybrid:
    print('** Hybrid SO-CI')
SOCI = mpr.fullmatSOCI(fsoc, atom=True, hybrid=ishybrid)
labels_ordinated = False  # flag to prevent multiple (1)(1)(1) etc. 

Computational group = Ci
CASSCF states:
     1 Singlet
    15 Triplet
---CASSCF state 1.1 matched to MRCI state 2.1
---CASSCF state 1.1 matched to MRCI state 3.1
---CASSCF state 1.1 matched to MRCI state 4.1
---CASSCF state 1.1 matched to MRCI state 5.1
---CASSCF state 6.1 matched to MRCI state 7.1
---CASSCF state 6.1 matched to MRCI state 8.1
---CASSCF state 6.1 matched to MRCI state 9.1
---CASSCF state 6.1 matched to MRCI state 10.1
---CASSCF state 6.1 matched to MRCI state 11.1
---CASSCF state 6.1 matched to MRCI state 12.1
---CASSCF state 13.1 matched to MRCI state 14.1
---CASSCF state 13.1 matched to MRCI state 15.1


In [552]:
print(f'CASSCF active space is ({SOCI.cas.nactel()}/{SOCI.cas.nactorb()})')
# Get the basis set
rx_bs = re.compile(' SETTING BASIS\s+=\s+(\S+)')
with open(fsoc, 'r') as F:
    for line in F:
        m = rx_bs.match(line)
        if m:
            basisset = m.group(1)
print(f'Basis set is {basisset}')
dynw = SOCI.cas.dynw()
caswts = SOCI.cas.weights(rescale=True)
if dynw:
    print(f'Dynamical weighting with dynw={dynw}')
    print('Final weights:')
else:
    print('CASSCF state weights (subject to rounding error):')
for wts in caswts:
    print('    ', np.round(wts, 1))

CASSCF active space is (2/7)
Basis set is AUG-CC-PWCV5Z-PP
CASSCF state weights (subject to rounding error):
     [1.]
     [1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [553]:
lsq = SOCI.cas.results['L**2'].values
if (lsq - np.round(lsq)).any():
    print('*** Non-integer values of <L**2> ***')
display(SOCI.cas.results[['Spin', 'Label', 'Energy', 'L**2', 'Term']])

Unnamed: 0,Spin,Label,Energy,L**2,Term
0,Singlet,1.1,-25.101309,-0.0,1S
1,Triplet,1.1,-25.061314,6.0,3D
2,Triplet,2.1,-25.061314,6.0,3D
3,Triplet,3.1,-25.061314,6.0,3D
4,Triplet,4.1,-25.061314,6.0,3D
5,Triplet,5.1,-25.061314,6.0,3D
6,Triplet,6.1,-25.005053,12.0,3F
7,Triplet,7.1,-25.005053,12.0,3F
8,Triplet,8.1,-25.005053,12.0,3F
9,Triplet,9.1,-25.005053,12.0,3F


In [554]:
# Report on orbital spaces in the MRCI
print('MRCI orbital spaces, by irrep')
print('    {:10s} {}'.format('core', SOCI.cilist[0].orbs_core))
print('    {:10s} {}'.format('closed', SOCI.cilist[0].orbs_closed))
print('    {:10s} {}'.format('active', SOCI.cilist[0].orbs_active))
print('    {:10s} {}'.format('external', SOCI.cilist[0].orbs_external))

MRCI orbital spaces, by irrep
    core       []
    closed     [1, 3]
    active     [7, 0]
    external   [93, 109]


In [555]:
nCI = len(SOCI.dfci)
nSO = len(SOCI.SOe.results)
print(f'There are {nCI} MRCI states')
print(f'There are {nSO} spin-orbit levels')

There are 16 MRCI states
There are 46 spin-orbit levels


In [556]:
SOCraw = SOCI.vals.min()
print(f'From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = {SOCraw:.2f} cm-1')

From lowest level and lowest uncoupled term energy, raw theoretical SOCraw = -0.06 cm-1


In [557]:
# display formatting
fmt = {'Eshift': '{:.1f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1', 'fitted',
           'wmean', 'wstds', 'uwmean', 'uwstds', 'change', 'rwmse']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'

In [558]:
dfterm = SOCI.average_terms(be_close=['Energy', 'Edav', 'Eref', 'dipZ', 'C0'], 
                            atom=True, always=True)
nTerm = len(dfterm)
termsIn = set(dfterm.Term)
# drop the dipZ column
dfterm.drop(columns=['dipZ'], inplace=True)
print(f'Averaged terms from MRCI ({fname}):')
dfso = SOCI.assign_atomic_J(quiet=False)  # create SOCI.dfso
Egl = SOCI.dfso.E.min()  # energy of ground level
dfterm['Erel'] = (dfterm.Edav - Egl) * chem.AU2CM
display(dfterm.style.format(fmt))
print('"ecm"  is relative to the lowest term (cm-1)')
print('"Erel" is relative to the ground level')

Averaged terms from MRCI (ba_1S3D3F3P_ac5zpp.pro):
Assigning J using 10 clusters/levels


Unnamed: 0,Term,Edav,idx,ecm,Erel
0,(1)1S,-25.388357,[0],0.0,0.1
1,(1)3D,-25.348964,[1 3 4 2 5],8645.8,8645.9
2,(1)3F,-25.294799,[ 7 6 10 12 8 9 11],20533.7,20533.7
3,(1)3P,-25.281133,[15 13 14],23533.1,23533.2


"ecm"  is relative to the lowest term (cm-1)
"Erel" is relative to the ground level


In [559]:
print('Level assignments from the calculation:')
display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift']].style.format(fmt))

Level assignments from the calculation:


Unnamed: 0,Lead,J,Jlbl,Erel,Eshift
0,(1)1S,0.0,(1)1S_0,0.0,-0.1
1,(1)3D,1.0,(1)3D_1,8337.0,8336.9
2,(1)3D,2.0,(1)3D_2,8542.9,8542.9
3,(1)3D,3.0,(1)3D_3,8851.9,8851.8
4,(1)3F,2.0,(1)3F_2,20155.6,20155.5
5,(1)3F,3.0,(1)3F_3,20439.2,20439.1
6,(1)3F,4.0,(1)3F_4,20817.4,20817.3
7,(1)3P,0.0,(1)3P_0,23327.5,23327.4
8,(1)3P,1.0,(1)3P_1,23430.3,23430.3
9,(1)3P,2.0,(1)3P_2,23636.1,23636.0


In [560]:
# Check for problems in assignments
nAssign = len(set(dfso.Lead))
dropT = False
if nAssign != nTerm:
    print(f'*** I started with {nTerm} terms but now have {nAssign} ***')
    print('Starting: ', sorted(termsIn))
    termsOut = set(dfso.Lead)
    print('Now     : ', sorted(termsOut))
    if nAssign > nTerm:
        addT = termsOut - termsIn
        print('Added terms: ', addT)
    else:
        dropT = termsIn - termsOut
        print('Dropped terms: ', dropT)
        # Add weights from dropped terms and display
        for term in dropT:
            df = SOCI.level_contributions_from_term(term, thr=0)
            dfso[term] = df[term]
        print('Weights of dropped terms in levels:')
        display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))
nlvl = (2 * dfso.J + 1).sum()  # number of sublevels
if nlvl != nSO:
    print(f'*** I started with {nSO} (sub)levels but now have {nlvl} ***')

In [561]:
# Manually assign any dropped terms
if dropT:
    for drt in dropT:
        ia = int(input(f'Which level do you want to assign to term {drt}? '))
        dfso.loc[ia, 'Lead'] = drt
    # rebuild 'Jlbl' values
    jlbl = [f'{t}_{chem.halves(j)}' for t, j in zip(dfso.Lead, dfso.J)]
    dfso['Jlbl'] = chem.enumerative_prefix(jlbl)
    display(dfso[['Lead', 'J', 'Jlbl', 'Erel', 'Eshift'] + list(dropT)].style.format(fmt))

In [562]:
irreps_ci = set(SOCI.dfci.Irrep)
if (SOCI.PG == 'Ci') and (len(irreps_ci) == 1):
    if 1 in irreps_ci:
        parity = 'even'
    else:
        parity = 'odd'
else:
    # ask user for parity of interest
    parity = input('Please choose "even" or "odd" parity: ')
print(f'Experimental states will be restricted to parity = {parity}')

Experimental states will be restricted to parity = even


In [563]:
# Read experimental energy levels
if charge >= 0:
    atstr = atom + '_' + 'I' * (charge + 1)
else:
    # anion
    atstr = atom + '_neg'
fxl = f'{atstr}_exptl_levels.xlsx'
# Use an alternative file if available
fxlalt = f'{atstr}_exptl_{parity}_assign.xlsx'
try:
    xlpath = os.sep.join([fdir, fxlalt])
    # Replace column 'Term' with column 'LS'
    dfexpt = pd.read_excel(xlpath).drop('Term', axis=1)
    dfexpt = dfexpt.rename(columns={'LS': 'Term'})
    print('** Using alternative experimental data file')
    fxl = fxlalt
except FileNotFoundError:
    xlpath = os.sep.join([fdir, fxl])
    dfexpt = pd.read_excel(xlpath)
print(f'Experimental energy levels read from {fxl}')

Experimental energy levels read from Ba_I_exptl_levels.xlsx


In [564]:
# Find the number of decimal places in the level energies
Ecol = 'Level (cm-1)'  # the exptl energy column
ndecim = 0
for e in dfexpt[Ecol]:
    words = str(e).split('.')
    # count numeric digits
    n = sum(c.isdigit() for c in words[-1])
    ndecim = max(n, ndecim)
print(f'Experimental energies are provided to {ndecim} decimal digits')

Experimental energies are provided to 3 decimal digits


In [565]:
# display formatting
fmt[Ecol] = '{:.' + str(ndecim) + 'f}'

In [566]:
# Delete any ionization limit
ilim = dfexpt[dfexpt.Term == 'Limit'].index.min()
# delete the "Limit" row and everything past it
n1 = len(dfexpt)
dfexpt = dfexpt.truncate(after=ilim-1)
n2 = len(dfexpt)
if n2 < n1:
    print(f'Discarding {n1-n2} ionized or metastable states')
oddstr = r'\*$|°' # characters to identify terms of odd parity
# Sometimes parity is shown in configuration alone?
#dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr))].copy()
#dfodd = dfexpt[dfexpt.Term.str.contains(oddstr) | dfexpt.Configuration.str.contains(oddstr)].copy()
dfeven = dfexpt[~(dfexpt.Term.str.contains(oddstr))].copy()
dfodd = dfexpt[dfexpt.Term.str.contains(oddstr)].copy()
print(f'{len(dfexpt)} experimental levels ({len(dfeven)} even and {len(dfodd)} odd)')
# Select by parity
if parity == 'even':
    # discard odd levels ('Term' field ends with '*')
    dfexpt = dfeven.copy()
elif parity == 'odd':
    dfexpt = dfodd.copy()
else:
    chem.print_err('', f'Parity of "{parity}" is not recognized')
n3 = len(dfexpt)
print(f'{n3} levels accepted for parity = {parity}')
# Reject bad values of J
for i in dfexpt.index:
    try:
        chem.halves_to_float(dfexpt.loc[i, 'J'])
    except ValueError:
        dfexpt.at[i, 'J'] = np.nan
nbad = dfexpt.J.isna().sum()
if nbad:
    print(f'** Rejecting {nbad} levels with malformed J values')
    dfexpt = dfexpt.dropna()
    n4 = len(dfexpt)
    print(f'{n4} level retained')
# Assign unique term symbols
if not labels_ordinated:
    dfexpt = chem.unique_labels_exptl_terms(dfexpt, verbose=True, always=True)
    labels_ordinate = True
dfexpt

Discarding 1 ionized or metastable states
294 experimental levels (176 even and 118 odd)
176 levels accepted for parity = even


Unnamed: 0,Configuration,Term,J,Level (cm-1),uTerm
0,6s2,1S,0,0.0,(1)1S
1,6s5d,3D,1,9033.966,(1)3D
2,6s5d,3D,2,9215.501,(1)3D
3,6s5d,3D,3,9596.533,(1)3D
4,6s5d,1D,2,11395.35,(1)1D
9,5d2,3F,2,20934.035,(1)3F
10,5d2,3F,3,21250.195,(1)3F
11,5d2,3F,4,21623.773,(1)3F
15,5d2,1D,2,23062.051,(2)1D
17,5d2,3P,0,23209.048,(1)3P


In [567]:
def match_expt_theory_simple(dfexpt, dftheory):
    # Match exptl and theoretical levels, based upon leading term
    #    (will fail if term assignments differ)
    # Return a DataFrame containing both theory and expt
    if 'degen' not in dfexpt.columns:
        # Add degeneracy
        dfexpt['degen'] = (2 * dfexpt.J.apply(chem.halves_to_float)) + 1
    dfcomp = dfexpt.copy()
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['termwt'] = ''
    dfcomp['Ecalc'] = np.nan
    for i, row in dftheory.iterrows():
        term = row.Lead
        J = chem.halves_to_float(row.J)
        #print(f'theory row {i} with J = {J} for term {term}')
        for ix, rowx in dfexpt.iterrows():
            xTerm = rowx.uTerm
            xJ = chem.halves_to_float(rowx.J)
            #print(f'    expt row {ix} with J = {xJ} for term {xTerm}')
            if (xJ == J) and (term == xTerm):
                if not np.isnan(dfcomp.at[ix, 'Ecalc']):
                    print('Already paired!', display(rowx.to_frame().T))                   
                else:
                    dfcomp.at[ix, 'Ecalc'] = row.Erel
                    dfcomp.at[ix, 'Tcalc'] = term
                    dfcomp.at[ix, 'termwt'] = row.termwt
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    return dfcomp

In [568]:
dfdiff = match_expt_theory_simple(dfexpt, dfso)
# Convert str values of J to float
dfdiff['J'] = dfdiff.J.apply(chem.halves_to_float)
warnThresh = 1000  # highlight errors larger than this (cm-1)
# drop rows with NaN (no matching level in the calculation)
dfdiff.dropna(axis=0, inplace=True)
print(f'Please inspect the following pairing of theory ("Ecalc") with expt ("{Ecol}")')
print('Disagreements in term assignments are highlighted in red')
print(f'Errors > {warnThresh} cm-1 are highlighted in yellow')
selcols = ['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'err']
display(dfdiff[selcols].style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
              subset=pd.IndexSlice[['err']]).apply(lambda x: (x != dfdiff['uTerm']).map({True: "background-color: red; \
              color: white", False: ""}), subset=['Tcalc']).format(fmt))
# Print a warning if experimental levels are missing
nth = len(dfso); ndiff = len(dfdiff)
if nth > ndiff:
    print('**** Missing experimental levels! ****')
    print(f'**** There are {nth} theoretical levels but only {ndiff} matching experimental levels')
    print('**** Missing level energies must be supplied before using eq. (2) ')
    1/0

Please inspect the following pairing of theory ("Ecalc") with expt ("Level (cm-1)")
Disagreements in term assignments are highlighted in red
Errors > 1000 cm-1 are highlighted in yellow


Unnamed: 0,Configuration,uTerm,J,Level (cm-1),Tcalc,Ecalc,err
0,6s2,(1)1S,0.0,0.0,(1)1S,0.0,0.0
1,6s5d,(1)3D,1.0,9033.966,(1)3D,8337.0,-697.0
2,6s5d,(1)3D,2.0,9215.501,(1)3D,8542.9,-672.6
3,6s5d,(1)3D,3.0,9596.533,(1)3D,8851.9,-744.7
9,5d2,(1)3F,2.0,20934.035,(1)3F,20155.6,-778.4
10,5d2,(1)3F,3.0,21250.195,(1)3F,20439.2,-811.0
11,5d2,(1)3F,4.0,21623.773,(1)3F,20817.4,-806.4
17,5d2,(1)3P,0.0,23209.048,(1)3P,23327.5,118.4
18,5d2,(1)3P,1.0,23479.976,(1)3P,23430.3,-49.7
19,5d2,(1)3P,2.0,23918.915,(1)3P,23636.1,-282.8


In [569]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for term in dfdiff.uTerm:
    if term not in xterms:
        xterms.append(term)
for Term in xterms:
    subdf = dfexpt[dfexpt.uTerm == Term]
    emean = np.dot(subdf.degen, subdf[Ecol]) / subdf.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1) (experimental data with naive model)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
lowterm = dfeq1.at[0, 'Term']
print(f'The term of lowest energy is \t{lowterm} \twith SOC1 = {SOC1} cm-1')
levterm = dfexpt.uTerm.values[0]

target = levterm

if levterm != lowterm:
    # The lowest term is not the leading term in the lowest level
    SOC1alt = SOC1
    SOC1 = -1 * np.round(dfeq1[dfeq1.Term == levterm]['Eterm'].values[0], 3)
    print(f'The lowest level belongs to \t{levterm} \twith SOC1 = {SOC1} cm-1')
print()
print(f'Term {target} is selected for further SOC calculation')
print('    to change this, assign the variable "target" to another term in the table above')

Term energies (cm-1) using eq. (1) (experimental data with naive model)


Unnamed: 0,Term,Eterm
0,(1)1S,0.0
1,(1)3D,9357.0
2,(1)3F,21335.0
3,(1)3P,23693.7


The term of lowest energy is 	(1)1S 	with SOC1 = -0.0 cm-1

Term (1)1S is selected for further SOC calculation
    to change this, assign the variable "target" to another term in the table above


In [570]:
def term_energy_from_levels(df, term):
    # Given a DataFrame with the right columns ['J', 'termwt', Ecol],
    #   where 'Ecol' is the header for the column of level energies,
    # Return the term's average energy as derived from the levels
    global SOCI
    # find index for term 'term'
    iterm = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    termwt = []
    for twt in df.termwt:
        try:
            termwt.append(twt[iterm])
        except IndexError:
            # assume scalar
            termwt.append(twt)
    termwt = np.array(termwt)
    #termwt = np.array([twt[iterm] for twt in df.termwt])
    try:
        degen = df.degen.values
    except:
        degen = 2 * df.J.values + 1
    dweight = degen * termwt  # total weight, including degeneracies
    Eterm = np.dot(df[Ecol], dweight) / dweight.sum()
    return Eterm

In [571]:
target

'(1)1S'

In [572]:
# Use experimental level energies via eq. (2)
Eterm = term_energy_from_levels(dfdiff, target)
SOC2 = -Eterm
print('Applying eq. (2) (experimental energies and theoretical term weights)')
print(f'For term {target}, SOC2 = {SOC2:.2f} cm-1')

Applying eq. (2) (experimental energies and theoretical term weights)
For term (1)1S, SOC2 = -0.06 cm-1


In [573]:
print(f'Molpro source file: {fname}\n')
print(f'Alternative values for E_so[{target}] of atom {atom}:')
print('-' * 25)
print('{:12s} {:.2f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.2f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.2f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

Molpro source file: ba_1S3D3F3P_ac5zpp.pro

Alternative values for E_so[(1)1S] of atom Ba:
-------------------------
eq (1)       -0.00 cm-1
raw theory   -0.06 cm-1
eq (2)       -0.06 cm-1
-------------------------


In [574]:
def term_distrib(term, df):
    # return the weights (including 2J+1) of term in levels
    global SOCI
    itarget = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    wt = []
    for twt in df.termwt:
        try:
            wt.append(twt[itarget])
        except IndexError:
            # assume scalar
            wt.append(twt)
    wt = np.array(wt)
    #wt = [twt[itarget] for twt in df.termwt]  # without 2J+1 weighting
    wt = wt * (2*df.J + 1)
    return wt

In [575]:
# Term energy errors as inferred from all levels
dftermerr = pd.DataFrame(columns=['Term', 'wmean', 'wstds'])
termlist = []
wmean = []
wstds = []
# also consider unsigned (absolute value) errors
uwmean = []
uwstds = []
# also consider RMSE
trmse = []
for term in set(dfdiff.uTerm):
    termlist.append(term)
    weights = term_distrib(term, dfdiff).values
    m, s = chem.weighted_mean(dfdiff.err, weights)
    wmean.append(m)
    wstds.append(s)
    uerr = np.abs(dfdiff.err.values)
    um, us = chem.weighted_mean(uerr, weights)
    uwmean.append(um)
    uwstds.append(us)
    umsq, ussq = chem.weighted_mean(uerr ** 2, weights)
    trmse.append(np.sqrt(umsq))
dftermerr['Term'] = termlist
dftermerr['wmean'] = wmean
dftermerr['wstds'] = wstds
dftermerr['uwmean'] = uwmean
dftermerr['uwstds'] = uwstds
dftermerr['rwmse'] = trmse

if dftermerr.isnull().values.any():
    print('*** Some terms are missing ***')
    print('Try decreasing the energy maximum ("termcut")')
else:
    print('Errors in term energies (cm-1) as inferred from full level distribution')
    print('    (not only levels where leading)')
    # default order same as experimental terms
    dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
    dftermerr = dftermerr.sort_values('Term')
dftermerr.sort_values('uwmean').style.format(fmt)

Errors in term energies (cm-1) as inferred from full level distribution
    (not only levels where leading)


Unnamed: 0,Term,wmean,wstds,uwmean,uwstds,rwmse
2,(1)1S,0.0,0.0,0.0,0.0,0.2
1,(1)3P,-160.5,87.8,186.8,74.5,216.4
0,(1)3D,-711.1,21.6,711.1,21.6,711.9
3,(1)3F,-801.3,7.1,801.3,7.1,801.4


In [547]:
print(f'Distribution of term "{target}" among levels:')
thrsh = 1.e-6
dfdistrib = dfso.copy()
dfdistrib[target] = term_distrib(target, dfso)
# remove rows with negligible weights
dfdistrib = dfdistrib[dfdistrib[target] > thrsh]
dfdistrib.drop(['termwt', 'Composition'], axis=1, inplace=True)
display(dfdistrib.sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dfdistrib[target].sum():.3f}')

Distribution of term "(1)1S" among levels:


Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,E,Nr,(1)1S
0,(1)1S,0.0,(1)1S_0,0.0,-0.1,-8132.646199,[1],0.999995
7,(1)3P,0.0,(1)3P_0,24567.0,24566.8,-8132.534263,[38],5e-06


Total weight of (1)1S = 1.000


In [375]:
# Don't attempt level-fitting by default
1/0

ZeroDivisionError: division by zero

## Fitting of term energies to experimental level energies

In [None]:
def levels_from_term_energies(term_order, term_energies):
    # Install the term energies along the SOCI.matrix diagonal and rediagonalize
    #   'term_order' is array of term symbols
    #   'term_energies' is array of corresponding energies (cm-1)
    # Return nothing
    #global SOCI
    #   'SOCI' is a fullmatSOCI() object and is modified
    term_dict = dict(zip(term_order, term_energies))
    newdiag = SOCI.matrix.diagonal().copy()
    for ibs in range(len(newdiag)):
        j = SOCI.sob_ici[ibs]
        term = SOCI.mrci[j].Term
        # install the new energy for the term
        newdiag[ibs] = term_dict[term]
    # update the matrix
    SOCI.fill_diagonal(newdiag)
    SOCI.diagonalize(store=True, vectors=True, sortval=False)  # update the SOCI() object
    SOCI.assign_atomic_J(quiet=True)
    return

In [None]:
def rmserr(dfcomp):
    # Given a matched-up comparison of expt and theory, return the
    #   rms error in level energies
    # Weighted with degeneracies
    rmse = np.dot(dfcomp.err**2, dfcomp.degen.astype(float))
    rmse /= dfcomp.degen.sum()
    rmse = np.sqrt(rmse)
    return rmse
def obj_fun(exc_terme):
    # Given only excited term energies (assuming ground=0)
    #   return the RMSE
    global term_order, dfexpt, SOCI
    terme = [0] + list(exc_terme)  # the fixed energy of the ground term is 0
    levels_from_term_energies(term_order, terme)
    dfcomp = match_expt_theory_simple(dfexpt, SOCI.dfso)
    # drop rows with NaN (no matching level in the calculation)
    dfcomp.dropna(axis=0, inplace=True)
    rmse = rmserr(dfcomp)
    return rmse

In [None]:
# Create global 'term_order' 
term_order = dfterm.Term.values
term_energies = dfterm.ecm.values

In [None]:
print('Initial comparison with expt, before fitting')
rmse0 = rmserr(dfdiff)
print(f'RMSE = {rmse0:.1f} cm-1')
display(dfdiff.style.format(fmt))

In [None]:
def freport(xvec):
    # callback function to monitor minimization
    # this version provides more info, at slightly increased cost
    freport.counter += 1
    print(f'{freport.counter:5d}', end='')
    for x in xvec:
        print(f'   {x:.1f}', end='')
    fun = obj_fun(xvec)
    print(f'  RMSE = {fun:.1f}')
    return
freport.counter = 0

In [None]:
# Minimize the RMSE (may take hours)
exc_terme = list(term_energies)[1:]  # only excited terms; assume ground term = 0 energy
result = scipy.optimize.minimize(obj_fun, exc_terme, method='Nelder-Mead', callback=freport)

In [None]:
if result.success:
    print(f'Minimization complete in {result.nit} iterations with {result.nfev} evaluations')
    print(f'RMSE = {result.fun:.1f} after Nelder-Mead')
    rmse = result.fun
    print('Fitted levels')
    fit_terme = [0] + list(result.x)
    levels_from_term_energies(term_order, fit_terme)
    dfcomp = match_expt_theory_simple(dfexpt, SOCI.dfso)
    # drop rows with NaN (no matching level in the calculation)
    dfcomp.dropna(axis=0, inplace=True)
    rmse = rmserr(dfcomp)
    # Convert str values of J to float
    dfcomp['J'] = dfcomp.J.apply(chem.halves_to_float)
    display(dfcomp.style.format(fmt))
    print(f'RMSE = {rmse:.1f} cm-1 compared with initial RMSE = {rmse0:.1f}')
else:
    print('*** Failure ***')
    print(result)

In [None]:
# Add to Term DataFrame
print('Fitted term energies; "Erel" is relative to the lowest level')
dfterm['fitted'] = np.nan
for term, terme in zip(term_order, fit_terme):
    dfterm.loc[dfterm.Term == term, 'fitted'] = terme
Eshift = SOCI.dfso.Eshift.min()  # energy of ground level relative to lowest term
dfterm['Erel'] = (dfterm.fitted - Eshift) # relative to ground level
dfterm['change'] = dfterm.fitted - dfterm.ecm
dfterm.style.format(fmt)

In [None]:
termfit = dfterm.loc[dfterm.Term == target, 'Erel'].values[0]

In [None]:
print(f'Molpro source file: {fname}\n')
print(f'Alternative values for E_so[{target}] of atom {atom}:')
print('-' * 25)
print('{:12s} {:.1f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.1f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.1f} cm-1'.format('eq (2)', SOC2))
print('{:12s} {:.1f} cm-1'.format(f'-E[{target}]', -termfit))
print('-' * 25)

## Seems broken below here

In [None]:
def eq2_from_term_energies(term_order, terme, target):
    levels_from_term_energies(term_order, terme)
    dflev = SOCI.assign_atomic_J(quiet=True).copy()
    dfc = match_expt_theory_simple(dfexpt, dflev).dropna()
    display(dfc)
    Etarg = term_energy_from_levels(dfc, target)
    eq2 = -Etarg
    return eq2

In [None]:
eq2_from_term_energies(term_order, dfterm.Edav.values, target)

In [None]:
dfcomp

In [None]:
# Install fitted term energies
#levels_from_term_energies(term_order, dfterm.fitted.values)
levels_from_term_energies(term_order, dfterm.Edav.values)
print(f'Distribution of term "{target}" among levels, after fitting term energies:')
thrsh = 1.e-6
dffitted = SOCI.assign_atomic_J(quiet=True).copy()
dffitted[target] = term_distrib(target, SOCI.dfso)
# remove rows with negligible weights
dffitted = dffitted[dffitted[target] > thrsh]
dffitted.drop(['termwt', 'Composition'], axis=1, inplace=True)
display(dffitted.sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dffitted[target].sum():.3f}')

In [None]:
print(f'Distribution of term "{target}" among levels:')
thrsh = 1.e-6
dfdistrib = SOCI.assign_atomic_J(quiet=True).copy()
dfdistrib[target] = term_distrib(target, dfso)
# remove rows with negligible weights
dfdistrib = dfdistrib[dfdistrib[target] > thrsh]
dfdistrib.drop(['termwt', 'Composition'], axis=1, inplace=True)
display(dfdistrib.sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dfdistrib[target].sum():.3f}')

In [None]:
dfdiff