In [1]:
# Extract SOC information from MOLPRO outputs for atoms
# ** This will probably break if there are multiple terms with the same term symbol **
# KKI version 3/21/2023
import re, sys, glob, subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sys.path.insert(0, '../karlib')
import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)

In [2]:
# Excel spreadsheet of experimental levels from https://physics.nist.gov/PhysRefData/ASD/levels_form.html
#   Download as CSV; paste into a column in Excel; use Data -> Text to Columns -> Delimited -> Comma
#   Rename that worksheet with a name like "Fe" or "Fe+"
# Note that experimental levels might not be listed by increasing energy
xl_expt = 'exptl_levels.xlsx'
xl = pd.ExcelFile(xl_expt, engine='openpyxl')

### Select atom and parity of interest

In [3]:
atom = 'Fe'  # a name like "Fe" or "Fe+"
parity = 'even'  #  choose 'even' or 'odd' or 'both'

### Select energy maximum for experimental terms

In [4]:
# In case of errors, try making this larger or smaller to match the theoretical calculation
termcut = 18000  # discard terms that lack levels below this energy (cm-1)

In [5]:
Ecol = 'Level (cm-1)'  # the exptl energy column
# display formatting
fmt = {'Eshift': '{:.1f}', Ecol: '{:.3f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm']:
    fmt[col] =  fmt['Eshift']

In [6]:
if atom not in xl.sheet_names:
    print(f'No experimental data sheet for {atom}!')
else:
    dfexpt = pd.read_excel(xl, atom, engine='openpyxl')
    # Delete any ionization limit
    dfexpt = dfexpt[dfexpt.Term != 'Limit']
    print(f'{len(dfexpt)} experimental levels for {atom} read from "{xl_expt}"')
    # Select by parity
    if parity == 'even':
        # discard odd levels ('Term' field ends with '*')
        dfexpt = dfexpt[~dfexpt.Term.str.contains('\*$')]
    elif parity == 'odd':
        dfexpt = dfexpt[dfexpt.Term.str.contains('\*$')]
    print(f'{len(dfexpt)} levels are of parity "{parity}"')
    # Select terms by energy
    lowTerms = []
    for term, grp in dfexpt.groupby('Term'):
        if (grp[Ecol] < termcut).any():
            lowTerms.append(term)
    print(f'There are {len(lowTerms)} assigned terms with levels below {termcut} cm-1')
    dfexpt = dfexpt[dfexpt.Term.isin(lowTerms)]
    nlevx = len(dfexpt)
    print(f'There are {nlevx} levels of interest')
    # parse 'Term' column to get simplified term labels
    def simplify(term):
        # extract the basic LS part of a decorated term label
        regex = re.compile('\d[SPDF-Z]')
        m = regex.search(term)
        if m:
            return m.group(0)
        else:
            # failed
            return '?'
    dfexpt['Tlbl'] = dfexpt.Term.apply(simplify)
    # Convert experimental 'J' and 'Level' to floats
    for col in ['J', Ecol]:
        dfexpt[col] = dfexpt[col].astype(float)
    # add degeneracy = 2J+1
    dfexpt['degen'] = 2 * dfexpt.J + 1
    display(dfexpt.style.format(fmt))  

846 experimental levels for Fe read from "exptl_levels.xlsx"
368 levels are of parity "even"
There are 4 assigned terms with levels below 18000 cm-1
There are 16 levels of interest


Unnamed: 0,Configuration,Term,J,Prefix,Level (cm-1),Suffix,Uncertainty (cm-1),Lande,Leading percentages,Reference,Tlbl,degen
0,3d6.4s2,a 5D,4.0,,0.0,,,1.5002,100,L11631,5D,9
1,3d6.4s2,a 5D,3.0,,415.933,,0.001,1.50034,100,,5D,7
2,3d6.4s2,a 5D,2.0,,704.007,,0.001,1.50041,100,,5D,5
3,3d6.4s2,a 5D,1.0,,888.132,,0.001,1.50022,100,,5D,3
4,3d6.4s2,a 5D,0.0,,978.074,,0.001,,100,,5D,1
5,3d7.(4F).4s,a 5F,5.0,,6928.268,,0.001,1.40021,100,,5F,11
6,3d7.(4F).4s,a 5F,4.0,,7376.764,,0.001,1.35004,100,,5F,9
7,3d7.(4F).4s,a 5F,3.0,,7728.06,,0.001,1.24988,100,,5F,7
8,3d7.(4F).4s,a 5F,2.0,,7985.785,,0.001,0.99953,100,,5F,5
9,3d7.(4F).4s,a 5F,1.0,,8154.714,,0.001,-0.014,100,,5F,3


### Take assignments at face value, i.e., apply eq. (1)

In [7]:
# No theoretical calculation are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for Term, grp in dfexpt.groupby(['Term']):
    xterms.append(Term)
    emean = np.dot(grp.degen, grp[Ecol]) / grp.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
print(f'The corresponding spin-orbit stabilization energy is SOC1 = {SOC1} cm-1')

Term energies (cm-1) using eq. (1)


Unnamed: 0,Term,Eterm
0,a 5D,403.0
1,a 5F,7459.8
2,a 3F,12407.4
3,a 5P,17684.6


The corresponding spin-orbit stabilization energy is SOC1 = -402.961 cm-1


### Specify Molpro SO-CI output file

In [8]:
fsoc = 'fe_15Q21T_ctzdk_x2c.pro'
fsoc = 'fe_ci_15Q7T_c5zdk_x2c.pro'
fsoc = 'fe_15Q7T_ctzdk_x2c.pro'

print(f'Reading MOLPRO file "{fsoc}"')
compAtom = mpr.stoichiometry(fsoc)
charge = mpr.total_charge(fsoc)
print(f'The atom is {compAtom} with charge {charge}')
# check for consistency with the experimental data that were read
if charge > 0: 
    compAtom += '+'
elif charge < 0:
    compAtom += '-'
if abs(charge) > 1:
    compAtom += f'{abs(charge)}'
        
if compAtom != atom:
    print(f'*** exptl atom = {atom} is different')
PG = mpr.read_compgroup(fsoc)
print(f'The computational point group is {PG}')

Reading MOLPRO file "fe_15Q7T_ctzdk_x2c.pro"
The atom is Fe with charge 0
The computational point group is Ci


In [9]:
nbf = mpr.nbfn(fsoc)
print(f'{nbf} cGTOs in basis set')
CAS = mpr.readMULTI(fsoc)[-1]  # use only the last CASSCF in the file 
ncas = len(CAS.results)
print(f'CASSCF has {ncas} states in these terms:', set(CAS.results.Term))
print(f'   active space is ({CAS.nactel()}/{CAS.nactorb()})')
#display(CAS.results)

102 cGTOs in basis set
CASSCF has 22 states in these terms: {'5F', '5D', '3F', '5P'}
   active space is (8/6)


In [10]:
MRCI = mpr.readMRCI(fsoc)  # there may be more than one MRCI (multiple spins)
nci = sum([len(m.results) for m in MRCI])
ncore = MRCI[0].ncore  # assume all MRCIs have same number of cores
print(f'MRCI has {nci} states and {ncore} frozen cores in {len(MRCI)} calcs')
if nci != ncas:
    print('\t*** is this correct? ***')
for m in MRCI:
    m.transfer_lz(CAS.results)
dfci = pd.concat([m.results for m in MRCI])
display(dfci[['Edav', 'Label', 'Ref', 'C0', 'Term']])

MRCI has 22 states and 0 frozen cores in 2 calcs


Unnamed: 0,Edav,Label,Ref,C0,Term
0,-1272.167879,1.1,3.1,0.961213,5D
1,-1272.167879,2.1,1.1,0.961213,5D
2,-1272.167874,3.1,4.1,0.961214,5D
3,-1272.16787,4.1,2.1,0.961215,5D
4,-1272.16787,5.1,5.1,0.961215,5D
5,-1272.139425,6.1,11.1,0.967081,5F
6,-1272.139425,7.1,7.1,0.967081,5F
7,-1272.139424,8.1,10.1,0.967081,5F
8,-1272.139424,9.1,12.1,0.967081,5F
9,-1272.139424,10.1,6.1,0.967081,5F


In [11]:
# use MRCI+Q energies to get term energies
terms = []
eterms = []
espread = []  # range of energies within a term (should be small)
e0 = np.inf  # lowest MRCI+Q energy
for Term, grp in dfci.groupby('Term'):
    terms.append(Term)
    eterms.append(grp.Edav.mean())
    espread.append(np.ptp(grp.Edav))
    e0 = min(e0, grp.Edav.min())
dfterm = pd.DataFrame({'Term': terms, 'Emean': eterms, 'Espread': espread}).sort_values('Emean').reset_index(drop=True)
#print('Theoretical term energies (hartree)')
#display(dfterm)

In [12]:
degentol = 3.  # degeneracy tolerance / cm-1
eSOC = mpr.readSOenergy(fsoc, linenum=False)
if eSOC.E0 != e0:
    print(f'\n*** The SO-CI is based on E0 = {eSOC.E0} but the lowest MRCI+Q energy is {e0} ***\n\n')
#display(eSOC.results)  # show all microstates
print(f'Threshold for level degeneracy is {degentol} cm-1\n')
dflevel = eSOC.collect_degenerate(cmtol=degentol)
# remove Irrep column if no information
irreps = set(eSOC.results.Irrep)
if (len(irreps) == 1) and (irreps.pop() == 0):
    #  there is no irrep information
    dflevel = dflevel.drop('Irrep', axis=1)
# Assign values of J based only upon degeneracy
dflevel['J'] = dflevel.degen.apply(chem.J_from_degen)  
tmultc = len(eSOC.energies)  # total number of microstates (i.e., magnetic sublevels)
nlevc = len(dflevel)
print(f'SO-CI has {tmultc} states in {nlevc} levels')
# energy of lowest level defines zero
Ezero = dflevel.Erel.min()
dflevel.Erel -= Ezero
# round computed energies to 0.01
dflevel.Erel = np.round(dflevel.Erel, 2)
compos = mpr.readSOcompos(fsoc)[0][0]
twall = mpr.resources_used(fsoc)['wall']
hours = twall / 3600
print(f'Computation wall time = {twall:.0f} s = {hours:.1f} hr')
# get SOC from the lowest microstate in the SO-CI (no level or term averaging)
SOCraw = np.round(dflevel.at[0, 'Eshift'], 1)
print(f'The raw theoretical spin-orbit stabilization energy is SOCraw = {SOCraw} cm-1')
# now average the ground level and the ground term before taking their difference
SOCth = np.round((dflevel.E.min() - dfterm.Emean.min()) * chem.AU2CM, 1)
print(f'The level-term theoretical spin-orbit stabilization energy is SOCth = {SOCth} cm-1')
display(dflevel.style.format(fmt))
print('"Eshift" is relative to the lowest diagonal (term) energy')

Threshold for level degeneracy is 3.0 cm-1

SO-CI has 96 states in 16 levels
Computation wall time = 19786 s = 5.5 hr
The raw theoretical spin-orbit stabilization energy is SOCraw = -396.7 cm-1
The level-term theoretical spin-orbit stabilization energy is SOCth = -397.6 cm-1


Unnamed: 0,degen,E,Eshift,Erel,index,J
0,9,-1272.169686,-396.7,0.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8]",4.0
1,7,-1272.167875,0.9,397.5,"[9, 10, 11, 12, 13, 14, 15]",3.0
2,5,-1272.166517,298.9,695.6,"[16, 17, 18, 19, 20]",2.0
3,3,-1272.165612,497.6,894.3,"[21, 23, 22]",1.0
4,1,-1272.165159,596.9,993.6,[24],0.0
5,11,-1272.141913,5698.9,6095.6,"[25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]",5.0
6,9,-1272.139872,6146.7,6543.4,"[36, 37, 38, 39, 40, 41, 42, 43, 44]",4.0
7,7,-1272.138212,6511.1,6907.8,"[45, 46, 47, 48, 49, 50, 51]",3.0
8,5,-1272.136952,6787.6,7184.3,"[52, 53, 54, 55, 56]",2.0
9,3,-1272.136106,6973.3,7370.0,"[57, 58, 59]",1.0


"Eshift" is relative to the lowest diagonal (term) energy


In [13]:
# Get level compositions, identify leading terms
dflevel['compos'] = [compos.sum_term_compos(idx, dfci) for idx in dflevel['index']]
#dflevel['compos'] = [sum_term_compos(compos, idx, MRCI.results) for idx in dflevel['index']]
# identify leading terms
dflevel['Leading'] = 'term'
for i, row in dflevel.iterrows():
    dfcomp = row.compos.sort_values('Pct', ascending=False)
    term = dfcomp.index.values[0]  # term symbol of leading term
    dflevel.loc[i, 'Leading'] = term
if False:
    # check for non-pure states
    nterm = dflevel.compos.apply(lambda x: len(x))
    if (nterm != 1).any():
        print('Some states have multiple term parentage')
        display(dflevel)
    else:
        print('All states have single-term parentage')
print('Composition of theoretical levels')
display(dflevel.style.format(fmt))

Composition of theoretical levels


Unnamed: 0,degen,E,Eshift,Erel,index,J,compos,Leading
0,9,-1272.169686,-396.7,0.0,"[0, 1, 2, 3, 4, 5, 6, 7, 8]",4.0,Pct 5D 99.998333 3F 0.001667,5D
1,7,-1272.167875,0.9,397.5,"[9, 10, 11, 12, 13, 14, 15]",3.0,Pct 5D 100.0,5D
2,5,-1272.166517,298.9,695.6,"[16, 17, 18, 19, 20]",2.0,Pct 5D 100.0,5D
3,3,-1272.165612,497.6,894.3,"[21, 23, 22]",1.0,Pct 5D 100.0,5D
4,1,-1272.165159,596.9,993.6,[24],0.0,Pct 5D 100.0,5D
5,11,-1272.141913,5698.9,6095.6,"[25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]",5.0,Pct 5F 100.0,5F
6,9,-1272.139872,6146.7,6543.4,"[36, 37, 38, 39, 40, 41, 42, 43, 44]",4.0,Pct 5F 99.860332 3F 0.139668,5F
7,7,-1272.138212,6511.1,6907.8,"[45, 46, 47, 48, 49, 50, 51]",3.0,Pct 5F 99.872142 3F 0.127858,5F
8,5,-1272.136952,6787.6,7184.3,"[52, 53, 54, 55, 56]",2.0,Pct 5F 99.935401 3F 0.064599,5F
9,3,-1272.136106,6973.3,7370.0,"[57, 58, 59]",1.0,Pct 5F 100.0,5F


In [14]:
# compare experimental levels with corresponding theoretical
warnThresh = 800  # highlight errors larger than this (cm-1)
dfdiff = dfexpt.copy()
dfdiff['Ecalc'] = np.nan
# match computed levels to exptl
print(f'Comparing theoretical levels from {fsoc} with experimental')
print('   Matching levels using J and minimal term label')
print('   (This will break if term labels are not clear)')
idx = list(dflevel.index)  # list of computed levels
for i, row in dfexpt.iterrows():
    while idx:
        # there are unassigned theoretical levels
        for j in idx.copy():
            if float(row.J) != float(dflevel.at[j, 'J']):
                # values of J must be equal
                #print('>>unequal J: ', row.J, dflevel.loc[j, 'J'])
                continue
            # also require that terms have same label
            if row.Tlbl != dflevel.loc[j, 'Leading']:
                #print('<<unequal term:', row.Term, dflevel.loc[j, 'Leading'])
                continue
            # both J and Term match
            dfdiff.at[i, 'Ecalc'] = dflevel.at[j, 'Erel']
            idx.remove(j)
            break
        else:
            print('Failed to assign any theoretical level to this exptl!')
            display(row.to_frame().T)
        break
dfdiff['err'] = np.round(dfdiff.Ecalc - dfdiff[Ecol], 2)
# keep only some columns
dfdiff = dfdiff[['Configuration', 'Term', Ecol, 'Leading percentages', 'Tlbl', 'Ecalc', 'err']]
display(dfdiff.style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
                  subset=pd.IndexSlice[['err']]).format(fmt))
print(f'Errors > {warnThresh} cm-1 are highlighted')

Comparing theoretical levels from fe_15Q7T_ctzdk_x2c.pro with experimental
   Matching levels using J and minimal term label
   (This will break if term labels are not clear)


Unnamed: 0,Configuration,Term,Level (cm-1),Leading percentages,Tlbl,Ecalc,err
0,3d6.4s2,a 5D,0.0,100,5D,0.0,0.0
1,3d6.4s2,a 5D,415.933,100,5D,397.5,-18.4
2,3d6.4s2,a 5D,704.007,100,5D,695.6,-8.4
3,3d6.4s2,a 5D,888.132,100,5D,894.3,6.1
4,3d6.4s2,a 5D,978.074,100,5D,993.6,15.5
5,3d7.(4F).4s,a 5F,6928.268,100,5F,6095.6,-832.7
6,3d7.(4F).4s,a 5F,7376.764,100,5F,6543.4,-833.4
7,3d7.(4F).4s,a 5F,7728.06,100,5F,6907.8,-820.3
8,3d7.(4F).4s,a 5F,7985.785,100,5F,7184.3,-801.5
9,3d7.(4F).4s,a 5F,8154.714,100,5F,7370.0,-784.8


Errors > 800 cm-1 are highlighted


In [15]:
# summarize level-energy errors by term
dftermerr = pd.DataFrame(columns=['Term', 'range', 'mean', 'stds'])
for term, grp in dfdiff.groupby('Term'):
    spread = np.round([grp.err.min(), grp.err.max()], 0).astype(int)
    m = grp.err.mean()
    s = grp.err.std()
    dftermerr.loc[len(dftermerr)] = [term, spread, m, s]
if dftermerr.isnull().values.any():
    print('*** Some terms are missing ***')
    print('Try decreasing the energy maximum ("termcut")')
else:
    # round values to nearest 1 cm-1
    dftermerr[['mean', 'stds']] = np.round(dftermerr[['mean', 'stds']], 0).astype(int)
    print(f'{fsoc} errors in level energies (cm-1), grouped by leading term')
    # order same as experimental terms
    dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
    dftermerr = dftermerr.sort_values('Term')
dftermerr

fe_15Q7T_ctzdk_x2c.pro errors in level energies (cm-1), grouped by leading term


Unnamed: 0,Term,range,mean,stds
0,a 3F,"[-187, -142]",-169,24
1,a 5D,"[-18, 16]",-1,13
2,a 5F,"[-833, -785]",-815,21
3,a 5P,"[120, 196]",160,38


In [16]:
# Apply eq. (2) to the ground term (or another 'target')
target = 'lowest'  # set to 'lowest' or choose a term from dfterm.Term

if target == 'lowest':
    # find the ground term
    dfterm = dfterm.sort_values('Emean').reset_index(drop=True)
    target = dfterm.at[0, 'Term']
if target not in dfterm.Term.values:
    print(f'*** Requested term "{target}" is not among {dfterm.Term.values}')
# get pct contribution of term 'target' to all SO microstates
dfmicro = compos.term_distrib(target, dfci)
# lookup and add the microstate indices into eSOC.results
dfSOC = eSOC.results
idx = []
for Nr in dfmicro.Nr:
    i = dfSOC[dfSOC.Nr == Nr].index[0]
    idx.append(i)
dfmicro['idx'] = idx
# Find the level in dflevel['index'] that matches
ilev = []  # index into dflevel
for idx in dfmicro.idx:
    for jrow, row in dflevel.iterrows():
        if idx in row['index']:
            i = jrow
            ilev.append(i)
            break
dfmicro['ilev'] = ilev
# sum within levels
levs = []
sumpct = []
degen = []
for ilev, grp in dfmicro.groupby('ilev'):
    levs.append(ilev)
    sumpct.append(grp.Pct.sum())
    degen.append(len(grp))
# get theoretical energies of these levels
erel = [dflevel.at[ilev, 'Erel'] for ilev in levs]
dfpct = pd.DataFrame({'ilev': levs, 'Pct': sumpct, 'g': degen, 'Erel': erel})
# find the matching EXPERIMENTAL level energies, and theoretical energy errors
# In case of errors here, try including more experimental levels by raising 
try:
    Eexp = [dfdiff[dfdiff.Ecalc == Eth][Ecol].values[0] for Eth in erel]
    dfpct[Ecol] = Eexp
    errs = [dfdiff[dfdiff.Ecalc == Eth]['err'].values[0] for Eth in erel]
    dfpct['err'] = errs
except IndexError:
    print('** Failed to find matching experimental level **')
    print('Try including more experimental levels by raising the energy maximum ("termcut")')


In [17]:
print(f'Distribution of term "{target}" among levels:')
display(dfpct.style.format(fmt))
print(f'Combine theoretical weights ("Pct") with experimental level energies ["{Ecol}"]:')
pctsum = dfpct.Pct.sum()
tmult = chem.atomic_term_multip(target)
Eterm = np.dot(dfpct.Pct, dfpct[Ecol]) / pctsum
print(f'   Term energy = {Eterm:.1f} cm-1')
frac = pctsum / tmult 
print(f'   That accounts for {frac:.3f} % of the term')
SOC2 = -Eterm

Distribution of term "5D" among levels:


Unnamed: 0,ilev,Pct,g,Erel,Level (cm-1),err
0,0,899.982,9,0.0,0.0,0.0
1,1,699.991,7,397.5,415.933,-18.4
2,2,500.0,5,695.6,704.007,-8.4
3,3,299.994,3,894.3,888.132,6.1
4,4,100.001,1,993.6,978.074,15.5
5,10,0.016,6,11788.8,11976.239,-187.4


Combine theoretical weights ("Pct") with experimental level energies ["Level (cm-1)"]:
   Term energy = 403.0 cm-1
   That accounts for 99.999 % of the term


In [18]:
print(f'Molpro source file: {fsoc}')
print(f'Alternative values for SOC({target}) of atom {atom}:')
print('-' * 25)
print('{:12s} {:.1f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.1f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.1f} cm-1'.format('avgd theory', SOCth))
print('{:12s} {:.1f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)
# report weighted RMSE for the relevant theoretical levels
wrmse = np.sqrt(np.dot(dfpct.Pct, dfpct.err ** 2) / pctsum)
print(f'For the relevant levels, the weighted RMS error in theoretical energies = {wrmse:.1f} cm-1')

Molpro source file: fe_15Q7T_ctzdk_x2c.pro
Alternative values for SOC(5D) of atom Fe:
-------------------------
eq (1)       -403.0 cm-1
raw theory   -396.7 cm-1
avgd theory  -397.6 cm-1
eq (2)       -403.0 cm-1
-------------------------
For the relevant levels, the weighted RMS error in theoretical energies = 11.1 cm-1
