In [1]:
# Extract SOC information from MOLPRO outputs for atoms
# ** This will probably break if there are multiple terms with the same term symbol **
# This version for developing better procedure for assigning values of J
# KKI 4/20/23
import re, sys, glob, subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.cluster import KMeans

sys.path.insert(0, '../karlib')
import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)

In [2]:
# Excel spreadsheet of experimental levels from https://physics.nist.gov/PhysRefData/ASD/levels_form.html
#   Download as CSV; paste into a column in Excel; use Data -> Text to Columns -> Delimited -> Comma
#   Rename that worksheet with a name like "Fe" or "Fe+"
# Note that experimental levels might not be listed by increasing energy
xl_expt = 'exptl_levels.xlsx'
xl = pd.ExcelFile(xl_expt)

### Select atom and parity of interest

In [3]:
atom = 'Fe'  # a name like "Fe" or "Fe+"
parity = 'even'  #  choose 'even' or 'odd' or 'both'

### Select energy maximum for experimental terms

In [4]:
# In case of errors, try making this larger or smaller to match the theoretical calculation
termcut = 18000  # discard terms that lack levels below this energy (cm-1)

In [5]:
Ecol = 'Level (cm-1)'  # the exptl energy column
# display formatting
fmt = {'Eshift': '{:.1f}', Ecol: '{:.3f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'Elev']:
    fmt[col] =  fmt['Eshift']

In [6]:
if atom not in xl.sheet_names:
    print(f'No experimental data sheet for {atom}!')
else:
    dfexpt = pd.read_excel(xl, atom)
    # Delete any ionization limit
    dfexpt = dfexpt[dfexpt.Term != 'Limit']
    print(f'{len(dfexpt)} experimental levels for {atom} read from "{xl_expt}"')
    # Select by parity
    if parity == 'even':
        # discard odd levels ('Term' field ends with '*')
        dfexpt = dfexpt[~dfexpt.Term.str.contains('\*$')]
    elif parity == 'odd':
        dfexpt = dfexpt[dfexpt.Term.str.contains('\*$')]
    print(f'{len(dfexpt)} levels are of parity "{parity}"')
    # Select terms by energy
    lowTerms = []
    for term, grp in dfexpt.groupby('Term'):
        if (grp[Ecol] < termcut).any():
            lowTerms.append(term)
    print(f'There are {len(lowTerms)} assigned terms with levels below {termcut} cm-1')
    dfexpt = dfexpt[dfexpt.Term.isin(lowTerms)]
    nlevx = len(dfexpt)
    print(f'There are {nlevx} levels of interest')
    # parse 'Term' column to get simplified term labels
    def simplify(term):
        # extract the basic LS part of a decorated term label
        regex = re.compile('\d[SPDF-Z]')
        m = regex.search(term)
        if m:
            return m.group(0)
        else:
            # failed
            return '?'
    dfexpt['Tlbl'] = dfexpt.Term.apply(simplify)
    # Convert experimental 'J' and 'Level' to floats
    for col in ['J', Ecol]:
        dfexpt[col] = dfexpt[col].astype(float)
    # add degeneracy = 2J+1
    dfexpt['degen'] = 2 * dfexpt.J + 1
    display(dfexpt.style.format(fmt))  

846 experimental levels for Fe read from "exptl_levels.xlsx"
368 levels are of parity "even"
There are 4 assigned terms with levels below 18000 cm-1
There are 16 levels of interest


Unnamed: 0,Configuration,Term,J,Prefix,Level (cm-1),Suffix,Uncertainty (cm-1),Lande,Leading percentages,Reference,Tlbl,degen
0,3d6.4s2,a 5D,4.0,,0.0,,,1.5002,100,L11631,5D,9
1,3d6.4s2,a 5D,3.0,,415.933,,0.001,1.50034,100,,5D,7
2,3d6.4s2,a 5D,2.0,,704.007,,0.001,1.50041,100,,5D,5
3,3d6.4s2,a 5D,1.0,,888.132,,0.001,1.50022,100,,5D,3
4,3d6.4s2,a 5D,0.0,,978.074,,0.001,,100,,5D,1
5,3d7.(4F).4s,a 5F,5.0,,6928.268,,0.001,1.40021,100,,5F,11
6,3d7.(4F).4s,a 5F,4.0,,7376.764,,0.001,1.35004,100,,5F,9
7,3d7.(4F).4s,a 5F,3.0,,7728.06,,0.001,1.24988,100,,5F,7
8,3d7.(4F).4s,a 5F,2.0,,7985.785,,0.001,0.99953,100,,5F,5
9,3d7.(4F).4s,a 5F,1.0,,8154.714,,0.001,-0.014,100,,5F,3


### Take assignments at face value, i.e., apply eq. (1)

In [7]:
# No theoretical calculation are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for Term, grp in dfexpt.groupby(['Term']):
    xterms.append(Term)
    emean = np.dot(grp.degen, grp[Ecol]) / grp.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
print(f'The corresponding spin-orbit stabilization energy is SOC1 = {SOC1} cm-1')

Term energies (cm-1) using eq. (1)


Unnamed: 0,Term,Eterm
0,a 5D,403.0
1,a 5F,7459.8
2,a 3F,12407.4
3,a 5P,17684.6


The corresponding spin-orbit stabilization energy is SOC1 = -402.961 cm-1


### Specify Molpro SO-CI output file

In [8]:
fsoc = 'fe_15Q21T_ctzdk_x2c.pro'
#fsoc = 'fe_ci_15Q7T_c5zdk_x2c.pro'
#fsoc = 'fe_15Q7T_ctzdk_x2c.pro'

print(f'Reading MOLPRO file "{fsoc}"')
compAtom = mpr.stoichiometry(fsoc)
charge = mpr.total_charge(fsoc)
print(f'The atom is {compAtom} with charge {charge}')
# check for consistency with the experimental data that were read
if charge > 0: 
    compAtom += '+'
elif charge < 0:
    compAtom += '-'
if abs(charge) > 1:
    compAtom += f'{abs(charge)}'
        
if compAtom != atom:
    print(f'*** exptl atom = {atom} is different')
PG = mpr.read_compgroup(fsoc)
print(f'The computational point group is {PG}')

Reading MOLPRO file "fe_15Q21T_ctzdk_x2c.pro"
The atom is Fe with charge 0
The computational point group is Ci


In [9]:
SOCI = mpr.fullmatSOCI(fsoc, atom=True)

Computational group = Ci
CASSCF states:
    15 Quintet
    21 Triplet


In [23]:
SOCraw = SOCI.vals.min()
print(f'From lowest level and lowest uncoupled energy, raw theoretical SOCraw = {SOCraw:.3f} cm-1')

From lowest level and lowest uncoupled energy, raw theoretical SOCraw = -429.076 cm-1


In [10]:
def term_energy_from_levels(df, term, Ecol):
    # Given a DataFrame with the right columns ['J', 'termwt', Ecol],
    #   where 'Ecol' is the header for the column of level energies,
    # Return the term's average energy as derived from the levels
    global SOCI
    # find index for term 'term'
    iterm = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    termwt = np.array([twt[iterm] for twt in df.termwt])
    degen = 2 * df.J + 1
    Eterm = (df[Ecol] * termwt * degen).sum()
    Eterm /= np.dot(degen, termwt)  # normalize using level degeneracies
    return Eterm

In [11]:
dflevel = SOCI.assign_atomic_J()
target = '5D'
Eterm = term_energy_from_levels(dflevel, target, 'Erel')
print(f'Using degenerated averages, energy of {target} term = {Eterm:.1f} cm-1')
SOCth = -Eterm
print(f'SOCth = {SOCth:.3f} cm-1')

large non-degeneracies in term energies:


Unnamed: 0,Spin,Term,Energy,Edav,Dipole,Lz,dipX,dipY,dipZ,Eref,C0
0,Quintet,5F,-1272.066757,-1272.137115,0.0,2.998841,0.0,0.0,0.0,-1271.209106,0.961345
1,Quintet,5F,-1272.066757,-1272.137115,0.0,2.918077,0.0,0.0,0.0,-1271.209106,0.961345
2,Quintet,5F,-1272.066756,-1272.137102,0.0,0.080223,0.0,0.0,0.0,-1271.209106,0.961351
3,Quintet,5F,-1272.066757,-1272.137102,0.0,1.003469,0.0,0.0,0.0,-1271.209106,0.961352
4,Quintet,5F,-1272.066757,-1272.137101,0.0,1.218533,0.0,0.0,0.0,-1271.209106,0.961352
5,Quintet,5F,-1272.066758,-1272.137098,0.0,2.0,0.0,0.0,0.0,-1271.209106,0.961354
6,Quintet,5F,-1272.066757,-1272.137097,0.0,1.99839,0.0,0.0,0.0,-1271.209106,0.961354
7,--,--,1e-06,1.8e-05,0.0,2.918618,0.0,0.0,0.0,0.0,9e-06


Assigning J using 22 clusters/levels
Using degenerated averages, energy of 5D term = 429.3 cm-1
SOCth = -429.337 cm-1


In [12]:
SOCI.dfterm

Unnamed: 0,Term,dipZ,Edav,idx,ecm
0,5D,0.0,-1272.176014,"[0, 1, 2, 3, 4]",0.0
1,5F,0.0,-1272.137104,"[7, 6, 11, 9, 10, 5, 8]",8539.8
2,3P,0.0,-1272.088313,"[16, 15, 17]",19248.1
3,5P,0.0,-1272.086669,"[12, 13, 14]",19609.0
4,3H,0.0,-1272.084718,"[19, 18, 23, 20, 21, 25, 26, 27, 24, 22, 28]",20037.1
5,3F,0.0,-1272.080916,"[29, 30, 31, 33, 35, 34, 32]",20871.7


In [13]:
dflevel.style.format(fmt)

Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,Composition,E,Nr,termwt
0,5D,4.0,5D_4,0.0,-428.9,{'5D': 1.0},-1272.17797,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",[9.98792166e-01 7.58280806e-08 1.78942632e-10 7.09238115e-14  9.51590212e-13 1.20775811e-03]
1,5D,3.0,5D_3,420.7,-8.2,{'5D': 1.0},-1272.176054,"[10, 11, 12, 13, 14, 15, 16]",[9.99587748e-01 7.02706294e-08 4.47233969e-10 2.13889223e-09  3.74747664e-12 4.12179266e-04]
2,5D,2.0,5D_2,715.1,286.2,{'5D': 1.0},-1272.174712,"[17, 18, 19, 20, 21]",[9.99082096e-01 4.10239185e-08 8.34202523e-04 5.77911885e-09  6.54032793e-12 8.36544671e-05]
3,5D,1.0,5D_1,902.8,473.9,{'5D': 1.0},-1272.173857,"[22, 23, 24]",[9.98390985e-01 1.44752986e-08 1.60899600e-03 3.86251524e-09  8.68824077e-12 2.30752312e-10]
4,5D,0.0,5D_0,994.4,565.5,{'5D': 1.0},-1272.17344,[25],[9.97961261e-01 1.37857077e-12 2.03873866e-03 6.13351907e-14  1.00911811e-11 1.84891211e-10]
5,5F,5.0,5F_5,8413.7,7984.8,{'5F': 1.0},-1272.139635,"[26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36]",[1.25201703e-12 1.00000000e+00 4.19425974e-13 1.20180645e-18  1.54262137e-13 3.62840009e-12]
6,5F,4.0,5F_4,8876.5,8447.6,{'5F': 1.0},-1272.137526,"[37, 38, 39, 40, 41, 42, 43, 44, 45]",[9.66394356e-08 9.99998871e-01 1.15190817e-13 6.90320382e-19  1.54146842e-13 1.03252676e-06]
7,5F,3.0,5F_3,9246.8,8817.9,{'5F': 1.0},-1272.135839,"[46, 47, 48, 49, 50, 51, 52]",[8.19527971e-08 9.99998835e-01 8.78593738e-14 5.92053065e-16  3.55469460e-13 1.08331394e-06]
8,5F,2.0,5F_2,9524.5,9095.6,{'5F': 1.0},-1272.134574,"[53, 54, 55, 56, 57]",[4.38387193e-08 9.99999358e-01 1.35168605e-10 1.65869590e-15  2.57071893e-13 5.97897482e-07]
9,5F,1.0,5F_1,9709.7,9280.8,{'5F': 1.0},-1272.13373,"[58, 59, 60]",[1.44125150e-08 9.99999985e-01 7.92040032e-11 3.99265023e-16  5.79653017e-13 1.65535343e-11]


In [14]:
# compare experimental levels with corresponding theoretical
warnThresh = 800  # highlight errors larger than this (cm-1)
dfdiff = dfexpt.copy()
dfdiff['Ecalc'] = np.nan
# match computed levels to exptl
print(f'Comparing theoretical levels from {fsoc} with experimental')
print('   Matching levels using J and term composition')
print('   (This will break if term labels are not clear)')
idx = list(dflevel.index)  # list of computed levels
termwt = []  # for arrays of term weights (from theory)
for i, row in dfexpt.iterrows():
    while idx:
        # there are theoretical levels that have not been matched to exptl
        for j in idx.copy():
            if float(row.J) != float(dflevel.at[j, 'J']):
                # values of J must be equal
                #print('>>unequal J: ', row.J, dflevel.loc[j, 'J'])
                continue
            # also require that terms have same label
            if row.Tlbl != dflevel.loc[j, 'Lead']:
                #print('<<unequal term:', row.Term, dflevel.loc[j, 'Leading'])
                continue
            # both J and Term match
            dfdiff.at[i, 'Ecalc'] = dflevel.at[j, 'Erel']
            termwt.append(dflevel.at[j, 'termwt'])
            idx.remove(j)
            break
        else:
            print('Failed to assign any theoretical level to this exptl!')
            display(row.to_frame().T)
        break
dfdiff['err'] = np.round(dfdiff.Ecalc - dfdiff[Ecol], 2)
dfdiff['termwt'] = termwt
# keep only some columns
dfdiff = dfdiff[['Configuration', 'Term', Ecol, 'Leading percentages', 'Tlbl', 'J', 'Ecalc', 'err', 'termwt']]
display(dfdiff.drop('termwt', axis=1).style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
                  subset=pd.IndexSlice[['err']]).format(fmt))
print(f'Errors > {warnThresh} cm-1 are highlighted')

Comparing theoretical levels from fe_15Q21T_ctzdk_x2c.pro with experimental
   Matching levels using J and minimal term label
   (This will break if term labels are not clear)


Unnamed: 0,Configuration,Term,Level (cm-1),Leading percentages,Tlbl,J,Ecalc,err
0,3d6.4s2,a 5D,0.0,100,5D,4.0,0.0,0.0
1,3d6.4s2,a 5D,415.933,100,5D,3.0,420.7,4.8
2,3d6.4s2,a 5D,704.007,100,5D,2.0,715.1,11.1
3,3d6.4s2,a 5D,888.132,100,5D,1.0,902.8,14.7
4,3d6.4s2,a 5D,978.074,100,5D,0.0,994.4,16.3
5,3d7.(4F).4s,a 5F,6928.268,100,5F,5.0,8413.7,1485.4
6,3d7.(4F).4s,a 5F,7376.764,100,5F,4.0,8876.5,1499.8
7,3d7.(4F).4s,a 5F,7728.06,100,5F,3.0,9246.8,1518.7
8,3d7.(4F).4s,a 5F,7985.785,100,5F,2.0,9524.5,1538.7
9,3d7.(4F).4s,a 5F,8154.714,100,5F,1.0,9709.7,1555.0


Errors > 800 cm-1 are highlighted


In [15]:
# Use experimental level via eq. (2)
Eterm = term_energy_from_levels(dfdiff, target, 'Level (cm-1)')
SOC2 = -Eterm
print(f'Using experimental levels and eq. (2) for term {target}, SOC2 = {SOC2:.1f} cm-1')

Using experimental levels and eq. (2) for term 5D, SOC2 = -411.2 cm-1


In [16]:
# summarize level-energy errors by term
dftermerr = pd.DataFrame(columns=['Term', 'range', 'mean', 'stds'])
for term, grp in dfdiff.groupby('Term'):
    spread = np.round([grp.err.min(), grp.err.max()], 0).astype(int)
    m = grp.err.mean()
    s = grp.err.std()
    dftermerr.loc[len(dftermerr)] = [term, spread, m, s]
if dftermerr.isnull().values.any():
    print('*** Some terms are missing ***')
    print('Try decreasing the energy maximum ("termcut")')
else:
    # round values to nearest 1 cm-1
    dftermerr[['mean', 'stds']] = np.round(dftermerr[['mean', 'stds']], 0).astype(int)
    print(f'{fsoc} errors in level energies (cm-1), grouped by leading term')
    # order same as experimental terms
    dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
    dftermerr = dftermerr.sort_values('Term')
dftermerr

fe_15Q21T_ctzdk_x2c.pro errors in level energies (cm-1), grouped by leading term


Unnamed: 0,Term,range,mean,stds
0,a 3F,"[8516, 9214]",8841,352
1,a 5D,"[0, 16]",9,7
2,a 5F,"[1485, 1555]",1520,28
3,a 5P,"[2318, 2397]",2355,40


In [19]:
print(f'Distribution of term "{target}" among levels:')
itarget = SOCI.dfterm[SOCI.dfterm.Term == target].index[0]
dflevel[target] = [twt[itarget] for twt in dflevel.termwt]
display(dflevel.drop(['termwt', 'Composition'], axis=1).sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dflevel[target].sum():.3f}')

Distribution of term "5D" among levels:


Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,E,Nr,5D
1,5D,3.0,5D_3,420.7,-8.2,-1272.176054,"[10, 11, 12, 13, 14, 15, 16]",0.999588
2,5D,2.0,5D_2,715.1,286.2,-1272.174712,"[17, 18, 19, 20, 21]",0.999082
0,5D,4.0,5D_4,0.0,-428.9,-1272.17797,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",0.998792
3,5D,1.0,5D_1,902.8,473.9,-1272.173857,"[22, 23, 24]",0.998391
4,5D,0.0,5D_0,994.4,565.5,-1272.17344,[25],0.997961
18,3P,0.0,3P_0,20891.9,20463.1,-1272.08278,[117],0.002039
19,3F,4.0,3F_4,21190.7,20761.8,-1272.081419,"[118, 119, 120, 121, 122, 123, 124, 125, 126]",0.001208
15,3P,1.0,3P_1,20311.3,19882.4,-1272.085425,"[94, 95, 96]",0.000835
10,3P,2.0,3P_2,19105.4,18676.5,-1272.09092,"[61, 62, 63, 64, 65]",0.000833
14,5P,1.0,5P_1,20278.6,19849.8,-1272.085574,"[91, 92, 93]",0.000774


Total weight of 5D = 5.000


In [24]:
print(f'Molpro source file: {fsoc}')
print(f'Alternative values for SOC({target}) of atom {atom}:')
print('-' * 25)
print('{:12s} {:.1f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.1f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.1f} cm-1'.format('avgd theory', SOCth))
print('{:12s} {:.1f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

Molpro source file: fe_15Q21T_ctzdk_x2c.pro
Alternative values for SOC(5D) of atom Fe:
-------------------------
eq (1)       -403.0 cm-1
raw theory   -429.1 cm-1
avgd theory  -429.3 cm-1
eq (2)       -411.2 cm-1
-------------------------
