In [23]:
# Adjust atomic term energies (SOC matrix diagonals) to fit exptl energy levels
#    to obtain semiempirical term energies
# This version does not use degeneracies; consider all sublevels individually without assignments
# -------So it is probably comparing energies of levels that should not be compared!------
# ** This will probably break if there are multiple terms with the same term symbol **
# ** If so, it can be fixed by adding ordinal prefixes to term symbols              **
# KKI 4/17/2023
import re, sys, glob, subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy, collections
sys.path.insert(0, '../karlib')
import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)

In [24]:
# Excel spreadsheet of experimental levels from https://physics.nist.gov/PhysRefData/ASD/levels_form.html
#   Download as CSV; paste into a column in Excel; use Data -> Text to Columns -> Delimited -> Comma
#   Rename that worksheet with a name like "Fe" or "Fe+"
# Note that experimental levels might not be listed by increasing energy
xl_expt = 'exptl_levels.xlsx'
xl = pd.ExcelFile(xl_expt)

### Select atom and parity of interest

In [25]:
atom = 'Fe'  # a name like "Fe" or "Fe+"
parity = 'even'  #  choose 'even' or 'odd' or 'both'

### Select energy maximum for experimental terms

In [26]:
# In case of errors, try making this larger or smaller to match the theoretical calculation
termcut = 20000  # discard terms that lack levels below this energy (cm-1)

In [27]:
Ecol = 'Level (cm-1)'  # the exptl energy column
# display formatting
fmt = {'Eshift': '{:.1f}', Ecol: '{:.3f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1', 'fitted']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'

In [28]:
if atom not in xl.sheet_names:
    print(f'No experimental data sheet for {atom}!')
else:
    dfexpt = pd.read_excel(xl, atom)
    # Delete any ionization limit
    dfexpt = dfexpt[dfexpt.Term != 'Limit']
    print(f'{len(dfexpt)} experimental levels for {atom} read from "{xl_expt}"')
    # Select by parity
    if parity == 'even':
        # discard odd levels ('Term' field ends with '*')
        dfexpt = dfexpt[~dfexpt.Term.str.contains('\*$')]
    elif parity == 'odd':
        dfexpt = dfexpt[dfexpt.Term.str.contains('\*$')]
    print(f'{len(dfexpt)} levels are of parity "{parity}"')
    # Select terms by energy
    lowTerms = []
    for term, grp in dfexpt.groupby('Term'):
        if (grp[Ecol] < termcut).any():
            lowTerms.append(term)
    print(f'There are {len(lowTerms)} assigned terms with levels below {termcut} cm-1:')
    print('   ', lowTerms)
    dfexpt = dfexpt[dfexpt.Term.isin(lowTerms)]
    nlevx = len(dfexpt)
    # parse 'Term' column to get simplified term labels
    def simplify(term):
        # extract the basic LS part of a decorated term label
        regex = re.compile('\d[SPDF-Z]')
        m = regex.search(term)
        if m:
            return m.group(0)
        else:
            # failed
            return '?'
    dfexpt['Tlbl'] = dfexpt.Term.apply(simplify)
    # Convert experimental 'J' and 'Level' to floats
    for col in ['J', Ecol]:
        dfexpt[col] = dfexpt[col].astype(float)
    # add degeneracy = 2J+1
    dfexpt['degen'] = 2 * dfexpt.J + 1
    # sort by energy (just in case)
    dfexpt = dfexpt.sort_values('Level (cm-1)')
    nmagx = int(dfexpt.degen.sum())
    print(f'There are {nlevx} levels of interest ({nmagx} magnetic sublevels)')
    display(dfexpt.style.format(fmt))
    gexpt = list(dfexpt.degen.astype(int))

846 experimental levels for Fe read from "exptl_levels.xlsx"
368 levels are of parity "even"
There are 6 assigned terms with levels below 20000 cm-1:
    ['a 3F', 'a 3H', 'a 3P2', 'a 5D', 'a 5F', 'a 5P']
There are 22 levels of interest (138 magnetic sublevels)


Unnamed: 0,Configuration,Term,J,Prefix,Level (cm-1),Suffix,Uncertainty (cm-1),Lande,Leading percentages,Reference,Tlbl,degen
0,3d6.4s2,a 5D,4.0,,0.0,,,1.5002,100,L11631,5D,9
1,3d6.4s2,a 5D,3.0,,415.933,,0.001,1.50034,100,,5D,7
2,3d6.4s2,a 5D,2.0,,704.007,,0.001,1.50041,100,,5D,5
3,3d6.4s2,a 5D,1.0,,888.132,,0.001,1.50022,100,,5D,3
4,3d6.4s2,a 5D,0.0,,978.074,,0.001,,100,,5D,1
5,3d7.(4F).4s,a 5F,5.0,,6928.268,,0.001,1.40021,100,,5F,11
6,3d7.(4F).4s,a 5F,4.0,,7376.764,,0.001,1.35004,100,,5F,9
7,3d7.(4F).4s,a 5F,3.0,,7728.06,,0.001,1.24988,100,,5F,7
8,3d7.(4F).4s,a 5F,2.0,,7985.785,,0.001,0.99953,100,,5F,5
9,3d7.(4F).4s,a 5F,1.0,,8154.714,,0.001,-0.014,100,,5F,3


### Take assignments at face value, i.e., apply eq. (1)

In [29]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
tlbls = []  # simplified term labels
for Term, grp in dfexpt.groupby(['Term']):
    xterms.append(Term)
    emean = np.dot(grp.degen, grp[Ecol]) / grp.degen.sum()
    eterms.append(emean)
    tlbls.append(grp.Tlbl.values[0])
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms, 'Tlbl': tlbls}).sort_values('Eterm').reset_index(drop=True)
dfeq1['Erel'] = dfeq1.Eterm - dfeq1.Eterm.min()
print('Term energies (cm-1) using eq. (1) and tabulated assignments')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
print(f'The corresponding spin-orbit stabilization energy is SOC1 = {SOC1:.2f} cm-1')
cols = ['Case', 'RMSE', 'SOC'] + list(dfeq1.Tlbl.values)
dfsummary = pd.DataFrame(columns=cols)
for t in tlbls:
    fmt[t] = '{:.1f}'
# situation without theoretical info
row = ['expt only', np.nan, SOC1, *list(dfeq1.Erel.values)]
dfsummary.loc[0] = row

Term energies (cm-1) using eq. (1) and tabulated assignments


Unnamed: 0,Term,Eterm,Tlbl,Erel
0,a 5D,403.0,5D,0.0
1,a 5F,7459.8,5F,7056.8
2,a 3F,12407.4,3F,12004.4
3,a 5P,17684.6,5P,17281.6
4,a 3P2,18954.0,3P,18551.1
5,a 3H,19575.7,3H,19172.7


The corresponding spin-orbit stabilization energy is SOC1 = -402.96 cm-1


In [30]:
# Copy out all degeneracies as individual sublevels
termall = []
Jall = []
Eall = []
lblall = []
for i, row in dfexpt.iterrows():
    g = int(row.degen)
    termall.extend([row.Term] * g)
    Jall.extend([row.J] * g)
    Eall.extend([row['Level (cm-1)']] * g)
    lblall.extend([row.Tlbl] * g)
dfexptall = pd.DataFrame({'Term': termall, 'J': Jall, 'Level (cm-1)': Eall, 'Tlbl': lblall})
#display(dfexptall.style.format(fmt))

### Specify Molpro SO-CI output file

In [31]:
fsoc = 'fe_15Q21T_ctzdk_x2c.pro'
#fsoc = 'fe_ci_15Q7T_c5zdk_x2c.pro'
#fsoc = 'fe_15Q7T_ctzdk_x2c.pro'

print(f'Reading MOLPRO file "{fsoc}"')
compAtom = mpr.stoichiometry(fsoc)
charge = mpr.total_charge(fsoc)
print(f'The atom is {compAtom} with charge {charge}')
# check for consistency with the experimental data that were read
if charge > 0: 
    compAtom += '+'
elif charge < 0:
    compAtom += '-'
if abs(charge) > 1:
    compAtom += f'{abs(charge)}'
        
if compAtom != atom:
    print(f'*** exptl atom = {atom} is different')
PG = mpr.read_compgroup(fsoc)
print(f'The computational point group is {PG}')

Reading MOLPRO file "fe_15Q21T_ctzdk_x2c.pro"
The atom is Fe with charge 0
The computational point group is Ci


In [32]:
SOCI = mpr.fullmatSOCI(fsoc, hybrid=True, sortval=False)
vals_original = SOCI.vals.copy()
#vecs_original = SOCI.vec.copy()
matcopy = SOCI.matrix.copy()
#print('SO-CI matrix diagonal:')
#np.set_printoptions(suppress = True)
#print(np.round(matcopy.diagonal().real, 1))

Computational group = Ci
CASSCF states:
    15 Quintet
    21 Triplet
Replacing MRCI+Q energies by HLSDIAG values


In [33]:
dfterm = SOCI.average_terms(be_close=['Energy', 'Edav', 'Eref', 'dipZ', 'C0'])
print('Averaged terms:')
display(dfterm)

Averaged terms:


Unnamed: 0,Term,dipZ,Edav,idx,ecm
0,5D,0.0,-1272.176014,"[0, 1, 2, 3, 4]",0.0
1,5F,0.0,-1272.137104,"[7, 6, 11, 9, 10, 5, 8]",8539.8
2,3P,0.0,-1272.088313,"[16, 15, 17]",19248.1
3,5P,0.0,-1272.086669,"[12, 13, 14]",19609.0
4,3H,0.0,-1272.084718,"[19, 18, 23, 20, 21, 25, 26, 27, 24, 22, 28]",20037.1
5,3F,0.0,-1272.080916,"[29, 30, 31, 33, 35, 34, 32]",20871.7


In [34]:
# Create global 'term_order'
term_order = dfterm.Term.values
print('Term order: ', term_order)
term_energies = dfterm.ecm.values

Term order:  ['5D' '5F' '3P' '5P' '3H' '3F']


In [35]:
def levels_term_energies(term_order, term_energies, SOCI):
    # Return the SO-CI energies from using new term energies
    # 'term_order' is array of term symbols
    # 'term_energies' is array of corresponding energies (cm-1)
    # 'SOCI' is a fullmatSOCI() object
    #  Install the term energies along the SOCI.matrix diagonal and rediagonalize
    term_dict = dict(zip(term_order, term_energies))
    newdiag = SOCI.matrix.diagonal().copy()
    for ibs in range(len(newdiag)):
        j = SOCI.sob_ici[ibs]
        term = SOCI.mrci[j].Term
        # install the new energy for the term
        newdiag[ibs] = term_dict[term]
    # update the matrix
    np.fill_diagonal(SOCI.matrix, newdiag)
    SOCI.diagonalize(store=True, vectors=True)
    return SOCI.vals

In [36]:
def compute_rmse(dfexpt, vals, DFret=False):
    # Given SO-CI level energies return their RMS error
    # 'dfexpt' is the DataFrame of exptl level energies
    # 'vals' is the array/list of theoretical levels
    # Just match by increasing energy, no assignments of J!
    dfcomp = dfexptall[['Term', 'J', 'Level (cm-1)', 'Tlbl']].copy()
    dfcomp = dfcomp.sort_values('Level (cm-1)')
    dfcomp['Theory'] = np.sort(vals) - np.min(vals)
    dfcomp['dif'] = dfcomp.Theory - dfcomp['Level (cm-1)']
    #display(dfcomp.style.format(fmt))
    rmse = np.sqrt( (dfcomp.dif ** 2).mean() )
    if DFret:
        return rmse, dfcomp
    else:
        return rmse

In [37]:
def rmse_fun(term_energies):
    # Uses globals
    # Return RMSE given term energies
    # Do not allow the lowest term energy to change
    global dfexpt, SOCI, term_order
    vals = levels_term_energies(term_order, term_energies, SOCI)
    rmse = compute_rmse(dfexpt, vals)
    return rmse
def obj_fun(exc_terme):
    # Given only excited term energies (assuming ground=0)
    #   return the RMSE
    terme = [0] + list(exc_terme)
    rmse = rmse_fun(terme)
    return rmse

In [38]:
rmse_original, dforig = compute_rmse(dfexpt, vals_original, DFret=True)  # as received from MOLPRO
SOC0 = vals_original[0]
print(f'Original RMSE = {rmse_original:.2f} cm-1 with SOC = {SOC0:.2f} cm-1')
display(dforig.style.format(fmt))

Original RMSE = 3226.43 cm-1 with SOC = -429.08 cm-1


Unnamed: 0,Term,J,Level (cm-1),Tlbl,Theory,dif
0,a 5D,4.0,0.0,5D,0.0,0.0
1,a 5D,4.0,0.0,5D,0.01,0.01
2,a 5D,4.0,0.0,5D,0.17,0.17
3,a 5D,4.0,0.0,5D,0.19,0.19
4,a 5D,4.0,0.0,5D,0.19,0.19
5,a 5D,4.0,0.0,5D,0.26,0.26
6,a 5D,4.0,0.0,5D,0.34,0.34
7,a 5D,4.0,0.0,5D,0.35,0.35
8,a 5D,4.0,0.0,5D,0.36,0.36
15,a 5D,3.0,415.933,5D,420.47,4.54


In [39]:
# Use the averaged term energies--expect little change
rmse_avgd = rmse_fun(term_energies)
print(f'Using averaged input term energies, RMSE = {rmse_avgd:.2f} cm-1')
row = ['Before fit', rmse_avgd, SOC0] + list(dfterm.ecm.values)
dfsummary.loc[1] = row

Using averaged input term energies, RMSE = 3226.28 cm-1


In [40]:
def freport(xvec):
    # callback function to monitor minimization
    freport.counter += 1
    print(f'{freport.counter:5d}', end='')
    return
freport.counter = 0

In [41]:
# Minimize the RMSE
exc_terme = list(term_energies)[1:]  # only excited terms; assume ground term = 0 energy
result = scipy.optimize.minimize(obj_fun, exc_terme, method='Nelder-Mead', callback=freport)

    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60   61   62   63   64   65   66   67   68   69   70   71   72   73   74   75   76   77   78   79   80   81   82   83   84   85   86   87   88   89   90   91   92   93   94   95   96   97   98   99  100  101  102  103  104  105  106  107  108  109  110  111  112  113  114  115  116  117  118  119  120  121  122  123  124  125  126  127  128  129  130  131  132  133  134  135  136  137  138  139  140  141  142  143  144  145  146  147  148  149  150  151  152  153  154  155  156  157  158  159  160  161  162  163  164  165  166  167  168  169  170  171  172  173  174  175  176  177  178  179  180  181  182  183  184  185  186  187  188  189  190  191  192  193  194  195  196  197  198  199  200

In [42]:
print(f'After minimization, RMSE = {result.fun:.2f} cm-1')
terme = [0] + list(result.x)
Eterm = dict(zip(term_order, terme))
#print('Fitted term energies:')
#chem.print_dict(Eterm)
vals = levels_term_energies(term_order, terme, SOCI)
rmse, dfcomp = compute_rmse(dfexpt, vals, DFret=True)
print('\nExptl vs fitted level energies:')
display(dfcomp.style.format(fmt))
SOCfit = vals[0]
print(f'The lowest level energy = SOCfit = {SOCfit:.2f} cm-1')

After minimization, RMSE = 1162.42 cm-1

Exptl vs fitted level energies:


Unnamed: 0,Term,J,Level (cm-1),Tlbl,Theory,dif
0,a 5D,4.0,0.0,5D,0.0,0.0
1,a 5D,4.0,0.0,5D,0.0,0.0
2,a 5D,4.0,0.0,5D,0.0,0.0
3,a 5D,4.0,0.0,5D,0.0,0.0
4,a 5D,4.0,0.0,5D,0.0,0.0
5,a 5D,4.0,0.0,5D,0.01,0.01
6,a 5D,4.0,0.0,5D,0.01,0.01
7,a 5D,4.0,0.0,5D,0.01,0.01
8,a 5D,4.0,0.0,5D,0.01,0.01
15,a 5D,3.0,415.933,5D,422.11,6.18


The lowest level energy = SOCfit = -431.47 cm-1


In [43]:
row = ['After fit', rmse, SOCfit] + list(terme)
dfsummary.loc[2] = row

In [44]:
display(dfsummary.style.format(fmt).hide_index())
print(f'Input file: {fsoc}')

Case,RMSE,SOC,5D,5F,3F,5P,3P,3H
expt only,,-402.96,0.0,7056.8,12004.4,17281.6,18551.1,19172.7
Before fit,3226.28,-429.08,0.0,8539.8,19248.1,19609.0,20037.1,20871.7
After fit,1162.42,-431.47,0.0,7028.3,18294.4,18777.4,13872.8,19242.5


Input file: fe_15Q21T_ctzdk_x2c.pro
