In [1]:
# Extract SOC information from MOLPRO outputs for atoms
# This version to be easier to use
#    but will need better expt/theory matching for heavy atoms
# KKI 7/13/2023
import re, sys, glob, subprocess
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter
from sklearn.cluster import KMeans

#sys.path.insert(0, '../karlib')
import chem_subs as chem
import molpro_subs as mpr

pd.set_option('display.max_rows', None)
np.set_printoptions(suppress=True)

In [2]:
# Excel spreadsheet of experimental levels from https://physics.nist.gov/PhysRefData/ASD/levels_form.html
#   Download as CSV; paste into a column in Excel; use Data -> Text to Columns -> Delimited -> Comma
#   Rename that worksheet with a name like "Fe" or "Fe+"
# Note that experimental levels might not be listed by increasing energy
xl_expt = 'exptl_levels.xlsx'
xl = pd.ExcelFile(xl_expt, engine='openpyxl')

### Specify Molpro SO-CI output file

In [3]:
fsoc = '../UMemphis/NI15T21S-cc-TZ-DK.out'

print(f'Reading MOLPRO file "{fsoc}"')
atom = mpr.stoichiometry(fsoc)
charge = mpr.total_charge(fsoc, verbose=True)
print(f'The atom is {atom} with charge {charge}')
# append charge, to match sheet names in exptl data file
if charge > 0: 
    atom += '+'
elif charge < 0:
    atom += '-'
if abs(charge) > 1:
    atom += f'{abs(charge)}'
PG = mpr.read_compgroup(fsoc)
print(f'The computational point group is {PG}')

Reading MOLPRO file "../UMemphis/NI15T21S-cc-TZ-DK.out"
The atom is Ni with charge 0.0
The computational point group is Ci


In [4]:
if atom not in xl.sheet_names:
    chem.print_err('', f'No experimental data sheet for {atom}!')

In [5]:
SOCI = mpr.fullmatSOCI(fsoc, atom=True)

Computational group = Ci
CASSCF states:
    21 Singlet
    15 Triplet


In [6]:
SOCraw = SOCI.vals.min()
print(f'From lowest level and lowest uncoupled energy, raw theoretical SOCraw = {SOCraw:.3f} cm-1')

From lowest level and lowest uncoupled energy, raw theoretical SOCraw = -1004.406 cm-1


In [7]:
Ecol = 'Level (cm-1)'  # the exptl energy column
# display formatting
fmt = {'Eshift': '{:.1f}', Ecol: '{:.3f}', 'Pct': '{:.3f}', 'degen': '{:.0f}'}
for col in ['J', 'Ecalc', 'E_dif', 'Erel', 'Eshift', 'err', 'Eterm', 'cm-1', 'fitted',
           'wmean', 'wstds', 'uwmean', 'uwstds']:
    fmt[col] =  fmt['Eshift']
for col in ['dif', 'Theory', 'ecm', 'SOC', 'RMSE']:
    fmt[col] = '{:.2f}'

In [8]:
dfterm = SOCI.average_terms(be_close=['Energy', 'Edav', 'Eref', 'dipZ', 'C0'], always=True)
# drop the dipZ column
dfterm.drop(columns=['dipZ'], inplace=True)
print('Averaged terms from MRCI:')
dfso = SOCI.assign_atomic_J(quiet=True)  # create SOCI.dfso
Egl = SOCI.dfso.E.min()  # energy of ground level
dfterm['Erel'] = (dfterm.Edav - Egl) * chem.AU2CM
display(dfterm.style.format(fmt))
print('"ecm"  is relative to the lowest term (cm-1)')
print('"Erel" is relative to the ground level')

Averaged terms from MRCI:


Unnamed: 0,Term,Edav,idx,ecm,Erel
0,(1)3F,-1519.415799,[0 1 4 5 2 6 3],0.0,1004.4
1,(1)3D,-1519.412704,[ 9 8 10 11 7],679.2,1683.6
2,(1)1D,-1519.402251,[15 16 18 17 19],2973.5,3977.9
3,(1)1S,-1519.394967,[34],4572.2,5576.6
4,(2)1D,-1519.347992,[21 22 20 24 23],14882.0,15886.4
5,(1)3P,-1519.346195,[12 14 13],15276.3,16280.7
6,(1)1G,-1519.311238,[25 30 32 27 28 29 26 31 33],22948.5,23952.9
7,(2)1S,-1519.17931,[35],51903.5,52907.9


"ecm"  is relative to the lowest term (cm-1)
"Erel" is relative to the ground level


In [9]:
irreps_ci = set(SOCI.dfci.Irrep)
if (PG == 'Ci') and (len(irreps_ci) == 1):
    if 1 in irreps_ci:
        parity = 'even'
    else:
        parity = 'odd'
else:
    # ask user for parity of interest
    parity = input('Please choose "even" or "odd" parity: ')
print(f'Experimental states will be restricted to parity = {parity}')

Experimental states will be restricted to parity = even


In [10]:
dfexpt = pd.read_excel(xl, atom, engine='openpyxl')
# Delete any ionization limit
ilim = dfexpt[dfexpt.Term == 'Limit'].index.min()
# delete the "Limit" row and everything past it
n1 = len(dfexpt)
dfexpt = dfexpt.truncate(after=ilim-1)
n2 = len(dfexpt)
if n2 < n1:
    print(f'Discarding {n1-n2} ionized or metastable states')
dfeven = dfexpt[~dfexpt.Term.str.contains('\*$')].copy()
dfodd = dfexpt[dfexpt.Term.str.contains('\*$')].copy()
print(f'{len(dfexpt)} experimental levels ({len(dfeven)} even and {len(dfodd)} odd) for {atom} read from "{xl_expt}"')
# Select by parity
if parity == 'even':
    # discard odd levels ('Term' field ends with '*')
    dfexpt = dfeven.copy()
elif parity == 'odd':
    dfexpt = dfodd.copy()
else:
    chem.print_err('', f'Parity of "{parity}" is not recognized')
n3 = len(dfexpt)
print(f'{n3} levels accepted for parity = {parity}')
# Assign unique term symbols
dfexpt = chem.unique_labels_exptl_terms(dfexpt, verbose=True, always=True)

Discarding 3 ionized or metastable states
285 experimental levels (128 even and 157 odd) for Ni read from "exptl_levels.xlsx"
128 levels accepted for parity = even


In [11]:
def match_expt_theory_simple(dfexpt, dftheory):
    # Match exptl and theoretical levels, based upon leading term
    #    (will fail if term assignments differ)
    # Return a DataFrame containing both theory and expt
    if 'degen' not in dfexpt.columns:
        # Add degeneracy
        dfexpt['degen'] = (2 * dfexpt.J) + 1
    dfcomp = dfexpt[['Configuration', 'uTerm', 'J', 'Leading percentages', 'degen', Ecol]].copy()
    dfcomp['Tcalc'] = ''  # term assignment in computation
    dfcomp['termwt'] = ''
    dfcomp['Ecalc'] = np.nan
    for i, row in dftheory.iterrows():
        term = row.Lead
        J = row.J
        for ix, rowx in dfexpt.iterrows():
            if (rowx.uTerm == term) and (rowx.J == J):
                if not np.isnan(dfcomp.at[ix, 'Ecalc']):
                    print('Already paired!', display(rowx.to_frame().T))
                    
                else:
                    dfcomp.at[ix, 'Ecalc'] = row.Erel
                    dfcomp.at[ix, 'Tcalc'] = term
                    dfcomp.at[ix, 'termwt'] = row.termwt
    dfcomp['err'] = dfcomp.Ecalc - dfcomp[Ecol]
    return dfcomp

In [12]:
dfso

Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,Composition,E,Nr,termwt
0,(1)3F,4.0,(1)3F_4,0.0,-1004.38751,{'(1)3F': 1.0},-1519.420376,"[1, 2, 3, 4, 5, 6, 7, 8, 9]","[0.9992597255705972, 2.2758853295530722e-11, 2..."
1,(1)3D,3.0,(1)3D_3,1073.203347,68.815837,{'(1)3D': 1.0},-1519.415486,"[10, 11, 12, 13, 14, 15, 16]","[0.0004981064316040897, 0.9995018935446802, 2...."
2,(1)3F,3.0,(1)3F_3,1333.424676,329.037166,{'(1)3F': 1.0},-1519.4143,"[17, 18, 19, 20, 21, 22, 23]","[0.9995018935221692, 0.0004981064611809434, 1...."
3,(1)3D,2.0,(1)3D_2,1744.656936,740.269426,"{'(1)3D': 0.9, '(1)1D': 0.1}",-1519.412426,"[24, 25, 26, 27, 28]","[0.003639734862877886, 0.8970499280657283, 0.0..."
4,(1)3F,2.0,(1)3F_2,2240.258702,1235.871193,{'(1)3F': 0.99},-1519.410168,"[29, 30, 31, 32, 33]","[0.987064119267106, 0.005869761309529669, 0.00..."
5,(1)3D,1.0,(1)3D_1,2599.123072,1594.735562,{'(1)3D': 1.0},-1519.408533,"[34, 35, 36]","[1.1340871741227094e-10, 0.9999999104728593, 2..."
6,(1)1D,2.0,(1)1D_2,4226.299783,3221.912273,"{'(1)3D': 0.1, '(1)1D': 0.9}",-1519.401119,"[37, 38, 39, 40, 41]","[0.003864460113598077, 0.09705236375442892, 0...."
7,(1)1S,0.0,(1)1S_0,5558.140477,4553.752967,{'(1)1S': 1.0},-1519.395051,[42],"[2.8727244970089097e-11, 1.1752304060946364e-1..."
8,(1)3P,2.0,(1)3P_2,14997.064893,13992.677383,"{'(2)1D': 0.49, '(1)3P': 0.51}",-1519.352044,"[43, 44, 45, 46, 47]","[0.003033356406775961, 1.625669768409063e-05, ..."
9,(1)3P,1.0,(1)3P_1,16637.485674,15633.098165,{'(1)3P': 1.0},-1519.34457,"[48, 49, 50]","[4.176778509453297e-11, 8.93909190487716e-08, ..."


In [13]:
dfdiff = match_expt_theory_simple(dfexpt, dfso)
warnThresh = 1000  # highlight errors larger than this (cm-1)
# drop rows with NaN (no matching level in the calculation)
dfdiff.dropna(axis=0, inplace=True)
print(f'Please inspect the following pairing of theory ("Ecalc") with expt ("{Ecol}")')
print('Disagreements in term assignments are highlighted in red')
print(f'Errors > {warnThresh} cm-1 are highlighted in yellow')
selcols = ['Configuration', 'uTerm', 'J', Ecol, 'Tcalc', 'Ecalc', 'err']
display(dfdiff[selcols].style.apply(lambda x: ["background: yellow" if abs(v) > warnThresh else "" for v in x], 
              subset=pd.IndexSlice[['err']]).apply(lambda x: (x != dfdiff['uTerm']).map({True: "background-color: red; \
              color: white", False: ""}), subset=['Tcalc']).format(fmt))

Please inspect the following pairing of theory ("Ecalc") with expt ("Level (cm-1)")
Disagreements in term assignments are highlighted in red
Errors > 1000 cm-1 are highlighted in yellow


Unnamed: 0,Configuration,uTerm,J,Level (cm-1),Tcalc,Ecalc,err
0,3d8.(3F).4s2,(1)3F,4.0,0.0,(1)3F,0.0,0.0
1,3d8.(3F).4s2,(1)3F,3.0,1332.164,(1)3F,1333.4,1.3
2,3d8.(3F).4s2,(1)3F,2.0,2216.55,(1)3F,2240.3,23.7
3,3d9.(2D).4s,(1)3D,3.0,204.787,(1)3D,1073.2,868.4
4,3d9.(2D).4s,(1)3D,2.0,879.816,(1)3D,1744.7,864.8
5,3d9.(2D).4s,(1)3D,1.0,1713.087,(1)3D,2599.1,886.0
6,3d9.(2D).4s,(1)1D,2.0,3409.937,(1)1D,4226.3,816.4
7,3d8.(1D).4s2,(2)1D,2.0,13521.347,(2)1D,16888.8,3367.4
8,3d10,(1)1S,0.0,14728.84,(1)1S,5558.1,-9170.7
9,3d8.(3P).4s2,(1)3P,2.0,15609.844,(1)3P,14997.1,-612.8


In [14]:
# No theoretical calculations are needed to use eq. (1)
xterms = []  # list of term labels
eterms = []  # list of term energies
for term in dfdiff.uTerm:
    if term not in xterms:
        xterms.append(term)
for Term in xterms:
    subdf = dfexpt[dfexpt.uTerm == Term]
    emean = np.dot(subdf.degen, subdf[Ecol]) / subdf.degen.sum()
    eterms.append(emean)
dfeq1 = pd.DataFrame({'Term': xterms, 'Eterm': eterms}).sort_values('Eterm').reset_index(drop=True)
print('Term energies (cm-1) using eq. (1)')
display(dfeq1.style.format(fmt))
SOC1 = -1 * np.round(dfeq1.at[0, 'Eterm'], 3)
lowterm = dfeq1.at[0, 'Term']
print(f'The term of lowest energy is \t{lowterm} \twith SOC1 = {SOC1} cm-1')
levterm = dfexpt.uTerm.values[0]
target = levterm
if levterm != lowterm:
    # The lowest term is not the leading term in the lowest level
    SOC1alt = SOC1
    SOC1 = -1 * np.round(dfeq1[dfeq1.Term == levterm]['Eterm'].values[0], 3)
    print(f'The lowest level belongs to \t{levterm} \twith SOC1 = {SOC1} cm-1')
#print()
#print(f'Term {target} is selected for further SOC calculation')
#print('    to change this, assign the variable "target" to another term in the table above')

Term energies (cm-1) using eq. (1)


Unnamed: 0,Term,Eterm
0,(1)3D,731.5
1,(1)3F,971.8
2,(1)1D,3409.9
3,(2)1D,13521.3
4,(1)1S,14728.8
5,(1)3P,15696.5
6,(1)1G,22102.3
7,(2)1S,50276.3


The term of lowest energy is 	(1)3D 	with SOC1 = -731.457 cm-1
The lowest level belongs to 	(1)3F 	with SOC1 = -971.805 cm-1


In [15]:
def term_energy_from_levels(df, term):
    # Given a DataFrame with the right columns ['J', 'termwt', Ecol],
    #   where 'Ecol' is the header for the column of level energies,
    # Return the term's average energy as derived from the levels
    global SOCI
    # find index for term 'term'
    iterm = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    termwt = np.array([twt[iterm] for twt in df.termwt])
    degen = 2 * df.J.values + 1
    dweight = degen * termwt  # total weight, including degeneracies
    Eterm = np.dot(df[Ecol], dweight) / dweight.sum()
    return Eterm

In [16]:
# Use experimental level energies via eq. (2)
Eterm = term_energy_from_levels(dfdiff, target)
SOC2 = -Eterm
print(f'Using experimental levels and eq. (2) for term {target}, SOC2 = {SOC2:.1f} cm-1')

Using experimental levels and eq. (2) for term (1)3F, SOC2 = -994.7 cm-1


In [17]:
print(f'Molpro source file: {fsoc}')
print(f'Alternative values for SOC({target}) of atom {atom}:')
print('-' * 25)
print('{:12s} {:.1f} cm-1'.format('eq (1)', SOC1))
print('{:12s} {:.1f} cm-1'.format('raw theory', SOCraw))
print('{:12s} {:.1f} cm-1'.format('eq (2)', SOC2))
print('-' * 25)

Molpro source file: ../UMemphis/NI15T21S-cc-TZ-DK.out
Alternative values for SOC((1)3F) of atom Ni:
-------------------------
eq (1)       -971.8 cm-1
raw theory   -1004.4 cm-1
eq (2)       -994.7 cm-1
-------------------------


In [18]:
def term_distrib(term, df):
    # return the weights (including 2J+1) of term in levels
    global SOCI
    itarget = SOCI.dfterm[SOCI.dfterm.Term == term].index[0]
    wt = [twt[itarget] for twt in df.termwt]  # without 2J+1 weighting
    wt = wt * (2*df.J + 1)
    return wt

In [19]:
# Term energy errors as inferred from all levels
dftermerr = pd.DataFrame(columns=['Term', 'wmean', 'wstds'])
termlist = []
wmean = []
wstds = []
# also consider unsigned (absolute value) errors
uwmean = []
uwstds = []
for term in set(dfdiff.uTerm):
    termlist.append(term)
    weights = term_distrib(term, dfdiff).values
    m, s = chem.weighted_mean(dfdiff.err, weights)
    wmean.append(m)
    wstds.append(s)
    uerr = np.abs(dfdiff.err.values)
    um, us = chem.weighted_mean(uerr, weights)
    uwmean.append(um)
    uwstds.append(us)
dftermerr['Term'] = termlist
dftermerr['wmean'] = wmean
dftermerr['wstds'] = wstds
dftermerr['uwmean'] = uwmean
dftermerr['uwstds'] = uwstds

if dftermerr.isnull().values.any():
    print('*** Some terms are missing ***')
    print('Try decreasing the energy maximum ("termcut")')
else:
    print('Errors in term energies (cm-1) as inferred from full level distribution')
    print('    (not only levels where leading)')
    # default order same as experimental terms
    dftermerr.Term = pd.Categorical(dftermerr.Term, xterms)
    dftermerr = dftermerr.sort_values('Term')
dftermerr.sort_values('uwmean').style.format(fmt)

Errors in term energies (cm-1) as inferred from full level distribution
    (not only levels where leading)


Unnamed: 0,Term,wmean,wstds,uwmean,uwstds
4,(1)3F,9.7,6.7,10.6,7.0
3,(1)1D,819.9,5.9,820.0,5.9
2,(1)3D,867.3,4.7,867.3,4.7
0,(1)3P,1147.2,818.1,1497.1,627.6
5,(1)1G,1867.0,2.0,1867.0,2.0
6,(2)1D,1407.7,1452.0,2005.6,1005.1
1,(2)1S,2700.2,5.2,2700.3,5.2
7,(1)1S,-9154.4,23.9,9157.4,19.6


In [20]:
print(f'Distribution of term "{target}" among levels:')
thrsh = 1.e-6
dfdistrib = dfso.copy()
dfdistrib[target] = term_distrib(target, dfso)
# remove rows with negligible weights
dfdistrib = dfdistrib[dfdistrib[target] > thrsh]
dfdistrib.drop(['termwt', 'Composition'], axis=1, inplace=True)
display(dfdistrib.sort_values(target, ascending=False).style.format(fmt))
print(f'Total weight of {target} = {dfdistrib[target].sum():.3f}')

Distribution of term "(1)3F" among levels:


Unnamed: 0,Lead,J,Jlbl,Erel,Eshift,E,Nr,(1)3F
0,(1)3F,4.0,(1)3F_4,0.0,-1004.4,-1519.420376,"[1, 2, 3, 4, 5, 6, 7, 8, 9]",8.993338
2,(1)3F,3.0,(1)3F_3,1333.4,329.0,-1519.4143,"[17, 18, 19, 20, 21, 22, 23]",6.996513
4,(1)3F,2.0,(1)3F_2,2240.3,1235.9,-1519.410168,"[29, 30, 31, 32, 33]",4.935321
6,(1)1D,2.0,(1)1D_2,4226.3,3221.9,-1519.401119,"[37, 38, 39, 40, 41]",0.019322
3,(1)3D,2.0,(1)3D_2,1744.7,740.3,-1519.412426,"[24, 25, 26, 27, 28]",0.018199
8,(1)3P,2.0,(1)3P_2,14997.1,13992.7,-1519.352044,"[43, 44, 45, 46, 47]",0.015167
10,(2)1D,2.0,(2)1D_2,16888.8,15884.4,-1519.343425,"[51, 52, 53, 54, 55]",0.011992
12,(1)1G,4.0,(1)1G_4,23970.7,22966.3,-1519.311157,"[57, 58, 59, 60, 61, 62, 63, 64, 65]",0.006662
1,(1)3D,3.0,(1)3D_3,1073.2,68.8,-1519.415486,"[10, 11, 12, 13, 14, 15, 16]",0.003487


Total weight of (1)3F = 21.000
