In [1]:
#!/usr/bin/env python3
"""
Read SO-CI output from Molpro.
Assign omegas, including parity. 
Also show composition of a level, or distribution of a term.
C2v symmetry (4 irreps) is assumed. 
KKI March 2023: include dipole moment
"""
import re, sys, copy, glob, os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
sys.path.insert(0, '../atomic_SOC')
import molpro_subs as mpr
import chem_subs as chem

pd.set_option('display.width', 1000)

In [2]:
fsoci = 'ac5z_hybB_r2p2444_lz.pro'
#soci = '../UMemphis/mgcl-equil-karl-block.pro'
#fsoci = '../UMemphis/ch.pro'
#fsoci = '../UMemphis/mgcl_small4.pro'
print('Will read SO-CI info from file: {:s}'.format(fsoci))

Will read SO-CI info from file: ac5z_hybB_r2p2444_lz.pro


In [3]:
# Read CASSCF
PG = mpr.read_point_group(fsoci)
print('Point group is', PG)
crd, lineno_crd = mpr.read_coordinates(fsoci, linenum=True)
if isinstance(lineno_crd, list):
    # take last geometry
    crd = crd[-1]
    lineno_crd = lineno_crd[-1]
# get diatomic bond length
G = chem.Geometry(crd, intype='DataFrame', units='bohr')
G.toAngstrom()
R = np.round(G.distance(0, 1), 6)  # round the bond length to 6 digits
print('Bond length = {:.4f}'.format(R))
caslist, lineno_cas = mpr.readMULTI(fsoci, PG=PG, linenum=True)
CAS = caslist[-1]   # assume the last CASSCF to be the relevant one

oldcas = CAS.results.copy()
for Spin in sorted(set(oldcas.Spin)):
    nspin = len(oldcas[oldcas.Spin == Spin])
    print('{:d} {:s}s'.format(nspin, Spin))
print('Active space = {:d}/{:d}'.format(CAS.nactel(), CAS.nactorb()))
CAS.results = mpr.relabel_CAS_by_energy(CAS.results)
# Note any changes in CAS labeling
diflbl = (oldcas.Label != CAS.results.Label).values
if diflbl.any():
    dflbl = oldcas[['Label', 'Term']].copy()
    dflbl['NewLabel'] = CAS.results.Label
    print('Some CAS labels changed:')
    display(dflbl)
# check for dynamical weights
rx_dynw = re.compile('[~!]*dynw,(\d+)')
dynw = 0
with open(fsoci, 'r') as F:
    for line in F:
        m = rx_dynw.search(line)
        if m:
            dynw = int(m.group(1))
            print('Dynamical weighting with dynw = {:d}'.format(dynw))
if not dynw:
    print('Uniform weighting')

Point group is C2v
Bond length = 2.2444
35 Doublets
15 Quartets
Active space = 19/11
Dynamical weighting with dynw = 8
Dynamical weighting with dynw = 8


In [4]:
# count the terms that are included in the calculation
from collections import Counter
for spin, grp in CAS.results.groupby('Spin'):
    print(spin)
    print(Counter(grp.Term.tolist()))
#CAS.results

Doublet
Counter({'2Π': 12, '2Δ': 10, '2Σ+': 5, '2Φ': 4, '2Γ': 2, '2Σ-': 2})
Quartet
Counter({'4Π': 6, '4Δ': 4, '4Φ': 2, '4Σ-': 2, '4Σ+': 1})


In [5]:
casdf = CAS.results[['Irrep', 'Label', 'Energy', 'Term']].copy()
casdf['S'] = [chem.spinname(x)-1 for x in CAS.results.Spin]
casdf['Lz'] = np.round(np.sqrt(np.abs(CAS.results.LzLz)), 0).astype(int)
spin = sorted(set(casdf.S))
irreps = sorted(set(casdf.Irrep))
lzvals = sorted(set(casdf.Lz))

In [6]:
casdf['ecm'] = np.round((casdf.Energy - casdf.Energy.min()) * chem.AU2CM, 0)
casdf.sort_values('Energy')

Unnamed: 0,Irrep,Label,Energy,Term,S,Lz,ecm
0,1,1.1,-119.041268,2Δ,1,2,0.0
27,4,1.4,-119.041268,2Δ,1,2,0.0
1,1,2.1,-119.035373,2Σ+,1,0,1294.0
19,3,1.3,-119.032982,2Π,1,1,1819.0
11,2,1.2,-119.032982,2Π,1,1,1819.0
35,1,1.1,-118.992903,4Δ,3,2,10615.0
46,4,1.4,-118.992903,4Δ,3,2,10615.0
38,2,1.2,-118.990509,4Π,3,1,11140.0
42,3,1.3,-118.990509,4Π,3,1,11140.0
47,4,2.4,-118.985873,4Σ-,3,0,12158.0


In [7]:
# check for proper pairing of degenerate states
broken = False
for grp, dfg in casdf.groupby(['S', 'Lz']):
    if grp[1] == 0:
        # ignore Sigma states
        continue
    # number in each irrep should be equal
    grpi = dfg.groupby('Irrep')
    lens = grpi.size().values
    if lens[0] != lens[1]:
        print('Broken pair somewhere')
        display(dfg.sort_values('Energy'))
        broken = True
if not broken:
    print('All CASSCF state pairs are closed')

All CASSCF state pairs are closed


In [8]:
# check for non-integer Lz values
cruft = casdf.Lz - np.round(casdf.Lz, 0)
if cruft.any():
    print('*** There are non-integer values of Lz')
else:
    print('Lz values look OK')

Lz values look OK


In [9]:
# order the CASSCF states by energy within each (S, irrep) group
newdf = pd.DataFrame(columns=casdf.columns)
for lbl, dg in casdf.groupby(['S', 'Irrep']):
    dt = dg.copy().sort_values('Energy').reset_index(drop=True)
    #display(dt)
    newdf = newdf.append(dt)

In [10]:
# read MRCI
cilist, lineno_ci = mpr.readMRCI(fsoci, linenum=True)   # probably many
for m in cilist:
    m.transfer_lz(CAS.results)
mrci = [mpr.MRCIstate(row) for m in cilist for (irow, row) in m.results.iterrows()]
dfci = mpr.combineMRCI(cilist)

In [11]:
dfci['ecm'] = np.round( (dfci.Edav - dfci.Edav.min()) * chem.AU2CM, 1)
dfci.sort_values('Edav')

Unnamed: 0,Group,Spin,Irrep,Label,Energy,Edav,Ncore,dipX,dipY,dipZ,Eref,Dipole,Ref,C0,Configs,Lz,Term,ecm
29,9,Doublet,4,1.4,-119.7135,-119.784065,0,0.0,0.0,-0.588169,-119.041268,0.588169,1.4,0.951341,"{'2222202222a': 0.7818616, '22a22a2222b': 0.03...",2.0,2Δ,0.0
5,2,Doublet,1,1.1,-119.7135,-119.784064,0,0.0,0.0,-0.588175,-119.041268,0.588175,1.1,0.951341,"{'222aab22222': 0.2129926, '222aba22222': -0.0...",2.0,2Δ,0.1
0,1,Doublet,1,2.1,-119.711771,-119.783698,0,0.0,0.0,-0.686412,-119.035373,0.686412,2.1,0.950797,"{'2222a022222': 0.7224706, '22220a22222': 0.10...",0.0,2Σ+,80.5
19,6,Doublet,3,1.3,-119.706564,-119.777318,0,0.0,0.0,-0.652485,-119.032982,0.652485,1.3,0.9513,"{'22a22a222b2': 0.1000028, '2222aa222b2': -0.0...",1.0,2Π,1480.8
11,4,Doublet,2,1.2,-119.706564,-119.777318,0,0.0,0.0,-0.652485,-119.032982,0.652485,1.2,0.9513,"{'22a22a2b222': 0.1000028, '2222aa2b222': -0.0...",1.0,2Π,1480.8
48,18,Quartet,4,1.4,-119.66027,-119.730043,0,0.0,0.0,-0.161304,-118.992903,0.161304,1.4,0.951564,"{'2222aa2222a': 0.9437512, '22a22a2222a': -0.1...",2.0,4Δ,11856.4
36,12,Quartet,1,1.1,-119.66027,-119.730043,0,0.0,0.0,-0.161305,-118.992903,0.161305,1.1,0.951564,"{'222aaa22222': 0.9437512, '22aa2a22222': 0.11...",2.0,4Δ,11856.4
1,1,Doublet,1,3.1,-119.656274,-119.727746,0,0.0,0.0,-0.014132,-118.985396,0.014132,3.1,0.950718,"{'2222a022222': -0.0927829, '22220a22222': 0.6...",0.0,2Σ+,12360.5
42,15,Quartet,3,1.3,-119.657255,-119.727039,0,0.0,0.0,-0.096289,-118.990509,0.096289,1.3,0.951552,"{'2222aa222a2': 0.8773686, '22a22a222a2': -0.2...",1.0,4Π,12515.7
38,13,Quartet,2,1.2,-119.657255,-119.727039,0,0.0,0.0,-0.096289,-118.990509,0.096289,1.2,0.951552,"{'2222aa2a222': 0.8773686, '22a22a2a222': -0.2...",1.0,4Π,12515.7


In [12]:
# check for erroneously repeated CI roots
tol = 1.e-6
for irrep, grp in dfci.sort_values('Energy').groupby(['Spin', 'Irrep']):
    e = grp.Energy.values
    de = e[1:] - e[:-1]
    smal = np.abs(de) < tol
    for i, s in enumerate(smal):
        if s:
            print('Warning: closely repeated root in MRCI')
            display(grp.iloc[[i,i+1]])
print('Checking for discrepancies between reference energy and CASSCF energy')
dfcheck = dfci[['Spin', 'Irrep', 'Label', 'Edav', 'Eref']].copy()
ecas = []
for i, row in dfcheck.iterrows():
    ecas.append(CAS.results[(CAS.results.Label == row.Label) & (CAS.results.Spin == row.Spin)].Energy.values[0])
dfcheck['CAS'] = ecas
dfcheck['diff'] = np.round(dfcheck.CAS - dfcheck.Eref, 6)
dfbad = dfcheck[np.abs(dfcheck['diff']) > 0.1].sort_values(['Spin', 'Irrep', 'Label'])
if len(dfbad):
    display(dfbad)
else:
    print('\t--looks good')

Checking for discrepancies between reference energy and CASSCF energy
	--looks good


In [13]:
# read SO-CI
SOCI = mpr.fullmatSOCI(fsoci, hybrid=True)
dfterms = SOCI.average_terms()

Computational group = C2v
CASSCF states:
    35 Doublet
    15 Quartet
Replacing MRCI+Q energies by HLSDIAG values


In [14]:
# Build handling for pairs of basis states
sibbs = []  # sibling basis state that differs only in sign of Sz
for bas in SOCI.basis:
    for jbas in range(SOCI.dimen):
        obas = SOCI.basis[jbas]
        if (bas[0] == obas[0]) and (bas[1] == obas[1]) and (bas[2] == -obas[2]):
            sibbs.append(jbas)
            break
def wt_avail(ibas, omega):
    # Return the weight available from BS for specified omega
    #   sum of used weights for BS, across omegas, cannot exceed 1.0
    #   sum of used weights for omega, across sibling pairs, cannot exceed 1.0
    totused = sum([used for om, used in bas_omega[ibas].items()])
    try:
        wtleft = 1 - bas_omega[ibas][omega]
    except KeyError:
        # this omega not available for this basis state
        return 0
    # deduct any used by sibling
    jbas = sibbs[ibas]
    wtleft -= bas_omega[jbas][omega]
    # don't exceed totused
    wtleft = min(wtleft, 1-totused)

In [15]:
# for Sz = +1/2
mat = np.array([[0, 0+67j], [0-67j, 0]])
print(mat)
xvals, xvecs = np.linalg.eigh(mat)
print(xvals)
print(np.round(xvecs, 3))

[[0. +0.j 0.+67.j]
 [0.-67.j 0. +0.j]]
[-67.  67.]
[[-0.707-0.j    -0.707+0.j   ]
 [ 0.   -0.707j  0.   +0.707j]]


In [16]:
# for Sz = -1/2
mat = np.array([[0, 0-67j], [0+67j, 0]])
print(mat)
xvals, xvecs = np.linalg.eigh(mat)
print(xvals)
print(np.round(xvecs, 3))

[[0. +0.j 0.-67.j]
 [0.+67.j 0. +0.j]]
[-67.  67.]
[[-0.707-0.j    -0.707+0.j   ]
 [ 0.   +0.707j  0.   -0.707j]]


In [17]:
SOCI.basis

[('2.1', 0.5, 0.5),
 ('3.1', 0.5, 0.5),
 ('6.1', 0.5, 0.5),
 ('8.1', 0.5, 0.5),
 ('11.1', 0.5, 0.5),
 ('2.1', 0.5, -0.5),
 ('3.1', 0.5, -0.5),
 ('6.1', 0.5, -0.5),
 ('8.1', 0.5, -0.5),
 ('11.1', 0.5, -0.5),
 ('1.1', 0.5, 0.5),
 ('4.1', 0.5, 0.5),
 ('5.1', 0.5, 0.5),
 ('7.1', 0.5, 0.5),
 ('10.1', 0.5, 0.5),
 ('1.1', 0.5, -0.5),
 ('4.1', 0.5, -0.5),
 ('5.1', 0.5, -0.5),
 ('7.1', 0.5, -0.5),
 ('10.1', 0.5, -0.5),
 ('9.1', 0.5, 0.5),
 ('9.1', 0.5, -0.5),
 ('1.2', 0.5, 0.5),
 ('2.2', 0.5, 0.5),
 ('4.2', 0.5, 0.5),
 ('5.2', 0.5, 0.5),
 ('6.2', 0.5, 0.5),
 ('8.2', 0.5, 0.5),
 ('1.2', 0.5, -0.5),
 ('2.2', 0.5, -0.5),
 ('4.2', 0.5, -0.5),
 ('5.2', 0.5, -0.5),
 ('6.2', 0.5, -0.5),
 ('8.2', 0.5, -0.5),
 ('3.2', 0.5, 0.5),
 ('7.2', 0.5, 0.5),
 ('3.2', 0.5, -0.5),
 ('7.2', 0.5, -0.5),
 ('1.3', 0.5, 0.5),
 ('2.3', 0.5, 0.5),
 ('4.3', 0.5, 0.5),
 ('5.3', 0.5, 0.5),
 ('6.3', 0.5, 0.5),
 ('8.3', 0.5, 0.5),
 ('1.3', 0.5, -0.5),
 ('2.3', 0.5, -0.5),
 ('4.3', 0.5, -0.5),
 ('5.3', 0.5, -0.5),
 ('6.3', 0.5,

In [18]:
def SO_assign_omega2(SOCI, csq_thresh=0.001, silent=False,
                    ordering='up', failure='crash', debug=False):
    '''
    New attempt (May 2023) to assign Omega to SO-CI states
        Based upon averaged terms instead of MRCI states
    Assign 0+/0- simply based upon irrep (if C2v)
    'SOCI' is a mpr.fullmatSOCI() object
    'ordering' determines which order Omega values are considered
        ('up', 'down', 'rare', 'common')
    'failure' == 'OK' will continue despite assignment failure
    Return a DataFrame
    '''
    sob_ici, ci_sob = mpr.link_MRCI_SObasis(SOCI.mrci, SOCI.basis)
    vals = SOCI.SOe.results.Eshift.values  # cm-1 energies relative to the reference
    Nrs = SOCI.SOe.results.Nr.values
    E = SOCI.SOe.results.E.values  # state energies in hartree
    nirrep = len(set(SOCI.SOe.results.Irrep))
    dimen = len(SOCI.basis)
    SO_omega = [None] * dimen   # assigned values of Omega
    parenterm = [None] * dimen  # term label for dominant parent
    termlabel = []              # term label including omega
    leadwt = [0] * dimen        # weight of leading term
    # 
    # get list of required omega values
    omegavals = mpr.omega_counts(mrci, silent=True)
    if not silent:
        print('Target omega counts:', omegavals)
    #
    # determine the possible omega values for each SO basis state
    #    allow negative values, to get multiplicities right
    bas_omega = []  # weights already used for possible omega values, for each basis state
    totbas_omega = []  # total permitted wiehgts for omega values, for each basis state
    omall = set()   # possible omegas across whole SO basis 
    for ibas, bas in enumerate(SOCI.basis):
        Sz = bas[2]
        Lz = mrci[sob_ici[ibas]].Lz
        lo = round(Lz-Sz, 1)  # signed
        hi = round(Lz+Sz, 1)
        ulo = abs(lo)  # unsigned
        uhi = abs(hi)
        omset = set([ulo, uhi])
        # before assignments, used weights are all zero
        bas_omega.append({o: 0. for o in omset})
        omall = omall.union(omset)  # omegas across all states
        totbas_omega.append({o: 1. for o in omset})
        if (ulo == uhi) and (lo != hi):
            # don't double-counter omega = 0
            totbas_omega[ibas][ulo] = 2.
            
    print('>>>totbas_omega:')
    for i, tdict in enumerate(totbas_omega):
        print(i, '\t', tdict)
    # Keep track of omega values assigned for each averaged term
    #    and for each basis state   
    # Setup accounting within terms 
    dfterm = SOCI.dfterm.copy()
    omavail = [{om: 0. for om in omall} for i in range(SOCI.nterm)]
    dfterm['om_avail'] = omavail
    '''
    term_idx = [-1] * dimen  # index/row of average term corresponding to SO basis state
    for ibas in range(dimen):
        jterm = SOCI.sob_iterm[ibas]
        term = dfterm.at[jterm, 'Term']
        omd = dfterm.loc[dfterm.Term == term, 'om_avail'].values[0]
        for oms in bas_omega[ibas]:
            om = abs(oms)
            omd[om] += 1
        iterm = dfterm[dfterm.Term == term].index.values[0]
        term_idx[ibas] = iterm
    '''
    term_idx = SOCI.sob_iterm # index/row of average term corresponding to SO basis state
    # Correct double-counting for spatially degenerate terms
    for i, row in dfterm.iterrows():
        n = len(row.idx)
        if n > 1:
            omd = row.om_avail
            for om in omd.keys():
                omd[om] /= n
    # check for inconsistent omega counting (terms vs. basis states)
    twtsum = 0
    wtsum = 0
    for om_avail in dfterm.om_avail:
        for n in om_avail.values():
            twtsum += n
    for om_avail in bas_omega:
        for n in om_avail.values():
            wtsum += n
    if debug:
        print(f'Initial weight sum for terms = {twtsum:.3f}')
        print(f'Initial weight sum for basis states = {wtsum:.3f}')
    if twtsum != wtsum:
        # something is wrong; show term components
        for iterm, row in dfterm.iterrows():
            tsum = sum(row.om_avail.values())
            bsum = 0
            for ibas in SOCI.term_iso[iterm]:
                bsum += sum(bas_omega[ibas].values())
            if tsum != bsum:
                print(f'iterm = {iterm} has total {tsum}', chem.round_dict(row.om_avail))
                for ibas in SOCI.term_iso[iterm]:
                    sum1 = sum(bas_omega[ibas].values())
                    print(f'iterm = {iterm}, ibas = {ibas} has total {sum1}:', chem.round_dict(bas_omega[ibas]))
    sibbs = []  # sibling basis state that differs only in sign of Sz
    for bas in SOCI.basis:
        for jbas in range(dimen):
            obas = SOCI.basis[jbas]
            if (bas[0] == obas[0]) and (bas[1] == obas[1]) and (bas[2] == -obas[2]):
                sibbs.append(jbas)
                break

    # look at decreasingly smaller contributions
    omposs = [omall.copy() for i in range(dimen)]  # possible omega values for each SO state (list of sets)
    ncontrib = 0   # depth of contributions to consider
    changed = True
    above_thresh = True
    nleft = dimen  # number of unassigned SO states
    omegavalslist = list(omegavals.keys())
    if ordering == 'up':
        omegavalslist = sorted(omegavalslist)
    elif ordering == 'down':
        omegavalslist = sorted(omegavalslist, reverse=True)
    elif ordering == 'rare':
        omegavalslist = sorted(omegavalslist, key=lambda x: omegavals[x])
    elif ordering == 'common':
        omegavalslist = sorted(omegavalslist, key=lambda x: omegavals[x], reverse=True)
    else:
        chem.print_err('', f'Unknown ordering "{ordering}"')
        
    def assign_one(istate, omega):
        # update vars to assign omega value to SO state
        nonlocal changed
        SO_omega[istate] = omega
        omegavals[omega] -= 1
        # deduct from term's om_avail[omega]
        for iterm, row in dfterm.iterrows():
            twt = SOCI.termwt[iterm, istate] # weight of term in this state
#            if debug and (twt > csq_thresh):
#                print(f'\ttwt = {twt:.3f} for iterm = {iterm}')
            omd = row.om_avail
            omd[omega] -= twt
            # check for deficits
            #if debug and (omd[omega] < -csq_thresh):
            #    print(f'\tnegative omd[{omega}] {omd[omega]:.5f} for term {iterm}')
        # add to basis state's bas_omega[omega] (showing usage, not remaining)
        for ibas in range(dimen):
            wt = SOCI.vecsq[ibas, istate]
            if debug and (wt > csq_thresh):
                print(f'\twt = {wt:.3f} for basis state #{ibas}')
            omd = bas_omega[ibas]
            try:
                omd[omega] += wt
                if debug and (omd[omega] > totbas_omega[ibas][omega] + csq_thresh):
                    print(f'\texcessive omd[{omega}] {omd[omega]:.5f} for basis state {ibas}')
            except KeyError:
                # invalid omega for this basis state
                pass
        changed = True
        return
    def wt_avail(ibas, omega):
        # Return the weight available from BS for specified omega
        #   sum of used weights for BS, across omegas, cannot exceed 1.0
        #   sum of used weights for omega, across sibling pairs, cannot exceed 1.0
        try:
            wtleft = totbas_omega[ibas][omega] - bas_omega[ibas][omega]
        except KeyError:
            # this omega not available for this basis state
            return 0
        # deduct any used by sibling
        jbas = sibbs[ibas]
        wtleft -= bas_omega[jbas][omega]
        # don't exceed totused (across omegas)
        totused = sum([used for om, used in bas_omega[ibas].items()])
        totallowed = sum([allowed for allowed in totbas_omega[ibas].values()])
        wtleft = min(wtleft, totallowed - totused)
        return wtleft
    def thresh_filter(ibas, thresh):
        # Return a dict of {omega: remaining_weight} restricted to 
        #   remaining_weight >= thresh - csq_thresh
        d = {}
        for om in bas_omega[ibas].keys():
            wa = wt_avail(ibas, om)
            if wa > thresh - csq_thresh:
                d[om] = wa
        return d
    def thresh_tfilter(iterm, thresh):
        # Return a dict of {omega: remaining_weight} restricted to 
        #   remaining_weight >= thresh
        d = {k: v for k, v in dfterm.at[iterm, 'om_avail'].items() if v >= thresh}
        return d
    def dump_progress():
        # for debugging: print available weights for all basis states
        for ibas in range(dimen):
            print('<<<', ibas, '\t', thresh_filter(ibas, -np.inf))
        return
    
    dump_progress()
    while (changed or above_thresh) and nleft:
        changed = False
        above_thresh = False
        
        #### Are there omega values for which number possible = number needed?
        for om in omegavalslist:
            need = omegavals[om]
            iposs = []  # list of states that could assign Omega = 'om'
            for iso, oms in enumerate(omposs):
                if (om in oms) and (SO_omega[iso] is None):
                    iposs.append(iso)
            if debug:
                s = 'for Ω = {:.1f}, need {:d} states, {:d} possibilities'
                print(s.format(om, need, len(iposs)))
            if (need > 0) and (len(iposs) == need):
                # assign these
                for iso in iposs:
                    assign_one(iso, om)
                need = 0
                if debug:
                    print('\t--assigned_A')
            if need == 0:
                # don't assign any more states to this Omega
                for oms in omposs:
                    oms.discard(om)
                if debug and len(iposs):
                    print(f'\teliminating omega = {om} as a possibility')
                    
        #### Consider the next smaller contribution to each SO state
        for iso in range(dimen):
            # loop over SO states, not basis functions
            if SO_omega[iso] is not None:
                # already assigned
                continue
            if debug:
                print(f'Weight[{ncontrib}] in state {iso}')
            wt = SOCI.vecsq[:, iso]
            twt = SOCI.termwt[:, iso]
            # sort the index   
            idx = np.argsort(-wt)  # decreasing order
            ibas = idx[ncontrib]
            iterm = SOCI.sob_iterm[ibas] # parent term of this basis state
            if ncontrib == 0:
                # First pass--get term symbol from term with largest total weight
                #   it might not be the same as the basis state with the largest weight?
                itmax = np.argmax(twt)
                parenterm[iso] = dfterm.at[itmax, 'Term']
                leadwt[iso] = twt[itmax]
                if debug:
                    print('\tleading term is {:s} with iterm = {:d}'.format(parenterm[iso], itmax))
                    print(f'\tleading basis state is #{ibas} with usage', chem.round_dict(bas_omega[ibas]),
                         f'and sib {sibbs[ibas]}')
                    if ibas not in SOCI.term_iso[itmax]:
                        print('\t\t---the leading basis state is not part of the leading term---')
            if wt[ibas] < csq_thresh:
                # done with significant weights for this state
                continue
            else:
                above_thresh = True
            bas_rem = thresh_filter(ibas, wt[ibas])  # dict of omegas and remaining weights that are large enough
            print('>>>allowed ', bas_rem)
            u = omposs[iso].intersection(bas_rem.keys())
            if debug:
                print('\tweight {:.4f} for BS#{} in term {} with remaining possibilities {}'.format(wt[ibas], 
                                            ibas, dfterm.at[iterm, 'Term'], chem.round_dict(bas_rem)))
            if len(u) == 0:
                # all possibilities eliminated! dismiss the last component as noise
                if debug:
                    print('All possibilities eliminated! Dismiss this weight as noise')
                continue
            omposs[iso] = u.copy()
            if debug:
                print('\tpossible:', omposs[iso])
            if len(u) == 1:
                # only one omega remains; assign it
                om = u.pop()
                assign_one(iso, om)
                if debug:
                    print('\t--assigned_B')
                    dump_progress()
                    
        ncontrib += 1
        nleft = len([x for x in SO_omega if x is None])
    #
    # check for states missing assignments
    for i, om in enumerate(SO_omega):
        if om is None:
            # this is bad
            print('missing Ω for SO state {:d} with E = {:.1f} cm-1 and parent {:s} '.format(i, vals[i], parenterm[i]))
            print('\tpossibilities: ', omposs[i])
            # install the ambiguity in the list
            SO_omega[i] = repr(omposs[i])
    # check for excess and lacking omega counts
    ok = True
    for om, count in omegavals.items():
        if count < 0:
            s = f'Overcounting for Ω = {om}'
            chem.print_err('', s, halt=False)
            ok = False
            if failure != 'OK':
                raise ValueError(s)  # increasing the threshold may help
        if count > 0:
            chem.print_err('', f'Missing {count} states for Ω = {om}', halt=False)
            ok = False
    # Find largest and smallest term residua
    residmax = [-1, '?', -1, -np.inf]  # [iterm, term label, omega, residuum]
    residmin = [-1, '?', -1,  np.inf]
    residsum = {o: 0 for o in omall}   # sums across all terms
    for iterm, row in dfterm.iterrows():
        for om, res in row.om_avail.items():
            if res > residmax[3]:
                residmax = [iterm, row.Term, om, res]
            if res < residmin[3]:
                residmin = [iterm, row.Term, om, res]
            residsum[om] += res
    # Likewise for basis states
    bmax = [-1, -1, -np.inf]  # [ibas, omega, residuum]
    bmin = [-1, -1, np.inf]
    bsum = {o: 0 for o in omall}  # sums across all basis states
    for ibas in range(dimen):
        for om, res in bas_omega[ibas].items():
            if res > bmax[2]:
                bmax = [ibas, om, res]
            if res < bmin[2]:
                bmin = [ibas, om, res]
            bsum[om] += res
    # Round the totals for printing
    for om in omall:
        residsum[om] = np.round(residsum[om], 3)
        bsum[om] = np.round(bsum[om], 3)
    if debug:
        print('Extremes of term residua:')
        print('\tmax for iterm = {:d}, {:s}, omega = {:.1f} residuum = {:.4f}'.format(*residmax))
        print('\tmin for iterm = {:d}, {:s}, omega = {:.1f} residuum = {:.4f}'.format(*residmin))
        print('\tSums of residua across terms:', residsum)
        print('Extremes of basis-state residua')
        print('\tmax for ibas = {:d}, omega = {:.1f} resid = {:.4f}'.format(*bmax))
        print('\tmin for ibas = {:d}, omega = {:.1f} resid = {:.4f}'.format(*bmin))
        print('\tSums of residua across basis states:', bsum)
    if not ok:
        print('*******************************')
        print('*** OMEGA ASSIGNMENTS FAILED***')
        print('*******************************')
        # set all Omega=0 to prevent pairing
        #SO_omega = [0] * len(SO_omega)
        df = pd.DataFrame({'E': E, 'cm-1': vals, mpr.OMEGA: SO_omega, 'term': parenterm,
                           'wt': leadwt,  'Nr': Nrs, })
        df['exc'] = df['cm-1'] - df['cm-1'].min()
        return df, ok
    #
    # strip any unnecessary '(1)' specifiers from MRCI term labels
    just_one = set()
    rx = re.compile('\((\d+)\)(\S+)')
    for t in parenterm:
        m = rx.match(t)
        if m:
            if int(m.group(1)) == 1:
                just_one = just_one.union({m.group(2)})
            else:
                # prefix is higher than (1); keep prefixes for this term symbol
                just_one.discard(m.group(2))
    for symb in just_one:
        # remove leading (1)
        for i, t in enumerate(parenterm):
            if t == '(1)' + symb:
                parenterm[i] = symb
    # create state labels that include omega as a "subscript"
    for i, pt in enumerate(parenterm):
        try:
            olbl = mpr.halves(SO_omega[i])
        except TypeError:
            # omega assignment failed
            olbl = SO_omega[i]
        if (SO_omega[i] == 0) and (nirrep == 4):
            # assign parity based upon irrep (assuming C2v!)
            irr0 = SOCI.SOe.results.at[i, 'Irrep']
            if irr0 == 1:
                olbl = olbl + '+'
            elif irr0 == 4:
                olbl = olbl + '-'
            elif not silent:
                print('Omega = 0 but irrep = {:d}'.format(irr0))
        termlabel.append('_'.join([pt, olbl]))
    # return a DataFrame
    df = pd.DataFrame({'E': E, 'cm-1': vals, mpr.OMEGA: SO_omega, 'term': parenterm, 
                       'wt': leadwt, 'label': termlabel, 'Nr': Nrs})
    df['exc'] = df['cm-1'] - df['cm-1'].min()
    return df, ok
##


In [19]:
    self = SOCI  
    csq_thresh = 0.001
    silent = False
    ordering = 'up'
    failure = 'crash'
    debug = True
    ok = False
    dfstates, ok = SO_assign_omega2(self, csq_thresh=csq_thresh,
                                    silent=silent, ordering=ordering, failure=failure, debug=debug)

Target omega counts: {0.5: 48, 1.5: 40, 2.5: 26, 3.5: 12, 4.5: 4}
>>>totbas_omega:
0 	 {0.5: 2.0}
1 	 {0.5: 2.0}
2 	 {0.5: 2.0}
3 	 {0.5: 2.0}
4 	 {0.5: 2.0}
5 	 {0.5: 2.0}
6 	 {0.5: 2.0}
7 	 {0.5: 2.0}
8 	 {0.5: 2.0}
9 	 {0.5: 2.0}
10 	 {1.5: 1.0, 2.5: 1.0}
11 	 {1.5: 1.0, 2.5: 1.0}
12 	 {1.5: 1.0, 2.5: 1.0}
13 	 {1.5: 1.0, 2.5: 1.0}
14 	 {1.5: 1.0, 2.5: 1.0}
15 	 {1.5: 1.0, 2.5: 1.0}
16 	 {1.5: 1.0, 2.5: 1.0}
17 	 {1.5: 1.0, 2.5: 1.0}
18 	 {1.5: 1.0, 2.5: 1.0}
19 	 {1.5: 1.0, 2.5: 1.0}
20 	 {3.5: 1.0, 4.5: 1.0}
21 	 {3.5: 1.0, 4.5: 1.0}
22 	 {0.5: 1.0, 1.5: 1.0}
23 	 {0.5: 1.0, 1.5: 1.0}
24 	 {0.5: 1.0, 1.5: 1.0}
25 	 {0.5: 1.0, 1.5: 1.0}
26 	 {0.5: 1.0, 1.5: 1.0}
27 	 {0.5: 1.0, 1.5: 1.0}
28 	 {0.5: 1.0, 1.5: 1.0}
29 	 {0.5: 1.0, 1.5: 1.0}
30 	 {0.5: 1.0, 1.5: 1.0}
31 	 {0.5: 1.0, 1.5: 1.0}
32 	 {0.5: 1.0, 1.5: 1.0}
33 	 {0.5: 1.0, 1.5: 1.0}
34 	 {2.5: 1.0, 3.5: 1.0}
35 	 {2.5: 1.0, 3.5: 1.0}
36 	 {2.5: 1.0, 3.5: 1.0}
37 	 {2.5: 1.0, 3.5: 1.0}
38 	 {0.5: 1.0, 1.5: 1.0}
39 	 {0.5: 1.

	weight 0.1838 for BS#112 in term 4Φ with remaining possibilities {2.5: 1.0, 3.5: 1.0}
	possible: {2.5, 3.5}
Weight[0] in state 66
	leading term is (1)2Φ with iterm = 14
	leading basis state is #52 with usage {2.5: 0.0, 3.5: 0.0} and sib 50
>>>allowed  {2.5: 1.0, 3.5: 1.0}
	weight 0.1787 for BS#52 in term (1)2Φ with remaining possibilities {2.5: 1.0, 3.5: 1.0}
	possible: {2.5, 3.5}
Weight[0] in state 67
	leading term is (1)2Φ with iterm = 14
	leading basis state is #50 with usage {2.5: 0.0, 3.5: 0.0} and sib 52
>>>allowed  {2.5: 1.0, 3.5: 1.0}
	weight 0.1787 for BS#50 in term (1)2Φ with remaining possibilities {2.5: 1.0, 3.5: 1.0}
	possible: {2.5, 3.5}
Weight[0] in state 68
	leading term is 4Φ with iterm = 9
	leading basis state is #94 with usage {1.5: 0.084, 4.5: 0.0} and sib 97
>>>allowed  {1.5: 0.832725170060919, 4.5: 1.0}
	weight 0.1453 for BS#94 in term 4Φ with remaining possibilities {1.5: 0.833, 4.5: 1.0}
	possible: {1.5, 4.5}
Weight[0] in state 69
	leading term is 4Φ with iterm

>>>allowed  {0.5: 0.9655615723283242, 1.5: 0.7414347641640939}
	weight 0.1378 for BS#41 in term (4)2Π with remaining possibilities {0.5: 0.966, 1.5: 0.741}
	possible: {0.5, 1.5}
Weight[0] in state 85
	leading term is (4)2Π with iterm = 18
	leading basis state is #47 with usage {0.5: 0.017, 1.5: 0.129} and sib 41
>>>allowed  {0.5: 0.9655615723283242, 1.5: 0.7414347641640939}
	weight 0.1378 for BS#47 in term (4)2Π with remaining possibilities {0.5: 0.966, 1.5: 0.741}
	possible: {0.5, 1.5}
Weight[0] in state 86
	leading term is (1)2Σ- with iterm = 15
	leading basis state is #56 with usage {0.5: 0.401} and sib 54
>>>allowed  {0.5: 1.198199548543352}
	weight 0.1947 for BS#56 in term (1)2Σ- with remaining possibilities {0.5: 1.198}
	possible: {0.5}
	wt = 0.020 for basis state #6
	wt = 0.018 for basis state #8
	wt = 0.024 for basis state #23
	wt = 0.088 for basis state #24
	wt = 0.011 for basis state #26
	wt = 0.004 for basis state #27
	wt = 0.024 for basis state #39
	wt = 0.088 for basis sta

>>>allowed  {2.5: 0.5749667884373035}
	weight 0.1452 for BS#91 in term (1)4Π with remaining possibilities {2.5: 0.575}
	possible: {2.5}
	wt = 0.002 for basis state #15
	wt = 0.233 for basis state #16
	wt = 0.020 for basis state #17
	wt = 0.003 for basis state #19
	wt = 0.021 for basis state #34
	wt = 0.007 for basis state #35
	wt = 0.021 for basis state #50
	wt = 0.007 for basis state #51
	wt = 0.002 for basis state #63
	wt = 0.232 for basis state #64
	wt = 0.020 for basis state #65
	wt = 0.003 for basis state #67
	wt = 0.035 for basis state #78
	wt = 0.145 for basis state #91
	wt = 0.007 for basis state #92
	wt = 0.026 for basis state #95
	wt = 0.145 for basis state #107
	wt = 0.007 for basis state #108
	wt = 0.026 for basis state #111
	wt = 0.035 for basis state #126
	--assigned_B
<<< 0 	 {0.5: 0.018112631751867503}
<<< 1 	 {0.5: 0.1577356728855167}
<<< 2 	 {0.5: 0.11600386005501029}
<<< 3 	 {0.5: 0.16996427449520712}
<<< 4 	 {0.5: 0.1635318717291362}
<<< 5 	 {0.5: 0.0181126317518675

<<< 96 	 {2.5: 0.17002757584706013, 3.5: 0.8500539601044936}
<<< 97 	 {1.5: 0.20658774823413095, 4.5: 0.9668365821865279}
<<< 98 	 {0.5: 9.038180926979056e-05, 2.5: 0.14284002321595224}
<<< 99 	 {0.5: 0.0018859364072154605, 2.5: 0.10022827381373578}
<<< 100 	 {0.5: 0.0005227310175914823, 2.5: 0.14915102006063674}
<<< 101 	 {0.5: 0.0014094159815723484, 1.5: 0.17578925845478632}
<<< 102 	 {0.5: 0.010025001126247768, 1.5: 0.5011497037774308}
<<< 103 	 {0.5: 0.0009341638279868891, 1.5: 0.17635585968750145}
<<< 104 	 {0.5: 0.0014094159815723484, 1.5: 0.17578925845478632}
<<< 105 	 {0.5: 0.010025001126247712, 1.5: 0.5011497037774308}
<<< 106 	 {0.5: 0.0009341638279868336, 1.5: 0.1763558596875015}
<<< 107 	 {0.5: 9.038180926979056e-05, 2.5: 0.14284002321595224}
<<< 108 	 {0.5: 0.0018859364072154605, 2.5: 0.10022827381373584}
<<< 109 	 {0.5: 0.0005227310175914823, 2.5: 0.14915102006063674}
<<< 110 	 {1.5: 0.20658775009945568, 4.5: 0.9668365822126819}
<<< 111 	 {2.5: 0.17002769814285673, 3.5: 0

In [33]:
if ok:
    print('Looks good!')
else:
    print('Failure!')
display(dfstates)
SOCI.dfso = dfstates

Looks good!


Unnamed: 0,E,cm-1,Ω,term,wt,label,Nr,exc
0,-119.860885,-4170.78,2.5,(1)2Δ,0.982950,(1)2Δ_5/2,1,0.00
1,-119.860885,-4170.78,2.5,(1)2Δ,0.982950,(1)2Δ_5/2,2,0.00
2,-119.854930,-2863.76,0.5,(1)2Σ+,0.686890,(1)2Σ+_1/2,3,1307.02
3,-119.854930,-2863.76,0.5,(1)2Σ+,0.686890,(1)2Σ+_1/2,4,1307.02
4,-119.854576,-2785.94,1.5,(1)2Π,0.706705,(1)2Π_3/2,5,1384.84
...,...,...,...,...,...,...,...,...
125,-119.689134,33524.41,2.5,(4)2Δ,0.579685,(4)2Δ_5/2,126,37695.19
126,-119.685428,34337.71,1.5,(6)2Π,0.379967,(6)2Π_3/2,127,38508.49
127,-119.685428,34337.71,1.5,(6)2Π,0.379967,(6)2Π_3/2,128,38508.49
128,-119.684257,34594.78,0.5,(5)2Π,0.273785,(5)2Π_1/2,129,38765.56


In [34]:
d = pd.DataFrame(columns=SOCI.basis, data=np.round(SOCI.vecsq, 3))
# Rows are eigenvectors
d

Unnamed: 0,"(2.1, 0.5, 0.5)","(3.1, 0.5, 0.5)","(6.1, 0.5, 0.5)","(8.1, 0.5, 0.5)","(11.1, 0.5, 0.5)","(2.1, 0.5, -0.5)","(3.1, 0.5, -0.5)","(6.1, 0.5, -0.5)","(8.1, 0.5, -0.5)","(11.1, 0.5, -0.5)",...,"(2.4, 1.5, -1.5)","(4.4, 1.5, -1.5)","(1.4, 1.5, 1.5)","(3.4, 1.5, 1.5)","(1.4, 1.5, 0.5)","(3.4, 1.5, 0.5)","(1.4, 1.5, -0.5)","(3.4, 1.5, -0.5)","(1.4, 1.5, -1.5)","(3.4, 1.5, -1.5)"
0,0.0,0.0,0.0,0.687,0.0,0.0,0.0,0.243,0.0,0.0,...,0.000,0.0,0.0,0.000,0.000,0.000,0.000,0.000,0.000,0.0
1,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.073,0.0,0.0,...,0.000,0.0,0.0,0.007,0.000,0.000,0.000,0.000,0.001,0.0
2,0.0,0.0,0.0,0.001,0.0,0.0,0.0,0.005,0.0,0.0,...,0.000,0.0,0.0,0.002,0.000,0.000,0.000,0.000,0.000,0.0
3,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.002,0.0,0.0,...,0.000,0.0,0.0,0.432,0.000,0.000,0.000,0.000,0.156,0.0
4,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000,0.0,0.0,...,0.000,0.0,0.0,0.010,0.000,0.000,0.000,0.000,0.014,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000,0.0,0.0,...,0.000,0.0,0.0,0.000,0.012,0.000,0.025,0.000,0.000,0.0
126,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000,0.0,0.0,...,0.002,0.0,0.0,0.000,0.000,0.006,0.000,0.003,0.000,0.0
127,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.000,0.0,0.0,...,0.000,0.0,0.0,0.000,0.000,0.012,0.000,0.025,0.000,0.0
128,0.0,0.0,0.0,0.000,0.0,0.0,0.0,0.002,0.0,0.0,...,0.000,0.0,0.0,0.001,0.000,0.000,0.000,0.000,0.002,0.0


In [35]:
d = pd.DataFrame(columns=SOCI.basis, data=np.round(SOCI.vec, 3))
print(fsoci)
d

ac5z_hybB_r2p2444_lz.pro


Unnamed: 0,"(2.1, 0.5, 0.5)","(3.1, 0.5, 0.5)","(6.1, 0.5, 0.5)","(8.1, 0.5, 0.5)","(11.1, 0.5, 0.5)","(2.1, 0.5, -0.5)","(3.1, 0.5, -0.5)","(6.1, 0.5, -0.5)","(8.1, 0.5, -0.5)","(11.1, 0.5, -0.5)",...,"(2.4, 1.5, -1.5)","(4.4, 1.5, -1.5)","(1.4, 1.5, 1.5)","(3.4, 1.5, 1.5)","(1.4, 1.5, 0.5)","(3.4, 1.5, 0.5)","(1.4, 1.5, -0.5)","(3.4, 1.5, -0.5)","(1.4, 1.5, -1.5)","(3.4, 1.5, -1.5)"
0,-0.000+0.000j,0.000+0.000j,0.0+0.0j,0.829+0.000j,-0.000+0.000j,-0.000+0.000j,-0.0+0.0j,0.493+0.000j,0.000+0.000j,0.000+0.000j,...,-0.000+0.000j,-0.000+0.000j,0.0+0.0j,0.010+0.000j,0.00+0.00j,0.000+0.000j,0.000+0.000j,0.000+0.000j,-0.009+0.000j,0.0+0.0j
1,-0.000+0.000j,0.000+0.000j,0.0+0.0j,0.012-0.000j,0.000+0.000j,-0.000-0.000j,0.0-0.0j,-0.271-0.000j,-0.000+0.000j,-0.000-0.000j,...,0.000-0.000j,0.000+0.000j,0.0+0.0j,-0.086-0.000j,-0.00+0.00j,-0.000-0.000j,-0.000-0.000j,0.000-0.000j,-0.039-0.000j,0.0+0.0j
2,0.000-0.000j,-0.000-0.000j,-0.0+0.0j,-0.031-0.000j,0.000-0.000j,0.000-0.000j,-0.0-0.0j,0.071-0.000j,0.000-0.000j,0.000-0.000j,...,0.000+0.000j,0.000+0.000j,-0.0+0.0j,-0.039-0.000j,-0.00+0.00j,-0.000-0.000j,-0.000-0.000j,-0.000+0.000j,-0.005-0.000j,0.0+0.0j
3,0.000+0.000j,0.000-0.000j,0.0+0.0j,0.019-0.000j,0.000-0.000j,-0.000-0.000j,0.0-0.0j,-0.039-0.000j,-0.000+0.000j,-0.000+0.000j,...,-0.000-0.000j,-0.000+0.000j,0.0+0.0j,0.657-0.000j,0.00+0.00j,0.000-0.000j,0.000-0.000j,-0.000+0.000j,0.395-0.000j,-0.0-0.0j
4,-0.000-0.000j,-0.000+0.000j,0.0-0.0j,0.004+0.000j,0.000+0.000j,0.000+0.000j,-0.0+0.0j,-0.010+0.000j,0.000-0.000j,0.000-0.000j,...,-0.000+0.000j,-0.000-0.000j,-0.0-0.0j,0.102+0.000j,0.00-0.00j,0.000+0.000j,0.000+0.000j,0.000-0.000j,0.118+0.000j,0.0+0.0j
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
125,-0.000+0.003j,0.000-0.000j,-0.0-0.0j,-0.000-0.000j,-0.000-0.000j,-0.000-0.013j,-0.0+0.0j,0.000-0.000j,-0.000+0.007j,0.000+0.000j,...,0.000-0.000j,0.000-0.016j,-0.0+0.0j,-0.000-0.000j,-0.00+0.11j,0.000+0.000j,0.000+0.157j,-0.000-0.000j,-0.000-0.000j,-0.0+0.0j
126,-0.000+0.000j,0.000+0.006j,0.0-0.0j,-0.000+0.000j,0.000+0.003j,0.000-0.000j,0.0-0.0j,-0.000+0.000j,0.000+0.000j,-0.000-0.003j,...,-0.000-0.048j,0.000+0.000j,-0.0+0.0j,0.000-0.000j,0.00+0.00j,-0.000-0.076j,0.000+0.000j,0.000+0.058j,-0.000+0.000j,0.0-0.0j
127,0.000-0.000j,-0.000-0.003j,0.0-0.0j,-0.000+0.000j,0.000+0.013j,0.000-0.000j,0.0-0.0j,-0.000+0.000j,-0.000-0.000j,0.000+0.007j,...,-0.000-0.016j,0.000+0.000j,-0.0+0.0j,-0.000-0.000j,0.00+0.00j,-0.000-0.110j,0.000+0.000j,0.000+0.157j,-0.000+0.000j,0.0-0.0j
128,-0.000-0.000j,-0.000+0.000j,0.0-0.0j,0.000-0.002j,-0.000+0.000j,-0.000+0.000j,-0.0+0.0j,-0.000+0.044j,0.000+0.000j,0.000+0.000j,...,-0.000-0.000j,0.000-0.000j,-0.0+0.0j,-0.000+0.036j,0.00-0.00j,0.000-0.000j,0.000-0.000j,-0.000+0.000j,-0.000-0.049j,0.0+0.0j


In [36]:
# sum of each eigenvector
def ffun(vec):
    s = vec.sum()
    f = np.real(s) * np.imag(s)
    return f
print(fsoci)
for irow, row in d.iterrows():
    print(irow, f'\t{ffun(row.values):6.3f}')
    #print(irow, row[1:], f'\t{ffun(row):6.3f}')

ac5z_hybB_r2p2444_lz.pro
0 	 0.000
1 	-0.000
2 	-0.000
3 	-0.000
4 	 0.000
5 	 0.000
6 	 0.002
7 	 0.005
8 	-0.000
9 	 0.000
10 	-0.000
11 	 0.000
12 	 0.000
13 	 0.000
14 	-0.000
15 	 0.029
16 	-0.776
17 	-0.049
18 	-0.291
19 	 0.048
20 	-0.000
21 	-0.053
22 	 0.003
23 	-1.034
24 	-0.762
25 	-0.017
26 	-0.118
27 	-0.076
28 	 0.000
29 	-0.000
30 	-0.000
31 	-0.000
32 	-0.000
33 	 0.000
34 	 0.381
35 	 0.032
36 	-0.000
37 	 0.000
38 	-0.005
39 	-0.745
40 	 0.207
41 	 0.439
42 	 0.103
43 	 0.009
44 	-0.000
45 	-0.000
46 	 0.000
47 	-0.000
48 	-0.000
49 	 0.000
50 	-0.397
51 	-0.003
52 	 0.000
53 	-0.000
54 	-0.000
55 	 0.000
56 	-0.001
57 	 0.001
58 	-0.000
59 	 0.000
60 	 0.000
61 	 0.000
62 	 0.000
63 	-0.263
64 	-0.427
65 	-0.869
66 	 0.272
67 	 0.009
68 	-0.000
69 	 0.369
70 	 0.246
71 	-0.000
72 	-0.002
73 	 0.000
74 	-0.013
75 	-0.514
76 	-0.000
77 	-0.000
78 	 0.205
79 	 0.771
80 	-0.000
81 	-0.000
82 	 0.000
83 	 0.000
84 	 0.000
85 	-0.025
86 	 0.003
87 	-0.470
88 	 0.000
89 	-0

In [37]:
# same for the eigevectors printed by Molpro
for icol in range(SOCI.SOvec.shape[1]):
    print(icol, f'\t{ffun(SOCI.SOvec[:, icol]):6.3f}')

0 	 0.306
1 	-0.415
2 	-0.096
3 	-0.560
4 	-0.658
5 	 0.042
6 	 0.233
7 	-0.369
8 	 0.897
9 	-0.001
10 	 0.795
11 	-0.236
12 	 0.021
13 	-0.529
14 	-1.768
15 	-0.019
16 	-2.152
17 	-0.513
18 	 0.321
19 	-0.322
20 	 2.423
21 	 0.522
22 	-0.476
23 	 0.695
24 	-1.455
25 	-0.035
26 	-0.003
27 	 0.468
28 	 0.030
29 	 0.181
30 	-0.001
31 	 0.974
32 	 0.033
33 	-0.005
34 	 0.241
35 	 0.220
36 	 0.106
37 	 0.355
38 	 0.010
39 	-0.092
40 	-0.025
41 	-0.262
42 	-0.190
43 	 0.015
44 	 0.253
45 	 0.737
46 	-0.007
47 	-0.339
48 	-0.091
49 	 0.087
50 	-0.076
51 	-0.009
52 	 0.001
53 	 0.446
54 	 0.395
55 	-0.221
56 	 0.330
57 	 0.164
58 	 0.002
59 	 0.331
60 	 0.139
61 	 0.049
62 	 0.096
63 	 0.343
64 	-0.108
65 	-0.786
66 	-0.155
67 	-0.193
68 	-0.284
69 	-0.324
70 	 0.007
71 	-0.212
72 	-0.194
73 	 0.427
74 	 0.244
75 	-0.020
76 	-0.181
77 	 0.039
78 	 0.348
79 	-0.209
80 	-0.031
81 	-1.205
82 	-1.442
83 	 0.136
84 	-0.065
85 	-0.109
86 	-0.002
87 	 0.063
88 	 1.301
89 	 0.073
90 	-0.033
91 	 0.01

In [38]:
np.set_printoptions(threshold=sys.maxsize)
np.round(SOCI.vecsq,3)

array([[0.   , 0.   , 0.   , 0.687, 0.   , 0.   , 0.   , 0.243, 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.021,
        0.   , 0.   , 0.   , 0.   , 0.012, 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.016, 0.   , 0.003, 0.   , 0.   , 0.   , 0.002,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.006, 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.003, 0.   , 0.   , 0.   , 0.   ,
        0.001, 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.002,
        0.   , 0.001, 0.   , 0.   , 0.   , 0.   , 0.001, 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.001, 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
        0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   , 0.   ,
      

In [39]:
thr = 0.001
for iso in [2, 3]:
    print(f'iso = {iso}')
    vec = SOCI.vec[:, iso] 
    wt = SOCI.vecsq[:, iso]
    twt = SOCI.termwt[:, iso]  # averaged terms
    for i, bas in enumerate(SOCI.basis):
        if wt[i] < thr:
            continue
        jterm = SOCI.sob_iterm[i]
        #print('\t', i, bas, '\t', jterm, SOCI.termlabel_index(jterm)[0], f'vec = {np.round(vec[i], 3)}')
        print('\t', i, bas, '\t', jterm, SOCI.termlabel_index(jterm)[0], f'wt = {np.round(wt[i], 3)}')

iso = 2
	 5 ('2.1', 0.5, -0.5) 	 1 (1)2Σ+ wt = 0.687
	 22 ('1.2', 0.5, 0.5) 	 2 (1)2Π wt = 0.148
	 38 ('1.3', 0.5, 0.5) 	 2 (1)2Π wt = 0.149
	 85 ('1.2', 1.5, 0.5) 	 5 (1)4Π wt = 0.001
	 91 ('1.2', 1.5, -1.5) 	 5 (1)4Π wt = 0.003
	 101 ('1.3', 1.5, 0.5) 	 5 (1)4Π wt = 0.001
	 107 ('1.3', 1.5, -1.5) 	 5 (1)4Π wt = 0.003
	 118 ('2.4', 1.5, -0.5) 	 8 (1)4Σ- wt = 0.001
iso = 3
	 0 ('2.1', 0.5, 0.5) 	 1 (1)2Σ+ wt = 0.687
	 28 ('1.2', 0.5, -0.5) 	 2 (1)2Π wt = 0.148
	 44 ('1.3', 0.5, -0.5) 	 2 (1)2Π wt = 0.149
	 82 ('1.2', 1.5, 1.5) 	 5 (1)4Π wt = 0.003
	 88 ('1.2', 1.5, -0.5) 	 5 (1)4Π wt = 0.001
	 98 ('1.3', 1.5, 1.5) 	 5 (1)4Π wt = 0.003
	 104 ('1.3', 1.5, -0.5) 	 5 (1)4Π wt = 0.001
	 116 ('2.4', 1.5, 0.5) 	 8 (1)4Σ- wt = 0.001


In [40]:
for iso in [4, 5]:
    print(f'iso = {iso}')
    vec = SOCI.vec[:, iso] 
    wt = SOCI.vecsq[:, iso]
    twt = SOCI.termwt[:, iso]  # averaged terms
    for i, bas in enumerate(SOCI.basis):
        if wt[i] < thr:
            continue
        jterm = SOCI.sob_iterm[i]
        #print('\t', i, bas, '\t', jterm, SOCI.termlabel_index(jterm)[0], f'vec = {np.round(vec[i], 3)}')
        print('\t', i, bas, '\t', jterm, SOCI.termlabel_index(jterm)[0], f'wt = {np.round(wt[i], 3)}')

iso = 4
	 15 ('1.1', 0.5, -0.5) 	 0 (1)2Δ wt = 0.138
	 22 ('1.2', 0.5, 0.5) 	 2 (1)2Π wt = 0.353
	 24 ('4.2', 0.5, 0.5) 	 12 (3)2Π wt = 0.001
	 38 ('1.3', 0.5, 0.5) 	 2 (1)2Π wt = 0.353
	 40 ('4.3', 0.5, 0.5) 	 12 (3)2Π wt = 0.001
	 63 ('1.4', 0.5, -0.5) 	 0 (1)2Δ wt = 0.138
	 97 ('2.2', 1.5, -1.5) 	 9 4Φ wt = 0.002
	 113 ('2.3', 1.5, -1.5) 	 9 4Φ wt = 0.002
	 114 ('2.4', 1.5, 1.5) 	 8 (1)4Σ- wt = 0.005
iso = 5
	 10 ('1.1', 0.5, 0.5) 	 0 (1)2Δ wt = 0.138
	 28 ('1.2', 0.5, -0.5) 	 2 (1)2Π wt = 0.353
	 30 ('4.2', 0.5, -0.5) 	 12 (3)2Π wt = 0.001
	 44 ('1.3', 0.5, -0.5) 	 2 (1)2Π wt = 0.353
	 46 ('4.3', 0.5, -0.5) 	 12 (3)2Π wt = 0.001
	 58 ('1.4', 0.5, 0.5) 	 0 (1)2Δ wt = 0.138
	 94 ('2.2', 1.5, 1.5) 	 9 4Φ wt = 0.002
	 110 ('2.3', 1.5, 1.5) 	 9 4Φ wt = 0.002
	 120 ('2.4', 1.5, -1.5) 	 8 (1)4Σ- wt = 0.005


In [41]:
if not ok:
    thresh = 2.e-5 # for energies
    wthr = 1.e-3   # for leading-term weights
    pd.set_option('display.max_rows', 500)
    print('*** Attempt to assign Omegas by interpolation!  ***')
    print('--- Using energy only ---')
    datfile = r'C:\Users\irikura\OneDrive - NIST\Karl\PtH_anion\guiding_scan\guiding_pots.tsv'
    print(f'Using PEC data file {datfile}')
    dfPEC = pd.read_csv(datfile, sep='\t')
    # expect a column 'R' followed by columns with state labels
    # generate interpolations
    from collections import Counter
    Einterp = []
    Ominterp = []
    re_om = re.compile('(\d)[-+]?$')
    x = dfPEC.R.values
    states = []
    for st in dfPEC.columns[1:]:
        y = dfPEC[st].values
        fPEC = chem.fit_diatomic_potential(x, y)
        E = float(fPEC(R))
        m = re_om.search(st)
        Om = int(m.group(1))
        states.append(st)
        Einterp.append(E)
        Ominterp.append(Om)
    dfinterp = pd.DataFrame({'Label': states, 'Om': Ominterp, 'E': Einterp}).sort_values('E').reset_index(drop=True)
    # add column for energy increments
    evals = dfinterp.E.values
    incr = [np.nan] + list(evals[1:] - evals[:-1])
    dfinterp['incrE'] = np.round(incr, 6)
    dfinterp['used'] = False
    print('\nInterpolated states (expectation):')
    display(dfinterp)
    icount = {}
    for om, v in Counter(dfinterp.Om).items():
        if om == 0:
            icount[om] = v
        else:
            icount[om] = v * 2
    icount = Counter(icount)
    print('Target (correct) counts:', icount)
    
    # Do not re-sort dfso by energy because ordering of degen levels can get scrambled
    dfso = SOCI.dfso.sort_values('E').reset_index(drop=True)  # to be assigned
    dfso['Ω'] = None
    dfso['label'] = None
    dfso['Olbl'] = None
    evals = dfso.E.values
    incr = [np.nan] + list(evals[1:] - evals[:-1])
    dfso['incrE'] = np.round(incr, 6)
    wts = dfso.wt.values
    dwt = [np.nan] + list(wts[1:] - wts[:-1])
    dfso['wtdiff'] = np.round(np.abs(dwt), 5)
    nstates = len(dfso)
    print('\nActual states to be assigned:')
    display(dfso)
   
    # Since usual Omega assignment failed, do not assume that the term symbols are reliable
    def assignstate():
        # use globals
        dfso.loc[i, 'Olbl'] = jow.Label
        dfso.loc[i, 'Ω'] = jow.Om
        dfinterp.loc[j, 'used'] = True
        print(f'assign state {i} with {jow.Label}')
        if jow.Om > 0:
            # also assign its twin (should be the next state by energy)
            dfso.loc[i+1, 'Olbl'] = jow.Label
            dfso.loc[i+1, 'Ω'] = jow.Om                    
            print(f'\tpair {i+1} with {jow.Label}')
        return

    print('Start with biggest gaps:')
    # look for big gaps, as more reliable than small gaps
    dfinterp = dfinterp.sort_values('incrE', ascending=False)
    for i, row in dfso.sort_values('incrE', ascending=False).iterrows():
        if row.incrE < 2 * thresh:
            # too small to trust
            continue
        #display(row.to_frame().T)
        for j, jow in dfinterp.iterrows():
            if jow.incrE < 2 * thresh:
                # gap too small to trust
                continue
            if jow.used:
                # already matched to an actual state
                continue
            if abs(row.incrE - jow.incrE) < thresh:
                de = abs(row.E - jow.E)
                if de < thresh:
                    # this looks like a match; check for accidental degeneracy
                    #if (i+1 < nstates) and (dfso.loc[i+1, 'incrE'] < thresh):
                    #    # next level is close; it might be the twin
                    #    if (i+2 < nstates) and (dfso.loc[i+2, 'incrE'] > thresh):
                    #        # looks like an accidental degeneracy (3 levels)
                    #        break
                    # this is a match; assign it
                    assignstate()
    dfinterp = dfinterp.sort_values('E')
    display(dfso[dfso.Ω.isnull()])
    counts = Counter(dfso.Ω)
    print('current counts:', counts)
    print('missing counts:', icount - counts)
    
    # drop used levels from dfinterp
    dfinterp = dfinterp[dfinterp.used == False]

    print('\nLook for close energy matches:')
    for i, row in dfso.iterrows():
        if row.Ω is not None:
            # already assigned
            continue
        for j, jow in dfinterp.iterrows():
            if jow.used:
                # row already matched
                continue
            dE = abs(row.E - jow.E)
            if dE < thresh:
                # a match
                assignstate()
    display(dfso[dfso.Ω.isnull()])
    counts = Counter(dfso.Ω)
    print('current counts:', counts)
    print('missing counts:', icount - counts)
    dfinterp = dfinterp[dfinterp.used == False]

    if (len(dfinterp) == 0) and (sum((icount - counts).values()) == 0):
        print('\nAll states assigned!')
        # drop unneeded columns
        dfdups = dfso[['E', 'cm-1', 'Ω', 'exc', 'Olbl']]
        df = mpr.average_SO_levels(dfdups, be_same=['Ω'])
        #df['Olbl'] = chem.enumerative_prefix([s.split('_')[1] for s in df.label])
        display(df)
        # copy into the SOCI object
        SOCI.dfso = df

In [42]:
dfterms['Ecm'] = np.round((dfterms.Edav - dfterms.Edav.min()) * chem.AU2CM, 1)
dfterms

Unnamed: 0,Term,dipZ,Edav,idx,ecm,Ecm
0,(1)2Δ,-0.588172,-119.784064,"[29, 5]",0.0,0.0
1,(1)2Σ+,-0.686412,-119.783698,[0],80.5,80.5
2,(1)2Π,-0.652485,-119.777318,"[11, 19]",1480.7,1480.7
3,(1)4Δ,-0.161305,-119.730043,"[48, 36]",11856.4,11856.4
4,(2)2Σ+,-0.014132,-119.727746,[1],12360.4,12360.4
5,(1)4Π,-0.096289,-119.727039,"[42, 38]",12515.7,12515.7
6,(2)2Δ,0.101961,-119.718361,"[30, 6]",14420.2,14420.2
7,(2)2Π,0.095556,-119.71635,"[12, 20]",14861.5,14861.5
8,(1)4Σ-,-0.142579,-119.715854,[46],14970.4,14970.4
9,4Φ,-0.110737,-119.714348,"[41, 45]",15301.1,15301.1


In [43]:
# Is this a hybrid calculation (prepared by build_hybrid_soci_input.ipynb)?
ccterms = []  # list of input CCSD(T) terms
rx_hyb = re.compile('HLSDIAG\(.+\s+!\s*.*(ccsd|anchored|shifted|input)')
is_hybrid = False
with open(fsoci, 'r', encoding='utf8') as F:
    for line in F:
        if rx_hyb.search(line):
            is_hybrid = True
            if 'input' in line:
                # an anchor term; extract its label
                words = line.split()
                if words[2] not in ccterms:
                    ccterms.append(words[2])

In [44]:
# transpose absolute energies and copy to clipboard
dfcp = SOCI.dfso[['Olbl', 'E', '<i|z|i>']].copy().sort_values('Olbl').set_index('Olbl')
dfcp.rename(columns={'E': f'{R}'}, inplace=True)   # put bond length in that position for pasting to Excel
dfcp.T.to_clipboard()
if is_hybrid:
    print(f'Hybrid SO-CI energies for R={R} copied to clipboard, for pasting into Excel')
else:
    print(f'Standard SO-CI energies for R={R} copied to clipboard, for pasting into Excel')

KeyError: "['Olbl', '<i|z|i>'] not in index"

In [45]:
dfcp

NameError: name 'dfcp' is not defined

### Term composition of some levels

In [None]:
olabels = ['(1)1/2', '(1)3/2', '(1)5/2']
for olabel in olabels:
    ilev = SOCI.dfso[SOCI.dfso.Olbl == olabel].index[0]
    #print('ilvel =', ilev)
    print('Composition of level #{:d}:  "{:s}" or "{:s}"'.format(ilev, SOCI.dfso.loc[ilev].Olbl,
                                                             SOCI.dfso.loc[ilev].label))
    dfci, dfterm = SOCI.composition_of_level(ilev, thr=1.e-6, normalize=True)
    wtcol = dfterm.columns[-1]
    display(dfterm.sort_values(wtcol, ascending=False))
    print('Sum of weights = {:.3f}'.format(dfterm[wtcol].sum()))
    ccsum = dfterm[dfterm.Term.isin(ccterms)][wtcol].sum()
    print('Sum of weights from CC terms = {:.3f}\n'.format(ccsum))

### Distribution of a term among levels

In [None]:
#term = '(1)1Σ+'
term = '(1)2Δ'
if is_hybrid:
    typ = 'hybrid'
else:
    typ = 'standard'
print('Distribution of term "{:s}" among {:s} levels'.format(term, typ))
print(f'R = {R}')
df = SOCI.level_contributions_from_term(term, thr=1.e-6, normalize=True)
df['prod'] = np.round(df.exc * df[term], 1)
ebar = df['prod'].sum() / df[term].sum()
print('Weighted mean energy of {:s} = {:.1f} cm-1'.format(term, ebar))
#display(df.sort_values(term, ascending=False))
display(df[df[term] > 0.0005].sort_values('E').style.format({term: '{:.4f}'}))
print('Total weight = {:.4f}'.format(df[term].sum()))

### Contribution of CCSD(T) terms to all levels

In [None]:
# Get contributions of CC terms to all levels
# Loop over levels to get the normalization right
dfcc = SOCI.dfso.copy()
ccsums = []
for ilev in dfcc.index:
    dfci, dfterm = SOCI.composition_of_level(ilev, thr=1.e-6,
                                normalize=True, silent=True)
    wtcol = dfterm.columns[-1]
    ccsum = dfterm[dfterm.Term.isin(ccterms)][wtcol].sum()
    ccsums.append(ccsum)
dfcc['CCwt'] = ccsums
fmt = {chem.OMEGA: '{:.1f}', 'exc': '{:.2f}', 'CCwt': '{:.3f}'}
display(dfcc[dfcc.columns[2:]].style.format(fmt))
print('Total of CCwt column = {:.3f}'.format(dfcc.CCwt.sum()))
print('The CC terms are', ccterms)

In [None]:
df = SOCI.dfso[SOCI.dfso['term'] == '4Φ']
display(df)
print('wt sum = {:.3f}'.format(df.wt.sum()))

In [None]:
SOCI.dfso[(SOCI.dfso.exc > 21000) & (SOCI.dfso.exc < 30000)]