In [1]:
#!/usr/bin/env python3
"""
Generate Molpro input that does SO-CI with MRCI done with separate
blocks for each irrep, to minimize degeneracy-breaking. 
Requires Molpro CASSCF output. C2v symmedtry (4 irreps) is assumed. 
KKI 1/13/2022
"""
import re, sys, copy, pyperclip
import numpy as np
import pandas as pd
from collections import Counter
sys.path.insert(0, '../atomic_SOC')
import molpro_subs as mpr
import chem_subs as chem

pd.set_option('display.width', 1000)
too_many_records = 21   # having this many MRCI records will fail in SO-CI

In [52]:
if True:
    fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\PtO\14S23T3Q\\'
    froot = 'actz_r2p15.pro'
    fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\PtO\14S_17T_5Q\\'
    froot = 'ac5z_r2p5.pro'
else:
    # alternative
    rcode = 'r2p3500'
    basis = 'ac5z'
    fdir = r'C:\Users\irikura\OneDrive - NIST\Karl\PtH_cation\hybrid_stmrciq_PEC\{:s}\\'.format(rcode)
    froot = '{:s}_22S15T_{:s}_lz.pro'.format(basis, rcode)
    froot = 'ac5z_soc1811_22S15T_lz.pro'

In [53]:
style = 'Lz'  # choices: 'Lz', 'split', 'irrep', 'single'
allECP = False   # whether all atoms have ECPs (with spin-orbit)

In [54]:
fcas = fdir + froot
print('Will read CASSCF info from file: {:s}'.format(froot))
if style == 'Lz':
    print('MRCI will be blocked by Lz')
elif style == 'split':
    print('MRCI will be in blocks of similar size')
elif style == 'irrep':
    print('MRCI will be blocked simply by irrep')
elif style == 'single':
    print('Each MRCI will have one state')
else:
    print(f'bad choice for "style": {style}')
    1/0

Will read CASSCF info from file: ac5z_r2p5.pro
MRCI will be blocked by Lz


In [55]:
try:
    PG = mpr.read_point_group(fcas)
except FileNotFoundError:
    print('File not found!')
    print('Here are the Molpro outputs in directory {:s}:'.format(fdir))
    import glob
    flist = glob.glob(fdir + '*.pro')
    for f in flist:
        print(f.split('\\')[-1])
if PG != 'C2v':
    print('Your calculation is in point group', PG)
crd, lineno_crd = mpr.read_coordinates(fcas, linenum=True)
if isinstance(crd, list):
    crd = crd[-1]
# get diatomic bond length
G = chem.Geometry(crd, intype='DataFrame', units='bohr')
G.toAngstrom()
R = np.round(G.distance(0, 1), 6)  # round the bond length to 6 digits
print('Bond length = {:.4f}'.format(R))
caslist, lineno_cas = mpr.readMULTI(fcas, PG=PG, linenum=True)
CAS = caslist[-1]   # assume the last CASSCF to be the relevant one
#CAS.results = relabel_CAS_by_energy(CAS.results)
print(f'{CAS.nactel()} active electrons, {CAS.nclosed} closed (doubly-occupied) orbitals')

Bond length = 2.5000
14 active electrons, 6 closed (doubly-occupied) orbitals


In [56]:
# check for non-integer Lz values
cruft = abs(CAS.results.LzLz - np.round(CAS.results.LzLz, 0))
if (cruft > 1.e-8).any():
    print('*** There are non-integer values of LzLz')
    print(f'    max deviation = {cruft.max()}')
    display(CAS.results.sort_values('Energy'))
    #print(cruft)
else:
    print('LzLz values look clean')

LzLz values look clean


In [57]:
# report number of states in each spin multiplicity
LS_got = dict()
for S, grp in CAS.results.groupby('Spin'):
    print(f'{len(grp)} {S} states')
    print('\t', Counter(grp.Term))
    LS_got.update(Counter(grp.Term))

5 Quintet states
	 Counter({'5Δ': 2, '5Φ': 2, '5Σ-': 1})
14 Singlet states
	 Counter({'1Π': 6, '1Σ+': 2, '1Δ': 2, '1Φ': 2, '1Σ-': 2})
17 Triplet states
	 Counter({'3Δ': 6, '3Π': 6, '3Φ': 2, '3Σ-': 2, '3Σ+': 1})


In [58]:
if True:
    # this is only for PtO calculations, to check for same terms across the PEC
    LS_target = {'5Δ': 2, '5Π': 2, '5Φ': 2, '5Σ-': 1}
    LS_target.update({'1Π': 4, '1Δ': 2, '1Φ': 2, '1Σ+': 1, '1Σ-': 1})
    LS_target.update({'3Δ': 6, '3Π': 6, '3Φ': 2, '3Σ-': 2, '3Σ+': 1})
    if '10S_19T_7Q' in fdir:
        LS_target['3Σ-'] = 3; LS_target['3Σ+'] = 2
    if '11S_19T_7Q' in fdir:
        LS_target['3Δ'] = 8; LS_target['1Π'] = 6; LS_target['1Σ-'] = 0
    if '14S_17T_5Q' in fdir:
        LS_target.update({'1Π': 6, '1Δ': 2, '1Φ': 2, '1Σ+': 2, '1Σ-': 2})
        LS_target.update({'3Δ': 6, '3Π': 6, '3Φ': 2, '3Σ-': 2, '3Σ+': 1})
        LS_target = {'5Δ': 2, '5Π': 0, '5Φ': 2, '5Σ-': 1}
    # compare with current calculation
    print('For PtO, checking for desired terms in CASSCF')
    nwrong = 0
    for k, v in LS_target.items():
        got = LS_got.get(k, 0)
        if v != got:
            print(f'  {k}:  got {got} instead of {v}')
            nwrong += abs(v - got)
    if nwrong:
        print(f'   {nwrong} states are wrong')
    else:
        print('   looks good')

For PtO, checking for desired terms in CASSCF
   looks good


In [59]:
casdf = CAS.results[['Irrep', 'Label', 'Energy', 'Term']].copy()
casdf['S'] = [chem.spinname(x)-1 for x in CAS.results.Spin]
#display(CAS.results.sort_values(['Spin', 'LzLz', 'Energy']))

In [60]:
# Round sqrt(LzLz) values to integers
casdf['Lz'] = np.round(np.sqrt(np.abs(CAS.results.LzLz)), 0).astype(int)
spin = sorted(set(casdf.S))
irreps = sorted(set(CAS.results.Irrep))
lzvals = sorted(set(casdf.Lz))

In [61]:
# check for proper pairing of degenerate states
broken = False
for (s, lz), dfg in casdf.groupby(['S', 'Lz']):
    if lz == 0:
        # ignore Sigma states
        continue
    # number in each irrep should be equal
    grpi = dfg.groupby('Irrep')
    lens = grpi.size().values
    if (len(lens) < 2) or (lens[0] != lens[1]):
        print('Broken pair somewhere')
        display(dfg.sort_values('Energy'))
        if len(lens) == 1:
            # display the complementary irrep
            ir = list(grpi.groups.keys())[0]
            print('all states with this spin and irrep:')
            display(casdf[(casdf.S == s) & (casdf.Irrep == ir)])
            print('complementary irrep:')
            cir = [0, 4, 3, 2, 1][ir]
            display(casdf[(casdf.S == s) & (casdf.Irrep == cir)])
        broken = True
if not broken:
    print('All term pairs are closed')

All term pairs are closed


In [62]:
# order the CASSCF states by energy within each (S, irrep) group
newdf = None
for lbl, dg in casdf.groupby(['S', 'Irrep']):
    dt = dg.copy().sort_values('Energy').reset_index(drop=True)
    if newdf is None:
        newdf = dt
    else:
        newdf = pd.concat([newdf, dt])
# report number of states in each spin multiplicity
for S, grp in newdf.groupby('S'):
    print(f'{len(grp)} states with S = {S}/2 ({chem.SPINLABEL[S+1]})')

14 states with S = 0/2 (Singlet)
17 states with S = 2/2 (Triplet)
5 states with S = 4/2 (Quintet)


In [63]:
def finish_line():
    # only use record suffix "2" as recommended by D. Kreplin
    global line, linebuf, nstprev, records, iblock, count, nstmax
    suf = 2
    recnum = (5 + S//2) * 1000 + irrep*100 + iblock + 0.1*suf
    line += f'; save,{recnum};'
    if nstati:
        line += f' option,nstati={nstati}'
    line += '};'
    records.append(recnum)
    iblock += 1
    if nst > 1: 
        if nst != nstprev:
            linebuf.append(f'nst={nst}')
            nstprev = nst
        line += ' savemany'
    else:
        line += ' save1'
    linebuf.append(line)
    count += nst
    nstmax = max(nstmax, nst)
    return

In [64]:
# lines to be added to the Molpro CASSCF input file
linebufhdr = ['! Code below generated by build_split_mrci_input.ipynb using style "{:s}"'.format(style),
           '!    from file {:s}'.format(froot.split()[0]), '', 'ecas = energy', 'k=0']
linebuf = linebufhdr.copy()

In [65]:
if style == 'Lz':
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
    for S in spin:
        spindf = newdf[newdf.S == S]
        for irrep in irreps:
            irrdf = spindf[spindf.Irrep == irrep].copy().reset_index(drop=True)
            #display(irrdf)
            nstati = len(irrdf)
            iblock = 1   # suffix for record label
            for lz in lzvals:
                # build one line of input
                lzdf = irrdf[irrdf.Lz == lz]
                nst = len(lzdf)
                count += nst
                nstmax = max(nstmax, nst)
                if nst < 1:
                    continue
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in lzdf.index:
                    line += ',' + '{:d}'.format(int(i) + 1)
                recnum = (5 + S//2)*1000 + irrep*100 + iblock + 0.2
                records.append(recnum)
                iblock += 1
                line += f'; save,{recnum}; option,nstati={nstati}' + '};'
                if nst == 1:
                    line += ' save1;'
                else:
                    if nst != nstprev:
                        linebuf.append(f'nst={nst}')
                        nstprev = nst
                    line += ' savemany;'
                # add the comment
                pre = '{:d}-'.format(S + 1)
                if lz == 0:
                    if irrep == 1:
                        line += f'  ! {pre}Sigma+'
                    elif irrep == 4:
                        line += f'  ! {pre}Sigma-'
                    else:
                        chem.print_err('', f'Bad irrep = {irrep} for Lz = 0')
                else:
                    line += f'  ! {pre}{mpr.LAMBDA[lz].title()}'
                linebuf.append(line)
    if len(records) >= too_many_records:
        print(f'*** Too many records: {len(records)} ***')
        print('*** combining Lz values within irreps 2 and 3 ***')

*** Too many records: 21 ***
*** combining Lz values within irreps 2 and 3 ***


In [66]:
if style == 'Lz' and len(records) >= too_many_records:
    style = 'Lz_condensed'
    print(f'*** setting style = {style} ***')
    linebufhdr[0] = linebufhdr[0].replace('Lz', 'Lz_condensed')
    linebuf = linebufhdr.copy()
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
    for S in spin:
        spindf = newdf[newdf.S == S]
        for irrep in irreps:
            irrdf = spindf[spindf.Irrep == irrep].copy().reset_index(drop=True)
            #display(irrdf)
            nstati = len(irrdf)
            iblock = 1   # suffix for record label
            if irrep in [2, 3]:
                # combine all Lz values together
                # build one line of input
                nst = len(irrdf)
                count += nst
                nstmax = max(nstmax, nst)
                if nst < 1:
                    continue
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in irrdf.index:
                    line += ',' + '{:d}'.format(int(i) + 1)
                recnum = (5 + S//2)*1000 + irrep*100 + iblock + 0.2
                records.append(recnum)
                iblock += 1
                line += f'; save,{recnum}; option,nstati={nstati}' + '};'
                if nst == 1:
                    line += ' save1;'
                else:
                    if nst != nstprev:
                        linebuf.append(f'nst={nst}')
                        nstprev = nst
                    line += ' savemany;'
                # add the comment
                #pre = '{:d}-'.format(S + 1)
                line += f'  ! {set(irrdf.Term)}'
                linebuf.append(line)
            else:
                # each Lz value alone
                for lz in lzvals:
                    # build one line of input
                    lzdf = irrdf[irrdf.Lz == lz]
                    nst = len(lzdf)
                    count += nst
                    nstmax = max(nstmax, nst)
                    if nst < 1:
                        continue
                    line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                    for i in lzdf.index:
                        line += ',' + '{:d}'.format(int(i) + 1)
                    recnum = (5 + S//2)*1000 + irrep*100 + iblock + 0.2
                    records.append(recnum)
                    iblock += 1
                    line += f'; save,{recnum}; option,nstati={nstati}' + '};'
                    if nst == 1:
                        line += ' save1;'
                    else:
                        if nst != nstprev:
                            linebuf.append(f'nst={nst}')
                            nstprev = nst
                        line += ' savemany;'
                    # add the comment
                    pre = '{:d}-'.format(S + 1)
                    if lz == 0:
                        if irrep == 1:
                            line += f'  ! {pre}Sigma+'
                        elif irrep == 4:
                            line += f'  ! {pre}Sigma-'
                        else:
                            chem.print_err('', f'Bad irrep = {irrep} for Lz = 0')
                    else:
                        line += f'  ! {pre}{mpr.LAMBDA[lz].title()}'
                    linebuf.append(line)
    print(f'There are {len(records)} records after condensing irreps 2 and 3.')
    if len(records) >= too_many_records:
        print(f'*** Still too many records: {len(records)} ***')

*** setting style = Lz_condensed ***
There are 17 records after condensing irreps 2 and 3.


In [67]:
if style == 'split':
    # assume that irreps 2 and 3 are already equal
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
       
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            # count the states in each irrep
            tdf = spindf[spindf.Irrep == irrep].copy()
            tdf = tdf.sort_values('Energy').reset_index(drop=True)
            nstate[irrep] = len(tdf)
            irrdf[irrep] = tdf
        for irrep in irreps:
            iblock = 1   # part of record number
            nstati = 0   # don't need nstati
            if (irrep in [2, 3]) or (nstate[irrep] < nstate[5-irrep]):
                # easy case
                nst = nstate[irrep]
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
            else:
                # irrep 1 or 4, larger block
                # split this block; try to keep Lz > 0 in the larger block
                otherblock = nstate[5-irrep]  # the size of the other irrep 
                nstati = nstate[irrep]
                tdf = irrdf[irrep]
                # put Lz > 0 first, then sigmas
                iord = list(tdf[tdf.Lz > 0].index)
                iord = iord + list(tdf[tdf.Lz == 0].index)
                # make a block of same size as 'otherblock'
                idx = iord[:otherblock]
                iord = iord[otherblock:]
                nst = otherblock
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in idx:
                    line += ',{:d}'.format(i+1)
                finish_line()
                # put the rest in a second block
                nst = len(iord)
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in iord:
                    line += ',{:d}'.format(i+1)
            finish_line()        
 

In [68]:
if style == 'irrep':
    # simple treatment, one block for each irrep 
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
       
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            # count the states in each irrep
            tdf = spindf[spindf.Irrep == irrep].copy()
            tdf = tdf.sort_values('Energy').reset_index(drop=True)
            nstate[irrep] = len(tdf)
            irrdf[irrep] = tdf
        for irrep in irreps:
            iblock = 1   # part of record number
            nstati = 0   # don't need nstati
            nst = nstate[irrep]
            line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
            finish_line()         

In [69]:
if style == 'single':
    # each MRCI has only one root
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
    
    nst = 1
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            tdf = spindf[spindf.Irrep == irrep]
            nstati = len(tdf)
            iblock = 1   # part of record number
            for i in range(nstati):
                line = baseline + f'wf,sym={irrep},spin={S}; state,1,{i+1}'
                finish_line()         

In [70]:
linebuf.append('\ntable,hlsdiag,lzlz,ecas,eref,ediff\n')

In [71]:
linebuf.append('! spin-orbit CI\n')
linebuf.append('lsint   ! workaround for error "TRYING TO READ BEYOND RECORD BOUNDARY"')
if allECP:
    linebuf.append('{ci; hlsmat,ecp, \\')
else:
    linebuf.append('{ci; hlsmat,ls, \\')

In [72]:
# add the record numbers to the SO-CI section
nline = 10  # number of records per line
line = None
for i, rec in enumerate(records):
    if i % nline == 0:
        if line is not None:
            if i == len(records) - 1:
                # terminate with semicolon
                linebuf.append(line + '; \\')
            else:
                # terminate with comma
                linebuf.append(line + ', \\')
        line = '    '
    else:
        line += ','
    line += f'{rec}'
linebuf.append(line)

In [73]:
# hls=1 prints the entire dipole matrix
linebuf.append('    options,matel=1; print,vls=0,hls=1}')

In [74]:
procs_text = '''
PROC save1
    k=k+1
    hlsdiag(k)=energd
    eref(k)=energr
    ediff(k)=energr-ecas(k)
ENDPROC

PROC savemany
    do h=1,NST
        k=k+1
        hlsdiag(k)=energd4(h)
        eref(k)=energr(h)
        ediff(k)=energr(h)-ecas(k)
    enddo
ENDPROC
'''

In [75]:
linebuf.append(procs_text)
print('\n'.join(linebuf))

! Code below generated by build_split_mrci_input.ipynb using style "Lz_condensed"
!    from file ac5z_r2p5.pro

ecas = energy
k=0
nst=2
{ci,maxit=60; wf,sym=1,spin=0; state,2,2,3; save,5101.2; option,nstati=3}; savemany;  ! 1-Sigma+
{ci,maxit=60; wf,sym=1,spin=0; state,1,1; save,5102.2; option,nstati=3}; save1;  ! 1-Delta
nst=4
{ci,maxit=60; wf,sym=2,spin=0; state,4,1,2,3,4; save,5201.2; option,nstati=4}; savemany;  ! {'1Π', '1Φ'}
{ci,maxit=60; wf,sym=3,spin=0; state,4,1,2,3,4; save,5301.2; option,nstati=4}; savemany;  ! {'1Π', '1Φ'}
nst=2
{ci,maxit=60; wf,sym=4,spin=0; state,2,2,3; save,5401.2; option,nstati=3}; savemany;  ! 1-Sigma-
{ci,maxit=60; wf,sym=4,spin=0; state,1,1; save,5402.2; option,nstati=3}; save1;  ! 1-Delta
{ci,maxit=60; wf,sym=1,spin=2; state,1,2; save,6101.2; option,nstati=4}; save1;  ! 3-Sigma+
nst=3
{ci,maxit=60; wf,sym=1,spin=2; state,3,1,3,4; save,6102.2; option,nstati=4}; savemany;  ! 3-Delta
nst=4
{ci,maxit=60; wf,sym=2,spin=2; state,4,1,2,3,4; save,6201.2; opt

In [76]:
if count != len(casdf):
    print(f'*** Error:  there are {len(casdf)} CASSCF states but {count} CI states! ***')
else:
    print(f'Use the text above to calculate {count} MRCI states.')
    pyperclip.copy('\n'.join(linebuf))
    print(f'There are {len(records)} MRCI calculations.')
    print(f'The largest has {nstmax} roots.')
    print(f'CASSCF results were taken from:  {froot}')
    print(f'Blocking style is "{style}"')
    print('Bond length = {:.4f}'.format(R))

Use the text above to calculate 36 MRCI states.
There are 17 MRCI calculations.
The largest has 4 roots.
CASSCF results were taken from:  ac5z_r2p5.pro
Blocking style is "Lz_condensed"
Bond length = 2.5000


In [77]:
1/0

ZeroDivisionError: division by zero

## LQUANT help (C2v)

In [None]:
# Specify the numbers of terms below
terms = {'Sigma+': 0, 'Sigma-': 2, 'Pi': 1, 'Delta': 2, 'Phi': 1, 'Gamma': 0}
terms['H'] = 0  # usually

In [None]:
print('Input:')
for k, v in terms.items():
    print('\t{:<7s} {:2d}'.format(k, v))
nterm = np.sum([v for k, v in terms.items()])
print(f'{nterm} terms')
lquant = {irr: ['lquant'] for irr in [1,2,3,4]}
lquant[1] += ['0']*terms['Sigma+'] + ['2']*terms['Delta'] + ['4']*terms['Gamma']
lquant[2] += ['1']*terms['Pi'] + ['3']*terms['Phi'] + ['5']*terms['H']
lquant[3] = lquant[2]
lquant[4] += ['0']*terms['Sigma-'] + ['2']*terms['Delta'] + ['4']*terms['Gamma']
count = [len(v)-1 for k, v in lquant.items()]
ntot = np.sum(count)
print(f'{ntot} MRCI states')
print('In order by irrep:')
for k, v in lquant.items():
    s = ','.join(v)
    print(f'state,{count[int(k)-1]}; {s}')