In [105]:
#!/usr/bin/env python3
"""
Generate Molpro SO-CI input that does MRCI with separate
blocks for each irrep, to minimize degeneracy-breaking. 
Requires Molpro CASSCF output.
C2v symmetry (4 irreps) is assumed. 
KKI 1/13/2022
"""
import re, sys, copy, pyperclip
import numpy as np
import pandas as pd
sys.path.insert(0, '../atomic_SOC')
import molpro_subs as mpr
import chem_subs as chem

pd.set_option('display.width', 1000)

### Specify CASSCF file, and how MRCI should be blocked

In [111]:
style = 'Lz'  # choices: 'Lz', 'split', 'irrep', 'single'

fcas = 'ac5z_35D15Q_r1p5400_lz.pro'
fcas = '../UMemphis/mgcl_small4_c2.pro'

In [112]:
print('Will read CASSCF info from file: {:s}'.format(fcas))
if style == 'Lz':
    print('MRCI will be blocked by Lz')
elif style == 'split':
    print('MRCI will be in blocks of similar size')
elif style == 'irrep':
    print('MRCI will be blocked simply by irrep')
elif style == 'single':
    print('Each MRCI will have one state')
else:
    print('bad choice for "style"')
    1/0

Will read CASSCF info from file: ../UMemphis/mgcl_small4_c2.pro
MRCI will be blocked by Lz


In [113]:
try:
    PG = mpr.read_point_group(fcas)
except FileNotFoundError:
    print('File not found!')
    print('Here are the Molpro outputs in directory {:s}:'.format(fdir))
    import glob
    flist = glob.glob(fdir + '*.pro')
    for f in flist:
        print(f.split('\\')[-1])
if PG != 'C2v':
    print('Your calculation is in point group', PG)
crd, lineno_crd = mpr.read_coordinates(fcas, linenum=True)
# get diatomic bond length
G = chem.Geometry(crd, intype='DataFrame', units='bohr')
G.toAngstrom()
R = np.round(G.distance(0, 1), 6)  # round the bond length to 6 digits
print('Bond length = {:.4f}'.format(R))
caslist, lineno_cas = mpr.readMULTI(fcas, PG=PG, linenum=True)
CAS = caslist[-1]   # assume the last CASSCF to be the relevant one

Your calculation is in point group C2
Bond length = 2.3000


In [114]:
casdf = CAS.results[['Irrep', 'Label', 'Energy', 'Term']].copy()
casdf['S'] = [chem.spinname(x)-1 for x in CAS.results.Spin]
casdf['Lz'] = np.round(np.sqrt(np.abs(CAS.results.LzLz)), 0).astype(int)
spin = sorted(set(casdf.S))
irreps = sorted(set(CAS.results.Irrep))
lzvals = sorted(set(casdf.Lz))

In [115]:
# check for proper pairing of degenerate states
broken = False
for grp, dfg in casdf.groupby(['S', 'Lz']):
    if grp[1] == 0:
        # ignore Sigma states
        continue
    # number in each irrep should be equal
    grpi = dfg.groupby('Irrep')
    lens = grpi.size().values
    if lens[0] != lens[1]:
        print('Broken pair somewhere')
        display(dfg.sort_values('Energy'))
        broken = True
if not broken:
    print('All Lambda state-pairs are closed')

IndexError: index 1 is out of bounds for axis 0 with size 1

In [116]:
# order the CASSCF states by energy within each (S, irrep) group
newdf = pd.DataFrame(columns=casdf.columns)
for lbl, dg in casdf.groupby(['S', 'Irrep']):
    dt = dg.copy().sort_values('Energy').reset_index(drop=True)
    #display(dt)
    newdf = newdf.append(dt)
# report number of states in each spin multiplicity
for S, grp in newdf.groupby('S'):
    print(f'{len(grp)} states with S = {S} ({chem.SPINLABEL[S]})')

3 states with S = 1 (Singlet)


In [117]:
newdf

Unnamed: 0,Irrep,Label,Energy,Term,S,Lz
0,1,1.1,-660.93335,2Σ,1,0
0,2,1.2,-660.810538,2Π,1,1
1,2,2.2,-660.810538,2Π,1,1


In [118]:
def finish_line():
    # only use record suffix ".2" as recommended by D. Kreplin
    global line, linebuf, nstprev, records, iblock, count, nstmax
    suf = 2
    recnum = (5 + S//2) * 1000 + irrep*100 + iblock + 0.1*suf
    line += f'; save,{recnum};'
    if nstati:
        line += f' option,nstati={nstati}'
    line += '};'
    records.append(recnum)
    iblock += 1
    if nst > 1: 
        if nst != nstprev:
            linebuf.append(f'nst={nst}')
            nstprev = nst
        line += ' savemany'
    else:
        line += ' save1'
    linebuf.append(line)
    count += nst
    nstmax = max(nstmax, nst)
    return

In [119]:
linebuf = ['! Code below generated by build_split_mrci_input.ipynb using style "{:s}"'.format(style),
           '', 'ecas = energy', 'k=0']  # lines to be added to the Molpro CASSCF input file

In [120]:
if style == 'Lz':
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
    for S in spin:
        spindf = newdf[newdf.S == S]
        for irrep in irreps:
            irrdf = spindf[spindf.Irrep == irrep].copy().reset_index(drop=True)
            #display(irrdf)
            nstati = len(irrdf)
            iblock = 1   # suffix for record label
            for lz in lzvals:
                # build one line of input
                lzdf = irrdf[irrdf.Lz == lz]
                nst = len(lzdf)
                count += nst
                nstmax = max(nstmax, nst)
                if nst < 1:
                    continue
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in lzdf.index:
                    line += ',' + '{:d}'.format(int(i) + 1)
                recnum = (5 + S//2)*1000 + irrep*100 + iblock + 0.2
                records.append(recnum)
                iblock += 1
                line += f'; save,{recnum}; option,nstati={nstati}' + '};'
                if nst == 1:
                    line += ' save1;'
                else:
                    if nst != nstprev:
                        linebuf.append(f'nst={nst}')
                        nstprev = nst
                    line += ' savemany;'
                # add the comment
                pre = '{:d}-'.format(S + 1)
                if lz == 0:
                    if irrep == 1:
                        line += f'  ! {pre}Sigma+'
                    elif irrep == 4:
                        line += f'  ! {pre}Sigma-'
                    else:
                        chem.print_err('', f'Bad irrep = {irrep} for Lz = 0')
                else:
                    line += f'  ! {pre}{mpr.LAMBDA[lz].title()}'
                linebuf.append(line)

In [121]:
def finish_line_old():
    global line, linebuf, nstprev, records, iblock, count, nstmax
    suf = max(S, 1)  # molpro seems to ignore suffix=0
    recnum = 5000 + irrep*100 + iblock + 0.1*suf
    line += f'; save,{recnum};'
    if nstati:
        line += f' option,nstati={nstati}'
    line += '};'
    records.append(recnum)
    iblock += 1
    if nst > 1: 
        if nst != nstprev:
            linebuf.append(f'nst={nst}')
            nstprev = nst
        line += ' savemany'
    else:
        line += ' save1'
    linebuf.append(line)
    count += nst
    nstmax = max(nstmax, nst)
    return

In [122]:
if style == 'split':
    # assume that irreps 2 and 3 are already equal
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
       
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            # count the states in each irrep
            tdf = spindf[spindf.Irrep == irrep].copy()
            tdf = tdf.sort_values('Energy').reset_index(drop=True)
            nstate[irrep] = len(tdf)
            irrdf[irrep] = tdf
        for irrep in irreps:
            iblock = 1   # part of record number
            nstati = 0   # don't need nstati
            if (irrep in [2, 3]) or (nstate[irrep] < nstate[5-irrep]):
                # easy case
                nst = nstate[irrep]
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
            else:
                # irrep 1 or 4, larger block
                # split this block; try to keep Lz > 0 in the larger block
                otherblock = nstate[5-irrep]  # the size of the other irrep 
                nstati = nstate[irrep]
                tdf = irrdf[irrep]
                # put Lz > 0 first, then sigmas
                iord = list(tdf[tdf.Lz > 0].index)
                iord = iord + list(tdf[tdf.Lz == 0].index)
                # make a block of same size as 'otherblock'
                idx = iord[:otherblock]
                iord = iord[otherblock:]
                nst = otherblock
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in idx:
                    line += ',{:d}'.format(i+1)
                finish_line()
                # put the rest in a second block
                nst = len(iord)
                line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
                for i in iord:
                    line += ',{:d}'.format(i+1)
            finish_line()        
 

In [123]:
if style == 'irrep':
    # simple treatment, one block for each irrep 
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
       
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            # count the states in each irrep
            tdf = spindf[spindf.Irrep == irrep].copy()
            tdf = tdf.sort_values('Energy').reset_index(drop=True)
            nstate[irrep] = len(tdf)
            irrdf[irrep] = tdf
        for irrep in irreps:
            iblock = 1   # part of record number
            nstati = 0   # don't need nstati
            nst = nstate[irrep]
            line = baseline + f'wf,sym={irrep},spin={S}; state,{nst}'
            finish_line()         

In [124]:
if style == 'single':
    # each MRCI has only one root
    records = []  # record labels for saving states
    maxit = 60  # for the MRCI
    baseline = '{ci,' + f'maxit={maxit}; '
    count = 0  # count of states (to be compared with input CASSCF count)
    nstprev = 1
    nstmax = 0
    
    nst = 1
    for S in spin:
        spindf = newdf[newdf.S == S]
        nstate = {}
        irrdf = {}
        for irrep in irreps:
            tdf = spindf[spindf.Irrep == irrep]
            nstati = len(tdf)
            iblock = 1   # part of record number
            for i in range(nstati):
                line = baseline + f'wf,sym={irrep},spin={S}; state,1,{i+1}'
                finish_line()         

In [125]:
linebuf.append('\ntable,hlsdiag,lzlz,ecas,eref,ediff\n')

In [126]:
linebuf.append('! spin-orbit CI\n{ci; hlsmat,ecp, \\')

In [127]:
# add the record numbers to the SO-CI section
nline = 10  # number of records per line
line = None
for i, rec in enumerate(records):
    if i % nline == 0:
        if line is not None:
            if i == len(records) - 1:
                # terminate with semicolon
                linebuf.append(line + '; \\')
            else:
                # terminate with comma
                linebuf.append(line + ', \\')
        line = '    '
    else:
        line += ','
    line += f'{rec}'
linebuf.append(line)

In [128]:
linebuf.append('    options,matel=1; print,vls=0,hls=0}')

In [129]:
procs_text = '''
PROC save1
    k=k+1
    hlsdiag(k)=energd
    eref(k)=energr
    ediff(k)=energr-ecas(k)
ENDPROC

PROC savemany
    do h=1,NST
        k=k+1
        hlsdiag(k)=energd4(h)
        eref(k)=energr(h)
        ediff(k)=energr(h)-ecas(k)
    enddo
ENDPROC
'''

In [130]:
linebuf.append(procs_text)
print('\n'.join(linebuf))

! Code below generated by build_split_mrci_input.ipynb using style "Lz"

ecas = energy
k=0
{ci,maxit=60; wf,sym=1,spin=1; state,1,1; save,5101.2; option,nstati=1}; save1;  ! 2-Sigma+
nst=2
{ci,maxit=60; wf,sym=2,spin=1; state,2,1,2; save,5201.2; option,nstati=2}; savemany;  ! 2-Pi   

table,hlsdiag,lzlz,ecas,eref,ediff

! spin-orbit CI
{ci; hlsmat,ecp, \
    5101.2,5201.2
    options,matel=1; print,vls=0,hls=0}

PROC save1
    k=k+1
    hlsdiag(k)=energd
    eref(k)=energr
    ediff(k)=energr-ecas(k)
ENDPROC

PROC savemany
    do h=1,NST
        k=k+1
        hlsdiag(k)=energd4(h)
        eref(k)=energr(h)
        ediff(k)=energr(h)-ecas(k)
    enddo
ENDPROC



In [131]:
# check for non-integer Lz values
cruft = casdf.Lz - np.round(casdf.Lz, 0)
if cruft.any():
    print('*** There are non-integer values of Lz')
else:
    print('Lz values look OK')

Lz values look OK


In [None]:
if count != len(casdf):
    print(f'*** Error:  there are {len(casdf)} CASSCF states but {count} CI states! ***')
else:
    print(f'Paste the clipboard into your Molpro input file to calculate {count} MRCI states.')
    pyperclip.copy('\n'.join(linebuf))
    print(f'There are {len(records)} MRCI calculations.')
    print(f'The largest has {nstmax} roots.')
    print(f'CASSCF results were taken from:  {fcas}')
    print(f'Blocking style is "{style}"')
    print('Bond length = {:.4f}'.format(R))

### LQUANT help (C2v), if desired

In [None]:
# Specify the numbers of terms in the dict below
terms = {'Sigma+': 0, 'Sigma-': 2, 'Pi': 1, 'Delta': 2, 'Phi': 1, 'Gamma': 0, 'H': 0}

In [None]:
print('Input:')
for k, v in terms.items():
    print('\t{:<7s} {:2d}'.format(k, v))
nterm = np.sum([v for k, v in terms.items()])
print(f'{nterm} terms')
lquant = {irr: ['lquant'] for irr in [1,2,3,4]}
lquant[1] += ['0']*terms['Sigma+'] + ['2']*terms['Delta'] + ['4']*terms['Gamma']
lquant[2] += ['1']*terms['Pi'] + ['3']*terms['Phi'] + ['5']*terms['H']
lquant[3] = lquant[2]
lquant[4] += ['0']*terms['Sigma-'] + ['2']*terms['Delta'] + ['4']*terms['Gamma']
count = [len(v)-1 for k, v in lquant.items()]
ntot = np.sum(count)
print(f'{ntot} MRCI states')
print('In order by irrep:')
for k, v in lquant.items():
    s = ','.join(v)
    print(f'state,{count[int(k)-1]}; {s}')