In [1]:
# Prepare a summary table of "raw Eso" values for valence terms 3P, 1D and 1S
#   for export to Excel

import sys, os, re
import pandas as pd
import numpy as np
sys.path.insert(0, r'../../atomic_SOC')
import molpro_subs2 as m2
import chem_subs as chem

In [2]:
def read_datafile(fname):
    # read data from CSV or Excel file
    froot = os.path.split(fname)[-1]
    if 'xlsx' in fname:
        xl = pd.ExcelFile(fname)
        if len(xl.sheet_names) > 1:
            print('Available worksheets:', xl.sheet_names)
            sheet = input('\tchoose a worksheet: ')
        else:
            sheet = xl.sheet_names[0]
        print(f'Reading worksheet "{sheet}" from Excel file {froot}')
        df = xl.parse(sheet, header=None)
    return sheet, df

In [3]:
xldir = r'C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\MSS'
xlfile = 'Pb_SOCI_results.xlsx'
fxl = os.sep.join([xldir, xlfile])

In [4]:
print(f'Reading SO-CI letter codes from {fxl}')
xl = pd.ExcelFile(fxl)
df_letters = xl.parse('letter codes', skiprows=1)
ncalc = len(df_letters)
print(f'There are results for {ncalc} calculations')
re_num = re.compile(r'\d\d+')
# Check that filenames are consistent with other descriptors
nbad = 0
iPPl = []  # rows with PP basis set
iAEl = []  # rows with AE basis set
for irow, row in df_letters.iterrows():
    fn = row['Filename']
    # basis set
    bas = row['Basis set']
    bad = False
    if 'pp' in bas:
        iPPl.append(irow)
        if bas not in fn:
            bad = True
    else:
        # all-electron calculation
        iAEl.append(irow) 
        if 'pp' in fn:
            bad = True
        else:
            # remove suffix
            for suf in ['dk3', 'dk', 'x2c']:
                bas = bas.replace(suf, '')
            if bas not in fn:
                bad = True
    if bad:
        print(f'*** basis = {bas} for filename = {fn} looks wrong ***')
        nbad += 1
        continue  # don't check this filename further
    # multi-digit numbers in filename should be 3P weight or active space
    numl = re_num.findall(fn)
    if len(numl):
        wt = str(row['3P wt in CASSCF'])
        acts = row['Active space'].replace(')', '').replace('(', '').replace('/', '')
        for num in numl:
            if (wt != num) and (acts != num):
                print(f'*** filname {fn} does not match active space {row["Active space"]} or 3P weight {wt}')
                nbad += 1
if nbad:
    print(f'*** There are {nbad} questionable filenames ***')
else:
    print('Filenames look fine')

Reading SO-CI letter codes from C:\Users\irikura\OneDrive - NIST\Karl\atomic_SOC\MSS\Pb_SOCI_results.xlsx
There are results for 34 calculations
Filenames look fine


In [5]:
calcdir = os.sep.join(['..', 'calculations', 'Pb_I'])  # location of Molpro outputs

In [6]:
def make_casscf_dataframe(major_sections):
    # return two DataFrames: CASSCF states and CASSCF terms
    # break section "multi" into sub-sections
    multisec = m2.multi_sections(major_sections['multi'][-1])
    statesym = m2.state_symmetry_groups(multisec['top'][-1])
    dfstates = m2.multi_results(multisec['results'])
    dfexpec = m2.multi_expec(multisec['trans'][-1])
    # Summarize CASSCF results
    dfcas = dfstates[['Label', 'irrep', 'E']].copy()
    Svals = []
    for g in statesym:
        for i in range(g['nstates']):
            Svals.append(chem.MULTSPIN[g['spin']])
    dfcas.insert(2, 'S', Svals)
    # zero out any small negative values of <L**2>
    lsq = dfexpec['L**2'].values
    idneg = np.argwhere(lsq < 0)
    if len(idneg):
        print(f'   zeroing negative <L**2>:  {lsq[idneg].flatten()}')
        lsq[idneg] = 0
    dfcas['L**2'] = lsq
    #dfcas['LZ'] = np.sqrt(dfexpec['LZLZ'])
    dfcas['L'] = np.sqrt(dfexpec['L**2']).astype(int)
    tsymb = []
    for S, L, irr in zip(dfcas.S, dfcas.L, dfcas.irrep):
        parity = 3 - 2*irr
        trm = chem.term_symbol(L, S, parity, linear=False)
        tsymb.append(trm)
    dfcas['term'] = tsymb
    dfcasterm = m2.collect_atomic_terms(dfcas, 'E')
    nterm = len(dfcasterm)
    # Add J values
    Jvals = [chem.possible_J_from_term(trm) for trm in dfcasterm['term']]
    dfcasterm['J_vals'] = Jvals
    return dfcas, dfcasterm

In [7]:
def make_mrci_dataframe(major_sections):
    # return a DataFrame of MRCI terms
    # Parse MRCI results and summarize in DataFrame
    dfmrci = pd.DataFrame()
    for imrci, sec in enumerate(major_sections['mrci']):
        #print(f'MRCI calculation #{imrci+1}')
        mrcisec = m2.mrci_sections(sec)
        #print(mrcisec.keys())
        mrci_meta = m2.mrci_info(mrcisec['top'][0])
        mrci_iter = m2.mrci_iterations(mrcisec['iterations'][0])
        mrci_results = m2.mrci_results(mrcisec['results'][0])
        nstate = len(mrci_results['state'])
        #print(f'    {mrci_meta["smult"]}, irrep {mrci_meta["irrep"]}')
        #print(f'    {nstate} states')
        lbll =  []  # list of state labels
        c0rot = []  # list of C0 (rotated) values
        El =    []  # list of energies
        davl =  []  # list of Davidson-corrected energies (rotated ref)
        erefl = []  # list of reference energies
        spinmult = mrci_meta['smult']
        S = chem.MULTSPIN[spinmult]
        irrep = mrci_meta['irrep']
        for lbl, v in mrci_results['state'].items():
            lbll.append(lbl)
            try:
                c0rot.append(v['C0']['rotated'])
                davl.append(v['Energy']['davidson']['rotated'])
            except KeyError:
                # no "rotated" values if there is only one state
                c0rot.append(v['C0']['relaxed'])
                davl.append(v['Energy']['davidson']['relaxed'])
            El.append(v['Energy']['total'])
            erefl.append(v['Energy']['ref E'])
    
        # Get CASSCF (fixed) term composition of each MRCI state
        fixref = m2.coefficients_of_refs(mrcisec['results'][0])  # coeffs of CASSCF refs
        subcas = dfcas[dfcas.S == S]
        if nstate == 1:
            # 'fixref' is [0] because that text block is not available
            # just assign 100% to the only CASSCF reference state
            ilead = np.array([0])
            fixref = np.array([])
            casterm = subcas.iloc[0]['term']
            cascomp = [{casterm: 1.}]
        else:
            ilead = np.zeros(fixref.shape[1]).astype(int)
            cascomp = []
        for ist in range(fixref.shape[0]):
            icas = np.argmax(np.abs(fixref[ist, :]))
            ilead[ist] = icas
            termd = {}
            for icas, c in enumerate(fixref[ist, :]):
                casterm = subcas.iloc[icas]['term']
                termd[casterm] = termd.get(casterm, 0) + c*c
            cascomp.append(termd)
        if len(set(ilead)) < len(ilead):
            print('    ** Warning: one CASSCF state leads more than one MRCI state')
        reflbl = [subcas.iloc[i]['Label'] for i in ilead]
        #lz = [subcas.iloc[i]['LZ'] for i in ilead]
        x = [v for v in mrci_iter['init_ref'].values()][:nstate]
        init_refE = [x[i] for i in ilead]
        terml = [subcas.iloc[i]['term'] for i in ilead]
        data = {'Label': lbll, 'irrep': irrep, 'S': S, 'E': El, 'Edav': davl,
                'C0': c0rot, 'Eref': erefl, 'init_ref': init_refE, 'iref_nr': ilead + 1,
                'irlbl': reflbl, 'term': terml, 'CASterm': cascomp}
        dfci = pd.DataFrame(data)
        dfmrci = pd.concat([dfmrci, dfci], ignore_index=True)
    # add column for difference between reference energy and corresponding CASSCF
    d_ref = dfmrci.Eref - dfmrci.init_ref
    dfmrci.insert(8, 'D_ref', d_ref)
    # Collect into terms
    dfciterm = m2.collect_atomic_terms(dfmrci, 'Edav')
    # Make prefixes enumerative
    dfciterm['term'] = chem.enumerative_prefix(dfciterm.term.values, always=False)
    return dfciterm

In [8]:
# create a DataFrame of energies of MRCI+Q terms and of the lowest SO-CI level
dfE = pd.DataFrame(columns=['Calc', '3P', '1D', '1S', 'SO-CI'])
for irow, row in df_letters.iterrows():
    lcode = row.Calc
    fpro = os.sep.join([calcdir, row.Filename])
    print(f'Calc {lcode} ({row.Filename})')
    with open(fpro, 'r') as F:
        major_sections, linenos = m2.identify_sections(fpro)
        dfcas, dfcasterm = make_casscf_dataframe(major_sections)
        dfciterm = make_mrci_dataframe(major_sections)
        # break section "soci" into sub-sections
        sosec = m2.soci_sections(major_sections['soci'][0])
        so_energies = m2.soci_energies(sosec['so_levels'][0])
        # Find the GS and term energies
        newrow = [lcode]
        for trm in ['3P', '1D', '1S']:
            subdf = dfciterm[dfciterm.term.isin([trm, '(1)' + trm])]
            try:
                eterm = subdf.Edav.values[0]
            except:
                # term is missing from this calculation
                eterm = np.nan
            newrow.append(eterm)
        newrow.append(min(so_energies['E']))
        # Add row to the DF
        dfE.loc[irow] = newrow

Calc A (pb_3PDPS1DSPDS_actzpp.pro)
Calc B (pb_3PDPS1DSPDS_acqzpp.pro)
Calc D (pb_3PDPS1DSPDS_ac5zpp.pro)
Calc F (pb_3P1D_actz.pro)
Calc G (pb_3P1D_acqz.pro)
Calc H (pb_3P1D_44_actzpp.pro)
Calc I (pb_3P1D_actzpp.pro)
Calc J (pb_3P1D_44_ac5zpp.pro)
Calc K (pb_3P1DS_44_actzpp.pro)
Calc L (pb_3P1DS_44_ac5zpp.pro)
Calc M (pb_3PDPS1DSPDS_dac5zpp.pro)
Calc N (pb_3PDPS1DSPDS_dacqzpp.pro)
   zeroing negative <L**2>:  [-2.5e-11 -5.0e-12 -1.9e-11]
Calc O (pb_3PDPS1DSPDS_dacqzpp_25.pro)
   zeroing negative <L**2>:  [-1.5e-11 -2.0e-12 -1.2e-11]
Calc P (pb_3PDPS1DSPDS_tacqzpp.pro)
   zeroing negative <L**2>:  [-3.6e-11 -6.0e-12 -2.9e-11]
Calc Q (pb_3PDPSP1DSPDS_tacqzpp.pro)
   zeroing negative <L**2>:  [-2.3e-11 -4.0e-12 -1.9e-11]
Calc R (pb_3P_44_ac5zpp.pro)
Calc S (pb_3P_ac5zpp.pro)
Calc T (pb_3P1D_ac5zpp.pro)
Calc U (pb_3P1DS_ac5zpp.pro)
Calc V (pb_3PDP1DS_ac5zpp.pro)
Calc W (pb_3PDPS1DSP_ac5zpp.pro)
Calc X (pb_3PDPS1DSPDS_tac5zpp.pro)
Calc Y (pb_3PDPS1DSPDS_dacqzpp_26.pro)
   zeroing negative <L

In [9]:
dfE.to_clipboard(index=False)
print('DataFrame copied to clipboard for pasting into Excel')

DataFrame copied to clipboard for pasting into Excel


In [10]:
dfE

Unnamed: 0,Calc,3P,1D,1S,SO-CI
0,A,-192.502341,-192.477229,-192.437913,-192.551932
1,B,-192.63721,-192.612238,-192.572827,-192.687797
2,D,-192.696175,-192.67118,-192.631872,-192.747052
3,F,-20884.832945,-20884.807507,,-20884.885074
4,G,-20885.450142,-20885.4245,,-20885.50357
5,H,-192.501577,-192.4765,,-192.531308
6,I,-192.503407,-192.478637,,-192.532614
7,J,-192.695606,-192.670966,,-192.725565
8,K,-192.501531,-192.476766,-192.43722,-192.547317
9,L,-192.695535,-192.67122,-192.632345,-192.741985
