In [1]:
# Generate working reactions, parallelized
import yaml, sys, os, random, re, glob
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import sympy
sys.path.insert(0, '../../karlib')
import chem_subs as chem
import multirx_subs as mrx

In [2]:
# Process all available molecular YAML files
yamls = glob.glob(os.sep.join([mrx.MDAT, '*.yml']))
moldata = {}
Gdict = {}
for file in yamls:
    fbase = os.path.basename(file)
    molec = os.path.splitext(fbase)[0]
    myml = mrx.read_molec_yaml(molec)
    mrx.thermo_functions(myml, T=0, zpe_scale=0.98)
    mrx.compute_E0(myml)
    moldata[molec] = myml
    G = chem.Geometry(myml['Geometry']['coordinates'])
    # store the spin multiplicity
    G.set_spinmult(myml['Spin_mult'])
    Gdict[molec] = G



In [3]:
def print_delta(rxns):
    # just print stuff for this notebook
    global target, moldata
    # must be a list of reactions
    if not rxns:
        # empty list
        print('No reactions')
        return
    if not isinstance(rxns[0][0], list):
        rlist = [ rxns ]
    else:
        rlist = rxns
    # are the reactions balanced?
    oklist = mrx.check_reactions_balance(rlist, Gdict)
    okrx = [r for i, r in enumerate(rlist) if oklist[i]]
    # mrx.eq5_sums() returns dH/x0, not dH
    print(f'Of {len(rlist)} reactions, {len(okrx)} are balanced.')
    calcH, calcS = mrx.eq5_sums(okrx, target, moldata)
    for rx, H in zip(okrx, calcH):
        #print(rx)   # this is ugly
        lhs = []
        rhs = []
        for pair in rx:
            mol = pair[0]
            c = pair[1]
            if c < 0:
                # left side of equation
                # present coefficients as rational numbers
                crat = sympy.Rational(-c).limit_denominator(1000)
                cstr = f'{crat}'
                lhs.append('{:s} {:s}'.format(cstr, mol))
            else:
                # right side
                crat = sympy.Rational(c).limit_denominator(1000)
                cstr = f'{crat}'
                rhs.append('{:s} {:s}'.format(cstr, mol))
        lhs = ' + '.join(lhs)
        rhs = ' + '.join(rhs)
        print('{:s} = {:s}'.format(lhs, rhs))
        print('\tchange in calc. H = {:.1f} kJ/mol'.format(-H))  # assuming x0 = -1
    return

In [4]:
target = 'phno2'
G = Gdict[target]
tstoich = G.stoichiometry(asdict=True)

In [5]:
print('Component = atom')
print(G.stoichiometry(asdict=True))
#G.printMOL('test.mol', title='molecule_components.ipynb')

Component = atom
{'C': 6, 'N': 1, 'O': 2, 'H': 5}


In [6]:
rxn = mrx.reaction_bond_separation(target, G)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 7 ch4 + 2 nh3 = 3 c2h4 + 1 ch3nh2 + 3 c2h6 + 2 hno + 1 h2
	change in calc. H = 647.7 kJ/mol


In [7]:
rxns = mrx.reaction_isomerization(target, Gdict)
print_delta(rxns)

No reactions


In [8]:
rxn = mrx.reaction_hydration(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 4 h2o + 7 h2 = 6 ch3oh + 1 nh3
	change in calc. H = -314.9 kJ/mol


In [9]:
rxn = mrx.reaction_hydrofluorination(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 6 hf + 7 h2 = 6 ch3f + 1 nh3 + 2 h2o
	change in calc. H = -335.7 kJ/mol


In [10]:
rxn = mrx.reaction_hydrochlorination(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 8 hcl + 5 h2 = 6 ch3cl + 1 nh3 + 2 hocl
	change in calc. H = 19.3 kJ/mol


In [11]:
rxn = mrx.reaction_to_elements(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 = 6 c + 1/2 n2 + 1 o2 + 5/2 h2
	change in calc. H = 4151.5 kJ/mol


In [12]:
rxn = mrx.reaction_hydrogenation(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 13 h2 = 6 ch4 + 1 nh3 + 2 h2o
	change in calc. H = -997.4 kJ/mol


In [13]:
rxn = mrx.reaction_oxygenation(target, Gdict)
print_delta(rxn)

Of 1 reactions, 1 are balanced.
1 phno2 + 29/4 o2 = 6 co2 + 1 no2 + 5/2 h2o
	change in calc. H = -3001.1 kJ/mol


In [14]:
df = G.list_bonds()
df

Unnamed: 0,i1,i2,el1,el2,dist,ring,order
0,0,1,C,C,1.387513,True,2
1,0,5,C,C,1.387513,True,1
2,0,6,C,N,1.478039,False,1
3,1,2,C,C,1.387537,True,1
4,1,9,C,H,1.078496,False,1
5,2,3,C,C,1.391227,True,2
6,2,10,C,H,1.080835,False,1
7,3,4,C,C,1.391227,True,1
8,3,11,C,H,1.081341,False,1
9,4,5,C,C,1.387537,True,2


In [15]:
from sympy import nsimplify

def cull_to_disjoint_educts(rxns):
    # 'rxns' is a list of reactions, where a reaction is a list of [educt, coeff] pairs
    # reduce the list until no educt (besides the target) occurs in more than one reaction
    # the target is the first educt listed
    # Give preference to reactions with fewer educts, small integer coefficients
    nin = len(rxns)
    # score by coefficient ugliness
    ftol = 1.e-6
    rank = []
    for rxn in rxns:
        r = (len(rxn) - 1) * 2   # penalty for more educts
        for pair in rxn[1:]:
            denom = nsimplify(pair[1]).q
            if abs(denom) > 1 + ftol:
                # penalty for fraction (to encourage homologous reactions)
                r += denom
            # penalty for large coefficient (multiplies uncertainties)
            r += abs(pair[1])
        rank.append(float(r))
    idx = np.argsort(rank)
    # do the cull
    used = set()
    ikeep = []
    print('>>> lowest ranks:', np.array(rank)[idx[:5]])
    for i in idx:
        educts = set([pair[0] for pair in rxns[i][1:]])
        if educts.intersection(used) == set():
            # keep this reaction
            ikeep.append(i)
            used = used.union(educts)
    return [rxns[i] for i in ikeep]

In [16]:
detail = 0
print('detail =', detail)
dfBgrp = mrx.educts_Benson_groups(target, Gdict, detail=detail, warn=False)
display(dfBgrp)

detail = 0
{'C-(C)2(H)': 5, 'N-(C)(O)2': 1, 'C-(C)2(N)': 1}
benzene {'C-(C)2(H)'}
c3h3 {'C-(C)2(H)'}
c5h5 {'C-(C)2(H)'}
cyclobutadiene {'C-(C)2(H)'}


Unnamed: 0,molec,C-(C)2(H),C-(C)2(N),N-(C)(O)2
0,phno2,5,1,1
1,benzene,6,0,0
2,c3h3,3,0,0
3,c5h5,5,0,0
4,cyclobutadiene,4,0,0


In [17]:
rxns = mrx.solve_descriptor_reactions_par(dfBgrp, nproc=6, verbose=True)

target = phno2
There are 4 educts besides the target
There are 3 descriptors: ['C-(C)2(H)', 'C-(C)2(N)', 'N-(C)(O)2']
Dimen = 3, maxeduct = 3
--- neduct = 1 ---
There are 4 educt tuples
--- neduct = 2 ---
There are 6 educt tuples


In [18]:
rbal = mrx.discard_unbalanced_reactions(rxns, Gdict, verbose=True)
print_delta(rbal)

TypeError: 'NoneType' object is not subscriptable

In [19]:
detail = 0
dfBbonds = mrx.Benson_bonds_table(target, Gdict, detail=detail, warn=False)
display(dfBbonds)

Unnamed: 0,molec,C-C,C-N,N-O,C-H
0,phno2,9,1,4,5
1,acenaphthylene,20,0,0,8
2,allene,4,0,0,4
3,benzene,9,0,0,6
4,benzyne,10,0,0,4
5,butadiene,5,0,0,6
6,butane,3,0,0,10
7,butene1,4,0,0,8
8,butyne1,5,0,0,6
9,c2h2,3,0,0,2


In [20]:
import time
start = time.time()
rxns = mrx.solve_descriptor_reactions_par(dfBbonds, verbose=True)
end = time.time()
t = end - start
print('Time = {:.0f} s = {:.1f} min = {:.2f} hr'.format(t, t/60, t/3600))

target = phno2
There are 51 educts besides the target
There are 4 descriptors: ['C-C', 'C-N', 'N-O', 'C-H']
Dimen = 4, maxeduct = 4
--- neduct = 1 ---
There are 51 educt tuples
--- neduct = 2 ---
There are 1275 educt tuples


TypeError: 'int' object is not subscriptable

In [None]:
rbal = mrx.discard_unbalanced_reactions(rxns, Gdict, tol=1.e-3, verbose=True)

In [None]:
Gdict['cch2'].stoichiometry(asdict=True)

In [None]:
def all_reactions_for_target(target, Gdict, verbose=False):
    # generate all reasonable reactions for the educt
    # 'Gdict' is a dict of Geometry() objects (dict key = name of molecule)
    # Return a list of reactions, where a reaction is a list of [educt, coeff] pairs
    st = Gdict[target].stoichiometry(asdict=True)
    tels = list(st.keys())  # chemical elements in the target molecule
    tset = set(tels)
    # build DataFrame of all molecules whose elements are a subset of those of target
    cols = ['molec'] + tels
    df = pd.DataFrame(columns=cols)
    df.loc[0] = [target] + [st[el] for el in tels]
    for molec, G in Gdict.items():
        if molec == target:
            continue
        st = G.stoichiometry(asdict=True)
        if set(st.keys()) <= tset:
            # include this molecule
            df.loc[len(df)] = [molec] + [st.get(el, 0) for el in tels]
    start = time.time()
    rxns = mrx.solve_descriptor_reactions(df, verbose=verbose)
    end = time.time()
    t = end - start
    print('Time = {:.0f} s = {:.1f} min = {:.2f} hr'.format(t, t/60, t/3600))    
    return rxns, df

In [None]:
rxns, dfelem = all_reactions_for_target(target, Gdict, verbose=True)

In [None]:
print('Found {:d} reactions'.format(len(rxns)))
rxns

In [None]:
# Make DataFrame of reactions
pd.set_option('display.max_rows', 500)
calcH, calcS = mrx.eq5_sums(rxns, target, moldata)
neduct = max([len(rx) for rx in rxns])
cols = ['Target', 'x0']
for n in range(neduct-1):
    cols.extend([f'Educt{n+1}', f'x{n+1}'])
cols.append('{:s}H'.format(chem.DELTA))
dfrx = pd.DataFrame(columns=cols)
for rx, H in zip(rxns, calcH):
    row = []
    for ed in rx:
        row.extend(ed)
    while len(row) < len(cols) - 1:
        row.extend(['', 0])
    row.append(np.round(H, 1))
    dfrx.loc[len(dfrx)] = row
dfrx['rho_E'] = abs(dfrx[cols[-1]])
dfrx.sort_values('rho_E')

In [None]:
plt.hist(dfrx[cols[-1]], bins=20)

In [None]:
# show the descriptors for a reaction
irx = 0
rxns[irx]
eds = [e[0] for e in rxns[irx]]
dfBbonds[dfBbonds.molec.isin(eds)]

In [None]:
i = 1
print(rxns[i])
print_delta(rxns[i])