# Standard Thermodynamic Quantities

These values were taken from three sources:
- [CHNOSZ](http://chnosz.net/vignettes/obigt.html), a chemistry and materials science package for [R](https://www.r-project.org/about.html). The dataframes were exported to csv files as-is.
- [Principles of Modern Chemistry, 8th ed.](https://www.amazon.com/Principles-Modern-Chemistry-David-Oxtoby-ebook/dp/B00UGDPNFI) by Oxtoby et al.
- [Chemical Principles: The Quest for Insight, 6th ed.](https://www.amazon.com/Chemical-Principles-Peter-Atkins/dp/1429288973/) by Atkins et al.

In [298]:
import os                               
import re                   
import time                 # to stall requests (just in case)
import itertools
import sympy

import numpy as np
import pandas as pd 
import pubchempy as pcp
import chemdataextractor as cde     # chemistry parser

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

from tika import parser     # the specific parser method 

from chempy import balance_stoichiometry
from chempy import Substance
from chempy import Reaction
from chempy.util import periodic

pd.set_option('display.max_colwidth', 0)    # no max column width
pd.set_option('display.max_rows', 1000)

## import CHNOSZ thermodynamic quantities csv

In [418]:
thermo = pd.read_csv('../data/external/thermo/chnosz_thermo.csv')

In [419]:
thermo.columns

Index(['name', 'abbrv', 'formula', 'state', 'ref1', 'ref2', 'date', 'E_units',
       'G', 'H', 'S', 'Cp', 'V', 'a1.a', 'a2.b', 'a3.c', 'a4.d', 'c1.e',
       'c2.f', 'omega.lambda', 'z.T'],
      dtype='object')

In [420]:
thermo = thermo[['name', 'abbrv', 'formula', 'E_units', 'state', 'G', 'H', 'S', 'Cp']]
thermo.head()

Unnamed: 0,name,abbrv,formula,E_units,state,G,H,S,Cp
0,water,,H2O,cal,liq,,,,
1,e-,,(Z-1),cal,aq,0.0,0.0,15.6166,0.0
2,H+,H+,H+,cal,aq,0.0,0.0,0.0,0.0
3,Li+,Li+,Li+,cal,aq,-69933.0,-66552.0,2.7,14.2
4,Na+,Na+,Na+,cal,aq,-62591.0,-57433.0,13.96,9.06


In [421]:
thermo.shape

(3372, 9)

In [422]:
thermo = thermo[thermo['name'].str[0] != '[']
thermo.shape

(3210, 9)

In [423]:
# code for finding duplicates, not necessary after gibbs formula below
# pd.concat(g for _, g in thermo.groupby('formula') if len(g) > 1)

In [424]:
thermo.dtypes

name       object 
abbrv      object 
formula    object 
E_units    object 
state      object 
G          float64
H          float64
S          float64
Cp         float64
dtype: object

In [425]:
thermo['E_units'].value_counts()

cal    3172
J      38  
Name: E_units, dtype: int64

In [426]:
# convert to joules
energy_columns = ['G', 'H', 'S', 'Cp']

for col in energy_columns:
    thermo.loc[thermo['E_units'] == 'cal', col] *= 4.184
    
thermo.drop(columns='E_units', inplace=True)
thermo.head()

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
0,water,,H2O,liq,,,,
1,e-,,(Z-1),aq,0.0,0.0,65.339854,0.0
2,H+,H+,H+,aq,0.0,0.0,0.0,0.0
3,Li+,Li+,Li+,aq,-292599.672,-278453.568,11.2968,59.4128
4,Na+,Na+,Na+,aq,-261880.744,-240299.672,58.40864,37.90704


In [427]:
thermo.at[0, 'G'] = -237180
thermo.at[0, 'H'] = -285830
thermo.at[0, 'S'] = 69.91
thermo.at[0, 'Cp'] = 75.29

just checking to make sure appropriate values are in joules

In [428]:
thermo.loc[4, 'G']

-261880.744

In [429]:
thermo.loc[779, 'G']

-713730.0

In [430]:
thermo.head()

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
0,water,,H2O,liq,-237180.0,-285830.0,69.91,75.29
1,e-,,(Z-1),aq,0.0,0.0,65.339854,0.0
2,H+,H+,H+,aq,0.0,0.0,0.0,0.0
3,Li+,Li+,Li+,aq,-292599.672,-278453.568,11.2968,59.4128
4,Na+,Na+,Na+,aq,-261880.744,-240299.672,58.40864,37.90704


In [431]:
thermo['state'].value_counts()

aq     1807
cr     601 
liq    488 
gas    285 
cr2    14  
cr3    9   
cr7    1   
cr8    1   
cr6    1   
cr9    1   
cr4    1   
cr5    1   
Name: state, dtype: int64

In [432]:
value_dict = {
    'aq': '(aq)',
    'cr': '(s)',
    'liq': '(l)',
    'gas': '(g)',
    'cr2': '(s, II)',
    'cr3': '(s, III)',
    'cr4': '(s, IV)',
    'cr5': '(s, V)',
    'cr6': '(s, VI)',
    'cr7': '(s, VII)',
    'cr8': '(s, VIII)',
    'cr9': '(s, IX)',
    'g': '(g)',
}

thermo['state'].replace(value_dict, inplace=True)

In [433]:
thermo['formula'] = thermo['formula'].astype(str) + thermo['state'].astype(str)
thermo.drop(columns='state', inplace=True)
thermo.head()

Unnamed: 0,name,abbrv,formula,G,H,S,Cp
0,water,,H2O(l),-237180.0,-285830.0,69.91,75.29
1,e-,,(Z-1)(aq),0.0,0.0,65.339854,0.0
2,H+,H+,H+(aq),0.0,0.0,0.0,0.0
3,Li+,Li+,Li+(aq),-292599.672,-278453.568,11.2968,59.4128
4,Na+,Na+,Na+(aq),-261880.744,-240299.672,58.40864,37.90704


In [520]:
thermo.at[1, 'formula'] = 'e-(aq)'

## import thermodynamic quantities from other sources

In [434]:
def get_text(file, sleep=0, counter=0):
    if counter == 2:        # so we stop the recursive function
        pass
    # grab the raw text using parser.from_file()
    raw = parser.from_file(file)
    status = raw['status']          # returns the status code from tika server
    # if things go well, return the raw text
    if status == 200:
        print(f"'{file}' successfully opened!")
        return raw['content']
    # if things don't go well, pause for five seconds and try again
    # we might not need this code, but it's useful for other server calls
    else:
        print(f'! ! ! ! error code {status} ! ! ! !')
        print(f'! ! ! ! trying again ! ! ! !')
        time.sleep(5)
        counter += 1
        # repeats grab_text up to twice
        return get_text(file, counter=counter)

In [435]:
oxtobya = get_text('../data/external/thermo/oxtoby8a.pdf')
oxtobyb = get_text('../data/external/thermo/oxtoby8b.pdf')

'../data/external/thermo/oxtoby8a.pdf' successfully opened!
'../data/external/thermo/oxtoby8b.pdf' successfully opened!


In [436]:
oxtobya

'\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPrinciples of Modern Chemistry\n\n\nD\na p p e n d i x\n\nStandard Chemical \nThermodynamic Properties\n\nThis table lists standard enthalpies of formation \ue044Hf� , standard third-law entro-\npies S°, standard free energies of formation \ue044Gf� , and molar heat capacities at \nconstant pressure, cP, for a variety of substances, all at 25°C (298.15 K) and  \n1 atm. The table proceeds from the left side to the right side of the periodic \ntable. Binary compounds are listed under the element that occurs to the left in \nthe periodic table, except that binary oxides and hydrides are listed with the \nother element. Thus, KCl is listed with potassium and its compounds, but ClO2 \nis listed with chlorine and its compounds.\n\nNote that the solution-phase entropies are not absolute entropies but are \nmea sured relative to the arbitrary standard S°(H\ue031(aq)) \ue035 0. Consequently, some \nof them (as 

In [545]:
clean = re.sub('—', 'nan', oxtobya)
clean = re.sub('\\ue02c', 'l', clean)
clean = re.sub(r'\n\n[I]*\s*', r'RROOWW', clean)
clean = re.sub(r'([\d]+)\n([A-Z]+)', r'\1RROOWW\2', clean)
clean = re.sub(r'(nan)\s*\n([A-Z]+)', r'\1RROOWW\2', clean)
clean = re.sub('\\ue031', '+', clean)
clean = re.sub('([\d])\+\(', r'+\1(', clean)
clean = re.sub('\\ue032', '-', clean)
clean = re.sub('([\d])\-\(', r'-\1(', clean)
clean = re.sub('\(([aqslg]+)\,\s([\w]+)', r'(\1,\2', clean)
clean = re.sub('[\s]+', ' ', clean)
clean = re.sub(' mol-1', '', clean)
# clean = re.sub('Substance.*mol', 'HHEEAADDEERR', clean)
clean = re.split('RROOWW', clean)
clean = [re.split(' ', r) for r in clean]
clean = [r for r in clean if len(r) == 5]
oxtobya_clean = clean

In [546]:
clean = re.sub('—', 'nan', oxtobyb)
clean = re.sub('\\ue02c', 'l', clean)
clean = re.sub(r'\n\n[I]*\s*', r'RROOWW', clean)
clean = re.sub(r'([\d]+)\n([A-Z]+)', r'\1RROOWW\2', clean)
clean = re.sub(r'(nan)\s*\n([A-Z]+)', r'\1RROOWW\2', clean)
clean = re.sub('\\ue031', '+', clean)
clean = re.sub('([\d])\+\(', r'+\1(', clean)
clean = re.sub('\\ue032', '-', clean)
clean = re.sub('([\d])\-\(', r'-\1(', clean)
clean = re.sub('\(([aqslg]+)\,\s([\w]+)', r'(\1,\2', clean)
clean = re.sub('[\s]+', ' ', clean)
clean = re.sub(' mol-1', '', clean)
# clean = re.sub('Substance.*mol', 'HHEEAADDEERR', clean)
clean = re.split('RROOWW', clean)
clean = [re.split(' ', r) for r in clean]
clean = [r for r in clean if len(r) == 5]
oxtobyb_clean = clean

In [547]:
oxtoby = oxtobya_clean + oxtobyb_clean
oxtoby

[['H(g)', '217.96', '114.60', '203.26', '20.78'],
 ['H2(g)', '0', '130.57', '0', '28.82'],
 ['H+(aq)', '0', '0', '0', '0'],
 ['H3O+(aq)', '-285.83', '69.91', '-237.18', '75.29'],
 ['Li(s)', '0', '29.12', '0', '24.77'],
 ['Li(g)', '159.37', '138.66', '126.69', '20.79'],
 ['Li+(aq)', '-278.49', '13.4', '-293.31', '68.6'],
 ['LiH(s)', '-90.54', '20.01', '-68.37', '27.87'],
 ['Li2O(s)', '-597.94', '37.57', '-561.20', '54.10'],
 ['LiF(s)', '-615.97', '35.65', '-587.73', '41.59'],
 ['LiCl(s)', '-408.61', '59.33', '-384.39', '47.99'],
 ['LiBr(s)', '-351.21', '74.27', '-342.00', 'nan'],
 ['LiI(s)', '-270.41', '86.78', '-270.29', '51.04'],
 ['Na(s)', '0', '51.21', '0', '28.24'],
 ['Na(g)', '107.32', '153.60', '76.79', '20.79'],
 ['Na+(aq)', '-240.12', '59.0', '-261.90', '46.4'],
 ['Na2O(s)', '-414.22', '75.06', '-375.48', '69.12'],
 ['NaOH(s)', '-425.61', '64.46', '-379.53', '59.54'],
 ['NaF(s)', '-573.65', '51.46', '-543.51', '48.86'],
 ['NaCl(s)', '-411.15', '72.13', '-384.15', '50.50'],
 ['N

In [548]:
oxtoby_df = pd.DataFrame(oxtoby, columns=['formula', 'H', 'S', 'G', 'Cp'])
oxtoby_df.head()

Unnamed: 0,formula,H,S,G,Cp
0,H(g),217.96,114.6,203.26,20.78
1,H2(g),0.0,130.57,0.0,28.82
2,H+(aq),0.0,0.0,0.0,0.0
3,H3O+(aq),-285.83,69.91,-237.18,75.29
4,Li(s),0.0,29.12,0.0,24.77


In [549]:
energy_columns = ['G', 'H', 'S', 'Cp']

for col in energy_columns:
    oxtoby_df[col] = oxtoby_df[col].astype(float)
    
oxtoby_df.head()

Unnamed: 0,formula,H,S,G,Cp
0,H(g),217.96,114.6,203.26,20.78
1,H2(g),0.0,130.57,0.0,28.82
2,H+(aq),0.0,0.0,0.0,0.0
3,H3O+(aq),-285.83,69.91,-237.18,75.29
4,Li(s),0.0,29.12,0.0,24.77


In [550]:
oxtoby_df.loc[:, 'H'] *= 1000
oxtoby_df.loc[:, 'G'] *= 1000
oxtoby_df.head()

Unnamed: 0,formula,H,S,G,Cp
0,H(g),217960.0,114.6,203260.0,20.78
1,H2(g),0.0,130.57,0.0,28.82
2,H+(aq),0.0,0.0,0.0,0.0
3,H3O+(aq),-285830.0,69.91,-237180.0,75.29
4,Li(s),0.0,29.12,0.0,24.77


In [551]:
oxtoby_df = oxtoby_df[['formula', 'G', 'H', 'S', 'Cp']]

In [552]:
oxtoby_df.isna().sum()

formula    0 
G          2 
H          0 
S          2 
Cp         73
dtype: int64

In [553]:
oxtoby_df.loc[oxtoby_df['G'].isna(), :]

Unnamed: 0,formula,G,H,S,Cp
213,H2C2O4(s),,-827200.0,,117.0
330,XeF4(s),,-261500.0,,


In [554]:
thermo.shape

(3089, 7)

In [555]:
thermo.isna().sum()

name       0   
abbrv      2258
formula    0   
G          0   
H          31  
S          16  
Cp         150 
dtype: int64

In [556]:
thermo = thermo.dropna(subset=['G'])
oxtoby_df = oxtoby_df.dropna(subset=['G'])

In [616]:
thermo_df = pd.concat([thermo, oxtoby_df], ignore_index=True).fillna(np.NaN)
thermo_df = thermo_df[['formula', 'abbrv', 'name', 'G', 'H', 'S', 'Cp']]
thermo_df.head()

of pandas will change to not sort by default.

To accept the future behavior, pass 'sort=False'.


  """Entry point for launching an IPython kernel.


Unnamed: 0,formula,abbrv,name,G,H,S,Cp
0,H2O(l),,water,-237180.0,-285830.0,69.91,75.29
1,e-(aq),,e-,0.0,0.0,65.339854,0.0
2,H+(aq),H+,H+,0.0,0.0,0.0,0.0
3,Li+(aq),Li+,Li+,-292599.672,-278453.568,11.2968,59.4128
4,Na+(aq),Na+,Na+,-261880.744,-240299.672,58.40864,37.90704


In [617]:
thermo_df.tail()

Unnamed: 0,formula,abbrv,name,G,H,S,Cp
3444,IBr(g),,,3710.0,40840.0,258.66,36.44
3445,Ne(g),,,0.0,0.0,146.22,20.79
3446,Ar(g),,,0.0,0.0,154.73,20.79
3447,Kr(g),,,0.0,0.0,163.97,20.79
3448,Xe(g),,,0.0,0.0,169.57,20.79


In [618]:
thermo_df.shape

(3449, 7)

In [619]:
thermo_df.drop_duplicates(subset='formula', keep='last', inplace=True)
thermo_df.reset_index(drop=True, inplace=True)
thermo_df.shape

(2837, 7)

In [620]:
thermo_df[thermo_df['formula'].str.contains('\.')].index

Int64Index([1765, 1766, 1767, 1770, 1785, 1787, 2596], dtype='int64')

In [621]:
thermo_df = thermo_df.drop(index=thermo_df[thermo_df['formula'].str.contains('\.')].index)
thermo_df.reset_index(drop=True, inplace=True)
thermo_df.shape

(2830, 7)

In [622]:
thermo_df.tail()

Unnamed: 0,formula,abbrv,name,G,H,S,Cp
2825,IBr(g),,,3710.0,40840.0,258.66,36.44
2826,Ne(g),,,0.0,0.0,146.22,20.79
2827,Ar(g),,,0.0,0.0,154.73,20.79
2828,Kr(g),,,0.0,0.0,163.97,20.79
2829,Xe(g),,,0.0,0.0,169.57,20.79


We'll fill in some of these `abbrv` fields with just the formula.

In [646]:
def formula_state_separator(formula, keep_state=False):
    '''
    Separates the state from a formula string.
    
    --Parameters--
    formula:        str
        a string of a single chemical formula
    
    --Output--
    tuple (str)
        
    --Examples--
    >>> formula_state_separator('NaCl(aq)')
    ('NaCl', 'aq')
    
    >>> formula_state_separator('NaCl')
    ('NaCl', None)
    '''
    try:
        regex = re.search('(?<=\()[aglsq]+', formula)
        formula = formula[:regex.start() - 1]
        if keep_state:
            state = regex.group(0)
            return formula, state
        else:
            return formula
    except:
        return formula, None

In [647]:
formulas = [formula_state_separator(f) for f in thermo_df['formula'].astype(str)]
formulas[:5]

['e-', 'HCO3-', 'NO3-', 'NO2-', 'NH4+']

In [624]:
new_list = []
for a, f in zip(list(thermo_df['abbrv']), formulas):
    if pd.isna(a):
        new_list.append(f)
    else:
        new_list.append(a)
new_list[:10]

['e-',
 'HCO3-',
 'NO3-',
 'NO2-',
 'NH4+',
 'H2PO4-',
 'HSO3-',
 'HSO4-',
 'S2O3-2',
 'S2O8-2']

In [625]:
thermo_df['abbrv'] = new_list

In [626]:
thermo_df.head()

Unnamed: 0,formula,abbrv,name,G,H,S,Cp
0,e-(aq),e-,e-,0.0,0.0,65.339854,0.0
1,HCO3-(aq),HCO3-,HCO3-,-586939.888,-689933.232,98.44952,-35.39664
2,NO3-(aq),NO3-,NO3-,-110905.288,-206810.936,146.94208,-68.6176
3,NO2-(aq),NO2-,NO2-,-32216.8,-104600.0,123.0096,-97.4872
4,NH4+(aq),NH4+,NH4+,-79454.16,-133260.4,111.16888,65.85616


In [627]:
thermo_df[thermo_df['formula'].str[:1] != thermo_df['abbrv'].str[:1]]

Unnamed: 0,formula,abbrv,name,G,H,S,Cp
825,C3H7OH(aq),1-propanol,propanol,-172087.9,-312754.0,170.7072,355.0124
826,C4H9OH(aq),1-butanol,butanol,-162506.6,-336058.9,196.2296,436.8096
827,C5H11OH(aq),1-pentanol,pentanol,-160958.5,-367062.3,223.4256,523.8368
832,C4H8O(aq),2-butanone,butanone,-153678.3,-284009.9,210.4552,336.3936
967,C4H8O2(aq),butanoic acid,n-butanoic acid,-381622.6,-535342.8,234.7224,336.812
1262,C6H5OH(aq),PHOL,phenol,-51107.56,-151962.9,190.372,310.4528
1263,C6H4OHCH3(aq),p-CRESOL,p-cresol,-48639.0,-185121.1,207.108,384.0912
1264,C6H3OHCH3CH3(aq),"3,5-DMP","3,5-dimethylphenol",-52128.46,-229889.9,205.016,507.5192
1420,C2Cl4(aq),PCE,perchloroethylene,31308.87,-54371.08,170.2888,669.44
1421,C2HCl3(aq),TCE,trichloroethylene,25359.22,-49588.77,160.07984,564.84


In [628]:
thermo_df.to_csv('../data/processed/thermo.csv', index=False)

## TO DO: maybe define a function to add missing substances

## replicate CHNOSZ stoichiometry csv

Originally, I had imported the `CHNOSZ` stoichiometry csv. However, I found the need to update it with whatever new entries were present in `thermo`.

The stoichiometry csv is formatted so that each formula is count-vectorized by element.

In [629]:
stoich_og = pd.read_csv('../data/external/thermo/chnosz_stoich.csv')
stoich_og.head()

Unnamed: 0.1,Unnamed: 0,Ag,Al,Ar,As,Au,B,Ba,Be,Bi,...,Tm,U,V,W,Xe,Y,Yb,Z,Zn,Zr
0,H2O,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,0,0
2,H+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Li+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Na+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


For my workflow, we will rename the columns by atomic number and have charge occupy column `0` to be in-line with how `chempy` works. We'll recreate this by scanning the formulas in `thermo` and then vectorizing the formulas.

In [630]:
thermo.loc[0, 'formula']

'H2O(l)'

In [631]:
H2O = Substance.from_formula(thermo.loc[0, 'formula'])
H2O.composition

{1: 2, 8: 1}

In [632]:
for k, v in H2O.composition.items():
    print (k, v)

1 2
8 1


Using `chempy` it becomes quite simple to vectorize.

From the composition dictionary:

- key $\Rightarrow$ `stoich[key]`  
- value $\Rightarrow$ `stoich.at[index, key]`

In [633]:
formulas = set(thermo_df['formula'])
formulas

{'Ag(OH)2-(aq)',
 'C20H42(s)',
 'LaC5H6O4+(aq)',
 'C11H17N2O8P(s)',
 'C29H54(l)',
 'MgC5H6O4(aq)',
 'TmOH+2(aq)',
 'CH3COOH(aq)',
 'Pd(HS)3-(aq)',
 'Pb(HS)2(aq)',
 'C21H36(s)',
 'C10H14N2O9P-(aq)',
 'C5H10(s)',
 'C14H10(s)',
 'C5H11I(g)',
 'C5H5O5-(aq)',
 'Tl(s)',
 'CeC2O4+(aq)',
 'C9H11NO3(aq)',
 'Cr2O7-2(aq)',
 'C9H12(g)',
 'C14H22(g)',
 'C10H20(l)',
 'C35H72(l)',
 'C19H40(s)',
 'C12H10S(s)',
 'PdCl3-(aq)',
 'TmCrO4+(aq)',
 'C6H12O2(aq)',
 'Sr(CH3COO)2(aq)',
 'C10H15N2O12P2-(aq)',
 'C5H6O4-2(aq)',
 'C40H72(l)',
 'PH3(aq)',
 'B(s)',
 'CuCl3-2(aq)',
 'C47H96(l)',
 'HUO2(aq)',
 'C6H14N4O2(aq)',
 'CuCl(s)',
 'HBr(g)',
 'AgCl2-(aq)',
 'C18H38(l)',
 'PrO2H(aq)',
 'H2VO4-(aq)',
 'B(g)',
 'TbCl2+(aq)',
 'C41H78O4(s)',
 'NaCl(s)',
 'Ga+3(aq)',
 'LuO2H(aq)',
 'S5O6-2(aq)',
 'C10H15N5O12P3-(aq)',
 'C19H14(l)',
 'ErO+(aq)',
 'ZnCH3CH2OCO2+(aq)',
 'EuNO3+2(aq)',
 'Ni(CH3CH2CH2CH2CO2)2(aq)',
 'MgC10H12N5O13P3-2(aq)',
 'NH4C3H2O4-(aq)',
 'C3H6O(aq)',
 'PrCl3(aq)',
 'BeF+(aq)',
 'InO2-(aq)',
 'LiC3H

In [634]:
stoich = pd.DataFrame(formulas)
stoich.head()

Unnamed: 0,0
0,Ag(OH)2-(aq)
1,C20H42(s)
2,LaC5H6O4+(aq)
3,C11H17N2O8P(s)
4,C29H54(l)


In [635]:
# naming the column -1 for now so we can sort them later

stoich.columns = [-1]
stoich.head()

Unnamed: 0,-1
0,Ag(OH)2-(aq)
1,C20H42(s)
2,LaC5H6O4+(aq)
3,C11H17N2O8P(s)
4,C29H54(l)


In [636]:
for i, f in enumerate(stoich[-1]):
    try:
        sub = Substance.from_formula(f)
        for k, v in sub.composition.items():
            stoich.at[i, k] = v
    except:
        pass
        
stoich.head()

Unnamed: 0,-1,47,8,1,0,6,57,7,15,12,...,74,72,88,43,87,22,2,18,91,75
0,Ag(OH)2-(aq),1.0,2.0,2.0,-1.0,,,,,,...,,,,,,,,,,
1,C20H42(s),,,42.0,,20.0,,,,,...,,,,,,,,,,
2,LaC5H6O4+(aq),,4.0,6.0,1.0,5.0,1.0,,,,...,,,,,,,,,,
3,C11H17N2O8P(s),,8.0,17.0,,11.0,,2.0,1.0,,...,,,,,,,,,,
4,C29H54(l),,,54.0,,29.0,,,,,...,,,,,,,,,,


In [637]:
stoich = stoich[sorted(stoich.columns)]
stoich.head()

Unnamed: 0,-1,0,1,2,3,4,5,6,7,8,...,80,81,82,83,86,87,88,90,91,92
0,Ag(OH)2-(aq),-1.0,2.0,,,,,,,2.0,...,,,,,,,,,,
1,C20H42(s),,42.0,,,,,20.0,,,...,,,,,,,,,,
2,LaC5H6O4+(aq),1.0,6.0,,,,,5.0,,4.0,...,,,,,,,,,,
3,C11H17N2O8P(s),,17.0,,,,,11.0,2.0,8.0,...,,,,,,,,,,
4,C29H54(l),,54.0,,,,,29.0,,,...,,,,,,,,,,


In [638]:
stoich.rename(columns={-1: 'formula'}, inplace=True)

In [639]:
stoich.fillna(0, inplace=True)
stoich.head()

Unnamed: 0,formula,0,1,2,3,4,5,6,7,8,...,80,81,82,83,86,87,88,90,91,92
0,Ag(OH)2-(aq),-1.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,C20H42(s),0.0,42.0,0.0,0.0,0.0,0.0,20.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,LaC5H6O4+(aq),1.0,6.0,0.0,0.0,0.0,0.0,5.0,0.0,4.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,C11H17N2O8P(s),0.0,17.0,0.0,0.0,0.0,0.0,11.0,2.0,8.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,C29H54(l),0.0,54.0,0.0,0.0,0.0,0.0,29.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [640]:
stoich.to_csv('../data/processed/stoich.csv', index=False)

## playground for writing functions using thermo tables

The cells below show my thought process exploring how to relate the `chempy` library with the thermodynamic tables. The goals are as follows:

- The most likely chemical reaction is highly correlated with the reaction that has the greatest loss of Gibbs free energy (given by parameter 'G' in `thermo`.
- We iterate through the different possible products so long as their combination allows for balanced stoichiometry (having the same number of each element on both sides of the equation)

We will attempt to predict the following reaction:

$$ 2 Na(s) + 2 H_{2}O(l) \longrightarrow 2 NaOH(aq) + H_{2}(g) $$

In [641]:
reactants = ['Na', 'H2O']

In [642]:
water = Substance.from_formula('H2O')
[*water.composition]

[1, 8]

When searching for possible products, we want to ignore all compounds that have elements that are not `Na`, `H`, or `O`.

In [643]:
z_ignore = ['formula']
for r in reactants:
    s = Substance.from_formula(r)
    z_ignore += [*s.composition]
z_ignore = set(z_ignore)
z_ignore

{1, 11, 8, 'formula'}

In [644]:
column_mask = [col for col in stoich.columns if col not in z_ignore]
print(column_mask)

[0, 2, 3, 4, 5, 6, 7, 9, 10, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 74, 75, 78, 79, 80, 81, 82, 83, 86, 87, 88, 90, 91, 92]


In [645]:
# https://stackoverflow.com/questions/22649693/

stoich_temp = stoich.copy()

for col in column_mask:
    stoich_temp = stoich_temp[stoich_temp[col] == 0]

stoich_temp = stoich_temp.loc[(stoich_temp.drop(columns=['formula'])!=0).any(axis=1)]

stoich_temp = stoich_temp[z_ignore]
stoich_temp

Unnamed: 0,8,formula,11,1
125,1.0,"H2O(s, VII)",0.0,2.0
135,1.0,Na2O(s),2.0,0.0
237,1.0,O(g),0.0,0.0
735,1.0,"H2O(s, V)",0.0,2.0
894,0.0,H2(g),0.0,2.0
1363,3.0,O3(g),0.0,0.0
1482,1.0,NaOH(aq),1.0,1.0
1514,1.0,NaOH(s),1.0,1.0
1535,0.0,Na(g),1.0,0.0
1544,1.0,"H2O(s, II)",0.0,2.0


In [649]:
# chemical reactions don't have the same species on both sides of the arrow

candidates = set([formula_state_separator(f) for f in set(stoich_temp['formula'])\
              if formula_state_separator(f) not in reactants])
candidates

{'H', 'H2', 'H2O2', 'Na2O', 'NaOH', 'O', 'O2', 'O3'}

In [650]:
# most chemical reactions don't form more than four different chemical species
# we'll play it safe and just make the maximum number 3 + num_reactants

combinations = []
for i in range(1, len(reactants) + 4):
    combinations += list(itertools.combinations(candidates, i))
combinations

[('Na2O',),
 ('H2O2',),
 ('NaOH',),
 ('O2',),
 ('H2',),
 ('O3',),
 ('O',),
 ('H',),
 ('Na2O', 'H2O2'),
 ('Na2O', 'NaOH'),
 ('Na2O', 'O2'),
 ('Na2O', 'H2'),
 ('Na2O', 'O3'),
 ('Na2O', 'O'),
 ('Na2O', 'H'),
 ('H2O2', 'NaOH'),
 ('H2O2', 'O2'),
 ('H2O2', 'H2'),
 ('H2O2', 'O3'),
 ('H2O2', 'O'),
 ('H2O2', 'H'),
 ('NaOH', 'O2'),
 ('NaOH', 'H2'),
 ('NaOH', 'O3'),
 ('NaOH', 'O'),
 ('NaOH', 'H'),
 ('O2', 'H2'),
 ('O2', 'O3'),
 ('O2', 'O'),
 ('O2', 'H'),
 ('H2', 'O3'),
 ('H2', 'O'),
 ('H2', 'H'),
 ('O3', 'O'),
 ('O3', 'H'),
 ('O', 'H'),
 ('Na2O', 'H2O2', 'NaOH'),
 ('Na2O', 'H2O2', 'O2'),
 ('Na2O', 'H2O2', 'H2'),
 ('Na2O', 'H2O2', 'O3'),
 ('Na2O', 'H2O2', 'O'),
 ('Na2O', 'H2O2', 'H'),
 ('Na2O', 'NaOH', 'O2'),
 ('Na2O', 'NaOH', 'H2'),
 ('Na2O', 'NaOH', 'O3'),
 ('Na2O', 'NaOH', 'O'),
 ('Na2O', 'NaOH', 'H'),
 ('Na2O', 'O2', 'H2'),
 ('Na2O', 'O2', 'O3'),
 ('Na2O', 'O2', 'O'),
 ('Na2O', 'O2', 'H'),
 ('Na2O', 'H2', 'O3'),
 ('Na2O', 'H2', 'O'),
 ('Na2O', 'H2', 'H'),
 ('Na2O', 'O3', 'O'),
 ('Na2O', 'O3', 

Let's see if any of these combinations result in a good balanced equation:

In [651]:
for comb in combinations:    
    try:
        print(balance_stoichiometry(reactants, comb))
    except:
        pass

(OrderedDict([('Na', -2), ('H2O', 1)]), OrderedDict([('Na2O', -1), ('H2O2', 1)]))
(OrderedDict([('Na', 2), ('H2O', 1)]), OrderedDict([('Na2O', 1), ('H2', 1)]))
(OrderedDict([('Na', 2), ('H2O', 1)]), OrderedDict([('Na2O', 1), ('H', 2)]))
(OrderedDict([('Na', -4), ('H2O', -2)]), OrderedDict([('NaOH', -4), ('O2', 1)]))
(OrderedDict([('Na', 2), ('H2O', 2)]), OrderedDict([('NaOH', 2), ('H2', 1)]))
(OrderedDict([('Na', -6), ('H2O', -3)]), OrderedDict([('NaOH', -6), ('O3', 1)]))
(OrderedDict([('Na', -2), ('H2O', -1)]), OrderedDict([('NaOH', -2), ('O', 1)]))
(OrderedDict([('Na', 1), ('H2O', 1)]), OrderedDict([('NaOH', 1), ('H', 1)]))
(OrderedDict([('Na', -4), ('H2O', x1 + 2)]), OrderedDict([('Na2O', -x1 - 2), ('H2O2', 2), ('NaOH', 2*x1)]))
(OrderedDict([('Na', -4*x1 - 2), ('H2O', 1)]), OrderedDict([('Na2O', -2*x1 - 1), ('H2O2', 1), ('O2', x1)]))
(OrderedDict([('Na', 2*x1 - 2), ('H2O', x1 + 1)]), OrderedDict([('Na2O', x1 - 1), ('H2O2', 1), ('H2', x1)]))
(OrderedDict([('Na', -6*x1 - 2), ('H2O', 

(OrderedDict([('Na', 2*x1 - 4*x2 - 12), ('H2O', x1)]), OrderedDict([('Na2O', x1 - 2*x2 - 6), ('O3', 2), ('O', 2*x2), ('H', 2*x1)]))
(OrderedDict([('Na', 1), ('H2O', 2*x1 - 2*x2)]), OrderedDict([('H2O2', x1 - 2*x2 - 1/2), ('NaOH', 1), ('O2', x2), ('H2', x1)]))
(OrderedDict([('Na', 1), ('H2O', -3*x1 - 2*x2)]), OrderedDict([('H2O2', -3*x1 - 2*x2 - 1/2), ('NaOH', 1), ('O2', x2), ('O3', x1)]))
(OrderedDict([('Na', 1), ('H2O', -x1 - 2*x2)]), OrderedDict([('H2O2', -x1 - 2*x2 - 1/2), ('NaOH', 1), ('O2', x2), ('O', x1)]))
(OrderedDict([('Na', 2), ('H2O', 2*x1 - 4*x2)]), OrderedDict([('H2O2', x1 - 4*x2 - 1), ('NaOH', 2), ('O2', 2*x2), ('H', 2*x1)]))
(OrderedDict([('Na', 1), ('H2O', -3*x1 + 2*x2)]), OrderedDict([('H2O2', -3*x1 + x2 - 1/2), ('NaOH', 1), ('H2', x2), ('O3', x1)]))
(OrderedDict([('Na', 1), ('H2O', -x1 + 2*x2)]), OrderedDict([('H2O2', -x1 + x2 - 1/2), ('NaOH', 1), ('H2', x2), ('O', x1)]))
(OrderedDict([('Na', 2), ('H2O', 2*x1 + 4*x2)]), OrderedDict([('H2O2', x1 + 2*x2 - 1), ('NaOH', 2

(OrderedDict([('Na', 2*x1 - 4*x2 - 12*x3 - 8), ('H2O', x1)]), OrderedDict([('Na2O', x1 - 2*x2 - 6*x3 - 4), ('O2', 2), ('O3', 2*x3), ('O', 2*x2), ('H', 2*x1)]))
(OrderedDict([('Na', 2*x1 - 4*x2 - 12*x3 + 4), ('H2O', x1 + 2)]), OrderedDict([('Na2O', x1 - 2*x2 - 6*x3 + 2), ('H2', 2), ('O3', 2*x3), ('O', 2*x2), ('H', 2*x1)]))
(OrderedDict([('Na', 1), ('H2O', -3*x1 + 2*x2 - 2*x3)]), OrderedDict([('H2O2', -3*x1 + x2 - 2*x3 - 1/2), ('NaOH', 1), ('O2', x3), ('H2', x2), ('O3', x1)]))
(OrderedDict([('Na', 1), ('H2O', -x1 + 2*x2 - 2*x3)]), OrderedDict([('H2O2', -x1 + x2 - 2*x3 - 1/2), ('NaOH', 1), ('O2', x3), ('H2', x2), ('O', x1)]))
(OrderedDict([('Na', 2), ('H2O', 2*x1 + 4*x2 - 4*x3)]), OrderedDict([('H2O2', x1 + 2*x2 - 4*x3 - 1), ('NaOH', 2), ('O2', 2*x3), ('H2', 2*x2), ('H', 2*x1)]))
(OrderedDict([('Na', 1), ('H2O', -x1 - 3*x2 - 2*x3)]), OrderedDict([('H2O2', -x1 - 3*x2 - 2*x3 - 1/2), ('NaOH', 1), ('O2', x3), ('O3', x2), ('O', x1)]))
(OrderedDict([('Na', 2), ('H2O', 2*x1 - 6*x2 - 4*x3)]), Ord

In order for an equation to be properly balanced, each coefficient (dictionary values here) must be a positive number, and oftentimes we balance so that every coefficient is a whole number. We have to filter out the balanced instances where we get negative coefficients (such as the equation with `H2O2` and `Na2O` as products).

Notice also that `sympy` has relative coefficients listed (meaning the equation would be balanced for any whole number `x1`, for example). We will want to filter these out too, but may consider including them for a reach goal.

In [365]:
np.array(list(balance_stoichiometry(reactants, ('H2O2', 'Na2O'))[0].values())) >= 1

array([ True, False])

In [366]:
# there might be a more elegant way of doing this. round down to zero if any instance is false.

np.floor((np.array(list(balance_stoichiometry(reactants, ('H2O2', 'Na2O'))[0].values())) >= 1).mean())

0.0

In [367]:
np.array(list(balance_stoichiometry(reactants, ('H2', 'O2', 'NaOH'))[0].values()))

array([2*x1, 2*x1 + 4], dtype=object)

In [368]:
np.array([isinstance(i, sympy.numbers.Number) for i in list(balance_stoichiometry(reactants, ('H2', 'O2', 'NaOH'))[0].values())])

array([False, False])

## functions

The above code has been condensed into several functions:

- `Z_unique`: returns a list of unique atomic numbers present in a list of substances
- `stoich_filter`: filters the `stoich` dataframe to return formulas that only have the elements described by `Z_unique`
- `check_coefficients`: checks if all coefficients are positive and non-relational once an equation has been balanced
- `formula_state_separator`: in the case that a formula is formatted with its corresponding state (eg: `NaCl(s)`), return a tuple of the formula and the state.
- `get_gibbs`: from the results of `formula_state_separator`, find the exact free energy value for the substance specified. If state is not specified, find the lowest free energy value for formulas that have multiple entries (since that is the most likely state under standard conditions.
- `possibility_reducer`: sometimes we get too many results from `stoich_filter`. In general, substances with lower $\Delta G$ values are more likely to be products. However, very large, complex molecules with low $\Delta G$ values are still not very likely, so one (imperfect) way to normalize for that is to divide by the mass of the compound. The jury is still out if this is a good way to filter.
- `standard_gibbs_free_energy`: calculates the overall $\Delta G$ change under standard conditions.
- `reaction_predictor`: takes a list of reactants, iterates through the different possibilities (using `stoich_filter`, takes valid combinations using `check_coefficients`, and calculates $\Delta G$ values using `thermo`. Returns the reaction with the lowest $\Delta G$ value.

In [92]:
def Z_unique(substances):
    '''
    Returns a set representing unique atomic numbers present within a list of
    chemical formulas.
    
    --Parameters--
    substances:     iterable (str)
        any iterable containing strings with valid chemical formulas
    
    --Output--
    set (int)
        atomic numbers of each unique element present in substances
        
    --Example--
    >>> Z_unique(['CH4', 'H2O'])
    {1, 6, 8}
    '''
    composition = []
    for s in substances:
        sub = Substance.from_formula(s)
        composition += [*sub.composition]
    return set(composition)

In [91]:
Z_unique(['CH4', 'H2O'])

{1, 6, 8}

In [652]:
def stoich_filter(substances):
    '''
    Returns a masked copy of the stoich dataframe containing elements that
    only contain the elements present in substances. 
    
    --Parameters--
    substances:     iterable (str)
        any iterable containing strings with valid chemical formulas
    
    --Output--
    DataFrame
    '''
    stoich_temp = stoich.copy()
    
    # mask to keep the charge and formula columns in final dataframe
    z_keep = [0, 'formula'] + list(Z_unique(substances))
    
    # get all other columns
    column_mask = [col for col in stoich.columns if col not in z_keep]
    for col in column_mask:
        # return the dataframe where these columns are all 0
        stoich_temp = stoich_temp[stoich_temp[col] == 0]
    
    # keep the columns where it's not all zero
    stoich_temp = stoich_temp.loc[(stoich_temp.drop(columns=['formula'])!=0).any(axis=1)]
    
    # return the dataframe with the columns we want to keep
    return stoich_temp[z_keep]

In [653]:
stoich_filter(['Al', 'O2'])

Unnamed: 0,0,formula,8,13
237,0.0,O(g),1.0,0.0
471,0.0,Al(s),0.0,1.0
1363,0.0,O3(g),3.0,0.0
1846,-1.0,e-(aq),0.0,0.0
1910,3.0,Al+3(aq),0.0,1.0
1923,0.0,Al2O3(s),3.0,2.0
2127,0.0,O2(g),2.0,0.0
2197,0.0,O2(aq),2.0,0.0
2348,0.0,Al(g),0.0,1.0


In [372]:
def check_coefficients(reactants, products):
    '''
    Checks whether a possible reactant/product combination would result in a
    valid balanced chemical equation.
    
    --Parameters--
    reactants:      iterable (str)
    products:       iterable (str)
        any iterable containing strings with valid chemical formulas
    
    --Output--
    bool
        
    --Examples--
    >>> check_coefficients(['CH4', 'H2O'], ['CO', 'H2'])
    True
    
    >>> check_coefficients(['CH4', 'H2O'], ['CO2', 'H2O2'])
    False
    
    >>> check_coefficients(['CH4', 'H2O'], ['NaOH'])
    False
    '''
    try:
        balance = balance_stoichiometry(reactants, products)
        
        # list all the coefficients out
        reac_coef = list(balance[0].values()) + list(balance[1].values())
        
        # rounds to zero if any of the coefficients are less than 1
        is_positive = np.floor((np.array(reac_coef) >= 1).mean()).astype(bool)
        
        # rounds to zero if any of the coefficients are sympy relational class
        is_definite = np.floor(np.array([isinstance(i, sympy.numbers.Number) for i in reac_coef])\
                               .mean()).astype(bool)
        
        return is_positive and is_definite
    except:
        return False   

In [654]:
check_coefficients(['H2O2'], ['H2', 'H2O'])

False

In [None]:
stoich[stoich['formula'].map(lambda x: x[:len('e-')] == 'e-')]

In [655]:
def get_gibbs(formula):     
    '''
    Retrieves the free energy value, in J, of a single substance
    
    --Parameters--
    formula:        str
        a string of a single chemical formula
    
    --Output--
    list (float)    
        
    --Examples--
    >>> get_gibbs('NaCl(aq)')
    array([-388735.44])
    '''
    try:
        matches = thermo[thermo['formula'] == formula]
    except:
        matches = thermo[thermo['formula']\
                  .map(lambda x: x[:len(formula)] == formula)]
    
#     if state == None:
    
#     else:
#         is_state = thermo['state'] == state
#         matches = thermo[is_formula & is_state]
        
    return matches['G'].values

In [385]:
formula_state_separator('NaCl')

'NaCl'

In [656]:
thermo[thermo['formula']\
        .map(lambda x: x[:len('NaCl')] == 'NaCl')]

Unnamed: 0,name,abbrv,formula,G,H,S,Cp
340,NaCl,NaCl,NaCl(aq),-388735.44,-402333.44,117.152,35.564
1881,halite,Hl,NaCl(s),-384120.488,-411119.84,72.13216,


In [400]:
get_gibbs('NaCl')

array([], dtype=float64)

In [120]:
# https://stackoverflow.com/questions/6618515/

def possibility_reducer(possibilities, length=12, offset=0):
    '''
    Limits the list of possible substances to a specified length based on 
    free energy 'density'
    
    --Parameters--
    possibilities:      iterable (str)
        any iterable containing strings with valid chemical formulas
    
    --Output--
    tuple (str)
        
    --Examples--
    >>> formula_state_separator('NaCl(aq)')
    ('NaCl', 'aq')
    
    >>> formula_state_separator('NaCl')
    ('NaCl', None)
    '''
    # just in case. might be redundant
    possibilities = np.array(list(possibilities))
    energies = np.array([min(get_gibbs(s)) / Substance.from_formula(s).mass for s in possibilities])
    indices = energies.argsort()
    sorted_possibilities = possibilities[indices]
    
    max_length = min(len(sorted_possibilities), (length + offset))
    
    return tuple(sorted_possibilities[offset:(max_length)])

In [121]:
print(len(stoich_filter(['CH4, O2'])))
print(len(possibility_reducer(stoich_filter(['CH4, O2'])['formula'])))

163
12


In [153]:
def standard_gibbs_free_energy(equation, kJ=True):
    '''
    Returns the overall delG of a reaction under standard conditions. 
    
    --Parameters--
    equation:       chempy balance_stoichiometry ordered dictionary
    
    --Output--
    float
        
    --Examples--
    >>> equation = balance_stoichiometry(['Na', 'H2O'], ['NaH', 'O2'])
    >>> standard_gibbs_free_energy(equation)
    340.36
    '''
    
    # each side is a formula, coefficient tuple
    prod = list(equation[1].items())
    reac = list(equation[0].items())
    
    delG = 0
        
    # s[0] is the formula, with or without state
    # s[1] is the coefficient
    
    def gibbs_sum(side):
        interim_delG = 0
        for s in side:
            interim_delG += min(get_gibbs(s[0])) * s[1]
        return interim_delG
    
    delG = gibbs_sum(prod) - gibbs_sum(reac)
    
    return delG / (1 + 999*kJ)

In [124]:
standard_gibbs_free_energy(balance_stoichiometry(['Na', 'H2O'], ['NaH', 'O2']))

340.360000000000

In [157]:
def reaction_predictor(reactants):
    '''
    Returns the balanced chemical equation of the predicted reaction based on
    minimizing overall delG values.
    
    --Parameters--
    reactants:      iterable(str)
        any iterable containing strings with valid chemical formulas
    
    --Output--
    chempy.chemistry.Reaction
        
    --Examples--
    >>> reaction_predictor(['Al', 'O2'])
    4 Al + 3 O2 → 2 Al2O3
    '''
    stoich_temp = stoich_filter(reactants)
    
    print('scoping possibilities...')
    possibilities = np.array([f for f in list(stoich_temp['formula']) if f not in reactants if f in list(thermo['formula'])])
    if len(possibilities) > 25:
        possibilities = possibility_reducer(possibilities)
    
    print('  optimizing combinations...')
    combinations = []
    comb_length = min(6, len(possibilities))
    for i in range(1, comb_length):
        combinations += list(itertools.combinations(possibilities, i))
    combinations = [c for c in combinations if Z_unique(c) == Z_unique(reactants)]
    
    print('    deriving equations...')
    equations = []
    for i, comb in enumerate(combinations):    
        if check_coefficients(reactants, comb):
            equations.append(balance_stoichiometry(reactants, comb))
    
    print('      calculating energies...')
    energies = []
    for eq in equations:
        energies.append(standard_gibbs_free_energy(eq))
    
    best_energy = min(energies)
    best_index = energies.index(best_energy)
    best_reaction = Reaction(*equations[best_index])
    
    print(best_reaction)
    print(best_energy)
    
    return best_reaction

In [155]:
reaction_predictor(['Na', 'H2O'])

scoping possibilities...
  optimizing combinations...
    deriving equations...
      calculating energies...
2 Na + 2 H2O -> 2 NaOH + H2
-361.603200000000


In [154]:
reaction_predictor(['Al', 'O2'])

scoping possibilities...
  optimizing combinations...
    deriving equations...
      calculating energies...
4 Al + 3 O2 -> 2 Al2O3
-3164.60000000000


In [156]:
reaction_predictor(['HCl', 'NaOH'])

scoping possibilities...
  optimizing combinations...
    deriving equations...
      calculating energies...
HCl + NaOH -> NaCl + H2O
-80.6942159999999
