# Standard Thermodynamic Quantities

In [1]:
import os                               
import re                   
import time                 # to stall requests (just in case)

import pandas as pd 
import chemdataextractor as cde     # chemistry parser

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize, sent_tokenize

from tika import parser     # the specific parser method 

from chempy import balance_stoichiometry
from chempy import Substance
from chempy import Reaction
from chempy.util import periodic

pd.set_option('display.max_colwidth', 0)    # no max column width
pd.set_option('display.max_rows', 150)

In [36]:
thermo = pd.read_csv('../data/external/thermo/chnosz_thermo.csv')

In [37]:
thermo.columns

Index(['name', 'abbrv', 'formula', 'state', 'ref1', 'ref2', 'date', 'E_units',
       'G', 'H', 'S', 'Cp', 'V', 'a1.a', 'a2.b', 'a3.c', 'a4.d', 'c1.e',
       'c2.f', 'omega.lambda', 'z.T'],
      dtype='object')

In [38]:
thermo = thermo[['name', 'abbrv', 'formula', 'state', 'G', 'H', 'S', 'Cp']]
thermo.head()

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
0,water,,H2O,liq,,,,
1,e-,,(Z-1),aq,0.0,0.0,15.6166,0.0
2,H+,H+,H+,aq,0.0,0.0,0.0,0.0
3,Li+,Li+,Li+,aq,-69933.0,-66552.0,2.7,14.2
4,Na+,Na+,Na+,aq,-62591.0,-57433.0,13.96,9.06


In [113]:
thermo.at[0, 'G'] = -237180
thermo.at[0, 'H'] = -285830
thermo.at[0, 'S'] = -69.91
thermo.at[0, 'Cp'] = -75.29

In [39]:
pd.concat(g for _, g in thermo.groupby('formula') if len(g) > 1)

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
1850,acanthite,Acn,Ag2S,cr,-9446.000,-7550.000,34.3000,
1851,acanthite,Acn,Ag2S,cr2,-9235.197,-7024.866,35.3543,
1852,acanthite,Acn,Ag2S,cr3,-8227.324,-4960.457,38.8980,
799,AgCl,,AgCl,aq,-17399.000,-18816.000,32.0700,-5.328
1872,chlorargyrite,Crg,AgCl,cr,-26247.000,-30370.000,23.0000,
721,Al(OH)3,,Al(OH)3,aq,-263321.000,,14.1850,23.650
1980,gibbsite,,Al(OH)3,cr,-1154889.000,-1293128.000,68.4400,91.700
2468,andalusite,And,Al2SiO5,cr,,,,
2497,kyanite,Ky,Al2SiO5,cr,,,,
2519,sillimanite,Si,Al2SiO5,cr,,,,


In [40]:
thermo.dtypes

name       object 
abbrv      object 
formula    object 
state      object 
G          float64
H          float64
S          float64
Cp         float64
dtype: object

In [41]:
thermo[['G', 'H', 'S', 'Cp']] = thermo[['G', 'H', 'S', 'Cp']] * 4.184

In [124]:
sodium = {
    'name': 'sodium',
    'abbrv': 'Na',
    'formula': 'Na',
    'state': 'cr',
    'G': 0,
    'H': 0,
    'S': 51.21,
    'Cp': 28.24
}

In [123]:
oxygen = {
    'name': 'oxygen',
    'abbrv': 'O2',
    'formula': 'O2',
    'state': 'gas',
    'G': 0,
    'H': 0,
    'S': 205.03,
    'Cp': 29.36
}

In [128]:
nah = {
    'name': 'sodium hydride',
    'abbrv': 'NaH',
    'formula': 'NaH',
    'state': 'cr',
    'G': -33500,
    'H': -56300,
    'S': 40,
    'Cp': 36.4
}

In [46]:
thermo = thermo.append(sodium, ignore_index=True)

In [125]:
thermo = thermo.append(oxygen, ignore_index=True)

In [129]:
thermo = thermo.append(nah, ignore_index=True)

In [126]:
thermo.tail()

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
3369,3-iodobenzoic acid,,C7H5IO2,gas,-160462.998168,-218999.999104,433.772016,122.101672
3370,4-iodobenzoic acid,,C7H5IO2,gas,-157064.000248,-215600.001208,433.772016,122.101672
3371,methyl-2-iodobenzoate,,C8H7IO2,gas,-79910.998408,-167660.001168,472.20624,168.07128
3372,sodium,Na,Na,cr,0.0,0.0,51.21,28.24
3373,oxygen,O2,O2,gas,0.0,0.0,205.03,29.36


In [141]:
stoich = pd.read_csv('../data/external/thermo/chnosz_stoich.csv')
stoich.head()

Unnamed: 0.1,Unnamed: 0,Ag,Al,Ar,As,Au,B,Ba,Be,Bi,...,Tm,U,V,W,Xe,Y,Yb,Z,Zn,Zr
0,H2O,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,0,0
2,H+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Li+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Na+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [142]:
stoich.rename(columns={'Unnamed: 0': -1}, inplace=True)
stoich.head()

Unnamed: 0,-1,Ag,Al,Ar,As,Au,B,Ba,Be,Bi,...,Tm,U,V,W,Xe,Y,Yb,Z,Zn,Zr
0,H2O,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,0,0
2,H+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Li+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Na+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [143]:
from alchemist.periodic import Z

ModuleNotFoundError: No module named 'alchemist'

In [220]:
stoich.rename(columns={'Z': 'e-'}, inplace=True)
stoich.head()

Unnamed: 0,-1,Ag,Al,Ar,As,Au,B,Ba,Be,Bi,...,Tm,U,V,W,Xe,Y,Yb,e-,Zn,Zr
0,H2O,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,0,0
2,H+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Li+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Na+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [221]:
stoich.columns = [Z(col) if col != -1 else col for col in stoich.columns]
stoich.head()

Unnamed: 0,-1,47,13,18,33,79,5,56,4,83,...,69,92,23,74,54,39,70,0,30,40
0,H2O,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,-1,0,0
2,H+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
3,Li+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0
4,Na+,0,0.0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,0


In [222]:
list(stoich.columns.sort_values())

[-1,
 0,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 11,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 74,
 75,
 78,
 79,
 80,
 81,
 82,
 83,
 86,
 87,
 88,
 90,
 91,
 92]

In [223]:
stoich = stoich[list(stoich.columns.sort_values())]
stoich

Unnamed: 0,-1,0,1,2,3,4,5,6,7,8,...,80,81,82,83,86,87,88,90,91,92
0,H2O,0,2.0,0,0,0,0,0,0,1.0,...,0,0,0,0,0,0,0,0,0,0
1,(Z-1),-1,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
2,H+,1,1.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
3,Li+,1,0.0,0,1,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
4,Na+,1,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
5,K+,1,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
6,Rb+,1,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
7,Cs+,1,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
8,Mg+2,2,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0
9,Ca+2,2,0.0,0,0,0,0,0,0,0.0,...,0,0,0,0,0,0,0,0,0,0


In [224]:
stoich.rename(columns={-1: 'formula'}, inplace=True)

In [236]:
stoich.at[1, 'formula'] = 'e-'

In [226]:
stoich.shape

(3372, 86)

In [227]:
reactants = ['Na', 'H2O']

In [228]:
water = Substance.from_formula('H2O')

In [229]:
water.composition

{1: 2, 8: 1}

In [231]:
column_mask = [col for col in stoich.columns if col not in ('formula', 0, 1, 8, 11)]
column_mask

[2,
 3,
 4,
 5,
 6,
 7,
 9,
 10,
 12,
 13,
 14,
 15,
 16,
 17,
 18,
 19,
 20,
 21,
 22,
 23,
 24,
 25,
 26,
 27,
 28,
 29,
 30,
 31,
 33,
 34,
 35,
 36,
 37,
 38,
 39,
 40,
 41,
 42,
 43,
 44,
 45,
 46,
 47,
 48,
 49,
 50,
 51,
 53,
 54,
 55,
 56,
 57,
 58,
 59,
 60,
 61,
 62,
 63,
 64,
 65,
 66,
 67,
 68,
 69,
 70,
 71,
 72,
 74,
 75,
 78,
 79,
 80,
 81,
 82,
 83,
 86,
 87,
 88,
 90,
 91,
 92]

In [248]:
stoich_temp.loc[(stoich_temp==0).all(axis=1)]

Unnamed: 0,formula,0,1,8,11


In [139]:
# https://stackoverflow.com/questions/22649693/

stoich_temp = stoich.copy()

for col in column_mask:
    stoich_temp = stoich_temp[stoich_temp[col] == 0]

stoich_temp = stoich_temp.loc[(stoich_temp.drop(columns='formula')!=0).any(axis=1)]

stoich_temp = stoich_temp[['formula', 0, 1, 8, 11]]
stoich_temp.head()

NameError: name 'stoich' is not defined

In [246]:
list(stoich_temp['formula'])

['H2O',
 'e-',
 'H+',
 'Na+',
 'OH-',
 'H2',
 'O2',
 'HO2-',
 'H2O2',
 'NaOH',
 'H',
 'Na2O',
 'H2O',
 'H2O',
 'H2O',
 'H2O',
 'H2O',
 'H2O',
 'H2O',
 'H2O',
 'O',
 'O',
 'H',
 'H2',
 'O2']

In [254]:
candidates = list(stoich_temp.index)
candidates

[0,
 1,
 2,
 4,
 20,
 61,
 62,
 483,
 484,
 572,
 1767,
 1906,
 1941,
 1942,
 1943,
 1944,
 1945,
 1946,
 1947,
 1948,
 2164,
 2165,
 2440,
 3090,
 3097]

In [256]:
thermo.loc[1, 'formula']

'(Z-1)'

In [92]:
equation = balance_stoichiometry(['Na', 'H2O'], ['NaOH', 'H2'])
equation

(OrderedDict([('Na', 2), ('H2O', 2)]), OrderedDict([('NaOH', 2), ('H2', 1)]))

In [97]:
list(equation[1].items())

[('NaOH', 2), ('H2', 1)]

In [71]:
list(equation[1].items())

('NaOH', 2)

In [88]:
thermo[thermo['formula'] == list(equation[1].keys())[0]].index

Int64Index([572], dtype='int64')

In [115]:
thermo[thermo['formula'] == 'H2O']['G']

0      -237180.0000
1941   -236590.9744
1942   -235557.1080
1943   -236114.8352
1944   -235759.1952
1945   -235308.1600
1946   -233062.1888
1947   -232981.8560
1948   -235613.1736
Name: G, dtype: float64

In [85]:
thermo.at[i, 'G']

ValueError: At based indexing on an integer index can only have integer indexers

In [118]:
def gibbs_free_energy(equation, kJ=True):
    prod = list(equation[1].items())
    reac = list(equation[0].items())
    delG = 0
    for p in prod:
        delG += thermo[thermo['formula'] == p[0]]['G'].values[0] * p[1]
    for r in reac:
        delG -= thermo[thermo['formula'] == r[0]]['G'].values[0] * r[1]
    return delG / (1 + 999*kJ)

In [119]:
gibbs_free_energy(equation)

-343.879776000000

In [134]:
equation2 = balance_stoichiometry(['Na', 'H2O'], ['NaH', 'O2'])
gibbs_free_energy(equation2)

356.903536000000

In [135]:
equation3 = balance_stoichiometry(['Na', 'H2O'], ['NaH', 'H2O2'])
gibbs_free_energy(equation3)

273.346480000000

In [137]:
equation4 = balance_stoichiometry(['Na', 'H2O'], ['Na2O', 'H2'])
gibbs_free_energy(equation4)

-121.167048000000

In [138]:
equation5 = balance_stoichiometry(['Na', 'H2O'], ['NaH', 'O'])
gibbs_free_energy(equation5)

69.1615040000000

In [249]:
output = Reaction(*balance_stoichiometry(['Na', 'H2O'], ['NaH', 'H2O2']))
output

In [48]:
thermo[thermo['formula'] == 'Na']

Unnamed: 0,name,abbrv,formula,state,G,H,S,Cp
3372,sodium,Na,Na,cr,0.0,0.0,51.21,28.24


In [173]:
keys = output.keys()
substances = {k: Substance.from_formula(k) for k in keys}
output.unicode(substances)

'2 Na + H₂O → Na₂O + H₂'

In [251]:
Reaction(*balance_stoichiometry(['Na', 'H2O'], ['H+', 'Na2O', 'e-']))

In [None]:
for c in candidates:
    products = []
    cand = thermo.loc[c, 'formula']
    if cand not in ['Na', 'H2O']:
        