In [1]:
import re
import pandas as pd

In [2]:
gd = pd.read_csv('rawdata/GeoRem_Preferred_170622.csv', comment='#')

In [3]:
els = pd.read_pickle('resources/elements.pkl')

In [4]:
def calc_M(molecule):
    """
    Returns molecular mass of molecule.
    
    Where molecule is in standard chemical notation,
    e.g. 'CO2' or 'HCO3'
    
    NOTE: Brackets not supported - i.e. B(OH)4 must be
    written as BO4H4
    """
    # break the molecule up into a list of (Element, N) pairs
    comp = re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      molecule)
    
    # Calculate their mass
    M = 0
    for el, n in comp:
        if n == '':
            n = 1
        else:
            n = float(n)
        m = els[el]
        
        M += m * n
    return M

In [5]:
# calculate molecular weight for all compounds
gd.loc[:,'M'] = gd.Item.apply(calc_M)

In [6]:
udict = {'%m/m': 100,
         'ug/g': 1e6}

In [7]:
gd.loc[:,'g/g'] = gd.Value / [udict[u] for u in gd.Unit]
gd.loc[:,'g/g_err'] = gd.Uncertainty / [udict[u] for u in gd.Unit]

In [8]:
gd.loc[:,'mol/g'] = gd.loc[:,'g/g'] / gd.loc[:,'M']
gd.loc[:,'mol/g_err'] = gd.loc[:,'g/g_err'] / gd.loc[:,'M']

In [9]:
gd.to_csv('GeoRem_Preferred_170622.csv', index=False)

In [153]:
srmdat = gd.copy()

## Create Ratios in latools

In [154]:
internal_standard = 'Si29'

In [155]:
internal_el = re.match('([A-Z][a-z]{0,})',internal_standard).groups()[0]

In [157]:
denom

Unnamed: 0,Item,SRM,Value,Uncertainty,Uncertainty_Type,Unit,GeoReM_bibcode,Reference,M,g/g,mol/g,g/g_err,mol/g_err
4,SiO2,NIST610,69.7,0.5,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,60.0843,0.697,0.0116,0.005,8.3e-05


In [161]:
for srm in srmdat.SRM.unique():
    ind = srmdat.SRM == srm
    
    # find denominator
    denom = srmdat.loc[srmdat.Item.str.contains(internal_el) & ind]
    
    # calculate denominator composition
    comp = re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      denom.Item.values[0])
    # determine stoichiometric multiplier
    N = [n for el, n in comp if el == internal_el][0]
    if N == '':
        N = 1
    else:
        N = float(N)
    
    srmdat.loc[ind, 'mol_ratio'] = srmdat.loc[ind, 'mol/g'] / (denom['mol/g'].values * N)
    srmdat.loc[ind, 'mol_ratio_err'] = ((srmdat.loc[ind, 'mol/g_err'] / srmdat.loc[ind, 'mol/g'])**2 +
                                        (denom['mol/g_err'].values / denom['mol/g'].values))**0.5 * srmdat.loc[ind, 'mol_ratio']
#     srmdat.loc[ind]

In [162]:
srmdat

Unnamed: 0,Item,SRM,Value,Uncertainty,Uncertainty_Type,Unit,GeoReM_bibcode,Reference,M,g/g,mol/g,g/g_err,mol/g_err,mol_ratio,mol_ratio_err
0,CO2,NIST610,0.0002,,,%m/m,GeoReM 5211,Jochum et al 2011,44.009500,2.000000e-06,4.544473e-08,,,3.917525e-06,
1,H2O,NIST610,0.0130,,,%m/m,GeoReM 5211,Jochum et al 2011,18.015280,1.300000e-04,7.216097e-06,,,6.220575e-04,
2,Na2O,NIST610,13.4000,0.300,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,61.978940,1.340000e-01,2.162025e-03,3.000000e-03,4.840354e-05,1.863755e-01,1.632763e-02
3,Al2O3,NIST610,1.9500,0.040,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,101.961276,1.950000e-02,1.912491e-04,4.000000e-04,3.923058e-06,1.648647e-02,1.436725e-03
4,SiO2,NIST610,69.7000,0.500,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,60.084300,6.970000e-01,1.160037e-02,5.000000e-03,8.321641e-05,1.000000e+00,8.500036e-02
5,CaO,NIST610,11.4000,0.200,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.077400,1.140000e-01,2.032905e-03,2.000000e-03,3.566499e-05,1.752448e-01,1.515780e-02
6,H,NIST610,15.0000,,,ug/g,GeoReM 5211,Jochum et al 2011,1.007940,1.500000e-05,1.488184e-05,,,1.282876e-03,
7,Li,NIST610,468.0000,24.000,95%CL,ug/g,GeoReM 5211,Jochum et al 2011,6.941000,4.680000e-04,6.742544e-05,2.400000e-05,3.457715e-06,5.812354e-03,5.754949e-04
8,Be,NIST610,476.0000,31.000,95%CL,ug/g,GeoReM 5211,Jochum et al 2011,9.012182,4.760000e-04,5.281740e-05,3.100000e-05,3.439788e-06,4.553079e-03,4.864557e-04
9,B,NIST610,350.0000,56.000,95%CL,ug/g,GeoReM 5211,Jochum et al 2011,10.811000,3.500000e-04,3.237443e-05,5.600000e-05,5.179909e-06,2.790811e-03,5.052339e-04


In [134]:
N

1

In [128]:
re.findall('([A-Z][a-z]{0,})([0-9]{0,})',
                      denom.Item.values[0])

[('Ca', ''), ('O', '')]

'CaO'

In [107]:
denom = 

In [108]:
denom

Unnamed: 0,Item,SRM,Value,Uncertainty,Uncertainty_Type,Unit,GeoReM_bibcode,Reference,M,g/g,mol/g
5,CaO,NIST610,11.4,0.2,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.114,0.002033
76,CaO,NIST612,11.9,0.1,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.119,0.002122
146,CaO,NIST614,11.9,0.2,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.119,0.002122


In [105]:
denom

Unnamed: 0,Item,SRM,Value,Uncertainty,Uncertainty_Type,Unit,GeoReM_bibcode,Reference,M,g/g,mol/g
5,CaO,NIST610,11.4,0.2,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.114,0.002033
76,CaO,NIST612,11.9,0.1,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.119,0.002122
146,CaO,NIST614,11.9,0.2,95%CL,%m/m,GeoReM 5211,Jochum et al 2011,56.0774,0.119,0.002122
