In [2]:
import numpy as np
import re

In [3]:
specs = np.loadtxt('/STER/silkem/ChemTorch/rates/rate16.specs', usecols=(1), dtype=str, skiprows = 1) 

In [4]:
def get_elements(specs):
    """
    Does not extract electrons!
    """
    elements = []

    for spec in specs:
        # Split on capital letters
        comps = re.findall('[A-Z][^A-Z]*', spec)

        for comp in comps:
            # Get element
            elem = re.findall('[a-zA-Z]+', comp)
            if len(elem) > 1:
                raise ValueError('More than one element found!')
            element = elem[0]
            elements.append(element)

    return sorted(list(set(elements)))

In [5]:
elements = get_elements(specs)
elements

['C', 'Cl', 'F', 'Fe', 'H', 'He', 'Mg', 'N', 'Na', 'O', 'P', 'S', 'Si']

In [6]:
elements.append('charge')

In [7]:
elements_dict = {e: i for i, e in enumerate(elements)}
elements_dict

{'C': 0,
 'Cl': 1,
 'F': 2,
 'Fe': 3,
 'H': 4,
 'He': 5,
 'Mg': 6,
 'N': 7,
 'Na': 8,
 'O': 9,
 'P': 10,
 'S': 11,
 'Si': 12,
 'charge': 13}

In [8]:
M = np.zeros((len(specs), len(elements)))

charge_dict = {'+': +1, '-': -1}

for i, spec in enumerate(specs):
    # Split on capital letters
    comps = re.findall('[A-Z][^A-Z]*', spec)

    charge = 0
    if spec == 'e-':
        charge = -1

    for comp in comps:
        # Get element
        elem = re.findall('[a-zA-Z]+', comp) # This does not find electrons!
        if len(elem) > 1:
            raise ValueError('More than one element found!')
        element = elem[0]

        # Get number of atoms
        numb = re.findall('\d+', comp)
        if len(numb) > 1:
            raise ValueError('More than one number found!')
        number = int(numb[0]) if numb else 1 

        # Get + or - for charge
        char = re.findall('[+-]+', comp)
        if len(char) > 1:
            raise ValueError('More than one charge found!')
        elif len(char) == 1:
            charge += charge_dict[char[0]]
        
        # print(element, elements_dict[element], number, charge)

        M[i][elements_dict[element]] = number

    M[i][len(elements_dict)-1] = charge

    print(spec, M[i])

H [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
H+ [0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
H- [ 0.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0. -1.]
H2+ [0. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
H3+ [0. 0. 0. 0. 3. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
He [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0.]
He+ [0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 1.]
HeH+ [0. 0. 0. 0. 1. 1. 0. 0. 0. 0. 0. 0. 0. 1.]
C- [ 1.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0.  0. -1.]
C+ [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
C [1. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
CH- [ 1.  0.  0.  0.  1.  0.  0.  0.  0.  0.  0.  0.  0. -1.]
CH+ [1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
CH [1. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
CH2+ [1. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 1.]
N+ [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 1.]
CH2 [1. 0. 0. 0. 2. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
N [0. 0. 0. 0. 0. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
CH3 [1. 0. 0. 0. 3. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
NH [0. 0. 0. 0. 1. 0. 0. 1. 0. 0. 0. 0. 0. 0.]
NH+ [0. 0.

In [11]:
np.save('/STER/silkem/ChemTorch/rates/M_rate16.npy', M)

In [9]:
M.shape

(468, 14)