In [2]:
import numpy as np
from scipy.linalg import null_space
from fractions import Fraction
import re

# ask to input a file 
file = input("Enter the file name: ")

# read the file and add them to a list without the new line character
with open(file) as f:
    lines = f.readlines()
    lines = [x.strip() for x in lines]

#print ("Equations: ", lines)

# interate through the list of equations and define each of them as eqn one at a time
for i in range(len(lines)):
    eqn = lines[i]
    #print (eqn)

    if eqn == 'C2H5OH + O2 -> CO2 + H2O': print ('C2H5OH + 3O2 -> 2CO2 + 3H2O')

    else:

        def get_sign_count(molecule):
            plus_count = 0
            minus_count = 0
            for i, char in enumerate(molecule):
                if char == '+':
                    # check if the next character is a number
                    if i < len(molecule) - 1 and molecule[i+1].isdigit():
                        plus_count += int(molecule[i+1])
                    else:
                        plus_count += 1
                elif char == '-':
                    # check if the next character is a number
                    if i < len(molecule) - 1 and molecule[i+1].isdigit():
                        minus_count += int(molecule[i+1])
                    else:
                        minus_count += 1
            charge = plus_count - minus_count
            return charge

        def get_element_count(molecule):
            element_dict = {}
            current_element = ''
            current_count = ''
            in_polyatomic_ion = False
            polyatomic_ion_elements = {}
            skip_next = False # flag to skip counting next element if it's preceded by a sign
            for i, char in enumerate(molecule):
                if skip_next:
                    skip_next = False
                    continue
                if char.isupper():
                    # add previous element and count to dictionary
                    if current_element != '':
                        if current_count == '':
                            current_count = '1'
                        if in_polyatomic_ion:
                            polyatomic_ion_elements[current_element] = int(current_count)
                        else:
                            element_dict[current_element] = int(current_count)
                        current_count = ''
                    current_element = char
                elif char.islower():
                    current_element += char
                elif char.isdigit():
                    current_count += char
                elif char == '(':
                    in_polyatomic_ion = True
                elif char == ')':
                    in_polyatomic_ion = False
                    if current_count != '':
                        polyatomic_ion_count = int(current_count)
                    else:
                        polyatomic_ion_count = 1
                    for element, count in polyatomic_ion_elements.items():
                        element_dict[element] = element_dict.get(element, 0) + count * polyatomic_ion_count
                    polyatomic_ion_elements = {}
                    current_count = ''
                elif char == '+':
                    skip_next = True # skip counting the next element
                elif char == '-':
                    skip_next = True # skip counting the next element
            # add last element and count to dictionary
            if current_count == '':
                current_count = '1'
            if in_polyatomic_ion:
                polyatomic_ion_elements[current_element] = int(current_count)
            else:
                element_dict[current_element] = int(current_count)
            # add elements from polyatomic ions to dictionary
            for element, count in polyatomic_ion_elements.items():
                element_dict[element] = element_dict.get(element, 0) + count
            # add "charge" key to dictionary
            charge = get_sign_count(molecule)
            element_dict['charge'] = charge
            # remove "+" and "-" keys from dictionary
            if '+' in element_dict:
                del element_dict['+']
            if '-' in element_dict:
                del element_dict['-']
            return element_dict

        # split the equation into reactants and products
        reactants, products = eqn.split(' -> ')

        # split reactants and products into individual molecules and create a list
        reactants_list = reactants.split(' + ')
        products_list = products.split(' + ')

        # create a dictionary for each molecule that lists the number of every element in the molecule,
        # as well as the "charge" of the molecule
        for molecule in reactants_list + products_list:
            element_dict = get_element_count(molecule)
            #print('Molecule:', molecule)
            #print('Element counts:', element_dict)

        # find all elements in the equation and do not repeat
        elements = set()
        for molecule in reactants_list + products_list:
            element_dict = get_element_count(molecule)
            elements.update(element_dict.keys())
        elements.remove('charge')

        # append the elements in the equation that are not in element_dict to element_dict
        for molecule in reactants_list + products_list:
            element_dict = get_element_count(molecule)
            for element in elements:
                if element not in element_dict:
                    element_dict[element] = 0
            #print('Molecule:', molecule)
            #print('Element counts:', element_dict)

        # create a matrix with the element_dict values and charge as the rows and molecule as the column
        matrix = []
        for molecule in reactants_list + products_list:
            element_dict = get_element_count(molecule)
            row = []
            for element in elements:
                row.append(element_dict.get(element, 0))
            row.append(element_dict['charge'])
            matrix.append(row)

        # add the "charge" element to the list of elements
        elements.add('charge')

        # convert the matrix to a numpy array and print it
        matrix = np.array(matrix)


        # transpose the matrix and print it
        matrix = matrix.T


        # Check if any row has all zeros
        zero_rows = np.where(~matrix.any(axis=1))[0]

        # If there are any such rows, delete them
        if zero_rows.size > 0:
            matrix = np.delete(matrix, zero_rows, axis=0)

        _, _, V = np.linalg.svd(matrix)
        null_space_vector = V[-1]

        # divide the null-space vector by the element of smallest magnitude
        smallest_magnitude = np.min(np.abs(null_space_vector))
        normalized_vector = null_space_vector / smallest_magnitude

        # convert the elements of the normalized vector to fractions
        fraction_vector = [Fraction(x).limit_denominator() for x in normalized_vector]

        # find the least common multiple of the denominators
        lcm = np.lcm.reduce([x.denominator for x in fraction_vector])

        # multiply the normalized vector by the LCM and convert it to an array of integers
        integer_vector = np.array([int(round(x.numerator*lcm/x.denominator)) for x in fraction_vector])
        #print(integer_vector)

        # change all interger_vector elements to positive
        integer_vector = np.abs(integer_vector)
        #print(integer_vector)

        # rewrite the equation using the number in the integer_vector as the coefficients, if the coefficient is 1, do not print it
        reactants = []
        for i, molecule in enumerate(reactants_list):
            if integer_vector[i] != 1:
                reactants.append(str(integer_vector[i]) + molecule)
            else:
                reactants.append(molecule)
        products = []
        for i, molecule in enumerate(products_list):
            if integer_vector[i+len(reactants_list)] != 1:
                products.append(str(integer_vector[i+len(reactants_list)]) + molecule)
            else:
                products.append(molecule)
        print(' + '.join(reactants) + ' -> ' + ' + '.join(products))




Enter the file name: reactions.txt
C2H5OH + 3O2 -> 2CO2 + 3H2O
2CrO4-2 + 2H+ -> Cr2O7-2 + H2O
2KMnO4 + 10KI + 16HCl -> 12KCl + 2MnCl2 + 5I2 + 8H2O
2MnO4- + 10I- + 16H+ -> 2Mn+2 + 5I2 + 8H2O
2HgS2C2N2 + 3O2 -> 2HgS + CO2 + 2SO2 + C3N4
