In [1]:
# script to remove duplicates
import os
import rmgpy.chemkin


In [2]:
# load the mechanism
chemkin_file = '/scratch/harris.se/guassian_scratch/RMG_min/RMG_min_2/chem_annotated.inp'
transport_file = os.path.join(os.path.dirname(chemkin_file), 'tran.dat')
sp_dict = os.path.join(os.path.dirname(chemkin_file), 'species_dictionary.txt')
species_list, reaction_list = rmgpy.chemkin.load_chemkin_file(chemkin_file, sp_dict, check_duplicates=False)


In [3]:
# count the bad duplicate pairs
bad_duplicate_pairs = []

for i in range(len(reaction_list)):
    for j in range(i):
        if reaction_list[i].is_isomorphic(reaction_list[j]):
            if not (reaction_list[i].duplicate and reaction_list[j].duplicate):
                bad_duplicate_pairs.append([i, j])
                
    
    if len(reaction_list[i].reactants) == 1 and len(reaction_list[i].products) == 1:
        # check for case of A (+M) <=> B (+M) not matched to 2A (+M) <=> 2B (+M) 
        
        other_reaction = rmgpy.reaction.Reaction()
        other_reaction.reactants = [reaction_list[i].reactants[0], reaction_list[i].reactants[0]]
        other_reaction.products = [reaction_list[i].products[0], reaction_list[i].products[0]]
        
        for j in range(len(reaction_list)):
            if j == i:
                continue
            if reaction_list[j].is_isomorphic(other_reaction):
                bad_duplicate_pairs.append([i, j])

In [4]:
len(bad_duplicate_pairs)

23

In [5]:
bad_duplicate_pairs

[[45, 33],
 [68, 50],
 [80, 74],
 [203, 197],
 [209, 207],
 [230, 71],
 [475, 469],
 [490, 488],
 [575, 525],
 [634, 632],
 [862, 854],
 [935, 934],
 [943, 936],
 [944, 939],
 [1003, 162],
 [1045, 1030],
 [1174, 1163],
 [1296, 1297],
 [1321, 1316],
 [1389, 1053],
 [1539, 1535],
 [1541, 1533],
 [1558, 1529]]

In [6]:
remove_list = []  # indices of reactions to remove
remove_list_items = []  # Reaction objects of the reactions to remove
for bad_duplicate_pair in bad_duplicate_pairs:
    i = bad_duplicate_pair[0]
    j = bad_duplicate_pair[1]
        
    # one PDEP and one library -- this is okay
    if type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) == rmgpy.data.kinetics.library.LibraryReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
    elif type(reaction_list[i]) != rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
        
    # one library and one family -- this is bad. remove
    elif type(reaction_list[i]) == rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) != rmgpy.data.kinetics.library.LibraryReaction:
        remove_list.append(j)
        remove_list_items.append(reaction_list[j])
    elif type(reaction_list[i]) != rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) == rmgpy.data.kinetics.library.LibraryReaction:
        remove_list.append(i)
        remove_list_items.append(reaction_list[i])
        
    # one PDEP and one family -- this is okay
    elif type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) != rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
    elif type(reaction_list[i]) != rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
        

    # two PDEPs -- this is probably wrong
    elif type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
        type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # remove the one with more reactants
        if len(reaction_list[i].reactants) > reaction_list[j].reactants:
            remove_list.append(i)
            remove_list_items.append(reaction_list[i])
        elif len(reaction_list[i].reactants) < reaction_list[j].reactants:
            remove_list.append(j)
            remove_list_items.append(reaction_list[j])
        else:  # remove j by default because it was added second
            remove_list.append(j)
            remove_list_items.append(reaction_list[j])
            
    else:
        print(type(reaction_list[i]))
        print(type(reaction_list[j]))
        print()

In [7]:
# actually remove the items
for item in remove_list_items:
    reaction_list.remove(item)

In [8]:
# save the resulting chemkin file
outfile = os.path.join(os.path.dirname(chemkin_file), 'chem_annotated_fixed.inp')
rmgpy.chemkin.save_chemkin_file(outfile, species_list, reaction_list)