In [9]:
# script to remove duplicates
import os
import rmgpy.chemkin


This still misses duplicate reactions of type:
    A (+M) <=> A (+M)
    A + A (+M) <=> A + A (+M)
    
We should either mark as duplicates, or delete the A + A (+M) <=> A + A (+M) type because it sounds less likely

In [10]:
# load the mechanism
chemkin_file = '/scratch/harris.se/guassian_scratch/RMG_min/RMG_min_2/chemkin/chem_annotated.inp'
transport_file = os.path.join(os.path.dirname(chemkin_file), 'tran.dat')
sp_dict = os.path.join(os.path.dirname(chemkin_file), 'species_dictionary.txt')
species_list, reaction_list = rmgpy.chemkin.load_chemkin_file(chemkin_file, sp_dict, check_duplicates=False)


In [3]:
# count the bad duplicate pairs
bad_duplicate_pairs = []

for i in range(len(reaction_list)):
    for j in range(i):
        if reaction_list[i].is_isomorphic(reaction_list[j]):
            if not (reaction_list[i].duplicate and reaction_list[j].duplicate):
                bad_duplicate_pairs.append([i, j])

In [4]:
len(bad_duplicate_pairs)

38

In [5]:
bad_duplicate_pairs

[[45, 33],
 [69, 50],
 [77, 71],
 [146, 140],
 [154, 150],
 [225, 205],
 [438, 434],
 [444, 432],
 [470, 459],
 [605, 468],
 [690, 688],
 [923, 922],
 [927, 926],
 [929, 928],
 [943, 240],
 [966, 949],
 [1064, 1060],
 [1116, 1114],
 [1128, 1104],
 [1323, 1321],
 [1366, 1340],
 [1524, 1520],
 [1548, 1546],
 [1575, 1574],
 [1580, 1079],
 [1612, 1596],
 [1618, 1593],
 [1619, 1592],
 [1641, 1603],
 [1686, 1606],
 [1723, 1713],
 [1743, 1740],
 [1881, 1329],
 [1889, 1709],
 [1903, 899],
 [1905, 915],
 [1938, 1921],
 [1956, 1339]]

In [6]:
remove_list = []  # indices of reactions to remove
remove_list_items = []  # Reaction objects of the reactions to remove
for bad_duplicate_pair in bad_duplicate_pairs:
    i = bad_duplicate_pair[0]
    j = bad_duplicate_pair[1]
        
    # one PDEP and one library -- this is okay
    if type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) == rmgpy.data.kinetics.library.LibraryReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
    elif type(reaction_list[i]) != rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
        
    # one library and one family -- this is bad. remove
    elif type(reaction_list[i]) == rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) != rmgpy.data.kinetics.library.LibraryReaction:
        remove_list.append(j)
        remove_list_items.append(reaction_list[j])
    elif type(reaction_list[i]) != rmgpy.data.kinetics.library.LibraryReaction and \
            type(reaction_list[j]) == rmgpy.data.kinetics.library.LibraryReaction:
        remove_list.append(i)
        remove_list_items.append(reaction_list[i])
        
    # one PDEP and one family -- this is okay
    elif type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) != rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
    elif type(reaction_list[i]) != rmgpy.rmg.pdep.PDepReaction and \
            type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # mark them as duplicates
        reaction_list[i].duplicate = True
        reaction_list[j].duplicate = True
        

    # two PDEPs -- this is probably wrong
    elif type(reaction_list[i]) == rmgpy.rmg.pdep.PDepReaction and \
        type(reaction_list[j]) == rmgpy.rmg.pdep.PDepReaction:
        # remove the one with more reactants
        if len(reaction_list[i].reactants) > reaction_list[j].reactants:
            remove_list.append(i)
            remove_list_items.append(reaction_list[i])
        elif len(reaction_list[i].reactants) < reaction_list[j].reactants:
            remove_list.append(j)
            remove_list_items.append(reaction_list[j])
        else:  # remove j by default because it was added second
            remove_list.append(j)
            remove_list_items.append(reaction_list[j])
            
    else:
        print(type(reaction_list[i]))
        print(type(reaction_list[j]))
        print()

In [7]:
# actually remove the items
for item in remove_list_items:
    reaction_list.remove(item)

In [8]:
# save the resulting chemkin file
outfile = os.path.join(os.path.dirname(chemkin_file), 'chem_annotated_fixed.inp')
rmgpy.chemkin.save_chemkin_file(outfile, species_list, reaction_list)