In [1]:
import itertools
import numpy as np
from math import sqrt
from numpy import arange
from bisect import insort
from copy import deepcopy
from random import choice, shuffle
from scipy.sparse import csgraph, csr_matrix, triu
from ase.neighborlist import NeighborList, natural_cutoffs
from ase.build.rotate import rotation_matrix_from_points
from ase.io import read, write
from collections import Counter

In [2]:
def rmsd(mol0, mol1):
    q0 = mol0.get_positions()
    q1 = mol1.get_positions()
    R = rotation_matrix_from_points(q1.T, q0.T)
    Rq1 = np.dot(q1, R.T)
    return sqrt(3*((Rq1 - q0)**2).mean())

def find_neighbors(mol):
    cutoffs = natural_cutoffs(mol, mult=1.2)
    neighbor_list = NeighborList(cutoffs, skin=0, self_interaction=False, bothways=True)
    neighbor_list.update(mol)
    mol.neighbors = [set(neighbor_list.get_neighbors(atom.index)[0].tolist()) for atom in mol]
    mol.connectivity_matrix = neighbor_list.get_connectivity_matrix()

def fragcount(mol):
    return csgraph.connected_components(mol.connectivity_matrix)[0]

def adjd(mol0, mol1):
    return csr_matrix.count_nonzero(triu(mol0.connectivity_matrix) != triu(mol1.connectivity_matrix))

class PartSet(set):
    def add(self, item):
        super().add(item)
        self.atom_types[item] = self.part_key

class Partition(dict):
    def __init__(self):
        super().__init__()
        self.atom_types = {}
        self.leaf_index = {}
    def __eq__(self, other):
        for a in self.values():
            for b in other.values():
                if a == b:
                    break
            else:
                return False
        return True 
    def __le__(self, other):
        for a in self.values():
            for b in other.values():
                if a <= b:
                    break
            else:
                return False
        return True 
    def new_type(self, branch):
        if branch in self.leaf_index:
            if self.leaf_index[branch] == 0:
                self[(*branch, 0)] = self.pop(branch)
            self.leaf_index[branch] += 1
            key = (*branch, self.leaf_index[branch])
        else:
            key = branch
            self.leaf_index[branch] = 0
        self[key] = PartSet()
        self[key].part_key = key
        self[key].atom_types = self.atom_types
        return self[key]
    def del_type(self, key):
        if key in self.leaf_index:
            del self.leaf_index[key]
        for i in self[key]:
            del self.atom_types[i]
        del self[key]
    def copy(self):
        self_copy = Partition()
        self_copy.atom_types = self.atom_types.copy()
        self_copy.leaf_index = self.leaf_index.copy()
        for k in self:
            self_copy[k] = PartSet()
            self_copy[k].part_key = k
            self_copy[k].atom_types = self_copy.atom_types
            self_copy[k].update(self[k].copy())
        return self_copy
    def print(self):
        print()
        for i, k in enumerate(self, start=1):
            print(f'{i:3}:  {k}   ->   { {i + 1 for i in self[k]} }')

def get_eltypes(mol):
    types = {}
    eltypes = Partition()
    for i, z in enumerate(mol.get_chemical_symbols()):
        if z not in types:
            types[z] = eltypes.new_type((z,))
        types[z].add(i) 
    return eltypes

def get_mnatypes(mol, intypes):
    while True:
        types = Partition()
        for k in intypes:
            for i1 in intypes[k]:
                if i1 not in types.atom_types:
                    type = types.new_type(k)
                    type.add(i1)
                    for i2 in intypes[k]:
                        if i2 not in types.atom_types:
                            if Counter([intypes.atom_types[i] for i in mol.neighbors[i1]]) \
                            == Counter([intypes.atom_types[i] for i in mol.neighbors[i2]]):
                                type.add(i2)
        if types == intypes:
            return types
        intypes = types

def get_neighbor_types(mol, atom_types):
    neighbor_types = {}
    for i in [atom.index for atom in mol]:
        neighbor_types[i] = {}
        for k, atom_list in atom_types.items():
            if mol.neighbors[i] & atom_list:
                neighbor_types[i][k] = mol.neighbors[i] & atom_list
    return neighbor_types

In [3]:
mol0 = read('input_ab.xyz', index=0)
mol1 = read('input_ab.xyz', index=1)

find_neighbors(mol0)
find_neighbors(mol1)
print(f"There are {len(mol0)} atoms and {fragcount(mol0)} fragment(s) in system 0")
print(f"There are {len(mol1)} atoms and {fragcount(mol1)} fragment(s) in system 1")
print(f"There are {adjd(mol0, mol1)} adjacency differences between mol and mol1")
print(f"The root mean square distance between mol0 and mol1 is {rmsd(mol0, mol1):.4f} ")
#print(triu(matrix0) != triu(matrix1))

eltypes0 = get_eltypes(mol0)
mnatypes0 = get_mnatypes(mol0, eltypes0)
#neighbor_mnatypes0 = get_neighbor_types(mol0, mnatypes0)
eltypes0.print()
mnatypes0.print()

There are 90 atoms and 1 fragment(s) in system 0
There are 90 atoms and 1 fragment(s) in system 1
There are 0 adjacency differences between mol and mol1
The root mean square distance between mol0 and mol1 is 2.8732 

  1:  ('C',)   ->   {1, 2, 3, 4, 5, 6, 7, 8, 14, 16, 18, 19, 20, 21, 23, 28, 35, 36, 42, 44, 46, 47, 48, 49, 51, 56, 63, 64, 70, 72, 74, 75, 76, 77, 79, 84}
  2:  ('H',)   ->   {9, 10, 11, 12, 13, 22, 24, 25, 26, 27, 29, 30, 31, 32, 33, 34, 37, 38, 39, 40, 41, 50, 52, 53, 54, 55, 57, 58, 59, 60, 61, 62, 65, 66, 67, 68, 69, 78, 80, 81, 82, 83, 85, 86, 87, 88, 89, 90}
  3:  ('N',)   ->   {71, 73, 43, 45, 15, 17}

  1:  ('C', 0, 0)   ->   {1, 3, 5}
  2:  ('C', 0, 1)   ->   {2, 4, 6}
  3:  ('C', 0, 2)   ->   {48, 20, 76}
  4:  ('C', 1)   ->   {35, 7, 63}
  5:  ('C', 2, 0)   ->   {64, 8, 36}
  6:  ('C', 2, 1)   ->   {51, 23, 79}
  7:  ('C', 2, 2)   ->   {56, 28, 84}
  8:  ('C', 3)   ->   {70, 42, 14}
  9:  ('C', 4)   ->   {16, 72, 44}
 10:  ('C', 5)   ->   {74, 18, 46}
 11:  ('

In [4]:
mnatypeslist = []
for mnatype in mnatypes0:
    if len(mnatypes0[mnatype]) >= 2:
        eltypes = mnatypes0.copy()
        eltypes.del_type(mnatype)
        for i in mnatypes0[mnatype]:
            eltypes.new_type(mnatype).add(i)
        mnatypes = get_mnatypes(mol0, eltypes)
        insort(mnatypeslist, mnatypes, key=len)

dismnatypeslist = []
for mnatypesitem in mnatypeslist:
    for dismnatypesitem in dismnatypeslist:
        if mnatypesitem <= dismnatypesitem:
            break
    else:
        mnatypesitem.print()
        dismnatypeslist.append(mnatypesitem)


  1:  ('C', 0, 1, 0)   ->   {2}
  2:  ('C', 0, 1, 1)   ->   {4}
  3:  ('C', 0, 1, 2)   ->   {6}
  4:  ('C', 0, 2, 0)   ->   {20}
  5:  ('C', 0, 2, 1)   ->   {76}
  6:  ('C', 0, 2, 2)   ->   {48}
  7:  ('C', 1, 0)   ->   {35}
  8:  ('C', 1, 1)   ->   {63}
  9:  ('C', 1, 2)   ->   {7}
 10:  ('C', 2, 0, 0)   ->   {36}
 11:  ('C', 2, 0, 1)   ->   {64}
 12:  ('C', 2, 0, 2)   ->   {8}
 13:  ('C', 2, 1, 0)   ->   {51}
 14:  ('C', 2, 1, 1)   ->   {79}
 15:  ('C', 2, 1, 2)   ->   {23}
 16:  ('C', 2, 2, 0)   ->   {84}
 17:  ('C', 2, 2, 1)   ->   {28}
 18:  ('C', 2, 2, 2)   ->   {56}
 19:  ('C', 3, 0)   ->   {42}
 20:  ('C', 3, 1)   ->   {14}
 21:  ('C', 3, 2)   ->   {70}
 22:  ('C', 4, 0)   ->   {44}
 23:  ('C', 4, 1)   ->   {16}
 24:  ('C', 4, 2)   ->   {72}
 25:  ('C', 5, 0)   ->   {74}
 26:  ('C', 5, 1)   ->   {46}
 27:  ('C', 5, 2)   ->   {18}
 28:  ('C', 6, 0, 0)   ->   {75}
 29:  ('C', 6, 0, 1)   ->   {19}
 30:  ('C', 6, 0, 2)   ->   {47}
 31:  ('C', 6, 1, 0)   ->   {49}
 32:  ('C', 6, 1,

In [5]:
mnatypes0 = dismnatypeslist[0]

mnatypeslist = []
for mnatype in mnatypes0:
    if len(mnatypes0[mnatype]) >= 2:
        eltypes = mnatypes0.copy()
        eltypes.del_type(mnatype)
        for i in mnatypes0[mnatype]:
            eltypes.new_type(mnatype).add(i)
        mnatypes = get_mnatypes(mol0, eltypes)
        insort(mnatypeslist, mnatypes, key=len)

dismnatypeslist = []
for mnatypesitem in mnatypeslist:
    for dismnatypesitem in dismnatypeslist:
        if mnatypesitem <= dismnatypesitem:
            break
    else:
        mnatypesitem.print()
        dismnatypeslist.append(mnatypesitem)


  1:  ('C', 0, 1, 0)   ->   {2}
  2:  ('C', 0, 1, 1)   ->   {4}
  3:  ('C', 0, 1, 2)   ->   {6}
  4:  ('C', 0, 2, 0)   ->   {20}
  5:  ('C', 0, 2, 1)   ->   {76}
  6:  ('C', 0, 2, 2)   ->   {48}
  7:  ('C', 1, 0)   ->   {35}
  8:  ('C', 1, 1)   ->   {63}
  9:  ('C', 1, 2)   ->   {7}
 10:  ('C', 2, 0, 0)   ->   {36}
 11:  ('C', 2, 0, 1)   ->   {64}
 12:  ('C', 2, 0, 2)   ->   {8}
 13:  ('C', 2, 1, 0)   ->   {51}
 14:  ('C', 2, 1, 1)   ->   {79}
 15:  ('C', 2, 1, 2)   ->   {23}
 16:  ('C', 2, 2, 0)   ->   {84}
 17:  ('C', 2, 2, 1)   ->   {28}
 18:  ('C', 2, 2, 2)   ->   {56}
 19:  ('C', 3, 0)   ->   {42}
 20:  ('C', 3, 1)   ->   {14}
 21:  ('C', 3, 2)   ->   {70}
 22:  ('C', 4, 0)   ->   {44}
 23:  ('C', 4, 1)   ->   {16}
 24:  ('C', 4, 2)   ->   {72}
 25:  ('C', 5, 0)   ->   {74}
 26:  ('C', 5, 1)   ->   {46}
 27:  ('C', 5, 2)   ->   {18}
 28:  ('C', 6, 0, 0)   ->   {75}
 29:  ('C', 6, 0, 1)   ->   {19}
 30:  ('C', 6, 0, 2)   ->   {47}
 31:  ('C', 6, 1, 0)   ->   {49}
 32:  ('C', 6, 1,