In [1]:
from Bio import PDB
import numpy as np

In [None]:
parser=PDB.PDBParser(QUIET=True)
structure1 = parser.get_structure("model1", r"path-to-model1.pdb")
structure2 = parser.get_structure("model2", r"path-to-model2.pdb")


In [None]:
def get_common_atoms(structure1, structure2):
    """Extracts atom coordinates for only the common atoms in two structures."""
    atoms1 = {atom.get_id(): atom for atom in structure1.get_atoms()}
    atoms2 = {atom.get_id(): atom for atom in structure2.get_atoms()}
    
    common_keys = set(atoms1.keys()).intersection(set(atoms2.keys()))
    
    coords1 = np.array([atoms1[key].coord for key in common_keys])
    coords2 = np.array([atoms2[key].coord for key in common_keys])
    
    return coords1, coords2

def get_common_ca_atoms(structure1, structure2):
    """Extracts Cα atom coordinates for only the common residues in two structures."""
    ca_atoms1 = {atom.get_parent().get_id()[1]: atom for atom in structure1.get_atoms() if atom.get_name() == "CA"}
    ca_atoms2 = {atom.get_parent().get_id()[1]: atom for atom in structure2.get_atoms() if atom.get_name() == "CA"}
    
    common_residues = set(ca_atoms1.keys()).intersection(set(ca_atoms2.keys()))
    
    coords1 = np.array([ca_atoms1[res].coord for res in sorted(common_residues)])
    coords2 = np.array([ca_atoms2[res].coord for res in sorted(common_residues)])
    
    return coords1, coords2

def get_common_ca_atoms_chain(structure1, structure2, chain_id="A"):
    """Extracts Cα atom coordinates for only the common residues in a specified chain."""
    ca_atoms1 = {res.get_id()[1]: atom for res in structure1[0][chain_id]
                 for atom in res if atom.get_name() == "CA"}
    ca_atoms2 = {res.get_id()[1]: atom for res in structure2[0][chain_id]
                 for atom in res if atom.get_name() == "CA"}

    # Find common residue numbers
    common_residues = sorted(set(ca_atoms1.keys()) & set(ca_atoms2.keys()))

    coords1 = [ca_atoms1[r].coord for r in common_residues]
    coords2 = [ca_atoms2[r].coord for r in common_residues]

    return coords1, coords2

def calculate_rmsd(coords1, coords2):
    """Computes RMSD given two sets of atomic coordinates."""
    diff = coords1 - coords2
    return np.sqrt(np.mean(np.sum(diff**2, axis=1)))

In [None]:
full_coords_1=[]
full_coords_2=[]


for chain in "ABCDEFGHIJKLMNOPQRSTUVWXYZ":
    try:
        coords1, coords2 = get_common_ca_atoms_chain(structure1, structure2,chain_id=chain)
        full_coords_1.extend(coords1)
        full_coords_2.extend(coords2)
    except:
        pass

coords_1=np.array(full_coords_1)
coords_2=np.array(full_coords_2)

rmsd_value = calculate_rmsd(coords_1, coords_2)
rmsd_value

50.267014


np.float32(1.1080964)