In [1]:
from __future__ import print_function
from pymol import cmd
import os
import csv
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

In [2]:
uniprots = ["P00497",
            "P49410",
            "P46926",
            "Q9P2T1",
            "P54819",
            "P09052",
            "O00571",
            "Q9Y2V2",
            "Q7VU70",
            "P07846",
            "Q02127",
            "P26196",
            "Q9BUQ8",
            "B4EH14",
            "H0W0T5",
            "P80371",
            "P9WK91",
            "P13716",
            "G5EFZ1",
            "Q2T9E1",
            "P60900",
            "P60842",
            "Q99P21",
            "F2Z6F1"]

In [3]:
pdbs = ["1AO0",
        "1D2E",
        "1NE7",
        "2A7R",
        "2C9Y",
        "2DB3",
        "2I4I",
        "3AQQ",
        "3DME",
        "3QE3",
        "3U2O",
        "4CT5",
        "4NHO",
        "4O5H",
        "4R8K",
        "4V67",
        "5EXK",
        "5HMS",
        "5KGN",
        "5KIA",
        "5VFS",
        "5ZC9",
        "7EF9",
        "7O6Y"]

In [4]:
pdams =["16119",
        "8377",
        "13059",
        "11371",
        "22864",
        "12517",
        "13062",
        "4525",
        "7800",
        "1603",
        "3663",
        "1357",
        "19825",
        "23149",
        "12031",
        "3348",
        "8253",
        "15997",
        "2214",
        "16562",
        "20552",
        "23652",
        "11201",
        "149"]

In [5]:
translator = pd.DataFrame({"pdb": pdbs, "uniprot": uniprots, "pdam": pdams})
translator = translator.set_index("pdb")
translator

Unnamed: 0_level_0,uniprot,pdam
pdb,Unnamed: 1_level_1,Unnamed: 2_level_1
1AO0,P00497,16119
1D2E,P49410,8377
1NE7,P46926,13059
2A7R,Q9P2T1,11371
2C9Y,P54819,22864
2DB3,P09052,12517
2I4I,O00571,13062
3AQQ,Q9Y2V2,4525
3DME,Q7VU70,7800
3QE3,P07846,1603


In [6]:
from Bio.PDB import NeighborSearch, PDBParser, Selection

Now we can use this to delete any residues on strings, an example run is below.

In [7]:
from Bio.PDB.PDBIO import PDBIO
io=PDBIO()

Now run this on all the predicted structures

In [8]:
CUTOFF = 4
DISTANCE = 5

In [9]:
import os
from Bio.PDB import PDBParser, PDBIO
import copy

def refine_structure(structure, cutoff, distance):
    model = copy.deepcopy(structure[0])
    for chain in model:
        for residue in list(chain):
            residue_on_string = True
            center_atoms = Selection.unfold_entities([chain[residue.get_id()[1]]], 'A')
            atom_list = [atom for atom in structure.get_atoms()]
            ns = NeighborSearch(atom_list)
            nearby_residues = {res for center_atom in center_atoms for res in ns.search(center_atom.coord, distance, 'R')}
            nearby_residues = [res.id[1] for res in nearby_residues]

            for i in nearby_residues:
                if abs(i - residue.get_id()[1]) > cutoff:
                    residue_on_string = False
            if residue_on_string:
                chain.detach_child((' ', residue.get_id()[1], ' '))

    return model

def save_refined_structure(model, output_path):
    io = PDBIO()
    io.set_structure(model)
    io.save(output_path)

parser = PDBParser()

cutoff = 4
distance = 5
for count, pdb_id in enumerate(pdbs):
    print(f"{pdb_id}, progress: {count + 1} out of {len(pdbs)}")
    uniprot_id = translator["uniprot"][pdb_id]
    pdam_id = translator["pdam"][pdb_id]

    path1 = os.path.join("model_structures", pdb_id + ".pdb")
    path2 = os.path.join("alphafold_structures", uniprot_id + ".pdb")
    path3 = os.path.join("pdam_structures", pdam_id + ".pdb")

    alphafold_structure = parser.get_structure(uniprot_id, path2)
    pdam_structure = parser.get_structure(pdam_id, path3)

    refined_alphafold_model = refine_structure(alphafold_structure, cutoff, distance)
    refined_pdam_model = refine_structure(pdam_structure, cutoff, distance)

    alphafold_output_path = os.path.join("edited_alphafold_structures", uniprot_id + ".pdb")
    pdam_output_path = os.path.join("edited_pdam_structures", pdam_id + ".pdb")

    save_refined_structure(refined_alphafold_model, alphafold_output_path)
    save_refined_structure(refined_pdam_model, pdam_output_path)


1AO0, progress: 1 out of 24
1D2E, progress: 2 out of 24
1NE7, progress: 3 out of 24
2A7R, progress: 4 out of 24
2C9Y, progress: 5 out of 24
2DB3, progress: 6 out of 24
2I4I, progress: 7 out of 24
3AQQ, progress: 8 out of 24
3DME, progress: 9 out of 24
3QE3, progress: 10 out of 24
3U2O, progress: 11 out of 24
4CT5, progress: 12 out of 24
4NHO, progress: 13 out of 24
4O5H, progress: 14 out of 24
4R8K, progress: 15 out of 24
4V67, progress: 16 out of 24
5EXK, progress: 17 out of 24
5HMS, progress: 18 out of 24
5KGN, progress: 19 out of 24
5KIA, progress: 20 out of 24
5VFS, progress: 21 out of 24
5ZC9, progress: 22 out of 24
7EF9, progress: 23 out of 24
7O6Y, progress: 24 out of 24
