In [None]:
import os

os.chdir('/home/pkwon2/mineral/dock') # change to absolute path
print(os.getcwd())

In [None]:
## Center at origin
import os
import numpy as np

def centroid(pdb_file):
    with open(pdb_file, 'r') as f:
        lines = f.readlines()
    coords = []
    for line in lines:
        if line.startswith('ATOM'):
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            coords.append([x, y, z])
    coords = np.array(coords)
    centroid = np.mean(coords, axis=0)
    return centroid

def translate(pdb_file, translation_vector):
    with open(pdb_file, 'r') as f:
        lines = f.readlines()
    new_lines = []
    for line in lines:
        if line.startswith('ATOM'):
            x = float(line[30:38].strip())
            y = float(line[38:46].strip())
            z = float(line[46:54].strip())
            x -= translation_vector[0]
            y -= translation_vector[1]
            z -= translation_vector[2]
            new_line = line[:30] + f'{x:>8.3f}' + f'{y:>8.3f}' + f'{z:>8.3f}' + line[54:]
            new_lines.append(new_line)
        else:
            new_lines.append(line)
    with open(pdb_file, 'w') as f:
        f.writelines(new_lines)

input_dir = './1_dock/input'
for file in os.listdir(input_dir):
    if file.endswith('.pdb'):
        pdb_file = os.path.join(input_dir, file)
        c = centroid(pdb_file)
        translate(pdb_file, c)

In [None]:
import os
import warnings
from Bio.PDB import PDBParser

# Ignore warnings
warnings.filterwarnings('ignore')

class ActiveResidueFinder:
    def __init__(self, pdb_file, resnames, remove_ranges, add_ranges):
        self.pdb_file = pdb_file
        self.resnames = resnames
        self.remove_ranges = remove_ranges
        self.add_ranges = add_ranges
        self.residue_indexes = self.process()

    def enumerate_residues(self):
        parser = PDBParser()
        structure = parser.get_structure('structure', self.pdb_file)
        residue_indexes = []
        for model in structure:
            for chain in model:
                for residue in chain:
                    if residue.get_resname() in self.resnames:
                        residue_indexes.append(residue.id[1])
        return residue_indexes

    def process(self):
        residue_indexes = self.enumerate_residues()

        # Remove specified indexes from the list
        for range_ in self.remove_ranges:
            for i in range(*range_):
                if i in residue_indexes:
                    residue_indexes.remove(i)

        # Add specified indexes to the list
        for range_ in self.add_ranges:
            for i in range(*range_):
                if i not in residue_indexes:
                    residue_indexes.append(i)

        return residue_indexes

# Dictionary to store the results
results = {}

def add_to_results(name, pdb_file, resnames, remove_ranges, add_ranges):
    finder = ActiveResidueFinder(pdb_file, resnames, remove_ranges, add_ranges)
    results[name + '.pdb'] = finder.residue_indexes

# Directory containing the pdb files
directory = './mineral/dock/1_dock/input/'

# Iterate over all pdb files in the directory
for filename in os.listdir(directory):
    if filename.endswith('.pdb'):
        # Full path to the pdb file
        pdb_file = os.path.join(directory, filename)
        # Add the pdb file to the results
        add_to_results(filename[:-4], pdb_file, ['HIS'], [], [])

In [None]:
# Import numpy to store the coordinates as arrays
import numpy as np

# Create another dictionary to store the coordinates
coordinates = {}

directory='./1_dock/input'

# Loop over all files in the directory
for filename in os.listdir(directory):
    # Check if the file is a .pdb file
    if filename in results:
        residue_indexes = results[filename]
    else:
        print(f"{filename} not found in results dictionary")
        continue
    if filename.endswith('.pdb'):
        # Create a full path to the file
        filepath = os.path.join(directory, filename)
        
        # Parse the structure using Bio.PDB
        parser = PDBParser()
        structure = parser.get_structure('structure', filepath)
        
        # Get the residue indexes from the results dictionary
        residue_indexes = results[filename]
        print(f"{filename}:{residue_indexes}")
        
        # Create an empty list to store the coordinates of each residue
        coord_list = []
        
        # Loop over all residues in the structure
        for model in structure:
            for chain in model:
                for residue in chain:
                    # Check if the residue index is in the results dictionary and if it has a Ca atom
                    if residue.id[1] in residue_indexes and 'CA' in residue:
                        # Get the coordinates of the Ca atom as a numpy array
                        coord_array = np.array(residue['CA'].get_coord())
                        # Append the array to the coord_list
                        coord_list.append(coord_array)
        
        # Convert the coord_list to a numpy array and store it in the coordinates dictionary
        coordinates[filename] = np.array(coord_list)
        
from scipy.spatial.transform import Rotation as R

# Create another dictionary to store the rotation matrices
rotation_matrices = {}
translation_matrices = {}

# Loop over all files in the directory
for filename in coordinates:
    # Get the coordinates from the coordinates dictionary
    coords = coordinates[filename]
    
    # Calculate the singular value decomposition of the coordinates    
    coords_mean = coords.mean(axis=0)
    coords_centered = coords - coords_mean    
    u, s, vh = np.linalg.svd(coords_centered, full_matrices=True)
    normal_to_x = vh[2]
    
    angle_to_x = np.arccos(np.dot(normal_to_x, [1, 0, 0]) / np.linalg.norm(normal_to_x))
    axis_to_x = np.cross(normal_to_x, [1, 0, 0])
    axis_to_x = axis_to_x / np.linalg.norm(axis_to_x)
    r_to_x = R.from_rotvec(angle_to_x * axis_to_x)
    R_matrix_to_x = r_to_x.as_matrix()
    
    # Store the product of the two rotation matrices in the rotation_matrices dictionary
    rotation_matrices[filename] = R_matrix_to_x
    translation_matrices[filename] = coords_mean

from Bio.PDB import PDBIO

# Define the output directory
output_dir = './1_dock/input/rotated_to_x'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Loop over all files in the directory
for filename in coordinates:
    # Check if the file is a .pdb file
    if filename.endswith('.pdb'):
        # Create a full path to the file
        filepath = os.path.join(directory, filename)
        
        # Parse the structure using Bio.PDB
        parser = PDBParser()
        structure = parser.get_structure('structure', filepath)
        
        # Get the rotation matrix from the rotation_matrices dictionary
        R_matrix = rotation_matrices[filename]
        T_matrix = translation_matrices[filename]
        
        # Loop over all atoms in the structure
        for model in structure:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        # Center using T_matrix before applying R_matrix
                        centered_coords = atom.get_coord() - T_matrix
                        
                        # Apply the rotation matrix to the atom's coordinates
                        rotated_coords = np.dot(R_matrix, centered_coords)
                        
                        atom.set_coord(rotated_coords)
        
        # Save the rotated structure as a new PDB file in the output directory
        io = PDBIO()
        io.set_structure(structure)
        io.save(os.path.join(output_dir, filename))

In [None]:
# Import numpy to store the coordinates as arrays
import numpy as np

# Create another dictionary to store the coordinates
coordinates = {}

directory='./1_dock/input/rotated_to_x'

# Loop over all files in the directory
for filename in os.listdir(directory):
    # Check if the file is a .pdb file
    if filename in results:
        residue_indexes = results[filename]
    else:
        print(f"{filename} not found in results dictionary")
        continue
    if filename.endswith('.pdb'):
        # Create a full path to the file
        filepath = os.path.join(directory, filename)
        
        # Parse the structure using Bio.PDB
        parser = PDBParser()
        structure = parser.get_structure('structure', filepath)
        
        # Get the residue indexes from the results dictionary
        residue_indexes = results[filename]
        print(f"{filename}:{residue_indexes}")
        
        # Create an empty list to store the coordinates of each residue
        coord_list = []
        
        # Loop over all residues in the structure
        for model in structure:
            for chain in model:
                for residue in chain:
                    # Check if the residue index is in the results dictionary and if it has a Ca atom
                    if residue.id[1] in residue_indexes and 'CA' in residue:
                        # Get the coordinates of the Ca atom as a numpy array
                        coord_array = np.array(residue['CA'].get_coord())
                        # Append the array to the coord_list
                        coord_list.append(coord_array)
        
        # Convert the coord_list to a numpy array and store it in the coordinates dictionary
        coordinates[filename] = np.array(coord_list)
        
from scipy.spatial.transform import Rotation as R

# Create another dictionary to store the rotation matrices
rotation_matrices = {}

# Loop over all files in the directory
for filename in coordinates:
    # Get the coordinates from the coordinates dictionary
    coords = coordinates[filename]
    
    # Calculate the singular value decomposition of the coordinates    
    coords_mean = coords.mean(axis=0)
    coords_centered = coords - coords_mean    
    u, s, vh = np.linalg.svd(coords_centered, full_matrices=True)
    
    # Calculate the rotation matrix to align with the y-axis

    normal_to_y = vh[0]
    angle_to_y = np.arccos(np.dot(normal_to_y, [0, 1, 0]) / np.linalg.norm(normal_to_y))
    axis_to_y = np.cross(normal_to_y, [0, 1, 0])
    axis_to_y = axis_to_y / np.linalg.norm(axis_to_y)
    r_to_y = R.from_rotvec(angle_to_y * axis_to_y)
    R_matrix_to_y = r_to_y.as_matrix()
    
    # Store the product of the two rotation matrices in the rotation_matrices dictionary
    rotation_matrices[filename] = R_matrix_to_y
    
from Bio.PDB import PDBIO

# Define the output directory
output_dir = './1_dock/input/rotated_to_x/rotated_to_y'

# Create the output directory if it doesn't exist
os.makedirs(output_dir, exist_ok=True)

# Loop over all files in the directory
for filename in coordinates:
    # Check if the file is a .pdb file
    if filename.endswith('.pdb'):
        # Create a full path to the file
        filepath = os.path.join(directory, filename)
        
        # Parse the structure using Bio.PDB
        parser = PDBParser()
        structure = parser.get_structure('structure', filepath)
        
        # Get the rotation matrix from the rotation_matrices dictionary
        R_matrix = rotation_matrices[filename]
        
        # Loop over all atoms in the structure
        for model in structure:
            for chain in model:
                for residue in chain:
                    for atom in residue:
                        # Apply the rotation matrix to the atom's coordinates
                        atom.set_coord(np.dot(R_matrix, atom.get_coord()))
        
        # Save the rotated structure as a new PDB file in the output directory
        io = PDBIO()
        io.set_structure(structure)
        io.save(os.path.join(output_dir, filename))

print("Rotation and saving of PDB files completed.")

In [None]:
#rotate if visual inspection shows active residues are not pointing toward the right direction.

import os
import numpy as np
from Bio.PDB import *

def rotate_pdb_files(directory):
    parser = PDBParser()
    io = PDBIO()

    for filename in os.listdir(directory):
        if filename.startswith("superfiber_rep6_contig2_thread10_85_mpnn08_model_1_ptm_92.5") and filename.endswith(".pdb"):
            structure = parser.get_structure('pdb', os.path.join(directory, filename))
            for atom in structure.get_atoms():
                atom.coord = rotate_around_y(atom.coord, 180)
            io.set_structure(structure)
            io.save(os.path.join(directory, filename))

def rotate_around_y(coord, angle_degree):
    angle_rad = np.radians(angle_degree)
    rotation_matrix = np.array([
        [np.cos(angle_rad), 0, np.sin(angle_rad)],
        [0, 1, 0],
        [-np.sin(angle_rad), 0, np.cos(angle_rad)]
    ])
    return np.dot(rotation_matrix, coord)

rotate_pdb_files('./1_dock/input/rotated_to_x/rotated_to_y')


In [None]:
import os
from Bio.PDB import PDBParser, PDBIO

def subtract_from_x(structure):
    for model in structure:
        for chain in model:
            for residue in chain:
                for atom in residue:
                    atom.coord[0] -= 25 #for r3 # for r6 c3:21 #c4:34 #c6:56

def process_pdb_files(input_dir, output_dir):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    parser = PDBParser()
    io = PDBIO()
    for pdb_file in os.listdir(input_dir):
        if pdb_file.endswith('.pdb'):
            structure = parser.get_structure('input_structure', os.path.join(input_dir, pdb_file))
            subtract_from_x(structure)
            io.set_structure(structure)
            io.save(os.path.join(output_dir, pdb_file))

process_pdb_files(input_dir='./1_dock/input/rotated_to_x/rotated_to_y', output_dir='./1_dock/input/rotated_to_x/rotated_to_y/subtract_x')


In [None]:
import os
from Bio import PDB
import numpy as np
parser = PDB.PDBParser(QUIET=True)
atom_radii = {
    "C": 1.70, 
    "N": 1.55, 
    "O": 1.52,
    "S": 1.80,
    "F": 1.47, 
    "P": 1.80, 
    "CL": 1.75, 
    "MG": 1.73,
}
def count_clashes(structure, clash_cutoff=0.63):
    clash_cutoffs = {i + "_" + j: (clash_cutoff * (atom_radii[i] + atom_radii[j])) for i in atom_radii for j in atom_radii}
    atoms = [x for x in structure.get_atoms() if x.element in atom_radii]
    coords = np.array([a.coord for a in atoms], dtype="d")
    kdt = PDB.kdtrees.KDTree(coords)
    clashes = []
    for atom_1 in atoms:
        kdt_search = kdt.search(np.array(atom_1.coord, dtype="d"), max(clash_cutoffs.values()))
        potential_clash = [(a.index, a.radius) for a in kdt_search]
        for ix, atom_distance in potential_clash:
            atom_2 = atoms[ix]
            if atom_1.parent.id == atom_2.parent.id:
                continue
            elif (atom_2.name == "C" and atom_1.name == "N") or (atom_2.name == "N" and atom_1.name == "C"):
                continue
            elif (atom_2.name == "SG" and atom_1.name == "SG") and atom_distance > 1.88:
                continue
            if atom_distance < clash_cutoffs[atom_2.element + "_" + atom_1.element]:
                clashes.append((atom_1, atom_2))
    return len(clashes) // 2 > 20

# Oligomerize- change the symmetry on the last line

import os
import numpy as np
from Bio.PDB import PDBParser, PDBIO, Chain
from Bio.PDB.vectors import rotaxis, Vector

def rotate_chain(chain, angle):
    axis = Vector(0, 0, 1)
    rot_matrix = rotaxis(np.radians(angle), axis)
    for atom in chain.get_atoms():
        atom.transform(rot_matrix, np.array([0, 0, 0]))

def create_oligomer(input_dir, output_dir, symmetries):
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    parser = PDBParser()
    for pdb_file in os.listdir(input_dir):
        if pdb_file.endswith('.pdb'):
            for symmetry in symmetries:
                structure = parser.get_structure('input_structure', os.path.join(input_dir, pdb_file))
                model = structure[0]
                chain_A = model['A']
                for i in range(1, symmetry):
                    new_chain_id = chr(ord('A') + i)
                    new_chain = Chain.Chain(new_chain_id)
                    model.add(new_chain)
                    for residue in chain_A:
                        new_residue = residue.copy()
                        new_chain.add(new_residue)
                    rotate_chain(new_chain, (i) * (360 / symmetry))

                    io = PDBIO()
                    io.set_structure(structure)
                    io.save(os.path.join(output_dir, f'oligomer_{symmetry}_{pdb_file}'))

create_oligomer(input_dir='./1_dock/input/rotated_to_x/rotated_to_y/subtract_x', output_dir='./1_dock/input/rotated_to_x/rotated_to_y/subtract_x/oligomer', symmetries=[6])