In [1]:
#Select kernel before running

#Import libraries
import pymol
from pymol import cmd
import os

In [2]:
#Run/Launch PyMOL
#pymol.finish_launching()

In [3]:
#Function selects all residues of the interactor protein
def select_all_residues(selection):
    cmd.select("all_residues", f"chain {selection}")
    #print("All residues in the selection have been selected.")

In [11]:
#Function selects interface residues on our SCAN domain MZF1
def select_residues_within_distance(interactor, distance, output_file):

    # Define a interactor if it's not already defined
    if not cmd.count_atoms(interactor):
        print(f"Error: Selection '{interactor}' not found.")
        return
    
    # Select residues within the specified distance around the interactor
    cmd.select("residues_within_6A", f"byres {interactor} around {distance}")
    
    # Get the model of the newly selected residues
    selected_model = cmd.get_model("residues_within_6A")
    
    # Keep track of unique residue IDs (No repeated entries)
    unique_residues = set()
    
    # Write the residue positions to the output file (writes to file later specified as .txt)
    with open(output_file, "w") as f:
        for atom in selected_model.atom:
            if atom.resi:
                residue_id = atom.resi
                if residue_id not in unique_residues:
                    f.write(f"{residue_id}\n")
                    unique_residues.add(residue_id)

In [5]:
#Function fetch 
def fetch_and_select_residues(protein_id, chain, output_file):
    # Fetch the protein and remove H2O (solvent).
    cmd.fetch(protein_id) 
    cmd.remove("solvent")
    
    # Select the specified chain
    cmd.select(chain, f"chain {chain}")
    
    # Performing to previous functions
    select_all_residues(chain)
    select_residues_within_distance(f"{chain}_", 6, output_file)

In [6]:
def load_and_select_chains(pdb_path, chain_A, chain_B, output_file_prefix):
    # Load PDB file 
    cmd.load(pdb_path)
    
    # Select Chain A and Chain B
    cmd.select("chain_A", f"chain {chain_A}")
    cmd.select("chain_B", f"chain {chain_B}")
    
    # Construct output file name for Chain B
    output_file_B = f"{output_file_prefix}_{chain_B}_residues.txt"
    
    # Fetch and select residues for Chain B
    fetch_and_select_residues("SCAN_domain_P28698_model0_checked_Repair", chain_B, output_file_B)

In [12]:
# PDB file for complexes from AlphaFold
pdb_path = "../data/scan_complex_pdbs/P57086_P28698_scand_complex.pdb"

# Define the chain IDs
chain_A = "A"
chain_B = "B"

# Extract the unique identifier from the PDB file name to save files with unique names
output_file_prefix = os.path.splitext(os.path.basename(pdb_path))[0]

# Load and select residues for Chain B
load_and_select_chains(pdb_path, chain_A, chain_B, output_file_prefix)


 Error-fetch: unable to load 'scan_domain_p28698_model0_checked_repair'.


AttributeError: 'Indexed' object has no attribute 'resi'

In [8]:
#Delete everything and start over.
cmd.delete("all")