In [2]:
#Import libraries
import pymol
from pymol import cmd
import os

In [None]:
# Run PyMOL
pymol.finish_launching()

In [8]:
def select_all_residues(selection):
    cmd.select("all_residues", f"chain {selection}")
    #print("All residues in the selection have been selected.")

In [4]:
def select_residues_within_distance(reference_selection, distance, output_file):
    # Define a reference selection if it's not already defined
    if not cmd.count_atoms(reference_selection):
        #print(f"Error: Selection '{reference_selection}' not found.")
        return
    
    # Select residues within the specified distance around the reference selection
    cmd.select("residues_within_6A", f"byres {reference_selection} around {distance}")
    
    # Get the model of the newly selected residues
    selected_model = cmd.get_model("residues_within_6A")
    
    # Keep track of unique residue IDs
    unique_residues = set()
    
    # Write the amino acids and positions to the output file
    with open(output_file, "w") as f:
        for atom in selected_model.atom:
            if atom.resn and atom.resi:
                residue_id = (atom.resn, atom.resi)
                if residue_id not in unique_residues:
                    f.write(f"{atom.resn} {atom.resi}\n")
                    unique_residues.add(residue_id)
    
    #print(f"Residues within {distance} Ångstroms of the reference selection have been written to {output_file}.")

In [5]:
def fetch_and_select_residues(protein_id, chain, output_file):
    # Fetch the protein
    cmd.fetch(protein_id)
    cmd.remove("solvent")
    
    # Select the specified chain
    cmd.select(chain, f"chain {chain}")
    
    # Perform the same operations as for protein1_id on Chain B
    select_all_residues(chain)
    select_residues_within_distance(f"{chain}_", 6, output_file)  # Update this line

In [6]:
def load_and_select_chains(pdb_path, chain_A, chain_B, output_file_prefix):
    # Load PDB file
    cmd.load(pdb_path)
    
    # Select Chain A and Chain B
    cmd.select("chain_A", f"chain {chain_A}")
    cmd.select("chain_B", f"chain {chain_B}")
    
    # Construct output file name for Chain B
    output_file_B = f"{output_file_prefix}_{chain_B}_residues.txt"
    
    # Fetch and select residues for Chain B
    fetch_and_select_residues("SCAN_domain_P28698_model0_checked_Repair", chain_B, output_file_B)

In [22]:
# Specify the path to the PDB file
pdb_path = "../data/scan_complex_pdbs/Q8NBB4_P28698_scand_complex.pdb"

# Define the chain IDs
chain_A = "A"
chain_B = "B"

# Extract the unique identifier from the PDB file name
output_file_prefix = os.path.splitext(os.path.basename(pdb_path))[0]

# Load and select residues for Chain B
load_and_select_chains(pdb_path, chain_A, chain_B, output_file_prefix)

In [21]:
cmd.delete("all")