This will verify the aa position in a PDB file, even -ve positions.

In [1]:
# Step 1: Install required libraries
!pip install biopython requests

# Step 2: Import necessary libraries
from Bio.PDB import PDBList, PDBParser
import requests

# Step 3: Function to download the PDB file
def download_pdb(pdb_id):
    pdbl = PDBList()
    pdb_file = pdbl.retrieve_pdb_file(pdb_id, file_format='pdb', pdir='.')
    return pdb_file

# Step 4: Parse PDB and search for amino acid sequences
def parse_and_find_protein_sequences(pdb_id, pdb_file, amino_acid, aa_position, results):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure(pdb_id, pdb_file)

    amino_acid_found = False

    # Mapping single-letter amino acids to PDB three-letter representation
    amino_acid_mapping = {
        'A': 'ALA', 'R': 'ARG', 'N': 'ASN', 'D': 'ASP', 'C': 'CYS',
        'E': 'GLU', 'Q': 'GLN', 'G': 'GLY', 'H': 'HIS', 'I': 'ILE',
        'L': 'LEU', 'K': 'LYS', 'M': 'MET', 'F': 'PHE', 'P': 'PRO',
        'S': 'SER', 'T': 'THR', 'W': 'TRP', 'Y': 'TYR', 'V': 'VAL'
    }

    if amino_acid not in amino_acid_mapping:
        results.append(f"Invalid amino acid: {amino_acid}. Please use a valid single-letter amino acid code.")
        return

    mapped_amino_acid = amino_acid_mapping[amino_acid]

    for model in structure:
        for chain in model:
            pdb_positions = []
            amino_acid_seq = []
            start_position = None
            end_position = None

            for residue in chain:
                if residue.resname in amino_acid_mapping.values():  # Check if residue is an amino acid
                    if start_position is None:
                        start_position = residue.id[1]
                    end_position = residue.id[1]
                    pdb_positions.append(residue.id[1])  # Residue number in PDB
                    amino_acid_seq.append(residue.resname)

            if start_position is not None and end_position is not None:
                results.append(f"Chain {chain.id} has residues from {start_position} to {end_position}.")

            if len(amino_acid_seq) >= len(pdb_positions):
                try:
                    # Find the PDB position that matches the query position
                    pdb_index = pdb_positions.index(aa_position)
                    if amino_acid_seq[pdb_index] == mapped_amino_acid:
                        full_sequence = ''.join([aa[0] for aa in amino_acid_seq])  # Convert back to single-letter code for full sequence
                        highlighted_aa_seq = ''.join([aa[0] for aa in amino_acid_seq[:pdb_index]]) + f'[{amino_acid}]' + ''.join([aa[0] for aa in amino_acid_seq[pdb_index+1:]])
                        results.append(f"Amino acid {mapped_amino_acid} found in chain {chain.id} at position {aa_position} in PDB ID {pdb_id}")
                        results.append(f"Full amino acid sequence (chain {chain.id}):\n{full_sequence}")
                        results.append(f"Highlighted sequence:\n{highlighted_aa_seq}\n")
                        amino_acid_found = True
                except ValueError:
                    pass

    if not amino_acid_found:
        results.append(f"Amino acid {amino_acid} not found at position {aa_position} in any chain of PDB ID {pdb_id}.")

# Step 5: Main function to run the analysis for a specific PDB ID
def main(pdb_id, amino_acid, aa_position):
    results = []
    pdb_file = download_pdb(pdb_id)
    parse_and_find_protein_sequences(pdb_id, pdb_file, amino_acid, aa_position, results)

    # Print all results at the end
    for result in results:
        print(result)

# Input values
pdb_id = "1BPX"      # Specific PDB ID
amino_acid = "N"     # Query amino acid (single-letter code)
aa_position = 279    # Position in the protein sequence

# Run the analysis
main(pdb_id, amino_acid, aa_position)


Downloading PDB structure '1bpx'...
Chain A has residues from 5 to 335.
Amino acid ASN found in chain A at position 279 in PDB ID 1BPX
Full amino acid sequence (chain A):
LAPGGTLAGGITAMLTGLAAPGLAVSGAIHLTAATALAASVIALTPHLILSGAGALLLPGVGTLIAGLIAGPLATGLLALLGLIAGAATSSSIAPLTAVSGIGPSAAALPVAGGILTLGALALAGALLAHHGAIGLLTPGAPGLAIPAGGMLGMGAIVLAGVLLVASGTIATVCGSPAAGAGSSGAMAVLLTHPSPTSGSTLGPLLLHGVVGGLGLVHPITATLSLGGTLPMGVCGLPSLAAGLGTPHAAIAIALIPLAGTTCGVLTPTGSAIPALAMAAHALGLGPTIAGTTIAPLGVTGVAGGPLPVASGLAIPATIGTLTAGPLAASG
Highlighted sequence:
LAPGGTLAGGITAMLTGLAAPGLAVSGAIHLTAATALAASVIALTPHLILSGAGALLLPGVGTLIAGLIAGPLATGLLALLGLIAGAATSSSIAPLTAVSGIGPSAAALPVAGGILTLGALALAGALLAHHGAIGLLTPGAPGLAIPAGGMLGMGAIVLAGVLLVASGTIATVCGSPAAGAGSSGAMAVLLTHPSPTSGSTLGPLLLHGVVGGLGLVHPITATLSLGGTLPMGVCGLPSLAAGLGTPHAAIAIALIPLAGTTCGVLTPTGSAIP[N]LAMAAHALGLGPTIAGTTIAPLGVTGVAGGPLPVASGLAIPATIGTLTAGPLAASG



In [2]:
# Input values
pdb_id = "1BPX"      # Specific PDB ID
amino_acid = "N"     # Query amino acid (single-letter code)
aa_position = 279    # Position in the protein sequence

# Run the analysis
main(pdb_id, amino_acid, aa_position)

Structure exists: './pdb1bpx.ent' 
Chain A has residues from 5 to 335.
Amino acid ASN found in chain A at position 279 in PDB ID 1BPX
Full amino acid sequence (chain A):
LAPGGTLAGGITAMLTGLAAPGLAVSGAIHLTAATALAASVIALTPHLILSGAGALLLPGVGTLIAGLIAGPLATGLLALLGLIAGAATSSSIAPLTAVSGIGPSAAALPVAGGILTLGALALAGALLAHHGAIGLLTPGAPGLAIPAGGMLGMGAIVLAGVLLVASGTIATVCGSPAAGAGSSGAMAVLLTHPSPTSGSTLGPLLLHGVVGGLGLVHPITATLSLGGTLPMGVCGLPSLAAGLGTPHAAIAIALIPLAGTTCGVLTPTGSAIPALAMAAHALGLGPTIAGTTIAPLGVTGVAGGPLPVASGLAIPATIGTLTAGPLAASG
Highlighted sequence:
LAPGGTLAGGITAMLTGLAAPGLAVSGAIHLTAATALAASVIALTPHLILSGAGALLLPGVGTLIAGLIAGPLATGLLALLGLIAGAATSSSIAPLTAVSGIGPSAAALPVAGGILTLGALALAGALLAHHGAIGLLTPGAPGLAIPAGGMLGMGAIVLAGVLLVASGTIATVCGSPAAGAGSSGAMAVLLTHPSPTSGSTLGPLLLHGVVGGLGLVHPITATLSLGGTLPMGVCGLPSLAAGLGTPHAAIAIALIPLAGTTCGVLTPTGSAIP[N]LAMAAHALGLGPTIAGTTIAPLGVTGVAGGPLPVASGLAIPATIGTLTAGPLAASG

