In [2]:
import urllib.request

# Define the URL for the PDB file
pdb_url = 'https://files.rcsb.org/download/6LT7.pdb'

# Download the PDB file from the URL
pdb_file = urllib.request.urlopen(pdb_url)

# Parse the PDB file and extract the amino acid sequences
amino_acids = []
for line in pdb_file:
    if line.startswith(b'ATOM') and line[17:20] == b'GLY':
        amino_acids.append('G')
    elif line.startswith(b'ATOM') and line[17:20] == b'ALA':
        amino_acids.append('A')
    elif line.startswith(b'ATOM') and line[17:20] == b'LEU':
        amino_acids.append('L')
    elif line.startswith(b'ATOM') and line[17:20] == b'VAL':
        amino_acids.append('V')
    elif line.startswith(b'ATOM') and line[17:20] == b'ILE':
        amino_acids.append('I')
    elif line.startswith(b'ATOM') and line[17:20] == b'PHE':
        amino_acids.append('F')
    elif line.startswith(b'ATOM') and line[17:20] == b'TRP':
        amino_acids.append('W')
    elif line.startswith(b'ATOM') and line[17:20] == b'TYR':
        amino_acids.append('Y')
    elif line.startswith(b'ATOM') and line[17:20] == b'HIS':
        amino_acids.append('H')
    elif line.startswith(b'ATOM') and line[17:20] == b'LYS':
        amino_acids.append('K')
    elif line.startswith(b'ATOM') and line[17:20] == b'ARG':
        amino_acids.append('R')
    elif line.startswith(b'ATOM') and line[17:20] == b'SER':
        amino_acids.append('S')
    elif line.startswith(b'ATOM') and line[17:20] == b'THR':
        amino_acids.append('T')
    elif line.startswith(b'ATOM') and line[17:20] == b'ASN':
        amino_acids.append('N')
    elif line.startswith(b'ATOM') and line[17:20] == b'GLN':
        amino_acids.append('Q')
    elif line.startswith(b'ATOM') and line[17:20] == b'CYS':
        amino_acids.append('C')
    elif line.startswith(b'ATOM') and line[17:20] == b'MET':
        amino_acids.append('M')
    elif line.startswith(b'ATOM') and line[17:20] == b'PRO':
        amino_acids.append('P')
    elif line.startswith(b'ATOM') and line[17:20] == b'ASP':
        amino_acids.append('D')
    elif line.startswith(b'ATOM') and line[17:20] == b'GLU':
        amino_acids.append('E')

# Mutate the amino acid sequence by replacing a specific amino acid with another
target_amino_acid = 'L'
mutated_amino_acid = 'A'

mutated_sequence = ''.join([mutated_amino_acid if aa == target_amino_acid else aa for aa in amino_acids])

# Print the original and mutated amino acid sequences
print('Original sequence:', ''.join(amino_acids))
print('Mutated sequence:', mutated_sequence)


Original sequence: PPPPPPPVVVVVVVEEEEEEEEEYYYYYYYYYYYYTTTTTTTLLLLLLLLRRRRRRRRRRRKKKKKKKKKRRRRRRRRRRRLLLLLLLLPPPPPPPSSSSSSRRRRRRRRRRRLLLLLLLLPPPPPPPRRRRRRRRRRRRRRRRPPPPPPPNNNNNNNNDDDDDDDDIIIIIIIIYYYYYYYYYYYYVVVVVVVNNNNNNNNKKKKKKKKKTTTTTTTDDDDDDDDFFFFFFFFFFFKKKKKKKKKAAAAAQQQQQQQQQLLLLLLLLAAAAARRRRRRRRRRRCCCCCCQQQQQQQQQKKKKKKKKKLLLLLLLLLLLLLLLLDDDDDDDDGGGGGGGGAAAAARRRRRRRRRRRGGGGQQQQQQQQQNNNNNNNNAAAAACCCCCCSSSSSSEEEEEEEEEIIIIIIIIYYYYYYYYYYYYIIIIIIIIHHHHHHHHHHGGGGLLLLLLLLGGGGLLLLLLLLAAAAAIIIIIIIINNNNNNNNRRRRRRRRRRRAAAAAIIIIIIIINNNNNNNNIIIIIIIIAAAAALLLLLLLLQQQQQQQQQLLLLLLLLQQQQQQQQQAAAAAGGGGSSSSSSFFFFFFFFFFFGGGGSSSSSSLLLLLLLLQQQQQQQQQVVVVVVVAAAAAAAAAANNNNNNNNTTTTTTTSSSSSSTTTTTTTVVVVVVVEEEEEEEEELLLLLLLLVVVVVVVDDDDDDDDEEEEEEEEELLLLLLLLEEEEEEEEEPPPPPPPEEEEEEEEETTTTTTTDDDDDDDDTTTTTTTRRRRRRRRRRREEEEEEEEEPPPPPPPLLLLLLLLTTTTTTTRRRRRRRRRRRIIIIIIIIRRRRRRRRRRRNNNNNNNNNNNNNNNNSSSSSSAAAAAIIIIIIIIHHHHHHHHHHIIIIIIIIRRRRRRRRRRRVVVVVVVFFFFFFFFFFFRRRRRRRRRRRVVVVVVVTTTTTTTPPPPPPPKKKKKKKKKSSSSSSEEEEEEEEEEEEEE

In [3]:
from Bio.PDB import PDBParser, PDBIO, Select
from Bio.Seq import Seq
from Bio.Alphabet import generic_protein
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import os

# Set the path to the input PDB file
pdb_path = "input.pdb"

# Set the index of the residue to mutate and the new amino acid residue code
residue_index = 10
new_residue = "C"

# Parse the input PDB file
parser = PDBParser()
structure = parser.get_structure("input", pdb_path)

# Create a Select object to select the residue to mutate
class MutateResidue(Select):
    def __init__(self, residue_index):
        self.residue_index = residue_index
    
    def accept_residue(self, residue):
        if residue.get_id()[1] == self.residue_index:
            return True
        else:
            return False

# Select the residue to mutate
selection = MutateResidue(residue_index)

# Create a new sequence record from the protein sequence in the PDB file
seq_record = SeqRecord(Seq("", generic_protein))
for model in structure:
    for chain in model:
        for residue in chain:
            if residue.get_resname() not in SeqRecord.protein_letters:
                continue
            seq_record.seq += Seq(residue.get_resname(), generic_protein)

# Mutate the selected residue in the sequence record
seq = str(seq_record.seq)
new_seq = seq[:residue_index - 1] + new_residue + seq[residue_index:]
mutated_seq_record = SeqRecord(Seq(new_seq, generic_protein))

# Create a new PDB file with the mutated sequence
class MutateResidueIO(PDBIO):
    def __init__(self, mutated_seq_record):
        self.mutated_seq_record = mutated_seq_record
        self.residue_index = residue_index
        self.new_residue = new_residue
        self.count = 0
    
    def _get_next_serial_number(self):
        self.count += 1
        return self.count
    
    def _write_atom(self, atom, hetfield, serial_number, resname,
                    chain_id, resseq, icode, x, y, z, occupancy,
                    bfactor, segid, element, charge, altloc):
        if resseq == self.residue_index and resname != self.new_residue:
            resname = self.new_residue
        PDBIO._write_atom(self, atom, hetfield, serial_number, resname,
                          chain_id, resseq, icode, x, y, z, occupancy,
                          bfactor, segid, element, charge, altloc)

io = MutateResidueIO(mutated_seq_record)
io.set_structure(structure)
io.save("mutated.pdb")


ImportError: Bio.Alphabet has been removed from Biopython. In many cases, the alphabet can simply be ignored and removed from scripts. In a few cases, you may need to specify the ``molecule_type`` as an annotation on a SeqRecord for your script to work correctly. Please see https://biopython.org/wiki/Alphabet for more information.

In [None]:
# bioalphabate deprecated. see the following
from Bio.PDB import PDBParser, PDBIO, Select
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

# Define the mutation function
def mutate_pdb(pdb_file, chain_id, res_num, new_res):
    # Parse the PDB file
    parser = PDBParser()
    structure = parser.get_structure("pdb", pdb_file)

    # Select the specified chain and residue number
    class ChainSelect(Select):
        def accept_chain(self, chain):
            if chain.get_id() == chain_id:
                return True
            else:
                return False

        def accept_residue(self, residue):
            if residue.get_id()[1] == res_num:
                return True
            else:
                return False

    # Apply the mutation to the selected residue
    class MutateResidue(Select):
        def accept_residue(self, residue):
            if residue.get_id()[1] == res_num:
                residue.resname = new_res
            return True

    io = PDBIO()
    io.set_structure(structure)

    # Write the mutated structure to a new PDB file
    io.save(pdb_file[:-4] + "_mutated.pdb", MutateResidue())
    
    # Generate a Seq object for the mutated sequence
    mutated_seq = ""
    for residue in structure[0][chain_id][res_num-1:res_num]:
        mutated_seq += Seq(residue.resname)

    # Generate a SeqRecord object for the mutated sequence and write to FASTA file
    seq_record = SeqRecord(mutated_seq, id=pdb_file[:-4] + "_" + chain_id + str(res_num) + new_res)
    SeqIO.write(seq_record, pdb_file[:-4] + "_mutated.fasta", "fasta")

# Test the function with an example PDB file
mutate_pdb("1AKI.pdb", "A", 31, "VAL")


In [None]:
from Bio.PDB import PDBParser, PDBIO, Select
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

def mutate_pdb(pdb_file, chain_id, res_num, new_res):
    """
    Mutates a PDB file by replacing a residue with a new amino acid.
    :param pdb_file: the name of the PDB file to mutate
    :param chain_id: the ID of the chain containing the residue to mutate
    :param res_num: the sequence number of the residue to mutate
    :param new_res: the three-letter code for the new amino acid
    :return: None
    """
    # Load the PDB file
    parser = PDBParser()
    structure = parser.get_structure(pdb_file[:-4], pdb_file)
    
    # Get the original residue and its parent chain
    chain = structure[0][chain_id]
    residue = chain[res_num]
    
    # Create a new residue with the desired properties
    new_residue = residue.copy()
    new_residue.resname = new_res
    new_residue.id = (' ', residue.id[1], ' ')
    
    # Replace the old residue with the new residue
    chain.detach_child(residue.id)
    chain.add(new_residue)
    
    # Generate a Seq object for the mutated sequence
    mutated_seq = Seq('')
    for residue in chain.get_residues():
        if residue.get_resname() != 'HOH':
            mutated_seq += Seq(residue.get_resname())
            
    # Generate a SeqRecord object for the mutated sequence and write to FASTA file
    seq_record = SeqRecord(mutated_seq, id=pdb_file[:-4], description=pdb_file[:-4])
    SeqIO.write(seq_record, pdb_file[:-4] + "_mutated.fasta", "fasta")

# Test the function with an example PDB file
mutate_pdb("1AKI.pdb", "G", 210, "VAL")


In [16]:
from Bio.PDB import PDBParser, PDBIO, Select
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

def mutate_pdb(pdb_id, chain_id, res_num, new_res):
    """
    Mutates a PDB file by replacing a residue with a new amino acid.
    :param pdb_id: the ID of the PDB file to mutate
    :param chain_id: the ID of the chain containing the residue to mutate
    :param res_num: the sequence number of the residue to mutate
    :param new_res: the three-letter code for the new amino acid
    :return: None
    """
    # Load the PDB file
    pdb_file = pdb_id + ".pdb"
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, pdb_file)
    
    # Get the original residue and its parent chain
    chain = structure[0][chain_id]
    residue = chain[res_num]
    
    # Create a new residue with the desired properties
    new_residue = residue.copy()
    new_residue.resname = new_res
    new_residue.id = (' ', residue.id[1], ' ')
    
    # Replace the old residue with the new residue
    chain.detach_child(residue.id)
    chain.add(new_residue)

    # Identify chain A and get its sequence
    seq_chain = None
    for model in structure:
        for chain in model:
            if chain.id == 'A':
                seq_chain = chain
                break
        if seq_chain:
            break
    else:
        raise ValueError("Chain A not found in PDB file.")
    mutated_seq = Seq('')
    for residue in seq_chain.get_residues():
        if residue.get_resname() != 'HOH':
            if residue.id[1] == res_num and residue.get_resname() == 'GLY':
                mutated_seq += Seq(new_res)
            else:
                mutated_seq += Seq(residue.get_resname())

    # Generate a SeqRecord object for the mutated sequence and write to FASTA file
    seq_record = SeqRecord(mutated_seq, id=pdb_id, description=pdb_id)
    SeqIO.write(seq_record, pdb_id + "_mutated.fasta", "fasta")

# Test the function with the example PDB ID 3UNF
mutate_pdb("3unf", "G", 210, "VAL")




In [20]:
from Bio.PDB import PDBParser, PDBIO, Select
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO

def mutate_pdb(pdb_id, chain_id, res_num, new_res, output_format="pdb"):
    """
    Mutates a PDB file by replacing a residue with a new amino acid and saves the mutated structure to file.
    Additionally, generates a FASTA file containing the mutated sequence.
    :param pdb_id: the ID of the PDB file to mutate
    :param chain_id: the ID of the chain containing the residue to mutate
    :param res_num: the sequence number of the residue to mutate
    :param new_res: the three-letter code for the new amino acid
    :param output_format: the output file format, either "pdb" or "mmcif"
    :return: None
    """
    # Load the PDB file
    pdb_file = pdb_id + ".pdb"
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, pdb_file)
    
    # Get the original residue and its parent chain
    chain = structure[0][chain_id]
    residue = chain[res_num]
    
    # Create a new residue with the desired properties
    new_residue = residue.copy()
    new_residue.resname = new_res
    new_residue.id = (' ', residue.id[1], ' ')
    
    # Replace the old residue with the new residue
    chain.detach_child(residue.id)
    chain.add(new_residue)

    # Write the mutated structure to file
    io = PDBIO()
    io.set_structure(structure)
    if output_format == "mmcif":
        io.save(pdb_id + "_mutated.cif")
    else:
        io.save(pdb_id + "_mutated.pdb")

    # Identify chain A and get its sequence
    seq_chain = None
    for model in structure:
        for chain in model:
            if chain.id == 'A':
                seq_chain = chain
                break
        if seq_chain:
            break
    else:
        raise ValueError("Chain A not found in PDB file.")
    mutated_seq = Seq('')
    for residue in seq_chain.get_residues():
        if residue.get_resname() != 'HOH':
            if residue.id[1] == res_num and residue.get_resname() == 'GLY':
                mutated_seq += Seq(new_res)
            else:
                mutated_seq += Seq(residue.get_resname())

    # Generate a SeqRecord object for the mutated sequence and write to FASTA file
    seq_record = SeqRecord(mutated_seq, id=pdb_id, description=pdb_id)
    SeqIO.write(seq_record, pdb_id + "_mutated.fasta", "fasta")
    
# Test the function with the example PDB ID 3UNF, saving the mutated structure in CIF format
mutate_pdb("3unf", "G", 210, "VAL", output_format="mmcif")




In [None]:
from Bio.PDB import PDBParser, PDBIO, Select, MMCIFParser
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio import SeqIO
import os

def cif_to_pdb(cif_file):
    """
    Converts a CIF file to a PDB file.
    :param cif_file: the path to the CIF file to convert
    :return: None
    """
    pdb_file = os.path.splitext(cif_file)[0] + ".pdb"
    parser = MMCIFParser()
    structure = parser.get_structure('temp', cif_file)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)

def mutate_pdb(pdb_id, chain_id, res_num, new_res):
    """
    Mutates a PDB file by replacing a residue with a new amino acid.
    :param pdb_id: the ID of the PDB file to mutate
    :param chain_id: the ID of the chain containing the residue to mutate
    :param res_num: the sequence number of the residue to mutate
    :param new_res: the three-letter code for the new amino acid
    :return: None
    """
    # Load the PDB file
    pdb_file = pdb_id + ".pdb"
    if not os.path.exists(pdb_file):
        raise ValueError(f"PDB file {pdb_file} not found.")
    parser = PDBParser()
    structure = parser.get_structure(pdb_id, pdb_file)
    
    # Get the original residue and its parent chain
    chain = structure[0][chain_id]
    residue = chain[res_num]
    
    # Create a new residue with the desired properties
    new_residue = residue.copy()
    new_residue.resname = new_res
    new_residue.id = (' ', residue.id[1], ' ')
    
    # Replace the old residue with the new residue
    chain.detach_child(residue.id)
    chain.add(new_residue)

    # Identify chain A and get its sequence
    seq_chain = None
    for model in structure:
        for chain in model:
            if chain.id == 'A':
                seq_chain = chain
                break
        if seq_chain:
            break
    else:
        raise ValueError("Chain A not found in PDB file.")
    mutated_seq = Seq('')
    for residue in seq_chain.get_residues():
        if residue.get_resname() != 'HOH':
            if residue.id[1] == res_num and residue.get_resname() == 'GLY':
                mutated_seq += Seq(new_res)
            else:
                mutated_seq += Seq(residue.get_resname())

    # Generate a SeqRecord object for the mutated sequence and write to FASTA file
    seq_record = SeqRecord(mutated_seq, id=pdb_id, description=pdb_id)
    SeqIO.write(seq_record, pdb_id + "_mutated.fasta", "fasta")

# Test the function with the example CIF file and PDB ID
cif_file = "3unf_mutated.cif"
cif_to_pdb(cif_file)
mutate_pdb("3unf_mutated", "G", 210, "VAL")


In [22]:
from Bio.PDB.MMCIFParser import MMCIFParser
from Bio.PDB.PDBIO import PDBIO
import os

def cif_to_pdb(cif_file):
    """Converts a CIF format file to a PDB format file"""
    pdb_file = os.path.splitext(cif_file)[0] + ".pdb"
    parser = MMCIFParser()
    structure = parser.get_structure('temp', cif_file)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)

# Example usage
cif_file = "3unf_mutated.cif"
cif_to_pdb(cif_file)


KeyError: '_atom_site.id'

In [28]:
from Bio.PDB import MMCIFParser, PDBIO, MMCIF2Dict
import os

def cif_to_pdb("3unf_mutated.cif"):
    # Generate the output file name
    pdb_file = os.path.splitext(cif_file)[0] + ".pdb"
    
    # Parse the CIF file using the MMCIF2Dict parser
    mmcif_dict = MMCIF2Dict(cif_file)
    
    # Extract the required data from the dictionary
    atom_serial_list = mmcif_dict["_atom_site.id"]
    atom_id_list = mmcif_dict["_atom_site.label_atom_id"]
    residue_id_list = mmcif_dict["_atom_site.label_comp_id"]
    chain_id_list = mmcif_dict["_atom_site.label_asym_id"]
    residue_number_list = mmcif_dict["_atom_site.auth_seq_id"]
    x_list = mmcif_dict["_atom_site.Cartn_x"]
    y_list = mmcif_dict["_atom_site.Cartn_y"]
    z_list = mmcif_dict["_atom_site.Cartn_z"]
    
    # Create the PDB file using the extracted data
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)
    
    print(f"{cif_file} converted to {pdb_file}")


SyntaxError: invalid syntax (654697832.py, line 4)

In [29]:
import os
from Bio.PDB import MMCIFParser, PDBIO, PDBList

def cif_to_pdb(cif_file):
    pdb_file = os.path.splitext(cif_file)[0] + ".pdb"
    parser = MMCIFParser()
    structure = parser.get_structure('temp', cif_file)
    io = PDBIO()
    io.set_structure(structure)
    io.save(pdb_file)

# Test the function with the example CIF file
cif_file = "3unf_mutated.cif"
cif_to_pdb(cif_file)


KeyError: '_atom_site.id'

In [30]:
from Bio.PDB import PDBParser, PDBIO, Select

# Define a class to select the residue at position 210 in chain G
class ResidueSelector(Select):
    def accept_residue(self, residue):
        if residue.get_id()[1] == 210 and residue.get_parent().id == "G":
            return True
        else:
            return False

# Parse the PDB file
pdb_file = "3unf.pdb"
parser = PDBParser()
structure = parser.get_structure("3unf", pdb_file)

# Select the residue at position 210 in chain G and change its residue type to valine
io = PDBIO()
io.set_structure(structure)
io.save("3unf_mutated.pdb", ResidueSelector())

for model in structure:
    for chain in model:
        for residue in chain:
            if residue.get_id()[1] == 210 and chain.get_id() == "G":
                residue.resname = "VAL"

io.save("3unf_mutated.pdb")


