In [1]:
import os

REPO_ADDRESS = "https://github.com/HeliXonProtein/binding-ddg-predictor.git"
# Download GitHub repo if not already downloaded
if not os.path.exists("binding-ddg-predictor"):
    !git clone $REPO_ADDRESS

# Change working directory to repo
os.chdir("binding-ddg-predictor")


In [2]:
# suppress warnings
import warnings
warnings.filterwarnings('ignore')



In [3]:
MUT_PROTEIN = "data/example_mut.pdb"
WT_PROTEIN = "data/example_wt.pdb"

In [4]:
# show the protein chains and number of amino acids
def show_protein(protein):
    from Bio.PDB import PDBParser

    parser = PDBParser()
    structure = parser.get_structure("protein", protein)
    for model in structure:
        for chain in model:
            print(chain.id, len(chain))

show_protein(MUT_PROTEIN)
show_protein(WT_PROTEIN)

A 214
B 214
C 200
A 214
B 214
C 200


In [5]:

# Get Amino Acid sequence from PDB file
def get_sequence(pdb_file, chain="A"):
    from Bio.PDB import PDBParser, PPBuilder

    parser = PDBParser()
    structure = parser.get_structure("protein", pdb_file)
    ppb = PPBuilder()
    for pp in ppb.build_peptides(structure[0][chain]):
        return pp.get_sequence()

# Get sequence of mutated protein
mut_seq = get_sequence(MUT_PROTEIN)
# Get sequence of wild type protein
wt_seq = get_sequence(WT_PROTEIN)

# Show sequences
print("Mutated protein sequence: ", mut_seq)
print("Wild type protein sequence: ", wt_seq)

Mutated protein sequence:  DIKMTQSPSSMYASLGERVTITCKASQDIRKYLNWYQQKPWKSPKTLIYYATSLADGVPSRFSGSGSGQDYSLTISSLESDDTATYYCLQHGESPYTFGGGTKLEINRADAAPTVSIFPPSSEQLTSGGASVVCFLNNFYPKDINVKWKIDGSERQNGVLNSWTDQDSKDSTYSMSSTLTLTKDEYERHNSYTCEATHKTSTSPIVKSFNRNEC
Wild type protein sequence:  DIKMTQSPSSMYASLGERVTITCKASQDIRKYLNWYQQKPWKSPKTLIYYATSLADGVPSRFSGSGSGQDYSLTISSLESDDTATYYCLQHGESPYTFGGGTKLEINRADAAPTVSIFPPSSEQLTSGGASVVCFLNNFYPKDINVKWKIDGSERQNGVLNSWTDQDSKDSTYSMSSTLTLTKDEYERHNSYTCEATHKTSTSPIVKSFNRNEC


In [6]:
# Show difference between sequences
def show_aa_diff(seq1, seq2):
    for i in range(len(seq1)):
        if seq1[i] != seq2[i]:
            print(f"Position {i+1}: {seq1[i]} -> {seq2[i]}")


In [7]:
seq1 = "ABCD"
seq2 = "ABEF"
show_aa_diff(seq1, seq2)

Position 3: C -> E
Position 4: D -> F


In [8]:

# loop through the peptide chains to find the mutations
for chain in "ABC":
    print(f"Chain {chain}:")
    show_aa_diff(get_sequence(MUT_PROTEIN, chain), get_sequence(WT_PROTEIN, chain))


Chain A:
Chain B:
Chain C:


In [9]:
# compute the ddG
!python scripts/predict.py $WT_PROTEIN $MUT_PROTEIN

Predicted ddG: -0.30
