In [90]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pyfoldx.foldx import foldxHandler
from pyfoldx.structure import structure
import pandas as pd
from Bio import SeqIO
from Bio.PDB import PDBParser, PPBuilder
from pyfoldx.structure.structure import Structure
from io import StringIO

In [108]:

# Function to generate all possible mutations for a given protein
def generate_all_mutations(sequence):
    amino_acids = ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
    mutations = []
    for i, aa in enumerate(sequence):
        for new_aa in amino_acids:
            if new_aa != aa:
                mutations.append(f"{aa}{i+1}{new_aa}")  # Format like GI29A
    return mutations

# Function to read the PDB file from a local path
def read_pdb_from_file(pdb_file_path):
    with open(pdb_file_path, 'r') as file:
        pdb_string = file.read()
    return pdb_string

# Function to extract chains and sequences from PDB
def extract_chains_and_sequence(pdb_string):
    parser = PDBParser(QUIET=True)
    # Use StringIO to treat pdb_string as a file-like object
    pdb_file = StringIO(pdb_string)
    structure = parser.get_structure("protein", pdb_file)
    chains = {}
    for model in structure:  # Iterate over models (usually only one)
        for chain in model:  # Iterate over chains
            chain_id = chain.id
            sequence = ""
            for residue in chain:
                # Skip non-standard residues
                if residue.id[0] == " ":
                    sequence += residue.resname[0]  # Get the first letter of the 3-letter residue code
            chains[chain_id] = sequence
    return chains

from Bio.PDB import PDBIO

# Function to compute delta G for all mutations
def calculate_delta_G(pdb_file_path):
    pdb_string = read_pdb_from_file(pdb_file_path)  # Read the PDB structure from the file
    parser = PDBParser(QUIET=True)
    
    # Parse the PDB structure
    structure = parser.get_structure("protein", pdb_file_path)
    
    # Save the structure to a temporary PDB file
    io = PDBIO()
    temp_pdb_path = "/home/labs/rudich/meiray/meirab/temp.pdb"  # Define the path for the temporary PDB file
    io.set_structure(structure)
    io.save(temp_pdb_path)

    # Extract chains and sequence from PDB string
    chains = extract_chains_and_sequence(pdb_string)
    
    # Here, we assume we're working with the sequence of the first chain (can be modified if necessary)
    wildtype_sequence = chains[list(chains.keys())[0]]  # Get the sequence of the first chain
    mutations = generate_all_mutations(wildtype_sequence)
    all_ddGs = []  # Store delta G values

    # Compute delta G for each mutation using foldxHandler
    for mutation in mutations:
        ddGs, mutModels, wtModels = foldxHandler.getMutants(temp_pdb_path, mutation, 1)
        all_ddGs.append(ddGs['ddG'][0])  # Extract delta G value from the result
    
    return mutations, np.array(all_ddGs)

# Function to plot heatmap
def plot_heatmap(delta_g_values, mutations):
    # Reshape delta G values to a 2D grid for the heatmap (e.g., assuming the protein is in a grid-like arrangement)
    grid_size = int(np.sqrt(len(delta_g_values)))  # This will work if the number of mutations is a perfect square
    delta_g_values_grid = delta_g_values[:grid_size**2].reshape(grid_size, grid_size)

    # Plotting the heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(delta_g_values_grid, cmap='coolwarm', annot=True, fmt=".2f", xticklabels=mutations[:grid_size], yticklabels=mutations[:grid_size])
    plt.title(f"Heatmap of ΔG for Mutations of Protein")
    plt.xlabel("Mutation")
    plt.ylabel("Mutation")
    plt.show()

# Main function to call everything
def main():
    pdb_file_path = "/home/labs/rudich/meiray/meirab/foldx/1A0N.pdb"  # Path to your PDB file
    
    mutations, delta_g_values = calculate_delta_G(pdb_file_path)
    
    # Plot the heatmap with the calculated delta G values
    plot_heatmap(delta_g_values, mutations)

if __name__ == "__main__":
    main()


AttributeError: 'str' object has no attribute 'toPdbFile'

In [None]:
from Bio.PDB import PDBIO

# Function to compute delta G for all mutations
def calculate_delta_G(pdb_file_path):
    pdb_string = read_pdb_from_file(pdb_file_path)  # Read the PDB structure from the file
    parser = PDBParser(QUIET=True)
    
    # Parse the PDB structure
    structure = parser.get_structure("protein", pdb_file_path)
    
    # Save the structure to a temporary PDB file
    io = PDBIO()
    temp_pdb_path = "/path/to/temp.pdb"  # Define the path for the temporary PDB file
    io.set_structure(structure)
    io.save(temp_pdb_path)

    # Extract chains and sequence from PDB string
    chains = extract_chains_and_sequence(pdb_string)
    
    # Here, we assume we're working with the sequence of the first chain (can be modified if necessary)
    wildtype_sequence = chains[list(chains.keys())[0]]  # Get the sequence of the first chain
    mutations = generate_all_mutations(wildtype_sequence)
    all_ddGs = []  # Store delta G values

    # Compute delta G for each mutation using foldxHandler
    for mutation in mutations:
        ddGs, mutModels, wtModels = foldxHandler.getMutants(temp_pdb_path, mutation, 1)
        all_ddGs.append(ddGs['ddG'][0])  # Extract delta G value from the result
    
    return mutations, np.array(all_ddGs)


In [78]:

# Function to generate all possible mutations for a given protein
def generate_all_mutations(sequence):
    amino_acids = ["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
    mutations = []
    for i, aa in enumerate(sequence):
        for new_aa in amino_acids:
            if new_aa != aa:
                mutations.append(f"{aa}{i+1}{new_aa}")  # Format like GI29A
    return mutations

# Function to read the PDB file from a local path
def read_pdb_from_file(pdb_file_path):
    with open(pdb_file_path, 'r') as file:
        pdb_string = file.read()
    return pdb_string


def extract_chains_and_sequence(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)
    chains = {}
    for model in structure:  # Iterate over models (usually only one)
        for chain in model:  # Iterate over chains
            chain_id = chain.id
            sequence = ""
            for residue in chain:
                # Skip non-standard residues
                if residue.id[0] == " ":
                    sequence += residue.resname[0]  # Get the first letter of the 3-letter residue code
            chains[chain_id] = sequence

    return chains

# Function to compute delta G for all mutations
def calculate_delta_G(pdb_file_path):
    pdb_string = read_pdb_from_file(pdb_file_path)  # Read the PDB structure from the file

    # Extract chains and sequence from PDB string
    chains = extract_chains_and_sequence(pdb_file_path)
    
    # Here, we assume we're working with the sequence of the first chain (can be modified if necessary)
    wildtype_sequence = chains[list(chains.keys())[0]]  # Get the sequence of the first chain
    mutations = generate_all_mutations(wildtype_sequence)
    all_ddGs = []  # Store delta G values

    # Compute delta G for each mutation using foldxHandler (assuming it's defined)
    for mutation in mutations:
        ddGs, mutModels, wtModels = foldxHandler.getMutants(pdb_string, mutation, 1)
        all_ddGs.append(ddGs['ddG'][0])  # Extract delta G value from the result
    
    return mutations, np.array(all_ddGs)

# Function to plot heatmap
def plot_heatmap(delta_g_values, mutations):
    # Reshape delta G values to a 2D grid for the heatmap (e.g., assuming the protein is in a grid-like arrangement)
    grid_size = int(np.sqrt(len(delta_g_values)))  # This will work if the number of mutations is a perfect square
    delta_g_values_grid = delta_g_values[:grid_size**2].reshape(grid_size, grid_size)

    # Plotting the heatmap
    plt.figure(figsize=(8, 6))
    sns.heatmap(delta_g_values_grid, cmap='coolwarm', annot=True, fmt=".2f", xticklabels=mutations[:grid_size], yticklabels=mutations[:grid_size])
    plt.title(f"Heatmap of ΔG for Mutations of Protein")
    plt.xlabel("Mutation")
    plt.ylabel("Mutation")
    plt.show()

# Main function to call everything
def main():
    pdb_file_path = "/home/labs/rudich/meiray/meirab/foldx/1A0N.pdb"  # Path to your PDB file
    
    mutations, delta_g_values = calculate_delta_G(pdb_file_path)
    
    # Plot the heatmap with the calculated delta G values
    plot_heatmap(delta_g_values, mutations)

if __name__ == "__main__":
    main()


ModuleNotFoundError: No module named 'foldxHandler'

In [64]:
def extract_chains_and_sequence(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)
    print(structure)
    chains = {}
    for model in structure:  # Iterate over models (usually only one)
        for chain in model:  # Iterate over chains
            chain_id = chain.id
            sequence = ""
            for residue in chain:
                # Skip non-standard residues
                if residue.id[0] == " ":
                    sequence += residue.resname[0]  # Get the first letter of the 3-letter residue code
            chains[chain_id] = sequence

    return chains

pdb_file = "/home/labs/rudich/meiray/meirab/foldx/1BOD.pdb"
# sequence = extract_sequence_from_pdb(pdb_file)
chains = extract_chains_and_sequence(pdb_file)

for chain_id, sequence in chains.items():
    print(f"Chain {chain_id}: {sequence}")

<Structure id=protein>
Chain A: GLLGIPGLTALGGAGGLSLGGLLLLLGTGPPSLLLGMSTLAGLPGGLALAGAGGVSPGGPGVLVLLIS


In [65]:
chains = extract_chains_and_sequence(pdb_file)


<Structure id=protein>


In [66]:
sequence

'GLLGIPGLTALGGAGGLSLGGLLLLLGTGPPSLLLGMSTLAGLPGGLALAGAGGVSPGGPGVLVLLIS'

In [62]:
chains.items()

dict_items([('A', 'GLLGIPGLTALGGAGGLSLGGLLLLLGTGPPSLLLGMSTLAGLPGGLALAGAGGVSPGGPGVLVLLIS')])

In [61]:
chain_id, sequence = chains.items()

ValueError: not enough values to unpack (expected 2, got 1)

In [120]:
import pyfoldx
dir(pyfoldx.structure.structure.Structure)


['__class__',
 '__delattr__',
 '__dict__',
 '__dir__',
 '__doc__',
 '__eq__',
 '__format__',
 '__ge__',
 '__getattribute__',
 '__gt__',
 '__hash__',
 '__init__',
 '__init_subclass__',
 '__le__',
 '__lt__',
 '__module__',
 '__ne__',
 '__new__',
 '__reduce__',
 '__reduce_ex__',
 '__repr__',
 '__setattr__',
 '__sizeof__',
 '__str__',
 '__subclasshook__',
 '__weakref__',
 '_addCifAtom',
 '_addPdbAtom',
 'addAtom',
 'alanineScan',
 'getInterfaceEnergy',
 'getMinMaxBFactor',
 'getNetworks',
 'getResiduesEnergy',
 'getSequence',
 'getTotalEnergy',
 'mutate',
 'positionScan',
 'repair',
 'setResolution',
 'toPdb',
 'toPdbFile']

In [128]:
from pyfoldx.structure import Structure

In [110]:
##calling all helper functions- see unet_helper_functions.py
import sys
sys.path.append('/home/labs/rudich/meiray/meirab/foldx/mutatex/')
from pyfoldx_functions import *


In [131]:
st = Structure(code)


In [130]:
st.toPdb

<bound method Structure.toPdbFile of <pyfoldx.structure.structure.Structure object at 0x14e8e38e7bb0>>

In [134]:
from pyfoldx.structure import structure
from pyfoldx.foldx import foldxHandler

# Define the PDB ID and mutation
code = "6U6M"  # Example PDB ID
mutations = "GI29A;"  # Example mutation (Glycine to Alanine at position 29 in chain I)
number_of_runs = 1  # Number of runs to perform (1 run is usually sufficient)

# Create a structure object from the PDB code
st = structure.Structure(code)

# Use the getMutants method to get the mutants and ddG values
try:
    # Use getMutants to generate mutants and ddG values
    ddGs, mutModels = foldxHandler.getMutants(st.toPdb, mutations, number_of_runs)

    # Print the ddG values for each model
    print("ΔΔG values (ddG):")
    print(ddGs)

    # Print the mutated models
    print("\nMutated models:")
    print(mutModels)

except AttributeError as e:
    print(f"Error: {e}. Check the available methods in foldxHandler.")


Error: 'function' object has no attribute 'toPdbFile'. Check the available methods in foldxHandler.


In [127]:
# Let's load a small structure with its PDB code from internet to work with
st=structure("2ci2")

TypeError: 'module' object is not callable

In [135]:
st.getTotalEnergy()

Computing total energy for structure...


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [116]:
st.getResiduesEnergy()

Computing residue energy for structure...


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [7]:
import os
os.environ['FOLDX_LOCATION'] = "/home/labs/rudich/meiray/meirab/foldx/foldx_20241231"


In [15]:
import os
print(os.path.isfile('/home/labs/rudich/meiray/meirab/foldx'))


False


In [8]:
#Some other imports, to work with the data and to plot
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# We want to print full tables
pd.set_option("display.max_rows", 1000, "display.max_columns", 1000)

In [21]:
def mutate(pdb_string, mutations, number_of_runs=1):
    """
    Generate a mutated structure
    
    :param pdb_string: a string containing a structure in PDB format
    :param mutations: mutations to be performed in foldx format (example GI1A means to mutate GLY in position 1 of molecule I to ALA)
    
    :return: tuple, where element 0 is the DataFrame of ddGs of each models respect to wildtype structure 
            and element 1 is the array of strings with the mutated model
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            ddGs, mutModels = foldxHandler.getMutant(pdb_string, mutations, number_of_runs) 
    else:
        ddGs, mutModels = foldxHandler.getMutant(pdb_string, mutations, number_of_runs) 
    
    return (pd.DataFrame(ddGs, columns=ENERGY_TERMS), mutModels)

In [35]:
'''
Created on Nov 9, 2020
@author: lradusky
@summary: python bindings for foldx commands
'''

from pyfoldx.structure.misc import OutputGrabber, in_notebook
from pyfoldx.foldx import foldxHandler
from pyfoldx.structure import structure
import pandas as pd
from pyfoldx.foldx.foldxHandler import ENERGY_TERMS
    


def getInterfaceEnergy(pdb_string, consider_waters=False):
    """
    Compute the interface energy of FoldX for molecules within a structure
    
    :param pdb_string: a string containing a structure in PDB format
    :param consider_waters: take waters into account for energy computations
    
    :return: pandas Dataframe with energy terms of the analyzed structure
             with index corresponding to pair of molecules
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            df = pd.DataFrame.from_dict(foldxHandler.getComplexEnergy(pdb_string, consider_waters), orient="index", columns=ENERGY_TERMS)
    else:
            df = pd.DataFrame.from_dict(foldxHandler.getComplexEnergy(pdb_string, consider_waters), orient="index", columns=ENERGY_TERMS)
    
    return df

def getResiduesEnergy(pdb_string, consider_waters=False):
    """
    Compute the interface energy of FoldX for molecules within a structure
    
    :param pdb_string: a string containing a structure in PDB format
    :param consider_waters: take waters into account for energy computations
    
    :return: pandas Dataframe with energy terms of the analyzed structure
             with index corresponding to each residue
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            df = pd.DataFrame.from_dict(foldxHandler.getResiduesEnergy(pdb_string, consider_waters), orient="index", columns=ENERGY_TERMS)
    else:
        df = pd.DataFrame.from_dict(foldxHandler.getResiduesEnergy(pdb_string, consider_waters), orient="index", columns=ENERGY_TERMS)
    
    return df

def mutate(pdb_string, mutations, number_of_runs=1):
    """
    Generate a mutated structure
    
    :param pdb_string: a string containing a structure in PDB format
    :param mutations: mutations to be performed in foldx format (example GI1A means to mutate GLY in position 1 of molecule I to ALA)
    
    :return: tuple, where element 0 is the DataFrame of ddGs of each models respect to wildtype structure 
            and element 1 is the array of strings with the mutated model
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            ddGs, mutModels = foldxHandler.getMutant(pdb_string, mutations, number_of_runs) 
    else:
        ddGs, mutModels = foldxHandler.getMutant(pdb_string, mutations, number_of_runs) 
    
    return (pd.DataFrame(ddGs, columns=ENERGY_TERMS), mutModels)
    
def repair(pdb_string,fix_residues=[]):
    """
    Repair the sidechains of a structure
    
    :param pdb_string: a string containing a structure in PDB format
    :param fix_residues: list of residues to remain fixed in foldx format (example GI1 means GLY in position 1 of molecule I)
    
    :return: repaired model in string format
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            ret = foldxHandler.getRepairedStructure(pdb_string, fix_residues)
    else:
        ret = foldxHandler.getRepairedStructure(pdb_string, fix_residues)
    
    return ret

def getNetworks(pdb_string):
    
    """
    Get networking information of a structure
    
    :param pdb_string: a string containing a structure in PDB format

    :return: dictionary where keys are network types and elements are linked residue names for that network
    """
    
    if type(pdb_string) == type([]):
        pdb_string = "\n".join(pdb_string)
    
    if not in_notebook():
        out = OutputGrabber()
        with out: 
            ret = foldxHandler.getNetworks(pdb_string)
    else:
        ret = foldxHandler.getNetworks(pdb_string)
    
    return ret

if __name__ == "__main__":
    print("started")

    code = "6U6M"
    st = Structure(code)
    print( getTotalEnergy(code, st.toPdb(), False) )
    print( getInterfaceEnergy(st.toPdb(), False) )
    #print( getResiduesEnergy(st.toPdb(), False) )
    #print( mutate(st.toPdb(), "GI29A;",1)[0] )
    #print( mutate(st.toPdb(), "GI29A;",1)[1][0][0] )
    
    #print( repair(st.toPdb(), ["GI29"]) )
    #for k,v in getNetworks(st.toPdb()).items() : print( k , v )
    '''
    code = "5XJL"
    st = structure(code)
    print (getInterfaceEnergy(st.toPdb(), False) )
    '''
    
    
    
    
    

started


NameError: name 'getTotalEnergy' is not defined

In [9]:
# Let's load a small structure with its PDB code from internet to work with
st=Structure("2ci2")

In [10]:
st.getTotalEnergy()

Computing total energy for structure...


TypeError: expected str, bytes or os.PathLike object, not NoneType

In [14]:
print(os.environ['FOLDX_LOCATION'])


/home/labs/rudich/meiray/meirab/foldx/foldx_20241231


In [39]:
import os
import subprocess
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.colors import Normalize
from Bio import SeqIO
from Bio.PDB import PDBParser, PPBuilder

def create_mutation_file(mutation_list, chain, output_file):
    """
    Create a mutation file in the format required by FoldX.

    Args:
        mutation_list (list): List of tuples (residue_number, wt_residue, mutant_residue).
        chain (str): Chain ID where mutations occur.
        output_file (str): Path to the mutation file to save.
    """
    with open(output_file, "w") as f:
        for mutation in mutation_list:
            residue_number, wt_residue, mutant_residue = mutation
            mutation_str = f"{wt_residue}{chain}{residue_number}{mutant_residue};\n"
            f.write(mutation_str)

def run_foldx(pdb_file, mutation_file, output_dir):
    """
    Run FoldX with a given mutation file.

    Args:
        pdb_file (str): Path to the PDB file.
        mutation_file (str): Path to the mutation file.
        output_dir (str): Directory to save FoldX output.

    Returns:
        float: ΔG value or None if FoldX failed.
    """
    foldx_path = "/home/labs/rudich/meiray/meirab/foldx/foldx_20241231" 

    os.makedirs(output_dir, exist_ok=True)
    command = (
        f"{foldx_path} --command=BuildModel --pdb={pdb_file} --mutant-file={mutation_file} --output-dir={output_dir}"
    )
    result = subprocess.run(command, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        print("Error running FoldX:", result.stderr)
        return None

    # Parse the FoldX output for the ΔG value
    report_file = os.path.join(output_dir, "Average_Stability.fxout")
    if os.path.exists(report_file):
        with open(report_file, "r") as f:
            for line in f:
                if line.strip().startswith("TOTAL"):
                    delta_g = float(line.split()[1])  # ΔG is typically in the second column
                    return delta_g
    else:
        print("FoldX report file not found.")
    return None

def generate_mutations(sequence, chain):
    """
    Generate a list of mutations for all amino acid positions in a chain.

    Args:
        sequence (str): Amino acid sequence of the chain.
        chain (str): Chain ID.

    Returns:
        list: List of tuples (residue_number, wt_residue, mutant_residue).
    """
    mutations = []
    for i, wt_residue in enumerate(sequence):
        for mutant_residue in "ACDEFGHIKLMNPQRSTVWY":  # All 20 amino acids
            if mutant_residue != wt_residue:
                mutations.append((i + 1, wt_residue, mutant_residue))  # PDB uses 1-based indexing
    return mutations

def calculate_delta_G_for_mutations(pdb_file, sequence, chain):
    """
    Calculate ΔG values for all mutations in a chain.

    Args:
        pdb_file (str): Path to the PDB file.
        sequence (str): Amino acid sequence of the chain.
        chain (str): Chain ID.

    Returns:
        list, list: List of mutations and their ΔG values.
    """
    mutations = generate_mutations(sequence, chain)
    delta_g_values = []

    output_dir = "foldx_output"
    mutation_file = os.path.join(output_dir, "mutations.txt")
    os.makedirs(output_dir, exist_ok=True)

    create_mutation_file(mutations, chain, mutation_file)
    delta_g = run_foldx(pdb_file, mutation_file, output_dir)

    if delta_g is not None:
        for _ in mutations:  # Assume the same ΔG for simplicity
            delta_g_values.append(delta_g)

    return mutations, delta_g_values

def generate_heatmap_html(mutations, delta_g_values, sequence):
    """
    Generate an HTML report with a heatmap of ΔG values.

    Args:
        mutations (list): List of mutations.
        delta_g_values (list): List of ΔG values.
        sequence (str): Amino acid sequence.
    """
    mutation_matrix = np.array(delta_g_values).reshape(len(sequence), 20)
    df = pd.DataFrame(
        mutation_matrix,
        index=list(sequence),
        columns=["A", "C", "D", "E", "F", "G", "H", "I", "K", "L", "M", "N", "P", "Q", "R", "S", "T", "V", "W", "Y"]
    )

    # Create the heatmap
    plt.figure(figsize=(12, 8))
    sns.heatmap(df, annot=True, cmap="coolwarm", norm=Normalize(vmin=-10, vmax=10), cbar_kws={"label": "ΔG (kcal/mol)"})
    plt.title("ΔG Change for Mutations")

    # Save the heatmap as an image
    plt.tight_layout()
    plt.savefig("heatmap.png")

    # Create HTML report
    html = f"""
    <html>
        <head><title>Mutation Heatmap</title></head>
        <body>
            <h1>ΔG Heatmap for Mutations</h1>
            <img src="heatmap.png" alt="Mutation Heatmap">
        </body>
    </html>
    """
    with open("mutation_heatmap.html", "w") as f:
        f.write(html)

    print("HTML report generated: mutation_heatmap.html")

# Example pipeline function
def mutation_pipeline(pdb_file, sequence, chain):
    """
    Run the entire mutation pipeline: generate mutations, calculate ΔG values, and create a heatmap.

    Args:
        pdb_file (str): Path to the PDB file.
        sequence (str): Amino acid sequence of the chain.
        chain (str): Chain ID.
    """
    mutations, delta_g_values = calculate_delta_G_for_mutations(pdb_file, sequence, chain)
    generate_heatmap_html(mutations, delta_g_values, sequence)


In [40]:
def extract_chains_and_sequence(pdb_file):
    parser = PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)

    chains = {}
    for model in structure:  # Iterate over models (usually only one)
        for chain in model:  # Iterate over chains
            chain_id = chain.id
            sequence = ""
            for residue in chain:
                # Skip non-standard residues
                if residue.id[0] == " ":
                    sequence += residue.resname[0]  # Get the first letter of the 3-letter residue code
            chains[chain_id] = sequence

    return chains


In [41]:
pdb_file = "/home/labs/rudich/meiray/meirab/foldx/1W4F.pdb"

# sequence = extract_sequence_from_pdb(pdb_file)
chains = extract_chains_and_sequence(pdb_file)

for chain_id, sequence in chains.items():
    print(f"Chain {chain_id}: {sequence}")



mutation_pipeline(pdb_file, sequence,chain_id)


Chain A: AVIAMPSVALTAAGLGVAIALVGGTGLAGAVLLGAIAATLA
Error running FoldX: 


ValueError: cannot reshape array of size 0 into shape (41,20)

In [None]:
'/home/labs/rudich/meiray/meirab/foldx/foldx_20241231' --command=BuildModel --pdb={'/home/labs/rudich/meiray/meirab/foldx/1W4F.pdb'} --mutant-file={'/home/labs/rudich/meiray/meirab/foldx/mutatex/foldx_output/mutations.txt'} --output-dir={'/home/labs/rudich/meiray/meirab/foldx/mutatex/foldx_output'} --screen 1 --debug 1
/path/to/foldx --command=BuildModel --pdb=/path/to/input.pdb --mutant-file=/path/to/mutation.txt --output-dir=/path/to/output --screen 1 --debug 1


In [29]:
print("Error running FoldX:", result.stderr)


NameError: name 'result' is not defined

In [30]:
subprocess.run()

TypeError: Popen.__init__() missing 1 required positional argument: 'args'

In [34]:
def run_foldx(pdb_file, mutation_file, output_dir):
    """
    Run FoldX with a given mutation file.

    Args:
        pdb_file (str): Path to the PDB file.
        mutation_file (str): Path to the mutation file.
        output_dir (str): Directory to save FoldX output.

    Returns:
        float: ΔG value or None if FoldX failed.
    """
    foldx_path = "/home/labs/rudich/meiray/meirab/foldx/foldx_20241231" 

    os.makedirs(output_dir, exist_ok=True)
    command = (
        f"{foldx_path} --command=BuildModel --pdb={pdb_file} --mutant-file={mutation_file} --output-dir={output_dir}"
    )
    result = subprocess.run(command, shell=True, capture_output=True, text=True)

    if result.returncode != 0:
        print("Error running FoldX:")
        print("stderr:", result.stderr)
        print("stdout:", result.stdout)
        return None

    # Parse the FoldX output for the ΔG value
    report_file = os.path.join(output_dir, "Average_Stability.fxout")
    if os.path.exists(report_file):
        with open(report_file, "r") as f:
            for line in f:
                if line.strip().startswith("TOTAL"):
                    delta_g = float(line.split()[1])  # ΔG is typically in the second column
                    return delta_g
    else:
        print("FoldX report file not found.")
    return None


In [38]:

mutation_file = '/home/labs/rudich/meiray/meirab/foldx/mutatex/foldx_output/mutations.txt'
output_dir = "foldx_output"
result = run_foldx(pdb_file, mutation_file, output_dir)


Error running FoldX:
stderr: 
stdout:    ********************************************
   ***                                      ***
   ***             FoldX 4 (c)              ***
   ***                                      ***
   ***     code by the FoldX Consortium     ***
   ***                                      ***
   ***     Jesper Borg, Frederic Rousseau   ***
   ***    Joost Schymkowitz, Luis Serrano   ***
   ***    Peter Vanhee, Erik Verschueren    ***
   ***     Lies Baeten, Javier Delgado      ***
   ***       and Francois Stricher          ***
   *** and any other of the 9! permutations ***
   ***   based on an original concept by    ***
   ***   Raphael Guerois and Luis Serrano   ***
   ********************************************

No pdbs for the run found at:
"./"
Foldx will end



In [None]:
/home/labs/rudich/meiray/meirab/foldx/foldx_20241231 --version
