In [2]:
# Read the uploaded FASTA file to extract the protein sequence
fasta_file_path = 'protein/rcsb_pdb_8HR6.fasta'

# Function to read a FASTA file and return the sequence
def read_fasta_file(file_path):
    with open(file_path, 'r') as file:
        lines = file.readlines()
        # Assuming the first line is the header and the rest is the sequence
        sequence = ''.join(line.strip() for line in lines if not line.startswith('>'))
    return sequence

# Extract the sequence from the file
protein_sequence = read_fasta_file(fasta_file_path)
protein_sequence

'MTLEIFEYLEKYDYEQVVFCQDKESGLKAIIAIHDTTLGPALGGTRMWTYDSEEAAIEDALRLAKGMTYKNAAAGLNLGGAKTVIIGDPRKDKSEAMFRALGRYIQGLNGRYITAEDVGTTVDDMDIIHEETDFVTGISPSFGSSGNPSPVTAYGVYRGMKAAAKEAVGTDNLEGKVIAVQGVGNVAYHLCKHLHAEGAKLIVTDINKEAVQRAVEEFGASAVEPNEIYGVECDIYAPCALGATVNDETIPQLKAKVIAGSANNQLKENRHGDIIHEMGIVYAPDYVINAGGVINVADELYGYNRERALKRVESIYDTIAKVIEISKRDGIATYVAADRLAEERIASLKNSRSTYLRNGHDIISRR'

In [None]:
import numpy as np

# Placeholder function for initializing a protein structure
def initialize_protein_structure(sequence):
    # Initialize protein structure based on sequence
    # This is highly simplified and purely illustrative
    structure = np.zeros((len(sequence), 3)) # Example: 3D coordinates for each amino acid
    return structure

# Placeholder for energy calculation of a protein structure
def calculate_energy(structure):
    # Calculate the energy of the given protein structure
    # This is a placeholder and would involve complex physics
    energy = np.random.random() # Simplified example
    return energy

# Placeholder for a Monte Carlo step
def monte_carlo_step(structure):
    # Perform a single Monte Carlo step (e.g., modify the structure slightly)
    new_structure = structure + np.random.normal(0, 0.1, structure.shape)
    return new_structure

# Main simulation function with replica exchange
def simulate_protein_folding(sequence, num_replicas=10, num_steps=1000):
    # Initialize replicas with different temperatures (simplified example)
    replicas = [initialize_protein_structure(sequence) for _ in range(num_replicas)]
    temperatures = np.linspace(1, 10, num_replicas)  # Example temperature range
    
    for step in range(num_steps):
        for i, replica in enumerate(replicas):
            new_structure = monte_carlo_step(replica)
            old_energy = calculate_energy(replica)
            new_energy = calculate_energy(new_structure)
            # Example of a Metropolis criterion for accepting new structure
            if np.exp((old_energy - new_energy) / temperatures[i]) > np.random.random():
                replicas[i] = new_structure
        
        # Example replica exchange step (simplified)
        if step % 100 == 0:  # Attempt exchange every 100 steps
            # Select two random replicas to attempt exchange
            i, j = np.random.choice(range(num_replicas), size=2, replace=False)
            # Simplified exchange criterion
            if np.random.random() > 0.5:
                replicas[i], replicas[j] = replicas[j], replicas[i]

    return replicas

# Example usage
sequence = "protein_sequence"  # Example sequence, replace with your protein sequence
replicas = simulate_protein_folding(sequence)
replicas

In [7]:
import numpy as np
import matplotlib.pyplot as plt
import imageio
import os

In [15]:
# Assuming the previous functions (initialize_protein_structure, calculate_energy, monte_carlo_step)
# are defined in the same script

def simulate_protein_folding_with_energy_tracking(sequence, num_replicas=10, num_steps=1000):
    replicas = [initialize_protein_structure(sequence) for _ in range(num_replicas)]
    temperatures = np.linspace(1, 10, num_replicas)
    energy_records = []

    for step in range(num_steps):
        step_energies = []
        for i, replica in enumerate(replicas):
            new_structure = monte_carlo_step(replica)
            old_energy = calculate_energy(replica)
            new_energy = calculate_energy(new_structure)
            if np.exp((old_energy - new_energy) / temperatures[i]) > np.random.random():
                replicas[i] = new_structure
                step_energies.append(new_energy)
            else:
                step_energies.append(old_energy)

        energy_records.append(step_energies)

        if step % 100 == 0:
            i, j = np.random.choice(range(num_replicas), size=2, replace=False)
            if np.random.random() > 0.5:
                replicas[i], replicas[j] = replicas[j], replicas[i]

    return replicas, energy_records

# Modify sequence as per your requirement
sequence = protein_sequence
replicas, energy_records = simulate_protein_folding_with_energy_tracking(sequence)

print(replicas)


[array([[-1.503052  , -1.5043058 ,  2.42385985],
       [ 3.14708335,  6.12477193,  3.05254443],
       [ 7.53872879, -1.2415743 ,  0.30109001],
       ...,
       [ 3.51427806, -1.65580299, -0.51280979],
       [ 0.04172175, -2.65349708, -6.272713  ],
       [ 3.78696133,  1.62238208,  0.23665786]]), array([[ 1.52524322,  5.7301862 , -2.0965883 ],
       [ 1.97575556, -3.08989637,  1.49672262],
       [ 0.06357012,  0.88161842, -4.35546438],
       ...,
       [ 1.01990975,  2.50009481,  1.55147074],
       [-1.26324321,  3.96982832, -1.22848188],
       [-2.79029009, -1.45829726,  1.4921621 ]]), array([[-4.85892926,  3.55778961,  2.65631665],
       [ 1.73713249,  0.71858067, -0.89956612],
       [ 2.99007345,  4.85246696,  0.7079676 ],
       ...,
       [-2.61235651, -8.02309397,  3.24594916],
       [-1.46749978,  0.15987917, -1.52520852],
       [ 1.92105919, -3.29689495, -3.05660142]]), array([[-2.60506425, -1.49629116,  0.30660643],
       [-2.11777288, -1.1405965 ,  2.93736001

In [None]:
# Function to plot energy changes
def plot_energy_changes(energy_records, step, filename):
    energies = np.array(energy_records)
    min_energies = np.min(energies, axis=1)
    max_energies = np.max(energies, axis=1)
    avg_energies = np.mean(energies, axis=1)

    plt.figure(figsize=(10, 6))
    plt.plot(min_energies[:step+1], label='Min Energy')
    plt.plot(max_energies[:step+1], label='Max Energy')
    plt.plot(avg_energies[:step+1], label='Average Energy', linestyle='--')
    plt.xlabel('Step')
    plt.ylabel('Energy')
    plt.title(f'Energy Profile up to Step {step}')
    plt.legend()
    plt.savefig(filename)
    plt.close()

# Creating a GIF
filenames = []
for step in range(len(energy_records)):
    filename = f'madata/temp_step_{step}.png'
    plot_energy_changes(energy_records, step, filename)
    filenames.append(filename)

with imageio.get_writer('madata/simulation_energy.gif', mode='I') as writer:
    for filename in filenames:
        image = imageio.imread(filename)
        writer.append_data(image)
        os.remove(filename)  # Clean up

'madata/simulation_energy.gif'


In [11]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
import numpy as np


def plot_protein_structure(structure, step, energy, min_energy, max_energy):
    fig = plt.figure(figsize=(10, 7))
    ax = fig.add_subplot(111, projection='3d')
    xs, ys, zs = structure.T
    ax.plot(xs, ys, zs, marker='o', linestyle='-', color='b')
    ax.set_title(f'Step: {step}, Energy: {energy:.2f}')
    plt.savefig(f'madata/structure_step_{step}.png')
    plt.close()

def simulate_and_plot_folding(sequence, num_replicas=10, num_steps=1000, plot_every_n_steps=100):
    replicas = [initialize_protein_structure(sequence) for _ in range(num_replicas)]
    temperatures = np.linspace(1, 10, num_replicas)
    min_energy = float('inf')
    max_energy = float('-inf')

    for step in range(num_steps):
        for i, replica in enumerate(replicas):
            new_structure = monte_carlo_step(replica)
            old_energy = calculate_energy(replica)
            new_energy = calculate_energy(new_structure)
            min_energy = min(min_energy, new_energy, old_energy)
            max_energy = max(max_energy, new_energy, old_energy)
            if np.exp((old_energy - new_energy) / temperatures[i]) > np.random.random():
                replicas[i] = new_structure

        if step % plot_every_n_steps == 0 or step == num_steps - 1:
            plot_protein_structure(replicas[0], step, calculate_energy(replicas[0]), min_energy, max_energy)

# Example usage with a simple protein sequence
sequence = protein_sequence
simulate_and_plot_folding(sequence, num_replicas=10, num_steps=1000, plot_every_n_steps=100)


In [22]:
def write_pdb_from_structure(structure, sequence, file_path):

    with open(file_path, 'w') as pdb:
        pdb.write("HEADER    Simplified Protein Structure\n")
        pdb.write("TITLE     Example Generated Structure\n")
        for i, (x, y, z) in enumerate(structure):
            resName = 'ALA'  # Using Alanine; replace or modify as needed based on actual amino acids
            pdb.write(f"ATOM  {i+1:5d}  CA  {resName} A{i+1:4d}    {x:8.3f}{y:8.3f}{z:8.3f}  1.00 20.00           C\n")
        pdb.write("TER\nEND\n")

# Selecting the first structure from your provided list as an example
selected_structure = replicas  # Replace with the structure of interest

testing_sequence = "MADEUPSEQUENCE"  # This should match the length of your structure array

# Generate PDB file
write_pdb_from_structure(selected_structure, protein_sequence, 'generated_protein_structure.pdb')


In [25]:
import py3Dmol

pdb_file_path = 'generated_protein_structure.pdb'

# Reading the PDB file into a string
with open(pdb_file_path, 'r') as file:
    pdb_contents = file.read()

print(pdb_contents)

view = py3Dmol.view(js='https://3dmol.org/build/3Dmol.js')
view.addModel(pdb_contents, "pdb")
view.setStyle({'cartoon': {'color': 'spectrum'}}) 
view.zoomTo()
view.show()


HEADER    Simplified Protein Structure
TITLE     Example Generated Structure
ATOM      1  CA  ALA A   1      -1.503  -1.504   2.424  1.00 20.00           C
ATOM      2  CA  ALA A   2       3.147   6.125   3.053  1.00 20.00           C
ATOM      3  CA  ALA A   3       7.539  -1.242   0.301  1.00 20.00           C
ATOM      4  CA  ALA A   4       0.929  -1.055   2.427  1.00 20.00           C
ATOM      5  CA  ALA A   5       3.088   1.498   6.140  1.00 20.00           C
ATOM      6  CA  ALA A   6       2.268   1.517  -0.957  1.00 20.00           C
ATOM      7  CA  ALA A   7       4.407  -1.578  -2.856  1.00 20.00           C
ATOM      8  CA  ALA A   8       1.194  -4.775  -1.704  1.00 20.00           C
ATOM      9  CA  ALA A   9       0.841  -2.178   0.229  1.00 20.00           C
ATOM     10  CA  ALA A  10       3.991  -3.290   6.369  1.00 20.00           C
ATOM     11  CA  ALA A  11      -4.935   3.895  -0.384  1.00 20.00           C
ATOM     12  CA  ALA A  12      -2.044  -1.285  -4.130