In [1]:
import numpy as np
import scipy.optimize as opt
import random


In [21]:
# !pip install MDAnalysis


In [22]:
# from google.colab import files
# uploaded = files.upload()


In [23]:
import ipywidgets as widgets
from IPython.display import display

uploaded = widgets.FileUpload()
display(uploaded)


FileUpload(value=(), description='Upload')

In [24]:
# !pip install biopython


In [25]:
import Bio.PDB

In [26]:
# import biopython


In [27]:
# !pip install scipy

In [28]:
from Bio import PDB
import numpy as np

def extract_loop_from_pdb(pdb_file, chain_id="A", start_res=100, end_res=110):
    """Extracts the loop backbone coordinates (N, CA, C) from a given PDB file"""
    parser = PDB.PDBParser(QUIET=True)
    structure = parser.get_structure("protein", pdb_file)

    loop_coords = []
    for model in structure:
        for chain in model:
            if chain.id == chain_id:
                for res in chain:
                    if start_res <= res.id[1] <= end_res:
                        if "CA" in res and "N" in res and "C" in res:
                            loop_coords.append(res["CA"].coord)  # Use only CA atoms

    return np.array(loop_coords)

pdb_filename = "4ge2.pdb"  
loop_coords = extract_loop_from_pdb(pdb_filename, chain_id="A", start_res=100, end_res=1100)

print("Extracted Loop Coordinates:\n", loop_coords)


Extracted Loop Coordinates:
 [[-8.4550e+00 -5.6490e+00 -1.3642e+01]
 [-5.4530e+00 -7.8880e+00 -1.4192e+01]
 [-6.4250e+00 -9.7950e+00 -1.1053e+01]
 [-9.0990e+00 -1.2224e+01 -1.2217e+01]
 [-9.6560e+00 -1.5636e+01 -1.0619e+01]
 [-7.2800e+00 -1.7959e+01 -1.2444e+01]
 [-6.3340e+00 -1.9425e+01 -1.4804e+01]
 [-5.0590e+00 -2.2261e+01 -1.2591e+01]
 [-5.9630e+00 -2.1023e+01 -9.0870e+00]
 [-5.2300e+00 -2.3337e+01 -6.1390e+00]
 [-7.3390e+00 -2.4641e+01 -3.2650e+00]
 [-5.9330e+00 -2.4616e+01  2.8300e-01]
 [-5.2670e+00 -2.8345e+01 -8.1000e-02]
 [-3.2900e+00 -2.7809e+01 -3.2880e+00]
 [-1.3850e+00 -2.4884e+01 -1.7870e+00]
 [-3.3400e-01 -2.7098e+01  1.1100e+00]
 [ 1.1160e+00 -2.9712e+01 -1.2190e+00]
 [ 2.5720e+00 -2.6946e+01 -3.3870e+00]
 [ 4.5450e+00 -2.5483e+01 -4.8200e-01]
 [ 5.5560e+00 -2.8843e+01  9.6900e-01]
 [ 6.8270e+00 -3.0007e+01 -2.4170e+00]
 [ 9.0060e+00 -2.6909e+01 -2.8610e+00]
 [ 9.8550e+00 -2.6054e+01  7.5000e-01]
 [ 1.1261e+01 -2.2694e+01  1.8400e+00]
 [ 1.4236e+01 -2.2848e+01 -5.2400e-

In [29]:
def apply_loop_sampling_on_pdb(loop_coords, num_samples=100):
    """Applies Hit-and-Run sampling to perturb loop conformations"""
    sampled_conformations = []

    for _ in range(num_samples):
        direction = np.random.randn(3)  # Random motion
        direction /= np.linalg.norm(direction)

        new_coords = loop_coords + np.random.uniform(-0.5, 0.5) * direction  # Perturb loop positions
        sampled_conformations.append(new_coords)

    return np.array(sampled_conformations)

sampled_loops = apply_loop_sampling_on_pdb(loop_coords, num_samples=100)


In [30]:
%matplotlib notebook


In [31]:
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

def plot_sampled_loops_3d(real_loop, sampled_loops):
    """Plot real PDB loop and sampled loops in an interactive 3D plot"""

    plt.ion()

    fig = plt.figure(figsize=(8, 6))
    ax = fig.add_subplot(111, projection='3d')

    # Plot real loop from PDB
    x_real, y_real, z_real = zip(*real_loop)
    ax.plot(x_real, y_real, z_real, color="red", label="Real PDB Loop", linewidth=4)

    # Plot sampled loops
    for conformation in sampled_loops:
        x, y, z = zip(*conformation)
        ax.plot(x, y, z, linewidth=0.5, alpha=0.3)

    ax.set_xlabel("X")
    ax.set_ylabel("Y")
    ax.set_zlabel("Z")
    ax.set_title("Comparison of Real PDB Loop vs Sampled Conformations")
    ax.legend()

    plt.show(block=True)

plot_sampled_loops_3d(loop_coords, sampled_loops)


<IPython.core.display.Javascript object>