In [None]:
import pyrosetta
from pyrosetta import pose_from_pdb
import nglview as nv
from ipywidgets import HBox
from tqdm import tqdm
import mdtraj as md
from Bio.PDB import PDBParser
import os
from foldingdiff.datasets import CathCanonicalAnglesDataset
import scipy.io
import numpy as np



In [None]:
# Initialize PyRosetta
pyrosetta.init()

# Load the PDB file
pdb_filename = "/n/home02/msun415/foldingdiff/data/cath/dompdb/152lA00.pdb"  # Change this to your actual file
pose = pose_from_pdb(pdb_filename)

# Residue index to modify (change as needed)
residue_index = 10  # Change to the residue you want to modify

# Get initial torsion angles
initial_phi = pose.phi(residue_index)
initial_psi = pose.psi(residue_index)

print(f"Before modification - Phi: {initial_phi:.2f}, Psi: {initial_psi:.2f}")

# Save the original structure
before_pdb = "before.pdb"
pose.dump_pdb(before_pdb)

# Modify the torsion angle
pose.set_phi(residue_index, initial_phi + 50)  # Increase phi by 20 degrees
pose.set_psi(residue_index, initial_psi)  # Decrease psi by 15 degrees

# Get modified torsion angles
modified_phi = pose.phi(residue_index)
modified_psi = pose.psi(residue_index)

print(f"After modification - Phi: {modified_phi:.2f}, Psi: {modified_psi:.2f}")

# Save the modified structure
after_pdb = "after.pdb"
pose.dump_pdb(after_pdb)

# Create two separate NGLView widgets
view_before = nv.show_structure_file(before_pdb)
view_after = nv.show_structure_file(after_pdb)

# Set titles
view_before._set_size('400px', '400px')
view_after._set_size('400px', '400px')

# Display side by side
HBox([view_before, view_after])

┌──────────────────────────────────────────────────────────────────────────────┐
│                                 PyRosetta-4                                  │
│              Created in JHU by Sergey Lyskov and PyRosetta Team              │
│              (C) Copyright Rosetta Commons Member Institutions               │
│                                                                              │
│ NOTE: USE OF PyRosetta FOR COMMERCIAL PURPOSES REQUIRE PURCHASE OF A LICENSE │
│         See LICENSE.PyRosetta.md or email license@uw.edu for details         │
└──────────────────────────────────────────────────────────────────────────────┘
PyRosetta-4 2025 [Rosetta PyRosetta4.Release.python38.ubuntu 2025.06+release.029c6a159b896477003a14f78f472d4cd2cead46 2025-02-04T15:14:13] retrieved from: http://www.pyrosetta.org
core.init: Checking for fconfig files in pwd and ./rosetta/flags
core.init: Rosetta version: PyRosetta4.Release.python38.ubuntu r394 2025.06+release.029c6a159b 029c6a159b89

HBox(children=(NGLWidget(), NGLWidget()))

In [None]:
def parse_pdb(pdb_file):
    # Create a PDB parser object
    parser = PDBParser(QUIET=True)

    # Path to your PDB file (e.g., '12asA00.pdb')
    structure = parser.get_structure("protein", pdb_file)

    # We'll store coordinates for each residue as a tuple: (N, CA, C)
    backbone_coords = []

    # Iterate over all residues in all chains
    for model in structure:
        for chain in model:
            for residue in chain:
                # Check that the residue has the backbone atoms we need.
                if all(atom_name in residue for atom_name in ['N', 'CA', 'C']):
                    # Extract coordinates
                    N_coord = residue['N'].get_coord()
                    CA_coord = residue['CA'].get_coord()
                    C_coord = residue['C'].get_coord()
                    backbone_coords.append((N_coord, CA_coord, C_coord))

    # Now, backbone_coords is a list of tuples, each containing three numpy arrays of shape (3,).
    # For a protein with N residues, you have N entries, corresponding to 3 x 3D coordinates.
    for i, (N_coord, CA_coord, C_coord) in enumerate(backbone_coords, start=1):
        print(f"Residue {i}:")
        print(f"  N:  {N_coord}")
        print(f"  CA: {CA_coord}")
        print(f"  C:  {C_coord}")

    return backbone_coords

In [None]:
cath_folder = "/n/home02/msun415/foldingdiff/data/cath/dompdb/"  # Change this to your actual file
all_coords = []
files = os.listdir(cath_folder)
files = sorted(files, key=len)
for f in tqdm(files[:10]):
    if f:
        print(f)
        all_coords.append(parse_pdb(os.path.join(cath_folder, f)))

  0%|          | 0/10 [00:00<?, ?it/s]

 20%|██        | 2/10 [00:00<00:00, 11.28it/s]

1yu0A01.pdb
Residue 1:
  N:  [  4.121 100.689  58.087]
  CA: [  2.653 100.913  57.953]
  C:  [  2.098 100.209  56.713]
Residue 2:
  N:  [ 1.11  99.347 56.933]
  CA: [ 0.456 98.604 55.87 ]
  C:  [-0.97  99.095 55.72 ]
Residue 3:
  N:  [-1.318 99.51  54.511]
  CA: [ -2.633 100.044  54.255]
  C:  [-3.653 98.951 54.169]
Residue 4:
  N:  [-4.896 99.315 54.428]
  CA: [-5.999 98.406 54.224]
  C:  [-6.044 98.026 52.752]
Residue 5:
  N:  [-6.428 96.796 52.492]
  CA: [-6.36  96.285 51.154]
  C:  [-7.398 95.245 50.87 ]
Residue 6:
  N:  [-7.484 94.898 49.604]
  CA: [-8.393 93.862 49.184]
  C:  [-8.313 93.625 47.699]
Residue 7:
  N:  [-9.046 92.624 47.247]
  CA: [-9.105 92.286 45.844]
  C:  [-9.904 93.346 45.084]
Residue 8:
  N:  [-9.91  93.258 43.763]
  CA: [-10.679  94.195  42.961]
  C:  [-12.168  94.011  43.25 ]
Residue 9:
  N:  [-12.587  92.757  43.369]
  CA: [-13.982  92.464  43.666]
  C:  [-14.411  93.02   45.027]
Residue 10:
  N:  [-13.54   92.914  46.022]
  CA: [-13.801  93.498  47.344]
  C

 40%|████      | 4/10 [00:00<00:00, 13.31it/s]

  N:  [12.755 13.607 20.513]
  CA: [11.632 14.497 20.815]
  C:  [11.693 15.891 20.152]
Residue 67:
  N:  [12.814 16.593 20.225]
  CA: [12.858 17.973 19.744]
  C:  [13.97  18.229 18.731]
Residue 68:
  N:  [14.246 17.221 17.928]
  CA: [15.2   17.343 16.856]
  C:  [16.617 17.084 17.308]
Residue 69:
  N:  [17.488 17.019 16.313]
  CA: [18.89  16.752 16.519]
  C:  [19.622 17.999 16.982]
Residue 70:
  N:  [20.611 17.787 17.85 ]
  CA: [21.516 18.84  18.31 ]
  C:  [20.763 20.089 18.758]
Residue 71:
  N:  [ 4.543 23.023 21.491]
  CA: [ 4.757 21.968 20.496]
  C:  [ 3.64  20.94  20.338]
Residue 72:
  N:  [ 2.771 20.81  21.338]
  CA: [ 1.613 19.919 21.244]
  C:  [ 0.632 20.437 20.221]
Residue 73:
  N:  [ 0.257 21.707 20.358]
  CA: [-0.624 22.349 19.397]
  C:  [ 0.024 22.356 18.023]
Residue 74:
  N:  [ 1.339 22.552 17.98 ]
  CA: [ 2.045 22.564 16.707]
  C:  [ 1.963 21.219 16.008]
Residue 75:
  N:  [ 2.076 20.144 16.776]
  CA: [ 1.982 18.806 16.201]
  C:  [ 0.605 18.591 15.573]
Residue 76:
  N:  [-0.

 80%|████████  | 8/10 [00:00<00:00, 11.64it/s]

Residue 1:
  N:  [15.7    0.683 47.706]
  CA: [17.1    1.056 47.859]
  C:  [17.532  1.944 46.692]
Residue 2:
  N:  [16.961  1.69  45.518]
  CA: [17.208  2.519 44.343]
  C:  [16.568  3.894 44.522]
Residue 3:
  N:  [15.349  3.906 45.053]
  CA: [14.611  5.147 45.266]
  C:  [15.336  6.1   46.211]
Residue 4:
  N:  [15.796  5.585 47.348]
  CA: [16.443  6.429 48.349]
  C:  [17.792  6.966 47.87 ]
Residue 5:
  N:  [18.424  6.264 46.934]
  CA: [19.717  6.698 46.415]
  C:  [19.549  7.781 45.356]
Residue 6:
  N:  [18.49   7.669 44.562]
  CA: [18.183  8.679 43.557]
  C:  [17.696  9.961 44.226]
Residue 7:
  N:  [16.819  9.809 45.214]
  CA: [16.283 10.947 45.954]
  C:  [17.392 11.745 46.636]
Residue 8:
  N:  [18.299 11.04  47.304]
  CA: [19.378 11.686 48.045]
  C:  [20.387 12.381 47.131]
Residue 9:
  N:  [20.524 11.884 45.904]
  CA: [21.502 12.427 44.964]
  C:  [20.897 13.396 43.951]
Residue 10:
  N:  [19.577 13.549 43.989]
  CA: [18.872 14.428 43.056]
  C:  [19.322 15.901 43.102]
Residue 11:
  N:  [

100%|██████████| 10/10 [00:00<00:00, 10.85it/s]

Residue 1:
  N:  [52.083  9.364 24.161]
  CA: [52.737 10.032 22.995]
  C:  [53.588 11.241 23.409]
Residue 2:
  N:  [54.688 11.515 22.666]
  CA: [55.543 12.697 22.879]
  C:  [54.855 14.055 22.632]
Residue 3:
  N:  [55.645 15.063 22.264]
  CA: [55.139 16.418 22.028]
  C:  [54.35  16.52  20.718]
Residue 4:
  N:  [53.963 17.738 20.34 ]
  CA: [53.273 17.954 19.061]
  C:  [54.223 18.181 17.878]
Residue 5:
  N:  [55.445 17.67  18.012]
  CA: [56.359 17.509 16.89 ]
  C:  [56.137 16.112 16.303]
Residue 6:
  N:  [55.053 15.471 16.746]
  CA: [54.562 14.213 16.185]
  C:  [53.947 14.47  14.812]
Residue 7:
  N:  [53.693 15.748 14.533]
  CA: [53.312 16.223 13.208]
  C:  [54.485 16.05  12.236]
Residue 8:
  N:  [55.684 16.446 12.668]
  CA: [56.906 16.271 11.875]
  C:  [57.278 14.798 11.721]
Residue 9:
  N:  [57.114 14.027 12.798]
  CA: [57.367 12.587 12.775]
  C:  [56.418 11.894 11.799]
Residue 10:
  N:  [55.195 12.415 11.697]
  CA: [54.195 11.915 10.753]
  C:  [54.38  12.44   9.338]
Residue 11:
  N:  [




In [None]:
dataset = CathCanonicalAnglesDataset('/n/home02/msun415/foldingdiff/data/cath/dompdb', use_cache=False, debug=True)

  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]
  v /= factor[..., np.newaxis]


In [9]:
G = []
for i in range(9):
    n = dataset[i]['lengths'].item()
    coords = dataset[i]['coords'][:n]
    if n%3 != 0:
        pass
    labels = np.tile([0,1,2], n//3)
    edges = [[j, j+1, 0] for j in range(1, n)]
    g = {
        'nodelabels': np.array(labels, dtype=np.uint32)[:, None],
        'nodepos': np.array(coords, dtype=np.float64),
        'edges': np.array(edges, dtype=np.uint32)
    }
    G.append(g)
scipy.io.savemat('/n/home02/msun415/foldingdiff/data/cath/graphs.mat', {"G": G})