In [None]:
import pyrosetta
from pyrosetta import pose_from_pdb
import nglview as nv
from ipywidgets import HBox
from tqdm import tqdm
import mdtraj as md
from Bio.PDB import PDBParser
import os
from foldingdiff.datasets import CathCanonicalAnglesDataset
import scipy.io
import numpy as np

In [None]:
# Initialize PyRosetta
pyrosetta.init()

# Load the PDB file
pdb_filename = "data/cath/dompdb/152lA00.pdb"  # Change this to your actual file
pose = pose_from_pdb(pdb_filename)

# Residue index to modify (change as needed)
residue_index = 10  # Change to the residue you want to modify

# Get initial torsion angles
initial_phi = pose.phi(residue_index)
initial_psi = pose.psi(residue_index)

print(f"Before modification - Phi: {initial_phi:.2f}, Psi: {initial_psi:.2f}")

# Save the original structure
before_pdb = "before.pdb"
pose.dump_pdb(before_pdb)

# Modify the torsion angle
pose.set_phi(residue_index, initial_phi + 50)  # Increase phi by 20 degrees
pose.set_psi(residue_index, initial_psi)  # Decrease psi by 15 degrees

# Get modified torsion angles
modified_phi = pose.phi(residue_index)
modified_psi = pose.psi(residue_index)

print(f"After modification - Phi: {modified_phi:.2f}, Psi: {modified_psi:.2f}")

# Save the modified structure
after_pdb = "after.pdb"
pose.dump_pdb(after_pdb)

# Create two separate NGLView widgets
view_before = nv.show_structure_file(before_pdb)
view_after = nv.show_structure_file(after_pdb)

# Set titles
view_before._set_size('400px', '400px')
view_after._set_size('400px', '400px')

# Display side by side
HBox([view_before, view_after])

In [None]:
def parse_pdb(pdb_file):
    # Create a PDB parser object
    parser = PDBParser(QUIET=True)

    # Path to your PDB file (e.g., '12asA00.pdb')
    structure = parser.get_structure("protein", pdb_file)

    # We'll store coordinates for each residue as a tuple: (N, CA, C)
    backbone_coords = []

    # Iterate over all residues in all chains
    for model in structure:
        for chain in model:
            for residue in chain:
                # Check that the residue has the backbone atoms we need.
                if all(atom_name in residue for atom_name in ['N', 'CA', 'C']):
                    # Extract coordinates
                    N_coord = residue['N'].get_coord()
                    CA_coord = residue['CA'].get_coord()
                    C_coord = residue['C'].get_coord()
                    backbone_coords.append((N_coord, CA_coord, C_coord))

    # Now, backbone_coords is a list of tuples, each containing three numpy arrays of shape (3,).
    # For a protein with N residues, you have N entries, corresponding to 3 x 3D coordinates.
    for i, (N_coord, CA_coord, C_coord) in enumerate(backbone_coords, start=1):
        print(f"Residue {i}:")
        print(f"  N:  {N_coord}")
        print(f"  CA: {CA_coord}")
        print(f"  C:  {C_coord}")

    return backbone_coords

In [None]:
cath_folder = "data/cath/dompdb/"  # Change this to your actual file
all_coords = []
files = os.listdir(cath_folder)
files = sorted(files, key=len)
for f in tqdm(files[:10]):
    if f:
        print(f)
        all_coords.append(parse_pdb(os.path.join(cath_folder, f)))

In [None]:
dataset = CathCanonicalAnglesDataset('data/cath/dompdb', use_cache=False, debug=True)

In [None]:
G = []
for i in range(9):
    n = dataset[i]['lengths'].item()
    coords = dataset[i]['coords'][:n]
    if n%3 != 0:
        pass
    labels = np.tile([0,1,2], n//3)
    edges = [[j, j+1, 0] for j in range(1, n)]
    g = {
        'nodelabels': np.array(labels, dtype=np.uint32)[:, None],
        'nodepos': np.array(coords, dtype=np.float64),
        'edges': np.array(edges, dtype=np.uint32)
    }
    G.append(g)
scipy.io.savemat('data/cath/graphs.mat', {"G": G})