# Investigate required values

Investigate which values are needed to predict in order to fully specify a protein correctly

In [1]:
import os, sys
import importlib
import tempfile
from pathlib import Path
import json

import numpy as np
import matplotlib.pyplot as plt
import py3Dmol

import torch

SRC_DIR = os.path.join(os.path.dirname(os.getcwd()), "protdiff")
assert os.path.isdir(SRC_DIR)
sys.path.append(SRC_DIR)
import datasets
import angles_and_coords as ac
import tmalign  # So we can compare structural similarity

datasets.LOCAL_DATA_DIR

PosixPath('/home/t-kevinwu/projects/protein_diffusion/protdiff/data')

In [2]:
# Define some simple structures
sample_structures = [
    datasets.LOCAL_DATA_DIR / "1CRN.pdb",
]
assert all([s.exists() for s in sample_structures])
len(sample_structures)

1

In [3]:
def view_pdb(fname:str):
    """
    View a PDB file in a Jupyter notebook
    See: https://william-dawson.github.io/using-py3dmol.html
    """
    with open(fname) as source:
        system = "".join([l for l in source])
    view = py3Dmol.view(width=400, height=300)
    view.addModelsAsFrames(system)
    view.setStyle({'model': -1}, {"cartoon": {'color': 'spectrum'}})
    view.zoomTo()
    view.show()

view_pdb(sample_structures[0])

In [4]:
all_ft_train_dset = datasets.CathCanonicalAnglesDataset(split='train')
all_ft_train_dset

<datasets.CathCanonicalAnglesDataset at 0x7f885088e310>

In [8]:
importlib.reload(ac)

def test_consistency(fname:str):
    """Test the consistency of reconstructing a pdb file"""
    # Create the internal coordinates
    angles = ac.canonical_distances_and_dihedrals(fname)
    print(angles.head())
    with tempfile.TemporaryDirectory() as dirname:
        out_fname = os.path.join(dirname, "rebuilt_" + os.path.basename(fname))
        rebuilt = ac.create_new_chain(out_fname, angles, sampled_values_dset=all_ft_train_dset)
        score = tmalign.run_tmalign(fname, out_fname)
        angles_new = ac.canonical_distances_and_dihedrals(out_fname)
        print(angles_new.head())
        print(f"TM-score: {score:.4f}")
        view_pdb(out_fname)

test_consistency(sample_structures[0])

      0C:1N       phi       psi     omega       tau
0  1.335037  0.000000  2.577154  0.000000  1.889198
1  1.319983 -1.882033  2.519317  3.121976  1.896289
2  1.331092 -2.289355  2.326370 -3.132049  1.861109
3  1.307138 -2.075137  2.638986 -3.105629  1.918830
4  1.339749 -1.329619 -0.331253 -3.086661  2.037471
      0C:1N       phi       psi     omega       tau
0  1.335054  0.000000  2.576943  0.000000  0.840377
1  1.319876 -1.881957  2.518816  3.121253  1.895180
2  1.331101 -2.290207  2.325816 -3.131662  1.861204
3  1.307825 -2.074499  2.638767 -3.105107  1.918857
4  1.339154 -1.329886 -0.330857 -3.086320  2.037343
TM-score: 0.1839
