# Investigate required values

Investigate which values are needed to predict in order to fully specify a protein correctly

In [1]:
import os, sys
import importlib
import tempfile
from pathlib import Path
import json

import numpy as np
import matplotlib.pyplot as plt
import py3Dmol

import torch

SRC_DIR = os.path.join(os.path.dirname(os.getcwd()), "protdiff")
assert os.path.isdir(SRC_DIR)
sys.path.append(SRC_DIR)
import datasets
import angles_and_coords as ac
import tmalign  # So we can compare structural similarity

datasets.LOCAL_DATA_DIR

PosixPath('/home/t-kevinwu/protdiff/data')

In [2]:
# Define some simple structures
sample_structures = [
    datasets.LOCAL_DATA_DIR / "1CRN.pdb",
]
assert all([s.exists() for s in sample_structures])
len(sample_structures)

1

In [3]:
def view_pdb(fname:str):
    """
    View a PDB file in a Jupyter notebook
    See: https://william-dawson.github.io/using-py3dmol.html
    """
    with open(fname) as source:
        system = "".join([l for l in source])
    view = py3Dmol.view(width=400, height=300)
    view.addModelsAsFrames(system)
    view.setStyle({'model': -1}, {"cartoon": {'color': 'spectrum'}})
    view.zoomTo()
    view.show()

view_pdb(sample_structures[0])

In [4]:
all_ft_train_dset = datasets.CathCanonicalAnglesDataset(split='train')
all_ft_train_dset

<datasets.CathCanonicalAnglesDataset at 0x7f8b827dbbb0>

In [28]:
importlib.reload(ac)

# https://arxiv.org/pdf/2205.04676.pdf
# N:CA:C = tau

# Full spec should be angles_to_use=["N:CA:C", "CA:C:1N", "C:1N:1CA", "phi", "psi", "omega"], dists_to_use=["N:CA", "CA:C", "C:1N"]

def test_consistency(fname:str, angles_to_use=["phi", "psi", "omega"], dists_to_use=["N:CA", "CA:C", "C:1N"]):
    """Test the consistency of reconstructing a pdb file"""
    # Create the internal coordinates
    angles = ac.canonical_distances_and_dihedrals(fname, distances=dists_to_use, angles=angles_to_use)
    print(angles.head())
    with tempfile.TemporaryDirectory() as dirname:
        out_fname = os.path.join(dirname, "rebuilt_" + os.path.basename(fname))
        # rebuilt = ac.create_new_chain(
        #     out_fname, angles,
        #     angles_to_set=angles_to_use, distances_to_set=dists_to_use
        # )
        rebuid = ac.create_new_chain_nerf(out_fname, angles)
        score = tmalign.run_tmalign(fname, out_fname)
        angles_new = ac.canonical_distances_and_dihedrals(out_fname, distances=dists_to_use, angles=angles_to_use)
        print(angles_new.head())
        print(f"TM-score: {score:.4f}")
        view_pdb(out_fname)

test_consistency(sample_structures[0])

       N:CA      CA:C      C:1N       phi       psi     omega
0  1.497996  1.508770  1.335037  0.000000  2.577154  0.000000
1  1.494683  1.512180  1.319983 -1.882033  2.519317  3.121976
2  1.480419  1.516032  1.331092 -2.289355  2.326370 -3.132049
3  1.481381  1.523938  1.307138 -2.075137  2.638986 -3.105629
4  1.455282  1.496306  1.339749 -1.329619 -0.331253 -3.086661
    N:CA      CA:C      C:1N       phi           psi     omega
0  1.355  1.530298  1.324617  0.000000  4.537247e-17  0.000000
1  0.000  1.529704  1.325175 -0.000309 -1.882685e+00  2.577290
2  0.000  1.530932  1.324824  3.123053 -2.289791e+00  2.519505
3  0.000  1.530329  1.324263 -3.132422 -2.074805e+00  2.325685
4  0.000  1.529990  1.325490 -3.105231 -1.330258e+00  2.639346
TM-score: 0.3595
