# Imports

In [1]:
import glob
import os
import random
from typing import Callable, List, Union

import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader, Dataset
from torch.nn.functional import softmax
from Bio.PDB.Polypeptide import index_to_one, one_to_index

from cavity_model import (
    ResidueEnvironment,
    ResidueEnvironmentsDataset,
    ToTensor,
    CavityModel,
)

%load_ext nb_black

<IPython.core.display.Javascript object>

# Cavity Model

Download and process Cavity Model data

In [2]:
# # Run shell script that takes a .txt file with PDBIDs as input.
# !./get_parse_pdbs_cavity_model.sh data/pdbids_010.txt

<IPython.core.display.Javascript object>

Global variables for Cavity Model Training

In [3]:
DEVICE = "cuda"  # "cpu" or "cuda"
TRAIN_VAL_SPLIT = 0.8
BATCH_SIZE = 100
LEARNING_RATE = 3e-4
EPOCHS = 10
PATIENCE_CUTOFF = 1

<IPython.core.display.Javascript object>

Load Parsed PDBs and perform train/val split

In [4]:
parsed_pdb_filenames = sorted(glob.glob("data/pdbs/parsed/*coord*"))
random.shuffle(parsed_pdb_filenames)

n_train_pdbs = int(len(parsed_pdb_filenames) * TRAIN_VAL_SPLIT)
filenames_train = parsed_pdb_filenames[:n_train_pdbs]
filenames_val = parsed_pdb_filenames[n_train_pdbs:]

to_tensor_transformer = ToTensor(DEVICE)

dataset_train = ResidueEnvironmentsDataset(
    filenames_train, transformer=to_tensor_transformer
)
dataset_val = ResidueEnvironmentsDataset(
    filenames_val, transformer=to_tensor_transformer
)

dataloader_train = DataLoader(
    dataset_train,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=to_tensor_transformer.collate_cat,
    drop_last=True,
)
dataloader_val = DataLoader(
    dataset_val,
    batch_size=BATCH_SIZE,
    shuffle=True,
    collate_fn=to_tensor_transformer.collate_cat,
    drop_last=True,
)

print(
    f"Training data set includes {len(filenames_train)} pdbs with "
    f"{len(dataset_train)} environments."
)
print(
    f"Validation data set includes {len(filenames_val)} pdbs with "
    f"{len(dataset_val)} environments."
)

Training data set includes 8 pdbs with 4123 environments.
Validation data set includes 2 pdbs with 1135 environments.


<IPython.core.display.Javascript object>

Training helper functions

In [5]:
def _train_step(
    cavity_model_net: CavityModel,
    optimizer: torch.optim.Adam,
    loss_function: torch.nn.CrossEntropyLoss,
) -> (torch.Tensor, float):
    """
    Helper function to take a training step
    """
    cavity_model_net.train()
    optimizer.zero_grad()
    batch_y_pred = cavity_model_net(batch_x)
    loss_batch = loss_function(batch_y_pred, torch.argmax(batch_y, dim=-1))
    loss_batch.backward()
    optimizer.step()
    return (batch_y_pred, loss_batch.detach().cpu().item())


def _eval_loop(
    cavity_model_net: CavityModel,
    data_loader_val,
    loss_function: torch.nn.CrossEntropyLoss,
) -> (float, float):
    """
    Helper function to perform an eval loop
    """
    # Eval loop. Due to memory, we don't pass the whole eval set to the model
    labels_true_val = []
    labels_pred_val = []
    loss_batch_list_val = []
    for batch_x_val, batch_y_val in dataloader_val:
        cavity_model_net.eval()
        batch_y_pred_val = cavity_model_net(batch_x_val)

        loss_batch_val = loss_function(
            batch_y_pred_val, torch.argmax(batch_y_val, dim=-1)
        )
        loss_batch_list_val.append(loss_batch_val.detach().cpu().item())

        labels_true_val.append(torch.argmax(batch_y_val, dim=-1).detach().cpu().numpy())
        labels_pred_val.append(
            torch.argmax(batch_y_pred_val, dim=-1).detach().cpu().numpy()
        )
    acc_val = np.mean(
        (np.reshape(labels_true_val, -1) == np.reshape(labels_pred_val, -1))
    )
    loss_val = np.mean(loss_batch_list_val)
    return acc_val, loss_val

<IPython.core.display.Javascript object>

Train the cavity model

In [6]:
# Define model
cavity_model_net = CavityModel(DEVICE).to(DEVICE)
loss_function = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(cavity_model_net.parameters(), lr=LEARNING_RATE)

# Create directory for model files
models_dirpath = "cavity_models/"
if not os.path.exists(models_dirpath):
    os.mkdir(models_dirpath)

# Train loop
current_best_epoch_idx = -1
current_best_loss_val = 1e4
patience = 0
epoch_idx_to_model_path = {}
for epoch in range(EPOCHS):
    labels_true = []
    labels_pred = []
    loss_batch_list = []
    for batch_x, batch_y in dataloader_train:
        # Take train step
        batch_y_pred, loss_batch = _train_step(
            cavity_model_net, optimizer, loss_function
        )
        loss_batch_list.append(loss_batch)

        labels_true.append(torch.argmax(batch_y, dim=-1).detach().cpu().numpy())
        labels_pred.append(torch.argmax(batch_y_pred, dim=-1).detach().cpu().numpy())

    # Train epoch metrics
    acc_train = np.mean((np.reshape(labels_true, -1) == np.reshape(labels_pred, -1)))
    loss_train = np.mean(loss_batch_list)

    # Validation epoch metrics
    acc_val, loss_val = _eval_loop(cavity_model_net, dataloader_val, loss_function)

    print(
        f"Epoch {epoch:2d}. Train loss: {loss_train:5.3f}. "
        f"Train Acc: {acc_train:4.2f}. Val loss: {loss_val:5.3f}. "
        f"Val Acc {acc_val:4.2f}"
    )

    # Save model
    model_path = f"cavity_models/model_epoch_{epoch:02d}.pt"
    epoch_idx_to_model_path[epoch] = model_path
    torch.save(cavity_model_net.state_dict(), model_path)

    # Early stopping
    if loss_val < current_best_loss_val:
        current_best_loss_val = loss_val
        current_best_epoch_idx = epoch
        patience = 0
    else:
        patience += 1
    if patience > PATIENCE_CUTOFF:
        print(f"Early stopping activated.")
        break

print(
    f"Best epoch idx: {current_best_epoch_idx} with validation loss: "
    f"{current_best_loss_val:5.3f} and model_path: "
    f"{epoch_idx_to_model_path[current_best_epoch_idx]}"
)

Epoch  0. Train loss: 2.472. Train Acc: 0.31. Val loss: 3.148. Val Acc 0.07
Epoch  1. Train loss: 1.259. Train Acc: 0.77. Val loss: 2.740. Val Acc 0.21
Epoch  2. Train loss: 0.713. Train Acc: 0.93. Val loss: 2.767. Val Acc 0.21
Epoch  3. Train loss: 0.369. Train Acc: 0.99. Val loss: 2.798. Val Acc 0.21
Early stopping activated.
Best epoch idx: 1 with validation loss: 2.740 and model_path: cavity_models/model_epoch_01.pt


<IPython.core.display.Javascript object>

# ddG Prediction

Global variables

In [7]:
EPS = 1e-9

<IPython.core.display.Javascript object>

Parse PDBs for DMS, Guerois and Protein G data sets

In [8]:
# # Parse PDBs for which we have ddG data
# !./get_parse_pdbs_dowstream_task.sh

<IPython.core.display.Javascript object>

Make dict for residue environments for easy look up

In [9]:
# Create temporary residue environment datasets as dicts to more easily match ddG data
parsed_pdbs_wildcards = {
    "dms": "data/data_dms/pdbs_parsed/*coord*",
    "protein_g": "data/data_protein_g/pdbs_parsed/*coord*",
    "guerois": "data/data_guerois/pdbs_parsed/*coord*",
    "symmetric": "data/data_symmetric/pdbs_parsed/*coord*",
}

resenv_datasets_look_up = {}
for dataset_key, pdbs_wildcard in parsed_pdbs_wildcards.items():
    parsed_pdb_filenames = sorted(glob.glob(pdbs_wildcard))
    dataset = ResidueEnvironmentsDataset(parsed_pdb_filenames, transformer=None)
    dataset_look_up = {}
    for resenv in dataset:
        key = (
            f"{resenv.pdb_id}{resenv.chain_id}_{resenv.pdb_residue_number}"
            f"{index_to_one(resenv.restype_index)}"
        )
        dataset_look_up[key] = resenv
    resenv_datasets_look_up[dataset_key] = dataset_look_up

<IPython.core.display.Javascript object>

Load ddG data to dataframe

In [10]:
ddg_data_dict = {
    "dms": pd.read_csv("data/data_dms/ddgs_parsed.csv"),
    "protein_g": pd.read_csv("data/data_protein_g/ddgs_parsed.csv"),
    "guerois": pd.read_csv("data/data_guerois/ddgs_parsed.csv"),
    "symmetric_direct": pd.read_csv("data/data_symmetric/ddgs_parsed_direct.csv"),
    "symmetric_inverse": pd.read_csv("data/data_symmetric/ddgs_parsed_inverse.csv"),
}

<IPython.core.display.Javascript object>

Populate dataframes with wt ResidueEnvironment objects and wt and mt restype indices

In [11]:
print(
    "Dropping data points where residue is not defined in structure "
    f"or due to missing parsed pdb file"
)
# Add wt residue environments to dataframes
for ddg_data_key in ddg_data_dict.keys():
    resenvs_ddg_data = []
    for idx, row in ddg_data_dict[ddg_data_key].iterrows():
        resenv_key = (
            f"{row['pdbid']}{row['chainid']}_"
            f"{row['variant'][1:-1]}{row['variant'][0]}"
        )
        try:
            if "symmetric" in ddg_data_key:
                ddg_data_key_adhoc_fix = "symmetric"
            else:
                ddg_data_key_adhoc_fix = ddg_data_key
            resenv = resenv_datasets_look_up[ddg_data_key_adhoc_fix][resenv_key]
            resenvs_ddg_data.append(resenv)
        except KeyError:
            resenvs_ddg_data.append(np.nan)
    ddg_data_dict[ddg_data_key]["resenv"] = resenvs_ddg_data
    n_datapoints_before = ddg_data_dict[ddg_data_key].shape[0]
    ddg_data_dict[ddg_data_key].dropna(inplace=True)
    n_datapoints_after = ddg_data_dict[ddg_data_key].shape[0]
    print(
        f"dropped {n_datapoints_before - n_datapoints_after:4d} / "
        f"{n_datapoints_before:4d} data points from dataset {ddg_data_key}"
    )

    # Add wt and mt idxs to df
    ddg_data_dict[ddg_data_key]["wt_idx"] = ddg_data_dict[ddg_data_key].apply(
        lambda row: one_to_index(row["variant"][0]), axis=1
    )
    ddg_data_dict[ddg_data_key]["mt_idx"] = ddg_data_dict[ddg_data_key].apply(
        lambda row: one_to_index(row["variant"][-1]), axis=1
    )

Dropping data points where residue is not defined in structure or due to missing parsed pdb file
dropped 1187 / 8096 data points from dataset dms
dropped    0 /  907 data points from dataset protein_g
dropped    0 /  911 data points from dataset guerois
dropped    0 /  342 data points from dataset symmetric_direct
dropped    1 /  342 data points from dataset symmetric_inverse


<IPython.core.display.Javascript object>

Populate dataframes with predicted NLLs and isolated WT and MT predicted NLLs as well as NLFs

In [12]:
# Load best performing cavity model from previos trainig
cavity_model_infer_net = CavityModel(DEVICE).to(DEVICE)
cavity_model_infer_net.load_state_dict(
    torch.load(epoch_idx_to_model_path[current_best_epoch_idx])
)
cavity_model_infer_net.eval()

# Load PDB amino acid frequencies used to approximate unfolded states
pdb_nlfs = -np.log(np.load("data/pdb_frequencies.npz")["frequencies"])

# Add predicted to Nlls ddG dataframes
for ddg_data_key in ddg_data_dict.keys():
    df = ddg_data_dict[ddg_data_key]

    # Perform predictions on matched residue environments
    ddg_resenvs = list(df["resenv"].values)
    ddg_resenv_dataset = ResidueEnvironmentsDataset(
        ddg_resenvs, transformer=ToTensor(DEVICE)
    )

    # Define dataloader for resenvs matched to ddG data
    ddg_resenv_dataloader = DataLoader(
        ddg_resenv_dataset,
        batch_size=BATCH_SIZE,
        shuffle=False,
        collate_fn=to_tensor_transformer.collate_cat,
        drop_last=False,
    )

    # Infer NLLs
    pred_nlls = []
    for batch_x, _ in ddg_resenv_dataloader:
        batch_pred_nlls = (
            -torch.log(softmax(cavity_model_infer_net(batch_x), dim=-1) + EPS)
            .detach()
            .cpu()
            .numpy()
        )
        pred_nlls.append(batch_pred_nlls)
    pred_nlls_list = [row for row in np.vstack(pred_nlls)]

    # Add NLLs to dataframe
    df["nlls"] = pred_nlls_list

    # Isolate WT and MT NLLs and add to datafra
    df["wt_nll"] = df.apply(lambda row: row["nlls"][row["wt_idx"]], axis=1)
    df["mt_nll"] = df.apply(lambda row: row["nlls"][row["mt_idx"]], axis=1)

    # Add pdb negative log frequencies to df
    df["wt_nlf"] = df.apply(lambda row: pdb_nlfs[row["wt_idx"]], axis=1)
    df["mt_nlf"] = df.apply(lambda row: pdb_nlfs[row["mt_idx"]], axis=1)

    display(df)

Unnamed: 0,pdbid,chainid,variant,ddg,resenv,wt_idx,mt_idx,nlls,wt_nll,mt_nll,wt_nlf,mt_nlf
120,1D5R,A,R14A,-0.087552,<ResidueEnvironment with 141 atoms. pdb_id: 1D...,14,0,"[3.7604918, 3.5315676, 1.8490579, 2.622114, 3....",3.613005,3.760492,3.035567,2.483982
121,1D5R,A,R14D,-0.142873,<ResidueEnvironment with 141 atoms. pdb_id: 1D...,14,2,"[3.7604918, 3.5315676, 1.8490579, 2.622114, 3....",3.613005,1.849058,3.035567,2.824680
122,1D5R,A,R14E,0.001090,<ResidueEnvironment with 141 atoms. pdb_id: 1D...,14,3,"[3.7604918, 3.5315676, 1.8490579, 2.622114, 3....",3.613005,2.622114,3.035567,2.712832
123,1D5R,A,R14G,-0.046315,<ResidueEnvironment with 141 atoms. pdb_id: 1D...,14,5,"[3.7604918, 3.5315676, 1.8490579, 2.622114, 3....",3.613005,2.538222,3.035567,2.560213
124,1D5R,A,R14I,0.335560,<ResidueEnvironment with 141 atoms. pdb_id: 1D...,14,7,"[3.7604918, 3.5315676, 1.8490579, 2.622114, 3....",3.613005,3.199334,3.035567,2.854018
...,...,...,...,...,...,...,...,...,...,...,...,...
8091,2H11,A,K245R,0.168083,<ResidueEnvironment with 114 atoms. pdb_id: 2H...,8,14,"[3.6974182, 2.9205883, 2.4144835, 3.2665694, 3...",2.878061,3.369781,2.828313,3.035567
8092,2H11,A,K245S,0.334371,<ResidueEnvironment with 114 atoms. pdb_id: 2H...,8,15,"[3.6974182, 2.9205883, 2.4144835, 3.2665694, 3...",2.878061,2.167184,2.828313,2.844502
8093,2H11,A,K245T,0.099750,<ResidueEnvironment with 114 atoms. pdb_id: 2H...,8,16,"[3.6974182, 2.9205883, 2.4144835, 3.2665694, 3...",2.878061,3.840221,2.828313,2.896883
8094,2H11,A,K245V,0.328221,<ResidueEnvironment with 114 atoms. pdb_id: 2H...,8,17,"[3.6974182, 2.9205883, 2.4144835, 3.2665694, 3...",2.878061,2.930480,2.828313,2.623741


Unnamed: 0,pdbid,chainid,variant,ddg,resenv,wt_idx,mt_idx,nlls,wt_nll,mt_nll,wt_nlf,mt_nlf
0,1PGA,A,M1A,0.1407,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,0,"[4.402693, 3.0883029, 3.9090133, 3.6181803, 3....",3.058156,4.402693,3.785957,2.483982
1,1PGA,A,M1D,0.3795,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,2,"[4.402693, 3.0883029, 3.9090133, 3.6181803, 3....",3.058156,3.909013,3.785957,2.824680
2,1PGA,A,M1E,0.6414,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,3,"[4.402693, 3.0883029, 3.9090133, 3.6181803, 3....",3.058156,3.618180,3.785957,2.712832
3,1PGA,A,M1L,0.4573,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,9,"[4.402693, 3.0883029, 3.9090133, 3.6181803, 3....",3.058156,3.879457,3.785957,2.433114
4,1PGA,A,T2E,0.1299,<ResidueEnvironment with 157 atoms. pdb_id: 1P...,16,3,"[3.7131631, 2.6506503, 2.21733, 3.5095992, 3.7...",3.198577,3.509599,2.896883,2.712832
...,...,...,...,...,...,...,...,...,...,...,...,...
902,1PGA,A,M1Q,0.4025,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,13,"[4.365631, 3.1383235, 3.8130198, 3.7375474, 3....",3.036116,2.806325,3.785957,3.338515
903,1PGA,A,M1R,0.5102,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,14,"[4.365631, 3.1383235, 3.8130198, 3.7375474, 3....",3.036116,3.168108,3.785957,3.035567
904,1PGA,A,M1S,0.6800,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,15,"[4.365631, 3.1383235, 3.8130198, 3.7375474, 3....",3.036116,3.386448,3.785957,2.844502
905,1PGA,A,M1T,0.4983,<ResidueEnvironment with 130 atoms. pdb_id: 1P...,10,16,"[4.365631, 3.1383235, 3.8130198, 3.7375474, 3....",3.036116,2.468205,3.785957,2.896883


Unnamed: 0,pdbid,chainid,variant,ddg,resenv,wt_idx,mt_idx,nlls,wt_nll,mt_nll,wt_nlf,mt_nlf
0,171L,A,A45E,0.01,<ResidueEnvironment with 189 atoms. pdb_id: 17...,0,3,"[2.9020095, 2.8835154, 2.1038797, 3.0346339, 3...",2.902009,3.034634,2.483982,2.712832
1,1A2P,A,Y103F,0.00,<ResidueEnvironment with 211 atoms. pdb_id: 1A...,19,4,"[3.3433564, 3.2358327, 2.2314699, 3.9513826, 3...",3.515742,3.086743,3.311580,3.207937
2,1A2P,A,T105V,2.24,<ResidueEnvironment with 157 atoms. pdb_id: 1A...,16,17,"[3.9847224, 3.0204551, 1.5820178, 3.165875, 3....",2.894643,3.967662,2.896883,2.623741
3,1A2P,A,I109A,2.07,<ResidueEnvironment with 182 atoms. pdb_id: 1A...,7,0,"[3.1244717, 3.211868, 3.8327773, 3.0644612, 2....",2.412011,3.124472,2.854018,2.483982
4,1A2P,A,I109V,0.76,<ResidueEnvironment with 182 atoms. pdb_id: 1A...,7,17,"[3.0670598, 3.2441483, 3.6173377, 2.9661434, 2...",2.378492,2.202275,2.854018,2.623741
...,...,...,...,...,...,...,...,...,...,...,...,...
906,4LYZ,A,D101N,-0.04,<ResidueEnvironment with 124 atoms. pdb_id: 4L...,2,11,"[3.360403, 2.6875033, 2.72565, 4.098991, 3.439...",2.725650,4.340140,2.824680,3.127102
907,4LYZ,A,I55A,4.40,<ResidueEnvironment with 320 atoms. pdb_id: 4L...,7,0,"[4.920697, 3.246707, 2.8133168, 3.8427093, 3.7...",4.722183,4.920697,2.854018,2.483982
908,4LYZ,A,I55T,4.96,<ResidueEnvironment with 320 atoms. pdb_id: 4L...,7,16,"[4.920697, 3.246707, 2.8133168, 3.8427093, 3.7...",4.722183,2.784832,2.854018,2.896883
909,4LYZ,A,I55V,0.91,<ResidueEnvironment with 320 atoms. pdb_id: 4L...,7,17,"[4.920697, 3.246707, 2.8133168, 3.8427093, 3.7...",4.722183,2.609725,2.854018,2.623741


Unnamed: 0,pdbid,chainid,variant,ddg,resenv,wt_idx,mt_idx,nlls,wt_nll,mt_nll,wt_nlf,mt_nlf
0,1AMQ,A,C191Y,2.3,<ResidueEnvironment with 278 atoms. pdb_id: 1A...,1,19,"[2.957762, 2.6631494, 3.4392724, 4.1303334, 3....",2.663149,3.032739,4.335140,3.311580
1,1AMQ,A,C191F,1.6,<ResidueEnvironment with 278 atoms. pdb_id: 1A...,1,4,"[2.957762, 2.6631494, 3.4392724, 4.1303334, 3....",2.663149,3.469451,4.335140,3.207937
2,1AMQ,A,C191W,3.9,<ResidueEnvironment with 278 atoms. pdb_id: 1A...,1,18,"[2.957762, 2.6631494, 3.4392724, 4.1303334, 3....",2.663149,3.277944,4.335140,4.250501
3,1AMQ,A,C191S,1.9,<ResidueEnvironment with 278 atoms. pdb_id: 1A...,1,15,"[2.957762, 2.6631494, 3.4392724, 4.1303334, 3....",2.663149,2.806900,4.335140,2.844502
4,1BNI,A,F7L,4.1,<ResidueEnvironment with 238 atoms. pdb_id: 1B...,4,9,"[4.3109593, 3.0232787, 3.6305857, 3.9481006, 2...",2.652364,1.714721,3.207937,2.433114
...,...,...,...,...,...,...,...,...,...,...,...,...
337,5PTI,A,F22A,1.2,<ResidueEnvironment with 262 atoms. pdb_id: 5P...,4,0,"[5.325358, 2.9873857, 2.3314087, 5.3781247, 3....",3.112593,5.325358,3.207937,2.483982
338,5PTI,A,Y23A,5.9,<ResidueEnvironment with 276 atoms. pdb_id: 5P...,19,0,"[4.381645, 2.8840778, 3.1311677, 3.0332594, 2....",2.276411,4.381645,3.311580,2.483982
339,5PTI,A,Y35G,5.0,<ResidueEnvironment with 239 atoms. pdb_id: 5P...,19,5,"[2.8341925, 2.6910563, 3.8256054, 3.234987, 3....",2.732100,3.938275,3.311580,2.560213
340,5PTI,A,N43G,5.7,<ResidueEnvironment with 255 atoms. pdb_id: 5P...,11,5,"[3.248741, 2.6551363, 1.5006171, 3.1649044, 3....",2.777148,4.406559,3.127102,2.560213


Unnamed: 0,pdbid,chainid,variant,ddg,resenv,wt_idx,mt_idx,nlls,wt_nll,mt_nll,wt_nlf,mt_nlf
1,1QIS,A,F191C,-1.6,<ResidueEnvironment with 275 atoms. pdb_id: 1Q...,4,1,"[2.736268, 2.6401606, 2.8751101, 3.456371, 2.5...",2.586200,2.640161,3.207937,4.335140
2,1QIT,A,W191C,-3.9,<ResidueEnvironment with 273 atoms. pdb_id: 1Q...,18,1,"[2.4796748, 2.8166325, 1.9928858, 3.3312378, 3...",4.040677,2.816633,4.250501,4.335140
3,5EAA,A,S191C,-1.9,<ResidueEnvironment with 275 atoms. pdb_id: 5E...,15,1,"[2.5849197, 2.2446847, 2.6969318, 3.619115, 4....",3.066625,2.244685,2.844502,4.335140
4,1BRG,A,L7F,-4.1,<ResidueEnvironment with 217 atoms. pdb_id: 1B...,9,4,"[4.112026, 2.833658, 2.3637228, 3.5993729, 2.8...",1.585139,2.895123,2.433114,3.207937
5,1BRH,A,A14L,-4.5,<ResidueEnvironment with 264 atoms. pdb_id: 1B...,0,9,"[2.3148243, 3.4878488, 3.7286549, 3.098855, 2....",2.314824,2.728974,2.483982,2.433114
...,...,...,...,...,...,...,...,...,...,...,...,...
337,1BTI,A,A22F,-1.2,<ResidueEnvironment with 268 atoms. pdb_id: 1B...,0,4,"[3.3072624, 2.5088875, 3.529256, 4.178809, 2.5...",3.307262,2.505421,2.483982,3.207937
338,1BPT,A,A23Y,-5.9,<ResidueEnvironment with 277 atoms. pdb_id: 1B...,0,19,"[3.4222112, 2.8950775, 3.0501826, 3.0206501, 2...",3.422211,2.430112,2.483982,3.311580
339,8PTI,A,G35Y,-5.0,<ResidueEnvironment with 203 atoms. pdb_id: 8P...,5,19,"[4.1232142, 3.6963515, 1.3021163, 3.171582, 3....",2.849258,4.265238,2.560213,3.311580
340,1NAG,A,G43N,-5.7,<ResidueEnvironment with 243 atoms. pdb_id: 1N...,5,11,"[4.2341886, 2.8076866, 1.9932888, 3.4339652, 3...",3.458206,3.278673,2.560213,3.127102


<IPython.core.display.Javascript object>