In [2]:
import mdtraj as md
import numpy as np
import pandas as pd
import sys

import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib as mpl

from scipy.cluster.hierarchy import linkage, dendrogram, fcluster
from sklearn.cluster import KMeans, DBSCAN
from natsort import natsorted
from pathlib import Path

sys.path.insert(0, '/home/rzhu/Desktop/projects/kinase_analysis/src/')
from funcs_featurise import *
from funcs_db_assign import *
from TrajData import *

In this notebook I look for representative crystal structures for DFG-in, out, and inter Abl

In [3]:
protein = 'abl'
crystal_files = natsorted([str(f) for f in Path(f"/arc/human_{protein}/").glob("kinoml*.pdb")])
crystal_dbdist = np.array([dbdist_featuriser(md.load(f), protein=protein) for f in crystal_files]).squeeze()
crystal_dbdihed = np.array([dbdihed_featuriser(md.load(f), protein=protein) for f in crystal_files]).squeeze()

In [4]:
crystal_clusters = assign_dfg_spatial(crystal_dbdist)


In [None]:
crystal_clusters

array([2, 2, 2, 2, 0, 2, 0, 0, 0, 0, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 2, 2, 2, 2, 0, 0, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 2, 1, 1, 2, 2, 1,
       2, 2, 2, 2, 2, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [9]:
np.where(crystal_clusters == 0)[0]

array([ 4,  6,  7,  8,  9, 14, 15, 16, 17, 18, 19, 29, 30, 31, 36, 37, 58,
       59])

In [17]:
# DFG-in
print(*[crystal_files[i] for i in np.where(crystal_clusters == 0)[0]], sep='\n')

/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2f4j_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g1t_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g1t_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g1t_chainC_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g1t_chainD_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2i_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2i_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2gqg_chainA_altlocA_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2gqg_chainA_altlocB_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2gqg_chainB_altlocA_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2gqg_chainB_altlocB_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKina

In [18]:
# DFG-inter
print(*[crystal_files[i] for i in np.where(crystal_clusters == 1)[0]], sep='\n')

/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2f_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_4xey_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_4xey_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_4zog_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_6bl8_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_6bl8_chainB_altlocA_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_6bl8_chainB_altlocB_protein.pdb


In [None]:
# DFG-out
print(*[crystal_files[i] for i in np.where(crystal_clusters == 2)[0]], sep='\n')

/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_1opl_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_1opl_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2e2b_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2e2b_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2fo0_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2f_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2h_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2g2h_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2hiw_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2hiw_chainB_altlocNone_protein.pdb
/arc/human_abl/kinoml_OEKLIFSKinaseApoFeaturizer_ABL1_2hyy_chainA_altlocNone_protein.pdb
/arc/human_abl/kinoml