In [46]:
from collections import defaultdict

import tensorflow as tf
import numpy as np
import sklearn
from tensorflow.contrib import learn

import features_database

In [154]:
sa_threshold = 0.25
distance_threshold = 8

dssp_to_3 = defaultdict(
    lambda : 'C', 
    H='H',
    G='H',
    I='H',
    E='E'
)

ss_encoder = {
    'H': [1, 0, 0],
    'E': [0, 1, 0],
    'C': [0, 0, 1],   
}

In [98]:
def sliding_window(sequence, size=5):
    if size > 0 and len(sequence) >= size:
        for i in range(len(sequence) - size + 1):
            yield (i, i + size // 2, i + size)

In [99]:
list(sliding_window('DFYFNAIDFYFNAI'))

[(0, 2, 5),
 (1, 3, 6),
 (2, 4, 7),
 (3, 5, 8),
 (4, 6, 9),
 (5, 7, 10),
 (6, 8, 11),
 (7, 9, 12),
 (8, 10, 13),
 (9, 11, 14)]

In [90]:
db = features_database.SpatialFeaturesDatabase('spatial_features.db')
structure = db.retrieve('d16vpa_')

In [193]:
def feature_vector(structure, residue):
    features = np.ndarray(shape=(25))
    features[0:20] = structure['pssm'][i]
    features[20:23] = ss_encoder[dssp_to_3[secondary_structure[i]]]
    features[23:25] = [1, 0] if structure['solvent_accessibility'][i] > sa_threshold else [0, 1]
    return features

def feature_vectors(structure, i, j, window_size):
    features_i = []
    features_j = []
    
    for index in range (i - window_size // 2, i + window_size // 2 + 1):
        features_i.append(feature_vector(structure, index))
    
    for index in range (j - window_size // 2, j + window_size // 2 + 1):
         features_j.append(feature_vector(structure, index))
                           
    cmap = structure['distance_map'][i][j] < distance_threshold
    return features_i, features_j, cmap



In [194]:
features_i, features_j, cmap = feature_vectors(structure, 1, 20, 5)
cmap

False

In [195]:
features_i

[array([-1., -2., -3., -4., -1., -2., -3., -4., -3.,  2.,  4., -2.,  2.,
         0., -3., -2., -1., -2., -1.,  1.,  0.,  0.,  1.,  1.,  0.]),
 array([-1., -2., -3., -4., -1., -2., -3., -4., -3.,  2.,  4., -2.,  2.,
         0., -3., -2., -1., -2., -1.,  1.,  0.,  0.,  1.,  1.,  0.]),
 array([-1., -2., -3., -4., -1., -2., -3., -4., -3.,  2.,  4., -2.,  2.,
         0., -3., -2., -1., -2., -1.,  1.,  0.,  0.,  1.,  1.,  0.]),
 array([-1., -2., -3., -4., -1., -2., -3., -4., -3.,  2.,  4., -2.,  2.,
         0., -3., -2., -1., -2., -1.,  1.,  0.,  0.,  1.,  1.,  0.]),
 array([-1., -2., -3., -4., -1., -2., -3., -4., -3.,  2.,  4., -2.,  2.,
         0., -3., -2., -1., -2., -1.,  1.,  0.,  0.,  1.,  1.,  0.])]

In [24]:
astral = sklearn.datasets.base.Bunch()

astral.data = np.array([1,2,3])
astral.target = np.array([1, 0, 1])

'SRMPS'

In [81]:
# Sanity check, every residue should be in contact with itself
print(len(contact_map), sum([contact_map[i][i] for i in range(len(contact_map))]))

311 311


In [12]:
from sklearn import datasets, metrics

iris = datasets.load_iris()
classifier = learn.DNNClassifier(hidden_units=[10, 20, 10], n_classes=3)
classifier.fit(iris.data, iris.target, steps=200, batch_size=32)
score = metrics.accuracy_score(iris.target, classifier.predict(iris.data))
print("Accuracy: %f" % score)

Accuracy: 0.960000


In [22]:
iris.data

array([[ 5.1,  3.5,  1.4,  0.2],
       [ 4.9,  3. ,  1.4,  0.2],
       [ 4.7,  3.2,  1.3,  0.2],
       [ 4.6,  3.1,  1.5,  0.2],
       [ 5. ,  3.6,  1.4,  0.2],
       [ 5.4,  3.9,  1.7,  0.4],
       [ 4.6,  3.4,  1.4,  0.3],
       [ 5. ,  3.4,  1.5,  0.2],
       [ 4.4,  2.9,  1.4,  0.2],
       [ 4.9,  3.1,  1.5,  0.1],
       [ 5.4,  3.7,  1.5,  0.2],
       [ 4.8,  3.4,  1.6,  0.2],
       [ 4.8,  3. ,  1.4,  0.1],
       [ 4.3,  3. ,  1.1,  0.1],
       [ 5.8,  4. ,  1.2,  0.2],
       [ 5.7,  4.4,  1.5,  0.4],
       [ 5.4,  3.9,  1.3,  0.4],
       [ 5.1,  3.5,  1.4,  0.3],
       [ 5.7,  3.8,  1.7,  0.3],
       [ 5.1,  3.8,  1.5,  0.3],
       [ 5.4,  3.4,  1.7,  0.2],
       [ 5.1,  3.7,  1.5,  0.4],
       [ 4.6,  3.6,  1. ,  0.2],
       [ 5.1,  3.3,  1.7,  0.5],
       [ 4.8,  3.4,  1.9,  0.2],
       [ 5. ,  3. ,  1.6,  0.2],
       [ 5. ,  3.4,  1.6,  0.4],
       [ 5.2,  3.5,  1.5,  0.2],
       [ 5.2,  3.4,  1.4,  0.2],
       [ 4.7,  3.2,  1.6,  0.2],
       [ 4