In [1]:
from biotite.structure import distance
import numpy as np  
import os
import sys
from biotite.structure import superimpose

sys.path.append('../../utils/')
import biotite_utils
import dataset_utils

apo = '1a4uB'
holo = '3rj9C'

In [2]:

def compute_distances(dataset_path, output_path):
    train_set = dataset_utils.load_train_set(dataset_path)
    apo_holo_pairs = dataset_utils.load_main_apo_holo_pairs(train_set, multichain=False)

    for apo, holo in apo_holo_pairs.items():
        print(f'Processing {apo}:{holo}')

        # Some random biotite error for those pairs; I don't have the mental capacity to deal with this - skipping
        if apo == "8j1kA" and holo == "3ouiA":
            continue
        if apo == "8pfpA" and holo == "7gquA":
            continue
        
        if os.path.exists(f'{output_path}/{apo}.npy'):
            continue

        apo_structure = biotite_utils.load_structure(apo)
        apo_sequence = biotite_utils.get_sequence(apo_structure, apo[4:])

        holo_structure = biotite_utils.load_structure(holo)
        holo_sequence = biotite_utils.get_sequence(holo_structure, holo[4:])

        alignment = biotite_utils.align_sequences(apo_sequence, holo_sequence)

        # we need to filter out the non-matching residues, otherwise the superimpose function will fail
        apo_indices = [i[0] for i in alignment[0].trace]
        holo_indices = [i[1] for i in alignment[0].trace]

        # get the structures with respect to indices
        apo_backbone = biotite_utils.get_protein_backbone(apo_structure, apo[4:], indices=apo_indices)
        holo_backbone = biotite_utils.get_protein_backbone(holo_structure, holo[4:], indices=holo_indices)

        holo_backbone, _ = superimpose(apo_backbone, holo_backbone)

        # rerun the alignment - the indices might got shifted due to the filtering for the sake of superimposing
        apo_sequence = biotite_utils.get_sequence(apo_backbone, apo[4:], from_backbone=True)
        holo_sequence = biotite_utils.get_sequence(holo_backbone, holo[4:], from_backbone=True)

        alignment = biotite_utils.align_sequences(apo_sequence, holo_sequence)

        distances = np.full(len(apo_backbone), -1)

        for i in alignment[0].trace:
            apo_index, holo_index = i
            distances[apo_index] = distance(apo_backbone[apo_index], holo_backbone[holo_index])
        np.save(f'{output_path}/{apo}.npy', distances)

DATASET_PATH = '/home/vit/Projects/flexibility-analysis/datasets/cryptobench-dataset'
OUTPUT_PATH = '/home/vit/Projects/flexibility-analysis/data/features/residue-distances/cryptobench-dataset'
compute_distances(DATASET_PATH, OUTPUT_PATH)

Processing 1jpmA:1tkkB
Processing 2yx7A:2e7xA
Processing 2e1cA:2cyyA
Processing 1qhtA:5omqA
Processing 6cjfA:4ighA
Processing 2w6rA:7ac8E
Processing 5x6zD:5x6yC
Processing 1nkoA:2hrlA
Processing 5caeB:2fp4B
Processing 1macB:1u0aA
Processing 4dmzB:4dn0A
Processing 4gvrA:4gvqA
Processing 6kx4A:7d1cA
Processing 4ok2B:4ojzB
Processing 4ljpA:6sc5A
Processing 5v49A:4mwdB
Processing 7l8qA:7u4kA
Processing 4m23B:4m25A
Processing 7se6A:7secA
Processing 4brrE:4uxzA
Processing 1vjuB:3dwrA
Processing 1nwhB:1pquA
Processing 5uzvZ:5v0aZ
Processing 4jycC:4jybA
Processing 1mufA:4jdsA
Processing 6lgyA:6lgzA
Processing 3p08B:4ot5A
Processing 1l0wB:1g51A
Processing 6cy1A:6cy5A
Processing 6ksuA:6ksvB
Processing 1e5lB:1e5qB
Processing 8c3uA:5r8cA
Processing 7btcB:6bsxC
Processing 7oueA:7oliB
Processing 3vskB:3vslA
Processing 4z4lA:7d5gA
Processing 7s5gA:5ocaA
Processing 3l15A:6s6jB
Processing 8jisR:6x19R
Processing 7ciuA:7ciwA
Processing 2zcoA:4ea0A
Processing 1m1zA:1mc1B
Processing 3vjbD:3v66A
Processing 

In [3]:
DATASET_PATH = '/home/vit/Projects/flexibility-analysis/datasets/rigid-dataset'
OUTPUT_PATH = '/home/vit/Projects/flexibility-analysis/data/features/residue-distances/rigid-dataset'
compute_distances(DATASET_PATH, OUTPUT_PATH)

UnboundLocalError: cannot access local variable 'holo_structure' where it is not associated with a value

In [2]:
from biotite.structure import distance
superimposed, _ = superimpose(apo_backbone, holo_backbone)

if len(apo_backbone) != len(holo_backbone):
    print('Different number of atoms')
    sys.exit(1)

for r1, r2 in zip(apo_backbone, superimposed):
    if r1.res_name != r2.res_name:
        continue
    print(distance(r1, r2))

0.22126673
0.08256568
0.19333434
0.29201365
0.5699414
0.16736606
0.021260047
0.09586253
0.15731305
0.21882685
0.15894488
0.2004774
0.15390056
0.253486
0.5540951
1.7572789
0.36263546
0.5049436
0.6313435
0.38191262
0.28834093
0.3453374
0.31606963
0.18958025
0.32565448
0.5999685
0.5935728
0.65430486
1.01148
0.5136668
0.34650543
0.26356962
0.32032958
0.35933712
0.4364145
0.5086713
0.6577395
1.0524162
1.195389
1.2552959
1.355823
1.5284864
1.5004284
1.0722119
0.9936257
1.3263627
0.97789824
0.63274896
0.8898442
1.1757491
0.97282237
0.988415
1.7712477
1.510404
0.6410537
0.46222854
0.45119992
0.4106967
0.34188798
0.46153802
0.58223534
0.6362736
0.90071523
0.29784736
0.20890673
0.27742684
0.31758174
0.29350647
0.5279892
0.5307641
0.5513973
0.5357457
0.53214127
0.5142883
0.42260882
0.439414
0.5535645
0.40352687
0.25844607
0.23821692
0.3110361
0.2828371
0.120319776
0.20238705
0.07999649
0.08980715
0.11824025
0.14510398
0.31823593
0.35781458
0.33304554
0.4391403
0.47895867
0.30347452
0.39654732
0.5

In [7]:
DATASET_PATH = '/home/vit/Projects/flexibility-analysis/datasets/cryptobench-dataset'
train_set = dataset_utils.load_train_set(DATASET_PATH)
dataset_utils.load_main_apo_holo_pairs(train_set, multichain=False)

{'1jpmA': '1tkkB',
 '2yx7A': '2e7xA',
 '2e1cA': '2cyyA',
 '1qhtA': '5omqA',
 '6cjfA': '4ighA',
 '2w6rA': '7ac8E',
 '5x6zD': '5x6yC',
 '1nkoA': '2hrlA',
 '5caeB': '2fp4B',
 '1macB': '1u0aA',
 '4dmzB': '4dn0A',
 '4gvrA': '4gvqA',
 '6kx4A': '7d1cA',
 '4ok2B': '4ojzB',
 '4ljpA': '6sc5A',
 '5v49A': '4mwdB',
 '7l8qA': '7u4kA',
 '4m23B': '4m25A',
 '7se6A': '7secA',
 '4brrE': '4uxzA',
 '1vjuB': '3dwrA',
 '1nwhB': '1pquA',
 '5uzvZ': '5v0aZ',
 '4jycC': '4jybA',
 '1mufA': '4jdsA',
 '6lgyA': '6lgzA',
 '3p08B': '4ot5A',
 '1l0wB': '1g51A',
 '6cy1A': '6cy5A',
 '6ksuA': '6ksvB',
 '1e5lB': '1e5qB',
 '8c3uA': '5r8cA',
 '7btcB': '6bsxC',
 '7oueA': '7oliB',
 '3vskB': '3vslA',
 '4z4lA': '7d5gA',
 '7s5gA': '5ocaA',
 '3l15A': '6s6jB',
 '8jisR': '6x19R',
 '7ciuA': '7ciwA',
 '2zcoA': '4ea0A',
 '1m1zA': '1mc1B',
 '3vjbD': '3v66A',
 '1wxeA': '1wxhA',
 '5h61B': '5h62B',
 '5fioB': '5c7rA',
 '5mkbB': '5m28A',
 '4rjzA': '4qsdA',
 '6x84A': '4aq4A',
 '6dtrA': '6dtqA',
 '7nbzB': '8s5bA',
 '2heuC': '6prgA',
 '2fk7A': '7