In [16]:
import tensorflow as tf
tf.config.set_visible_devices([], 'GPU')

from typing import List, Tuple
import pickle
import os
import numpy as np
import pandas as pd
from plotly import express as px

In [17]:
pdbs_dir_path = '/home/iscb/wolfson/doririmon/home/order/ubinet/repo/ubinet/datasets/patch_to_score/sources'
data_for_training_dir_path = '/home/iscb/wolfson/doririmon/home/order/ubinet/repo/ubinet/datasets/patch_to_score/data_for_training/03_04_with_pesto_and_coord'

In [18]:
with open(f'{data_for_training_dir_path}/folds_training_dicts.pkl', 'rb') as f:
    folds_training_dicts = pickle.load(f)

with open(f'{data_for_training_dir_path}/all_patches_ca_coordinates.pkl', 'rb') as f:
    all_patches_ca_coordinates = pickle.load(f)

In [19]:
fold = folds_training_dicts[0]
fold.keys()

dict_keys(['sizes_train', 'components_train', 'num_patches_train', 'uniprots_train', 'labels_train', 'sizes_validation', 'components_validation', 'num_patches_validation', 'uniprots_validation', 'labels_validation', 'sizes_test', 'components_test', 'num_patches_test', 'uniprots_test', 'labels_test', 'coordinates_train', 'coordinates_validation', 'coordinates_test'])

In [20]:
features = fold['components_train']
coordinates = fold['coordinates_train']
sizes = fold['sizes_train']
num_patches = fold['num_patches_train']
uniprots = fold['uniprots_train']

In [21]:
uniprot_index = 0
uniprot_name = uniprots[uniprot_index]
uniprot_name

'A0A060X2W8'

In [22]:
uniprot_features = features[uniprot_index]
uniprot_coordinates = coordinates[uniprot_index]
uniprot_sizes = sizes[uniprot_index]

In [23]:
# find uniprot in pdbs_dir_path
def find_pdbs_for_uniprot(uniprot_name: str, pdbs_dir_path: str) -> str:
    for root, dirs, files in os.walk(pdbs_dir_path):
        for file in files:
            if uniprot_name in file:
                return os.path.join(root, file)

In [24]:
uniprot_pdb_path = find_pdbs_for_uniprot(uniprot_name, pdbs_dir_path)

In [25]:
# load pdb file with biopython
from Bio.PDB import PDBParser
parser = PDBParser(QUIET=True)
structure = parser.get_structure(uniprot_name, uniprot_pdb_path)
model = structure.child_list[0]
assert (len(model) == 1)
for chain in model:
    pass

In [26]:
all_coordinates = np.array([x['CA'].coord for x in chain.child_list])

In [27]:
all_coordinates.shape

(655, 3)

In [28]:
coordinates_df = pd.DataFrame(all_coordinates, columns=['x', 'y', 'z'])
coordinates_df['kind'] = 'CA'

patch_coordinates_df = pd.DataFrame(uniprot_coordinates[uniprot_coordinates[:, 0] != 0], columns=['x', 'y', 'z'])
patch_coordinates_df['kind'] = 'patch_average'
coordinates_df = pd.concat([coordinates_df, patch_coordinates_df], ignore_index=True)

In [29]:
for patch_index, patch_coordinates in enumerate(all_patches_ca_coordinates[uniprot_name]):
    sub_df = pd.DataFrame(patch_coordinates, columns=['x', 'y', 'z'])
    sub_df['kind'] = f'patch_{patch_index}'
    coordinates_df = pd.concat([coordinates_df, sub_df], ignore_index=True)

In [30]:
fig = px.scatter_3d(
    coordinates_df,
    x='x',
    y='y',
    z='z',
    title=f'3D coordinates of {uniprot_name}',
    width=800,
    height=800,
    color='kind',
)
fig.show()