In [8]:
# Import necessary libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import notebook

# GIFs
from PIL import Image

In [9]:
def process_df(data):
    for ind, node_cols in enumerate(["NodeId1", "NodeId2"]):
        split_cols = data[node_cols].str.replace("_:", "").str.split(':', expand=True)
        if split_cols.shape[1] > 2:
            data[['chain_{}'.format(ind + 1), 'id_{}'.format(ind + 1), 'aa_{}'.format(ind + 1)]] = split_cols
        else:
            data[['id_{}'.format(ind + 1), 'aa_{}'.format(ind + 1)]] = split_cols

    data[['id_1', 'id_2']] = data[['id_1', 'id_2']].astype(int)
    data[['int_type', 'int_portion']] = data['Interaction'].str.split(':', expand=True)

    data.drop(['NodeId1', 'NodeId2', 'Interaction'], axis=1, inplace=True)

    data = data.rename(columns = {
        "Distance" : "distance",
        "Angle" : "angle",
        "Energy" : "energy",
        "Atom1" : "atom1",
        "Atom2" : "atom2",
        "Donor" : "donor",
        "Positive" : "positive",
        "Cation" : "cation",
        "Orientation" : "orientation",
    })

    if ("chain_1" in data.columns) and ("chain_2" in data.columns):
        rin = data[["chain_1", "chain_2", "aa_1", "aa_2",  "id_1",  "id_2",  "int_type",  "int_portion",  "distance",  "angle",  "energy",  "atom1",  "atom2"]]
    elif "chain_1" in data.columns:
        rin = data[["chain_1", "aa_1", "aa_2",  "id_1",  "id_2",  "int_type",  "int_portion",  "distance",  "angle",  "energy",  "atom1",  "atom2"]]
    elif "chain_2" in data.columns:
        rin = data[["chain_2", "aa_1", "aa_2",  "id_1",  "id_2",  "int_type",  "int_portion",  "distance",  "angle",  "energy",  "atom1",  "atom2"]]
    else:
        rin = data[["aa_1", "aa_2",  "id_1",  "id_2",  "int_type",  "int_portion",  "distance",  "angle",  "energy",  "atom1",  "atom2"]]

    suppl = data[["donor", "positive", "cation", "orientation"]]

    return rin, suppl

def plot_contact_map(data, int_type, path_save):
    filtered_data = data[data['int_type'] == int_type]

    contact_matrix = np.full((max_id - min_id + 1, max_id - min_id + 1), np.nan)

    for _, row in filtered_data.iterrows():
        i, j = row['id_1'] - min_id, row['id_2'] - min_id
        contact_matrix[i, j] = row['distance']
        contact_matrix[j, i] = row['distance']

    plt.style.use('dark_background')
    cmap = plt.cm.inferno.reversed()

    plt.figure(figsize=(10, 8))
    im = plt.imshow(contact_matrix, cmap=cmap, interpolation='none', vmin=vmin, vmax=vmax)
    plt.colorbar(im, label='Distance (A?)')
    plt.title("{} contact map".format(int_type))

    ticks = np.arange(min_id, max_id + 1)
    tick_positions = np.arange(0, max_id - min_id + 1, 15)
    tick_labels = np.arange(min_id, max_id + 1, 15)
    plt.xticks(tick_positions, tick_labels)
    plt.yticks(tick_positions, tick_labels)

    plt.savefig(path_save, dpi = 100)

In [10]:
datasets = ['antibody', 'cdk6_p16ink4a', 'frataxin', 'p16', 'stim1', 'vcb', 'vhl']

datasets_edges = dict()
datasets_pdbs = dict()

for dataset in datasets:
    edges_files = [file for file in os.listdir("datasets/{}/edges".format(dataset)) if file.endswith("Edges")]
    pdbs_files = [file for file in os.listdir("datasets/{}/pdbs".format(dataset)) if file.endswith("pdb") or file.startswith("fra")]

    if dataset == "antibody":
        files_index = {int(file.split(".")[1]) : file for file in edges_files}
        edges_files = [files_index[index] for index in sorted(files_index.keys())]
        files_index = {int(file.split(".")[1]) : file for file in pdbs_files}
        pdbs_files = [files_index[index] for index in sorted(files_index.keys())]
        
        datasets_edges[dataset] = edges_files
        datasets_pdbs[dataset] = pdbs_files
    
    elif dataset == "cdk6_p16ink4a":
        files_index = {int(file.split("A")[1].split(".")[0]) : file for file in edges_files}
        edges_files = [files_index[index] for index in sorted(files_index.keys())]
        files_index = {int(file.split("A")[1].split(".")[0]) : file for file in pdbs_files}
        pdbs_files = [files_index[index] for index in sorted(files_index.keys())]

        datasets_edges[dataset] = edges_files
        datasets_pdbs[dataset] = pdbs_files

    elif dataset == "frataxin":
        files_index = {int(file.split("_")[1]) : file for file in edges_files}
        edges_files = [files_index[index] for index in sorted(files_index.keys())]
        files_index = {int(file.split("_")[1]) : file for file in pdbs_files}
        pdbs_files = [files_index[index] for index in sorted(files_index.keys())]

        datasets_edges[dataset] = edges_files
        datasets_pdbs[dataset] = pdbs_files
    
    else:
        files_index = {int(file.split("_")[1].split(".")[0]) : file for file in edges_files}
        edges_files = [files_index[index] for index in sorted(files_index.keys())]
        files_index = {int(file.split("_")[1].split(".")[0]) : file for file in pdbs_files}
        pdbs_files = [files_index[index] for index in sorted(files_index.keys())]

        datasets_edges[dataset] = edges_files
        datasets_pdbs[dataset] = pdbs_files

In [21]:
import nglview as nv
from Bio.PDB import PDBParser

# Caricare la struttura dal file PDB
parser = PDBParser()
structure = parser.get_structure("frataxin", "datasets/frataxin/pdbs/{}".format(datasets_pdbs["frataxin"][0]))

# Creare una vista NGL per la struttura proteica
view = nv.show_biopython(structure)
view.add_representation('cartoon', selection='protein', color_scheme='blue')
view.camera = 'orthographic'

# Visualizzare il widget
view





NGLWidget()

In [None]:
view

Defaulting to user installation because normal site-packages is not writeableNote: you may need to restart the kernel to use updated packages.

Collecting nglview
  Downloading nglview-3.1.1.tar.gz (5.5 MB)
     ---------------------------------------- 0.0/5.5 MB ? eta -:--:--
     - -------------------------------------- 0.1/5.5 MB 4.3 MB/s eta 0:00:02
     -- ------------------------------------- 0.3/5.5 MB 4.3 MB/s eta 0:00:02
     --- ------------------------------------ 0.5/5.5 MB 3.5 MB/s eta 0:00:02
     --- ------------------------------------ 0.5/5.5 MB 3.8 MB/s eta 0:00:02
     ----- ---------------------------------- 0.8/5.5 MB 4.3 MB/s eta 0:00:02
     ----- ---------------------------------- 0.8/5.5 MB 4.3 MB/s eta 0:00:02
     ----- ---------------------------------- 0.8/5.5 MB 4.3 MB/s eta 0:00:02
     ------------ --------------------------- 1.7/5.5 MB 4.7 MB/s eta 0:00:01
     -------------- ------------------------- 2.0/5.5 MB 4.7 MB/s eta 0:00:01
     ---------------


[notice] A new release of pip is available: 23.2.1 -> 24.0
[notice] To update, run: python.exe -m pip install --upgrade pip




In [43]:
vmin = np.inf
vmax = -np.inf

min_id = np.inf
max_id = -np.inf

path = "{}/{}/{}/".format(main, ant, edges)

files = {int(file.split(".")[-2]) : file for file in os.listdir(path) if file.endswith("Edges")}

for key in notebook.tqdm(sorted(files.keys())):
    file_path = "{}/6J6Y_1_ms_1K0.{}.pdb_ringEdges".format(folder, key)
    data = pd.read_csv(file_path, sep = "\t")
    rin, suppl = process_df(data)
    vmax = max(rin["distance"].max(), vmax)
    vmin = min(rin["distance"].min(), vmin)
    min_id = min(rin["id_1"].min(), rin["id_1"].min())
    max_id = max(rin["id_2"].max(), rin["id_2"].max())

  0%|          | 0/101 [00:00<?, ?it/s]

In [None]:
files = {int(file.split(".")[-2]) : file for file in os.listdir(folder) if file.endswith("Edges")}

for key in notebook.tqdm(sorted(files.keys())):
    file_path = "{}/6J6Y_1_ms_1K0.{}.pdb_ringEdges".format(folder, key)
    data = pd.read_csv(file_path, sep = "\t")
    rin, suppl = process_df(data)
    plot_contact_map(rin, 'HBOND', "{}/contact_maps/contact_map_{}.png".format(folder, key))

path = "{}/contact_maps".format(folder)

# PNG images upload
files_indexed = {int(file[:-4].split("_")[-1]) : file for file in os.listdir(path)}
images = [Image.open("{}/contact_maps/{}".format(folder, image)) for image in [files_indexed[file_ind] for file_ind in sorted(files_indexed.keys())]]

# GIF making
images[0].save('output.gif', save_all=True, append_images=images[1:], optimize=False, duration=100, loop=0)