In [None]:
import BioSimSpace as BSS
import sys
import glob
import csv
import numpy as np
from sklearn.preprocessing import minmax_scale
from pipeline.utils import *

##### <span style="color:teal">Generating the RBFENN</span>  


Using different links files for the network generation.

In [14]:
# for sem perturbations
protein = "p38"
tgt_to_run = f"{protein}_rename"  # f"{protein}_me" f"{protein}_rename" for tyk2 and p38
cats_files_path = f"/home/anna/Documents/benchmark/scripts/RBFENN/ANALYSIS/perturbation_networks/output/series_predictions"

In [15]:
def scaleArray(arr):
    """Scales an array to be the inverse in the range [0-1]."""

    # normalise to the range 0-1.
    return minmax_scale(1 / arr, feature_range=(0.03, 1))

In [16]:
# get the FEPNN SEM prediction per ligand.
perts = {}
for cats_file in glob.glob(f"{cats_files_path}/{tgt_to_run}_*"):
    with open(cats_file, "r") as readfile:
        reader = csv.reader(readfile)
        next(reader)
        for row in reader:
            pert = row[0]
            pred_sem = float(row[1])

            if not pert in perts:
                perts[pert] = [pred_sem]
            else:
                perts[pert].append(pred_sem)

# compute the mean SEM prediction per pert.
pert_names = []
pert_sems = []
for pert, sems in perts.items():
    mean_sem = np.mean(sems)
    pert_names.append(pert)
    pert_sems.append(float(mean_sem))

# now scale the sems to [0-1].
pert_sems = scaleArray(np.array(pert_sems))

for pert, val in zip(pert_names, pert_sems):
    perts[pert] = val

In [None]:
pl = initialise_pipeline()
# where the ligands for the pipeline are located. These should all be in the same folder in sdf format
pl.ligands_folder(f"/home/anna/Documents/benchmark/inputs/reruns/{protein}/ligands")
# where the pipeline should be made
pl.main_folder(f"/home/anna/Documents/benchmark/test/{protein}")


protocol = pipeline_protocol()
protocol.validate()
ana_protocol = analysis_protocol()
ana_protocol.validate()
pl.add_pipeline_protocol(protocol)
pl.add_analysis_protocol(ana_protocol)
pl.setup_ligands()

In [18]:
# make folder for the RBFENN network
validate.folder_path(f"{pl.exec_folder()}/RBFENN", create=True)

written = []
with open(f"{pl.exec_folder()}/RBFENN/links_file.in", "w") as writefile:
    writer = csv.writer(writefile, delimiter=" ")

    for pert_name, value in perts.items():
        # find the lomap filename for both ligs.
        liga_lomap_name = None
        ligb_lomap_name = None
        for filename in glob.glob(f"{pl.ligands_folder()}/*.sdf"):
            # if "lig_8" in filename:
            #     continue # exclude +1 ligands from tnks2 set.
            if pert_name.split("~")[0] in filename:
                liga_lomap_name = filename.split("/")[-1].split(".")[
                    0
                ]  # .replace("ejm","ejm_").replace("jmc","jmc_")
            elif pert_name.split("~")[1] in filename:
                ligb_lomap_name = filename.split("/")[-1].split(".")[
                    0
                ]  # .replace("ejm","ejm_").replace("jmc","jmc_")

            if liga_lomap_name and ligb_lomap_name:
                if not [liga_lomap_name, ligb_lomap_name] in written:
                    writer.writerow([liga_lomap_name, ligb_lomap_name, value])

                    written.append([liga_lomap_name, ligb_lomap_name])

In [None]:
# ligands and ligands_names already exists due to lomap above
# if change the folder name, will put this in the execution model as default.
pl.setup_network(folder="RBFENN", links_file=f"{pl.exec_folder()}/RBFENN/links_file.in")

# this will update the existing network.
# write the rbfenn to a different network file
pl.write_network(file_path=f"{pl.exec_folder()}/rbfenn_network.dat")