In [2]:
from pyspark.sql import SparkSession
import pyspark.sql.functions as f
from pyspark.sql import DataFrame
from pyspark.sql.functions import col

In [3]:
import os
print(os.getcwd())

/home/polina/bioactivity/code


In [4]:
spark = SparkSession.builder.getOrCreate()

Setting default log level to "WARN".
To adjust logging level use sc.setLogLevel(newLevel). For SparkR, use setLogLevel(newLevel).
23/12/11 11:17:28 INFO SparkEnv: Registering MapOutputTracker
23/12/11 11:17:28 INFO SparkEnv: Registering BlockManagerMaster
23/12/11 11:17:28 INFO SparkEnv: Registering BlockManagerMasterHeartbeat
23/12/11 11:17:28 INFO SparkEnv: Registering OutputCommitCoordinator


# Data parsing

In [5]:
def join_dataframes(initial_df: DataFrame, 
                    second_df: DataFrame, 
                    initial_key_column: str, 
                    second_key_column: str,
                    columns_to_join: list) -> DataFrame:
    """
    Joins two PySpark DataFrames on specified key columns.

    Args:
    initial_df (DataFrame): The initial PySpark DataFrame.
    second_df (DataFrame): The second PySpark DataFrame to join with.
    initial_key_column (str): The key column name in the initial DataFrame.
    second_key_column (str): The key column name in the second DataFrame.
    columns_to_join (list): List of column names from the second DataFrame to include in the join.

    Returns:
    DataFrame: The resulting DataFrame after the join.
    """

    # Selecting specified columns from the second DataFrame, including its key column
    second_df_selected = second_df.select([second_key_column] + columns_to_join)

    # Performing the left join
    joined_df = initial_df.join(second_df_selected, 
                                initial_df[initial_key_column] == second_df_selected[second_key_column], 
                                how='left')

    # Drop the second key column if not needed
    joined_df = joined_df.drop(second_df_selected[second_key_column])

    return joined_df


In [6]:
def count_unique_values(df: DataFrame, column_name: str) -> int:
    """
    Count unique values in a specific column of a PySpark DataFrame.

    Args:
    df (DataFrame): The PySpark DataFrame.
    column_name (str): The name of the column to analyze.

    Returns:
    int: The number of unique values in the column.
    """
    # Get distinct values in the column and count them
    unique_count = df.select(column_name).distinct().count()

    return unique_count

# Example usage
# unique_count = count_unique_values(your_dataframe, 'your_column_name')
# print(f"Number of unique values: {unique_count}")


In [7]:
def show_unique_values_and_counts(df: DataFrame, column_name: str):
    """
    Shows unique values and their counts for a specified column in a Spark DataFrame.

    Parameters:
    df (DataFrame): The Spark DataFrame to analyze.
    column_name (str): The name of the column for which to count unique values.
    """
    if column_name not in df.columns:
        raise ValueError(f"Column {column_name} not found in DataFrame")

    unique_values_counts = df.groupBy(column_name).count()
    unique_values_counts.show()


In [8]:
# Take list of unique drugs (obtained from target with evidence in Platform or chemProbes)
import pandas as pd

unique_drugs_pd_df = pd.read_csv("../data/drug_to_target_unique_drugs.csv")
drug_list = spark.createDataFrame(unique_drugs_pd_df)
# drug_list = spark.read.csv(drug_list_dir, header=True, inferSchema=True)
drug_list.show()

[Stage 0:>                                                          (0 + 1) / 1]

+-------------+
|       drugId|
+-------------+
|CHEMBL1200632|
|   CHEMBL1231|
|CHEMBL1233511|
|   CHEMBL1637|
|CHEMBL1743017|
| CHEMBL185885|
|CHEMBL1949708|
|CHEMBL2105675|
|CHEMBL2107826|
|CHEMBL2109673|
|CHEMBL2346976|
| CHEMBL279115|
|CHEMBL3181832|
|CHEMBL3545096|
|CHEMBL3545103|
|CHEMBL3545145|
|CHEMBL3545312|
| CHEMBL363648|
|CHEMBL3707249|
|CHEMBL3989766|
+-------------+
only showing top 20 rows



                                                                                

### For each drug find a max phase of clinical trial

In [9]:
molecule_path = "gs://open-targets-data-releases/23.12/output/etl/json/molecule"
molecule = spark.read.json(molecule_path)
molecule.persist()
molecule.show()

[Stage 3:>                                                          (0 + 1) / 1]

+---------------+--------------------+--------------------+--------------------+--------------------+--------------+----------------+-------------+--------------------+----------+--------------------+--------------------+-------------------------+--------------------+-------------+--------------------+--------------------+-------------------+
+---------------+--------------------+--------------------+--------------------+--------------------+--------------+----------------+-------------+--------------------+----------+--------------------+--------------------+-------------------------+--------------------+-------------+--------------------+--------------------+-------------------+
|          false|Cc1cc(CN2CCN(c3c(...|                null|                null|Small molecule drug.|Small molecule|           false|CHEMBL1086582|UUGWPYPNRZQDFO-UH...|      null|                null|                null|                     null|       CHEMBL1086582|         null|                  []|       

                                                                                

In [10]:
# List of columns from molecule table
list_molecule = ["drugType", 
                "maximumClinicalTrialPhase", 
                "isApproved", 
                "linkedTargets", 
                "linkedDiseases"]
                 
# Join list of drugs and max_phase from molecule table
drug_list_phase = join_dataframes(drug_list, molecule, "drugId", "id", list_molecule).persist()
drug_list_phase.show()

                                                                                

+-------------+--------------+-------------------------+----------+--------------------+--------------------+
|       drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|
+-------------+--------------+-------------------------+----------+--------------------+--------------------+
|CHEMBL1200632|Small molecule|                      4.0|      true|             {0, []}|{3, [EFO_0003894,...|
|   CHEMBL1231|Small molecule|                      4.0|      true|{2, [ENSG00000133...|{15, [EFO_1000781...|
|CHEMBL1233511|Small molecule|                      3.0|     false|                null|  {1, [EFO_0002950]}|
|   CHEMBL1637|Small molecule|                      4.0|      true|{14, [ENSG0000010...|{54, [MONDO_00081...|
|CHEMBL1743017|      Antibody|                      2.0|     false|{1, [ENSG00000127...|{3, [EFO_0000676,...|
| CHEMBL185885|Small molecule|                      2.0|     false|                null|{2, [EFO_0009444,...|
|CHEMBL194

### For each unique drug find bioactivity data from chembl_33_activity

In [11]:
activity_path = "gs://open-targets-pre-data-releases/chembl-columns/chembl-inputs/chembl_33_activity.jsonl"
activity = spark.read.json(activity_path)
activity.persist()
activity.show()

[Stage 7:>                                                          (0 + 1) / 1]

+--------------------+-----------+---------------+----------+-----------------------+----------------------+---------------------+-------------------------+------------------+-------------+-----------------+------------------+-------------+-------------+-----------------+-------------------+-------------+--------------+--------------+----------------+--------------------+--------------------+
|           _metadata|action_type|assay_chembl_id|assay_type|assay_variant_accession|assay_variant_mutation|data_validity_comment|data_validity_description|document_chembl_id|document_year|ligand_efficiency|molecule_chembl_id|pchembl_value|standard_flag|standard_relation|standard_text_value|standard_type|standard_units|standard_value|target_chembl_id|     target_organism|    target_pref_name|
+--------------------+-----------+---------------+----------+-----------------------+----------------------+---------------------+-------------------------+------------------+-------------+-----------------+-

                                                                                

In [12]:
# List of columns from activity table
list_activity = ["assay_chembl_id",
                "assay_type",
                "action_type",
                "pchembl_value",
                "standard_type",
                "standard_units",
                "standard_value",
                "standard_relation",
                "target_organism",
                "target_pref_name",
                "target_chembl_id"]
                # "standard_flag",
                # "ligand_efficiency",
                # "assay_variant_mutation"
                # "assay_variant_accession",
                # "data_validity_comment",
                # "data_validity_description"]

In [13]:
# Join list of drugs and chembl_33_activity
drug_to_activity = join_dataframes(drug_list_phase, activity, "drugId", "molecule_chembl_id", list_activity).filter(col("assay_chembl_id").isNotNull()).persist()
drug_to_activity.show()



+------------+--------------+-------------------------+----------+-------------+------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+
|      drugId|      drugType|maximumClinicalTrialPhase|isApproved|linkedTargets|    linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|     target_organism|    target_pref_name|target_chembl_id|
+------------+--------------+-------------------------+----------+-------------+------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+
|CHEMBL110739|Small molecule|                      3.0|     false|         null|{1, [EFO_1000786]}|  CHEMBL1737902|         F|       null|         null|      Potency|            

                                                                                

In [14]:
# Calculate for how many drugs and targets we have biodata

drug_list_count = count_unique_values(drug_list, 'drugId')
drug_to_activity_count = count_unique_values(drug_to_activity, 'drugId')
drug_to_activity_count_targets = count_unique_values(drug_to_activity, 'target_chembl_id')

print("Number of unique drugs from targets dataset: ", drug_list_count)
print("Number of unique drugs with any bioactivities: ", drug_to_activity_count)
print("Number of unique targets with any drug bioactivities: ", drug_to_activity_count_targets)



Number of unique drugs from targets dataset:  12835
Number of unique drugs with any bioactivities:  6215
Number of unique targets with any drug bioactivities:  4848


                                                                                

### For each bioactivity assay find parameters from chembl_33_assay

In [15]:
assay_path = "gs://open-targets-pre-data-releases/chembl-columns/chembl-inputs/chembl_33_assay.jsonl"
assay = spark.read.json(assay_path)
assay.persist()
assay.show()

[Stage 37:>                                                         (0 + 1) / 1]

+--------------------+--------------+---------------+--------------------+---------------+----------+----------------------+----------------+----------------+
|           _metadata|assay_category|assay_chembl_id|      assay_organism|assay_test_type|assay_type|confidence_description|confidence_score|variant_sequence|
+--------------------+--------------+---------------+--------------------+---------------+----------+----------------------+----------------+----------------+
|{{0 - Default val...|          null|   CHEMBL688540|                null|           null|         F|  Default value - T...|               0|            null|
|{{8 - Homologous ...|  confirmatory|  CHEMBL2114731|Mycobacterium tub...|           null|         F|  Homologous single...|               8|            null|
|{{8 - Homologous ...|          null|   CHEMBL643834|                null|           null|         B|  Homologous single...|               8|            null|
|{{8 - Homologous ...|  confirmatory|  CHEMBL2

                                                                                

In [16]:
# List of columns from assay table
list_assay = ["confidence_score",
            "confidence_description",
            "assay_category"]

In [17]:
drug_to_assay = join_dataframes(drug_to_activity, assay, "assay_chembl_id", "assay_chembl_id", list_assay).persist()
drug_to_assay.show()



+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+----------------------+--------------+
|      drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|     target_organism|    target_pref_name|target_chembl_id|confidence_score|confidence_description|assay_category|
+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+----------------------+--

                                                                                

In [18]:
drug_to_assay.count()

                                                                                

168729

### Protein classification

####  check if it's possible to map target_chembl_id to uniprots via accession

In [39]:
target_path = "gs://open-targets-pre-data-releases/chembl-columns/chembl-inputs/chembl_33_target.jsonl"
target = spark.read.json(target_path)
target.persist()
target.show()

                                                                                

+--------------------+--------------------+----------------+--------------------+---------------+
|           _metadata|           pref_name|target_chembl_id|   target_components|    target_type|
+--------------------+--------------------+----------------+--------------------+---------------+
|{[{Sodium channel...|Sodium channel pr...|   CHEMBL4630763|[{Q14524, Sodium ...|PROTEIN COMPLEX|
|{[{A121, 100}, {H...|                A121|    CHEMBL613106|                  []|      CELL-LINE|
|{[{Spermatozoa, 1...|         Spermatozoa|    CHEMBL614870|                  []|      CELL-LINE|
|{[{Thioredoxin re...|Thioredoxin reduc...|      CHEMBL2403|[{Q9NNW7, Thiored...| SINGLE PROTEIN|
|{[{Fusarium oxysp...|  Fusarium oxysporum|    CHEMBL612648|                  []|       ORGANISM|
|{[{A375-SM, 100},...|             A375-SM|   CHEMBL4513121|                  []|      CELL-LINE|
|{[{Receptor-inter...|Receptor-interact...|   CHEMBL3784911|[{Q60855, Recepto...| SINGLE PROTEIN|
|{[{HCC44, 100}, {..

                                                                                

In [43]:
# List of columns from target table
list_target = ["target_components",
            "target_type"]

In [44]:
target_to_uniprot = join_dataframes(drug_to_assay, target, "target_chembl_id", "target_chembl_id", list_target).persist()
target_to_uniprot.show()

+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+----------------------+--------------+--------------------+--------------+
|      drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|     target_organism|    target_pref_name|target_chembl_id|confidence_score|confidence_description|assay_category|   target_components|   target_type|
+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------

In [45]:
# Explode target_components
from pyspark.sql.functions import udf
from pyspark.sql.types import StringType
from pyspark.sql import Row

# Define a UDF to extract the 'accession' field
def extract_accession(rows):
    # Assuming you want to extract the 'accession' from the first Row object in the list
    return rows[0].accession if rows else None

# Register UDF
extract_accession_udf = udf(extract_accession, StringType())

In [46]:
# Apply UDF to create a new column with the 'accession' values
target_to_uniprot_extr = target_to_uniprot.withColumn("accession", extract_accession_udf(target_to_uniprot["target_components"]))

target_to_uniprot_extr.show()

+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+----------------------+--------------+--------------------+--------------+---------+
|      drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|     target_organism|    target_pref_name|target_chembl_id|confidence_score|confidence_description|assay_category|   target_components|   target_type|accession|
+------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+---------

In [47]:
target_to_uniprot_extr.count()

                                                                                

168729

####  temporal decision - get accession id from Uniprot web

In [19]:
# # Obtain the list of unique targets to map them to uniprots

# from pyspark.sql import SparkSession
# import pandas as pd

# def save_column_as_csv(df, column_name, output_path):
#     """
#     This function takes a Spark DataFrame, a column name, and an output path.
#     It will remove duplicates based on the specified column, convert the unique values of that column to a Pandas DataFrame,
#     and save it as a CSV file.

#     :param df: Spark DataFrame
#     :param column_name: The name of the column to process
#     :param output_path: The path to save the CSV file
#     """
#     # Ensure the column exists in the DataFrame
#     if column_name not in df.columns:
#         raise ValueError(f"Column '{column_name}' does not exist in the DataFrame.")

#     # Select the specified column and drop duplicates
#     unique_values_df = df.select(column_name).distinct()

#     # Convert the Spark DataFrame to a Pandas DataFrame
#     unique_values_pd_df = unique_values_df.toPandas()

#     # Save the Pandas DataFrame as a CSV file
#     unique_values_pd_df.to_csv(output_path, index=False)


# save_column_as_csv(drug_to_assay, "target_chembl_id", "files/target_chembl_id.csv")

                                                                                

####  temporal decision - get accession id from Barabara's table

In [20]:
# # Get accession for target_chembl_id from Barbara's table

# barbara_table_path = pd.read_csv("../data/drug2target_bioactivities_chembl_33_grouped.csv")
# columns = ['accession', 'target_chembl_id']
# filtered_table = barbara_table_path[columns].astype(str).drop_duplicates()
# barbara_table = spark.createDataFrame(filtered_table)
# barbara_table.show()


+---------+----------------+
|accession|target_chembl_id|
+---------+----------------+
|   Q96FL8|   CHEMBL1743126|
|   P08684|       CHEMBL340|
|   Q12809|       CHEMBL240|
|   Q9Y6L6|   CHEMBL1697668|
|   P35367|       CHEMBL231|
|   P02763|      CHEMBL4285|
|   P02768|      CHEMBL3253|
|   Q02763|      CHEMBL4128|
|   O94956|   CHEMBL1743124|
|   Q9NPD5|   CHEMBL1743121|
|   O15245|      CHEMBL5685|
|   P07550|       CHEMBL210|
|   P03372|       CHEMBL206|
|   Q92731|       CHEMBL242|
|   Q9HA47|      CHEMBL5682|
|   O75762|      CHEMBL6007|
|   O43868|      CHEMBL5780|
|   Q96SW2|   CHEMBL3763008|
|   P32320|      CHEMBL4502|
|   P27708|      CHEMBL3093|
+---------+----------------+
only showing top 20 rows



In [21]:
# # Join target_chembl_id and accession from Barbara's table

# list_protein = ["accession"]
              
# target_to_uniprot = join_dataframes(drug_to_assay, barbara_table, "target_chembl_id", "target_chembl_id", list_protein).persist()

# # from pyspark.sql.functions import col
# # target_to_uniprot = target_to_uniprot.filter(col("accession") != "null")

# target_to_uniprot.show()



+-------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+----------------------+--------------+---------+
|       drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|     target_organism|    target_pref_name|target_chembl_id|confidence_score|confidence_description|assay_category|accession|
+-------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+--------------------+--------------------+----------------+----------------+--

                                                                                

In [22]:
# target_to_uniprot.count()

                                                                                

171911

### Protein classification by uniprot from SwissProt

In [48]:
proteinclass_path = pd.read_csv("../data/uniprot2family.csv")
proteinclass_str = proteinclass_path.astype(str).drop_duplicates()
proteinclass = spark.createDataFrame(proteinclass_str)
proteinclass.show()

+---------+------------+
|accession|proteinClass|
+---------+------------+
|   P32929|      Enzyme|
|   A4D0Y5|        None|
|   Q49A92|        None|
|   Q9UFW8|        None|
|   Q96K31|        None|
|   O14646|  Epigenetic|
|   Q8IWX8|        None|
|   Q99653|        None|
|   O94983|          TF|
|   Q8NA66|        None|
|   Q96M20|        None|
|   Q86VU5|      Enzyme|
|   P42695|        None|
|   Q8IYT2|      Enzyme|
|   Q9NSA3|        None|
|   Q96KP4|      Enzyme|
|   Q13956|      Enzyme|
|   O95476|      Enzyme|
|   Q9BYD5|        None|
|   Q969H4|      Enzyme|
+---------+------------+
only showing top 20 rows



In [49]:
# Protein classification
proteinclass_list = ["proteinClass"]
uniprot_to_class = join_dataframes(target_to_uniprot_extr, proteinclass, "accession", "accession", proteinclass_list).persist()
uniprot_to_class.show()



+-------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+-----------------+---------------+--------------------+----------------+----------------+----------------------+--------------+--------------------+--------------+---------+------------+
|       drugId|      drugType|maximumClinicalTrialPhase|isApproved|       linkedTargets|      linkedDiseases|assay_chembl_id|assay_type|action_type|pchembl_value|standard_type|standard_units|standard_value|standard_relation|target_organism|    target_pref_name|target_chembl_id|confidence_score|confidence_description|assay_category|   target_components|   target_type|accession|proteinClass|
+-------------+--------------+-------------------------+----------+--------------------+--------------------+---------------+----------+-----------+-------------+-------------+--------------+--------------+--------

                                                                                

In [50]:
show_unique_values_and_counts(uniprot_to_class, 'proteinClass')



+--------------+------+
|  proteinClass| count|
+--------------+------+
|        Enzyme|  8866|
|          GPCR| 13084|
|          null|123239|
|        Kinase| 14469|
|            TF|   285|
|          None|  3567|
|            IC|   801|
|            NR|  1845|
|    Epigenetic|   964|
|   Transporter|  1577|
|TF; Epigenetic|    32|
+--------------+------+



                                                                                

In [51]:
uniprot_to_class.count()

168729

In [27]:
# uniprot_to_class.write.parquet("data/analysis/v2/uniprot_to_class_temp_v1")

# Data processing

### Drug-target pairs: Target is in MoA of a drug (targetInMoA, boolean)

In [28]:
# Targets dataset to map accession to Ensembl
target_path = "gs://open-targets-data-releases/23.12/output/etl/json/targets"
target = spark.read.json(target_path)
target.persist()
target.show()


23/12/11 11:18:37 WARN package: Truncated the string representation of a plan since it was too large. This behavior can be adjusted by setting 'spark.sql.debug.maxToStringFields'.
[Stage 138:>                                                        (0 + 1) / 1]

+----------------+--------------------+--------------+--------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+---------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+--------------------+----+--------------------+--------------------+
|alternativeGenes|        approvedName|approvedSymbol|       biotype|      canonicalExons| canonicalTranscript|      chemicalProbes|          constraint|             dbXrefs|functionDescriptions|     genomicLocation|                  go|           hallmarks|          homologues|             id|        nameSynonyms|       obsoleteNames|     obsoleteSymbols|            pathways|          proteinIds|   safetyLiabilities|subcellularLocations|     

                                                                                

### Targets

In [29]:
# Evidence type: sources + classification for GE, clinical_GE, probes (boolean)



## Assay type filtering

In [30]:
# assay_type ≠ P, U

# confidence_score = 9,7

# assay_organism = human


## Activity threshold

In [31]:
# Where pchembl_value is available for each T-D pairs make new columns:
#     1. max_pchembl_value
#     2. median_pchembl_value

In [32]:
# Where pchembl_value is not available for each T-D pairs make new columns:
#     1. Calculate how much data is this
#     2. Think about what to do with different standard_units
#     3. Ideally:
#         1. max_standard_value_n
#         2. median_standard_value_n
#         3. Cutoff for every n

In [33]:
# Make column with activity of molecule: drugActive = TRUE/FALSE
#     1. based protein type and:
#         1. max_pchembl_value
#         2. median_pchembl_value
#     2. based on cutoffs for other experiment types

# Data coverage

### Drugs

In [34]:
# Filtering by:
#         1. max_phase ≠ 4 | max_phase = 4 | probes = TRUE
#         2. moa = NaN | moa ≠ NaN
#         3. drugActive = TRUE
#         4. GE = TRUE | clinical_GE = TRUE

In [35]:
# Number of clinical candidates/approved drugs/chemical probes for which:
#     1. there is no MoA and they are bioactive against some targets:
#         1. which have GE/GE+clinical evidence for any disease
#     2. there is MoA but they are bioactive against some other targets
#         1. which have GE/GE+clinical evidence for any disease

### Targets

In [36]:
# For which action type is avaliable

### Drug-Target pairs

In [37]:
# With non pharmacological action
