# Termite Linking Demo

## TERMite in action

In [74]:
%load_ext jupyternotify
import requests
import json
text = 'Iressa'
def call_termite(text):
    url = 'https://termite.scibite-mvp.nonprod.entellect.com/termite'
    payload = {'text': text}
    r = requests.post(url, data=payload)
    response_json = r.json()
    print(json.dumps(response_json['RESP_MULTIDOC_PAYLOAD']['_document']['DRUG'], indent=4, sort_keys=True))
call_termite(text)

<IPython.core.display.Javascript object>

[
    {
        "dependencyMet": true,
        "dictSynList": [
            "iressa"
        ],
        "docID": "_document",
        "docTitle": "",
        "entityMeta": {
            "_ext_name": "ChEMBL",
            "_ext_uri": "https://www.ebi.ac.uk/chembl/compound/inspect/CHEMBL939",
            "_termite_id": "TDG155845",
            "parent_canonical_smiles": "COc1cc2ncnc(Nc3ccc(F)c(Cl)c3)c2cc1OCCCN4CCOCC4",
            "parent_std_inchi": "InChI=1S/C22H24ClFN4O3/c1-29-20-13-19-16(12-21(20)31-8-2-5-28-6-9-30-10-7-28)22(26-14-25-19)27-15-3-4-18(24)17(23)11-15/h3-4,11-14H,2,5-10H2,1H3,(H,25,26,27)",
            "parent_std_inchi_key": "XGALLCVXEZPNRQ-UHFFFAOYSA-N"
        },
        "entityType": "DRUG",
        "exact_array": [
            {
                "byteEnd": 0,
                "byteStart": 0,
                "end": 6,
                "sentence": 1,
                "start": 0,
                "subsumed": false
            }
        ],
        "exact_string": "1#0-6",
 

## The processed data in Entellect

In [2]:
import spark_session_builder
sparkSession, sc = spark_session_builder.session()
sc

The linking process generates 3 topics
- Resnet to Reaxys
- PP to Reaxys
- PP to Resnet

In [3]:
hdfs_path = "hdfs://demo-full-load-hdfs-namenode:8020" 
r2rx_data_path = hdfs_path + '/links/resnet-reaxys'
pp2rx_data_path = hdfs_path + '/links/pp-reaxys'
pp2rs_data_path = hdfs_path + '/links/pp-resnet'
r2rx_filename = r2rx_data_path + '/*.csv'
pp2rx_filename = pp2rx_data_path + '/*.csv'
pp2rs_filename = pp2rs_data_path  + '/*.csv'

In [136]:
def show_top2(df, records=10):
    """
    display the top n records in a table
    """
    display({'text/html': df.limit(records).toPandas().to_html()}, raw=True)

In [137]:
from IPython.core.display import HTML
def show_top(df, records=10):
    """
    display the top n records in a table
    """
    display(HTML(df.limit(records).toPandas().to_html(escape=False)))

## Resnet to Reaxys

In [138]:
r2rx_df = sparkSession \
    .read \
    .option("header", "true") \
    .csv(r2rx_filename)
show_top(r2rx_df)

Unnamed: 0,hdfsIngestTime,offset,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,provenanceId
0,2021-02-24T11:55:24.301359,95505,iri:uniprot/CHEMBL561481,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3138034,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2733435,
1,2021-02-24T11:55:24.301359,95505,iri:uniprot/CHEMBL561481,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2733435,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3138034,
2,2021-02-24T11:55:24.301369,95506,iri:uniprot/CHEMBL561481,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2733435,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3138034,
3,2021-02-24T11:55:24.301369,95506,iri:uniprot/CHEMBL561481,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3138034,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2733435,
4,2021-02-24T11:55:24.304186,150196,iri:uniprot/CHEMBL42228,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/6164356,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038181225,
5,2021-02-24T11:55:24.305224,155785,iri:uniprot/CHEMBL276334,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/774966,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038182021,
6,2021-02-24T11:55:24.304651,156616,iri:uniprot/CHEMBL2323854,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3195859,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038199220,
7,2021-02-24T11:55:24.307341,156647,iri:uniprot/CHEMBL3337531,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2041500,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038113731,
8,2021-02-24T11:55:24.307588,157382,iri:uniprot/CHEMBL416578,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/24032549,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038094213,
9,2021-02-24T11:55:24.308715,157510,iri:uniprot/CHEMBL2108765,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/11465497,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038135438,


## Pharmapendium to Reaxys

In [139]:
pp2rx_df = sparkSession \
    .read \
    .option("header", "true") \
    .csv(pp2rx_filename)
show_top(pp2rx_df)

Unnamed: 0,hdfsIngestTime,offset,fingerprint,ppplus_drug,reaxys_chemicalcompoundname,provenanceId
0,2021-02-24T11:55:22.99909,24097,iri:uniprot/CHEMBL3218576,https://data.elsevier.com/lifescience/entity/ppplus/drug/jO4qcQ5aUr9,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038135647,
1,2021-02-24T11:55:23.004095,24098,iri:uniprot/CHEMBL3218576,https://data.elsevier.com/lifescience/entity/ppplus/drug/jO4qcQ5aUr9,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038135647,
2,2021-02-24T11:55:23.010593,28542,iri:uniprot/CHEMBL841,https://data.elsevier.com/lifescience/entity/ppplus/drug/5G3oKDOTrf_,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038102779,
3,2021-02-24T11:55:23.01143,28543,iri:uniprot/CHEMBL841,https://data.elsevier.com/lifescience/entity/ppplus/drug/5G3oKDOTrf_,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038102779,
4,2021-02-24T11:55:23.051009,34198,iri:uniprot/CHEMBL170797,https://data.elsevier.com/lifescience/entity/ppplus/drug/LmZQWquqxuF,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038125122,
5,2021-02-24T11:55:23.054978,34199,iri:uniprot/CHEMBL170797,https://data.elsevier.com/lifescience/entity/ppplus/drug/LmZQWquqxuF,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038125122,
6,2021-02-24T11:55:23.057858,35278,iri:uniprot/CHEMBL486174,https://data.elsevier.com/lifescience/entity/ppplus/drug/FNJ8ldAiEOA,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038103204,
7,2021-02-24T11:55:23.05872,35279,iri:uniprot/CHEMBL486174,https://data.elsevier.com/lifescience/entity/ppplus/drug/FNJ8ldAiEOA,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038103204,
8,2021-02-24T11:55:23.060412,35283,iri:uniprot/CHEMBL1201213,https://data.elsevier.com/lifescience/entity/ppplus/drug/mBTPxofGut7,https://data.elsevier.com/lifescience/entity/ppplus/drug/ADA-62DMlw9,
9,2021-02-24T11:55:23.060412,35283,iri:uniprot/CHEMBL1201213,https://data.elsevier.com/lifescience/entity/ppplus/drug/ADA-62DMlw9,https://data.elsevier.com/lifescience/entity/ppplus/drug/mBTPxofGut7,


## Pharmapendium to Resnet

In [143]:
pp2rs_df = sparkSession \
    .read \
    .option("header", "true") \
    .csv(pp2rs_filename)
show_top(pp2rs_df)

Unnamed: 0,hdfsIngestTime,offset,fingerprint,ppplus_drug,resnet_smallmol,provenanceId
0,2021-02-24T11:55:24.559873,45395,iri:uniprot/CHEMBL1201346,https://data.elsevier.com/lifescience/entity/ppplus/drug/DH4u_TrfagG,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038093101,
1,2021-02-24T11:55:24.559863,45512,iri:uniprot/CHEMBL2043437,https://data.elsevier.com/lifescience/entity/ppplus/drug/j9sCxvbs8NA,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038136039,
2,2021-02-24T11:55:24.568017,45514,iri:uniprot/CHEMBL516,https://data.elsevier.com/lifescience/entity/ppplus/drug/aZ1m_BKrhL8,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076869,
3,2021-02-24T11:55:24.567722,45546,iri:uniprot/CHEMBL312448,https://data.elsevier.com/lifescience/entity/ppplus/drug/3LL9lyH0tN-,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038099059,
4,2021-02-24T11:55:24.572663,45606,iri:uniprot/CHEMBL631,https://data.elsevier.com/lifescience/entity/ppplus/drug/gTokRyMybs4,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038088761,
5,2021-02-24T11:55:24.572248,45752,iri:uniprot/CHEMBL1201471,https://data.elsevier.com/lifescience/entity/ppplus/drug/qo6FO7aGFJF,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038096035,
6,2021-02-24T11:55:24.575978,45754,iri:uniprot/CHEMBL1200709,https://data.elsevier.com/lifescience/entity/ppplus/drug/P9DmldiDXr9,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038095378,
7,2021-02-24T11:55:24.575333,45787,iri:uniprot/CHEMBL3137342,https://data.elsevier.com/lifescience/entity/ppplus/drug/LY7jJYb6ia-,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038135495,
8,2021-02-24T11:55:24.579929,45798,iri:uniprot/CHEMBL2107448,https://data.elsevier.com/lifescience/entity/ppplus/drug/MZuYA_lTIz9,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038095367,
9,2021-02-24T11:55:24.578934,46186,iri:uniprot/CHEMBL2108638,https://data.elsevier.com/lifescience/entity/ppplus/drug/Yg5ymXArLR6,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038135105,


# Let's join the linking topics to the data topics so we can see what these things are called

In [144]:
from PWBdata import PWBdata
pwb = PWBdata(hdfs_path)
resnet_Smallmol_df = pwb.get_dataframe(sparkSession, 'resnet', 'Smallmol')

In [145]:
r2rx_with_resnet_names = r2rx_df.join(resnet_Smallmol_df, r2rx_df.resnet_smallmol == resnet_Smallmol_df.SmallMol).select('fingerprint','reaxys_chemicalcompoundname', 'resnet_smallmol', 'hasName').withColumnRenamed('hasName', 'resnet_name')

In [146]:
show_top(r2rx_with_resnet_names)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name
0,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/20487708,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
1,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/30985940,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
2,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/15494998,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
3,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/34095599,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
4,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/32058082,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
5,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/8101020,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
6,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/19732180,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
7,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/29555266,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
8,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/8003908,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate
9,iri:uniprot/CHEMBL1229,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/19882123,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038079849,oseltamivir carboxylate


In [147]:
rmc_compoundname_df = pwb.get_dataframe(sparkSession, 'rmc', 'chemicalcompoundname').select('ChemicalCompoundName','hasChemicalCompound', 'hasName')
show_top(rmc_compoundname_df)

Unnamed: 0,ChemicalCompoundName,hasChemicalCompound,hasName
0,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100000890,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9616311,poly(ethylene glycol) MW = 3400 d = 1.204
1,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/10000288,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3365544,4-amino-benzoic acid-(isopentylamino-tert-butyl ester)
2,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100004786,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691282,HSDGTFTSELSRLREGARLQRLLQGLV
3,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100004839,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691295,sodium diclofenac transcutol(R) oleic acid d-limonene water; mixture of
4,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100006951,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691697,4.2 percent 1 4-cis and 95.8 percent 1 2-polybutadiene Mw= 444000 Mw/Mn= 2.05 crystallinity 21.0 percent; monomer(s): 1 3-butadiene
5,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100007139,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691725,isotactic polypropylene from polymerization with 2-arylindenyl metallocene catalyst with syn-like conformation pentad content [mmmm] = 61 percent Mn= 53000 MWD= 1.9 Tm= 35-110 deg C; monomer(s): propylene
6,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100007602,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691819,polypropylene/clay 4 wt percent clay
7,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100007641,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691825,polymer poly(ester imide)
8,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/100007954,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/9691878,polystyrene resin with [(5-aminopentyl)amino]methyl group ring substitution cross-linked with divinylbenzene full trifluoroacetic acid salt
9,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/10000804,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/3366780,4-Butylamino-2-hydroxy-benzoesaeure-(3-diaethylamino-propylester)


### NB, although the linking table says the RMC ID is chemicalcompoundname its actually a chemicalcompound ID

In [148]:
r2rx_with_resnet_and_RMC_names = r2rx_with_resnet_names.join(rmc_compoundname_df, rmc_compoundname_df.hasChemicalCompound == r2rx_with_resnet_names.reaxys_chemicalcompoundname).withColumnRenamed('hasName', 'reaxys_name').drop('hasChemicalCompound').dropDuplicates()
show_top(r2rx_with_resnet_and_RMC_names)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name,ChemicalCompoundName,reaxys_name
0,iri:uniprot/CHEMBL447565,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10053679,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038104380,alpha-hederin,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/93103122,kalopanaxsaponin A
1,iri:uniprot/CHEMBL447565,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10053679,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038104380,alpha-hederin,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1728890821,alpha-hederin
2,iri:uniprot/CHEMBL447565,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10053679,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038104380,alpha-hederin,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/93103123,hederagenin 3-O-[α-L-rhamnopyranosyl-(1->2)-α-L-arabinopyranoside]
3,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/181699184,diamino ethene
4,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1928402251,diaminoethylene
5,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/181699187,Ethylendiamine
6,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1440374261,urea
7,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1937189765,1 1-diaminoethylene
8,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/181699185,guanidine
9,iri:uniprot/CHEMBL816,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/10151656,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038076374,ethylenediamine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/181699186,ethylendiamine


## Now we have the basis of an Upper Ontology class, for Drugs
We can query for a drug of interest and ask what Entellect "knows" about it

In [149]:
import pyspark.sql.functions as f
query_df = r2rx_with_resnet_and_RMC_names.filter(f.col('reaxys_name') == 'carvacrol')
show_top(query_df)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name,ChemicalCompoundName,reaxys_name
0,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol
1,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol
2,iri:uniprot/CHEMBL281202,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/2139818,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038096597,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1908524134,carvacrol


## The table in RMC that tells us what parameter is being measured is MeasureSettings.  The display values of those parameters are in MeasuredParameter
We need to link that back to the chemical compound through BiologicalActivity, but there are many per row so we use the 'explode' function

In [150]:
from pyspark.sql.functions import split, explode
measure_df = pwb.get_dataframe(sparkSession, 'rmc', 'measuresettings').withColumn("hasBiologicalActivity", explode(split("hasBiologicalActivity", "\|\|"))).select('MeasureSettings', 'hasMeasuredParameter', 'hasBioAssay', 'hasChemicalCompound', 'hasBiologicalActivity')
parameter_df = pwb.get_dataframe(sparkSession, 'rmc', 'measuredparameter').select('MeasuredParameter', 'hasLabel')
rmc_data = measure_df.join(parameter_df, parameter_df.MeasuredParameter == measure_df.hasMeasuredParameter)
show_top(rmc_data)

Unnamed: 0,MeasureSettings,hasMeasuredParameter,hasBioAssay,hasChemicalCompound,hasBiologicalActivity,MeasuredParameter,hasLabel
0,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1000053,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/751524,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5237933,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,IC50
1,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1000480,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/105,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/751709,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5226216,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/105,qualitative
2,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1000791,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/751796,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5239789,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,Fold-increase
3,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1000975,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/751875,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240575,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,Fold-increase
4,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1001911,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/744,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752138,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5234580,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/744,eosinophil count
5,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1002397,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752342,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240009,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,Fold-increase
6,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1002397,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752342,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240007,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,Fold-increase
7,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1002397,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752342,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240008,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/86,Fold-increase
8,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1002539,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/2,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752427,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240639,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/2,% Inhibition
9,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/1002539,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/2,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/752427,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/5240637,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/2,% Inhibition


## The route from ChemicalCompound to MeasureSettings is via AgentConfiguration and BiologicalActivity

In [151]:
agent_df = pwb.get_dataframe(sparkSession, 'rmc', 'agentconfiguration').select('hasChemicalCompound', 'AgentConfiguration')
agent_for_query = query_df.join(agent_df, query_df.reaxys_chemicalcompoundname == agent_df.hasChemicalCompound)
show_top(agent_for_query)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name,ChemicalCompoundName,reaxys_name,hasChemicalCompound,AgentConfiguration
0,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/11988564
1,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/1682687
2,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/8980938
3,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19687997
4,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/3645431
5,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/6047262
6,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/10205357
7,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/15874091
8,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/20019435
9,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/2140898


In [152]:
bioact_df = pwb.get_dataframe(sparkSession, 'rmc', 'biologicalactivity').select('BiologicalActivity','hasMeasureSettings','hasAgentConfiguration')
bioact_for_query = agent_for_query.join(bioact_df, agent_for_query.AgentConfiguration == bioact_df.hasAgentConfiguration)
show_top(bioact_for_query)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name,ChemicalCompoundName,reaxys_name,hasChemicalCompound,AgentConfiguration,BiologicalActivity,hasMeasureSettings,hasAgentConfiguration
0,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307254,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286138,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
1,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307244,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286163,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
2,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307292,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286173,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
3,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307295,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286177,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
4,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307238,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286139,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
5,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307247,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286170,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
6,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307262,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286162,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
7,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307300,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286174,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
8,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307231,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286127,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653
9,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/47307240,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/6286154,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/19544653


## Now, we can join the table corresponding to our query to the RMC data derived from measuresettings

In [153]:
rmc_for_query = bioact_for_query.join(rmc_data, rmc_data.hasBiologicalActivity == bioact_for_query.BiologicalActivity)
show_top(rmc_for_query)

Unnamed: 0,fingerprint,reaxys_chemicalcompoundname,resnet_smallmol,resnet_name,ChemicalCompoundName,reaxys_name,hasChemicalCompound,AgentConfiguration,BiologicalActivity,hasMeasureSettings,hasAgentConfiguration,MeasureSettings,hasMeasuredParameter,hasBioAssay,hasChemicalCompound.1,hasBiologicalActivity,MeasuredParameter,hasLabel
0,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/1651909,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/2922368,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/525194,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/1651909,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/525194,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/106,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/403147,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/2922368,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/106,MIC
1,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/1651909,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/2922368,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/525194,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/1651909,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/525194,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/106,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/403147,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/2922368,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/106,MIC
2,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/12656148,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/29794070,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4430732,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/12656148,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4430732,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/444,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/3955080,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/29794070,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/444,LDLo
3,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/12656148,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/29794070,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4430732,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/12656148,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4430732,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/444,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/3955080,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/29794070,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/444,LDLo
4,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/13279788,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/31390517,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4647668,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/13279788,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4647668,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/59,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/4184550,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/31390517,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/59,concentration
5,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/13279788,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/31390517,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4647668,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/13279788,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/4647668,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/59,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/4184550,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/31390517,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/59,concentration
6,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/15619535,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/36737797,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5470866,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/15619535,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5470866,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/109,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/4935864,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/36737797,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/109,number
7,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/15619535,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/36737797,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5470866,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/15619535,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5470866,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/109,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/4935864,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/36737797,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/109,number
8,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038176084,Listerine,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/16085253,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/37937829,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5636025,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/16085253,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5636025,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/5040221,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/37937829,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,IC50
9,iri:uniprot/CHEMBL29411,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/resnet/smallmol/72057594038201312,thymol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompoundname/1570713044,carvacrol,https://data.elsevier.com/lifescience/entity/reaxys/chemicalcompound/1860514,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/16085253,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/37937829,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5636025,https://data.elsevier.com/lifescience/entity/reaxys/agentconfiguration/16085253,https://data.elsevier.com/lifescience/entity/reaxys/measuresettings/5636025,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,https://data.elsevier.com/lifescience/entity/reaxys/bioassay/5040221,,https://data.elsevier.com/lifescience/entity/reaxys/biologicalactivity/37937829,https://data.elsevier.com/lifescience/entity/reaxys/measuredparameter/900,IC50


In [155]:
show_top(rmc_for_query.groupBy('hasLabel').count().orderBy('count', ascending=False), 20)

Unnamed: 0,hasLabel,count
0,MIC,908
1,qualitative,358
2,% Inhibition,250
3,IC50,158
4,concentration,150
5,number,140
6,% Decrease,108
7,fractional inhibitory concentration index,98
8,MBC,96
9,percentage increase,86
