## Make lists of drugs ranked using MLP link prediction
Here we create 3 list of ~30k or so drugs according to their MLP link prediction score with nodes that represent important concepts in COVID-19. 

Basically we're just parsing the file `mlp_link_pred_ranked_drug_compound_list.csv` produced by this notebook:
http://localhost:8888/notebooks/Graph%20embedding%20and%20Link%20Prediction%20(SkipGram%2020201012%20chembl%20validation).ipynb

The three lists are drugs ranked by their link prediction score with:
1. SARS-CoV-2 (`NCBITaxon:2697049`)

2. Coronavirus Infections (`MESH:D018352`)

3. Respiratory Distress Syndrome, Adult (`MESH:D012128`)

In [53]:
import os

s3_path = "s3://kg-hub-public-data/embeddings/20201012/"  # keep trailing slash
base_url = "https://kg-hub.berkeleybop.io/embeddings/20201012/"

base_dl_dir = "downloaded_data"
pos_neg_data_dir = os.path.join(base_dl_dir, "pos_neg_data_dir")
pos_neg_data_dir = os.path.join(base_dl_dir, "pos_neg_data_dir")
ranked_drug_dir = os.path.join(base_dl_dir, "ranked-drug-lists")

# pos/neg nodes for better pos/neg edge set
pos_node_url = os.path.join(base_url, pos_neg_data_dir, "positive_nodes.tsv")
pos_node_file = os.path.join(pos_neg_data_dir, "positive_nodes.tsv")
neg_node_url = os.path.join(base_url, pos_neg_data_dir, "negative_nodes.tsv")
neg_node_file = os.path.join(pos_neg_data_dir, "negative_nodes.tsv")

# ranked list stuff
mlp_link_pred_outdir = os.path.join(ranked_drug_dir, "mlp_link_pred")
os.makedirs(mlp_link_pred_outdir, exist_ok=True)
ranked_drug_url = os.path.join(base_url, mlp_link_pred_outdir, "mlp_link_pred_ranked_drug_compound_list.csv")
ranked_drug_file = os.path.join(mlp_link_pred_outdir, "mlp_link_pred_ranked_drug_compound_list.csv")

#### get positive node info

In [43]:
import pandas as pd
import os

if not os.path.exists(pos_node_file):
    os.makedirs(pos_neg_data_dir, exist_ok=True)
    os.system(" ".join(["wget", "-O", pos_node_file, pos_node_url])
pos_nodes = pd.read_csv(pos_node_file, sep="\t", comment="#")
pos_nodes

wget -O downloaded_data/pos_neg_data_dir/positive_nodes.tsv https://kg-hub.berkeleybop.io/embeddings/20201012/downloaded_data/pos_neg_data_dir/positive_nodes.tsv


Unnamed: 0,curie,description
0,NCBITaxon:2697049,SARS-CoV-2
1,MESH:D018352,Coronavirus Infections
2,MESH:D012128,"Respiratory Distress Syndrome, Adult"
3,MESH:D045169,Severe Acute Respiratory Syndrome
4,MESH:D017934,Coronavirus
5,MESH:D045473,SARS Virus
6,NCBITaxon:227859,Severe acute respiratory syndrome-related coro...


#### Get ranked list output from graph embedding notebook

In [86]:
import pandas as pd
import os

if not os.path.exists(ranked_drug_file):
    os.system(" ".join(["wget", "-O", ranked_drug_file, ranked_drug_url]))
ranked_drugs = pd.read_csv(ranked_drug_file, sep=",", comment="#", index_col=0)

### Get ranked list of biolink:Drugs --> SARS-CoV-2 links

In [101]:
ranked_drugs[(ranked_drugs.category != 'biolink:ChemicalSubstance') & (ranked_drugs.description == 'SARS-CoV-2')].head(30)

Unnamed: 0,pred,subject,name,category,object,description
179,0.731687,CHEBI:16243,quercetin,biolink:Drug|biolink:ChemicalSubstance,NCBITaxon:2697049,SARS-CoV-2
312,0.724675,CHEBI:9588,ticlopidine,biolink:Drug|biolink:ChemicalSubstance,NCBITaxon:2697049,SARS-CoV-2
423,0.720082,DrugCentral:1775,methysergide,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2
570,0.716496,CHEBI:2904,atenolol,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2
745,0.712406,CHEBI:15365,aspirin,biolink:Drug|biolink:ChemicalSubstance,NCBITaxon:2697049,SARS-CoV-2
794,0.711467,CHEBI:5001,fenofibrate,biolink:Drug|biolink:ChemicalSubstance,NCBITaxon:2697049,SARS-CoV-2
980,0.707394,ttd.drug:D0P9BM,3-hydroxybenzylhydrazine,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2
1032,0.706292,ttd.drug:D0X7EQ,Resorcinol compound 19,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2
1038,0.706205,ttd.drug:D0Y6ON,Pyrrolo-pyrazine derivative 4,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2
1135,0.704123,ttd.drug:D02KKU,Oleanonic acid,biolink:Drug,NCBITaxon:2697049,SARS-CoV-2


In [95]:
ranked_drugs[(ranked_drugs.category != 'biolink:ChemicalSubstance') & (ranked_drugs.description == 'Coronavirus Infections')].head(30)

Unnamed: 0,pred,subject,name,category,object,description
1990,0.691101,CHEBI:5001,fenofibrate,biolink:Drug|biolink:ChemicalSubstance,MESH:D018352,Coronavirus Infections
2436,0.685987,CHEBI:16243,quercetin,biolink:Drug|biolink:ChemicalSubstance,MESH:D018352,Coronavirus Infections
2485,0.685391,DrugCentral:1775,methysergide,biolink:Drug,MESH:D018352,Coronavirus Infections
2974,0.680028,ttd.drug:D0C3LW,ISIS 19649,biolink:Drug,MESH:D018352,Coronavirus Infections
3012,0.67962,CHEBI:46081,fluconazole,biolink:Drug|biolink:ChemicalSubstance,MESH:D018352,Coronavirus Infections
3119,0.678474,ttd.drug:D0G7XW,"Beta-phenyl-alpha,beta-unsaturated carbonyl de...",biolink:Drug,MESH:D018352,Coronavirus Infections
3317,0.67653,ttd.drug:D06YMZ,N-(6-phenethyl-1H-indazol-3-yl)butyramide,biolink:Drug,MESH:D018352,Coronavirus Infections
3353,0.676207,CHEBI:46345,fluorouracil,biolink:Drug|biolink:ChemicalSubstance,MESH:D018352,Coronavirus Infections
3441,0.675237,ttd.drug:D0T2EW,Cyclohexylglycine-(2S)-cyanopyrrolidine,biolink:Drug,MESH:D018352,Coronavirus Infections
3535,0.674072,ttd.drug:D0R3BV,Inotersen,biolink:Drug,MESH:D018352,Coronavirus Infections


In [96]:
ranked_drugs[(ranked_drugs.category != 'biolink:ChemicalSubstance') & (ranked_drugs.description == 'Respiratory Distress Syndrome, Adult')].head(30)

Unnamed: 0,pred,subject,name,category,object,description
3307,0.676658,CHEBI:5001,fenofibrate,biolink:Drug|biolink:ChemicalSubstance,MESH:D012128,"Respiratory Distress Syndrome, Adult"
4937,0.659971,DrugCentral:1775,methysergide,biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
5264,0.656671,ttd.drug:D0X7EQ,Resorcinol compound 19,biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
6206,0.647861,CHEBI:9588,ticlopidine,biolink:Drug|biolink:ChemicalSubstance,MESH:D012128,"Respiratory Distress Syndrome, Adult"
6396,0.646036,CHEBI:16243,quercetin,biolink:Drug|biolink:ChemicalSubstance,MESH:D012128,"Respiratory Distress Syndrome, Adult"
6510,0.64509,ttd.drug:D02YIW,"5-(N,N-hexamethylene)-amiloride",biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
6904,0.641131,ttd.drug:D07GJZ,CQA 206-291,biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
7393,0.636044,ttd.drug:D0Z0QD,LVVYPWT,biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
7525,0.634902,ttd.drug:D0Z8HX,"6,7-dichloro-1H-indole-2,3-dione",biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
7790,0.632281,ttd.drug:D02GTU,Benzoyl-piperidine derivative 2,biolink:Drug,MESH:D012128,"Respiratory Distress Syndrome, Adult"
