In [1]:
import pandas as pd
import itertools

In [2]:
prediction_dir = '../predictions/'
triplets_dir = '../data/triplets/'
drugbank_dir = '../data/drugbank/'

In [3]:
drug_name_map = pd.read_csv(drugbank_dir + 'drug_id_name_map.csv', sep=',', index_col=[0])
food_name_map = pd.read_csv(triplets_dir + 'food_name.tsv', sep='\t', index_col=[0])
food_compound_map = pd.read_csv(triplets_dir + 'compounds_names.tsv', sep='\t', index_col=[0])

In [4]:
drug_ids = drug_name_map.id
drug_names = drug_name_map.drug_name
drug_id_map_dict = dict(zip(drug_ids, drug_names))

food_ids = food_name_map.public_id
food_names = food_name_map.name
food_id_map_dict = dict(zip(food_ids, food_names))

compound_ids = food_compound_map.compound_id
compound_names = food_compound_map.name
compound_id_map_dict = dict(zip(compound_ids, compound_names))

In [8]:
common_drugs = pd.read_csv('../data/common_drugs.csv', sep=';')
common_drugs_ids = common_drugs.DrugBank_id.values
print(common_drugs)
common_drugs_ids

                    drug  DrugBank_name DrugBank_id
0               lexaurin     Bromazepam     DB01558
1            fraxiparine     Nadroparin     DB08813
2               novalgin     Metamizole     DB04817
3              dithiaden      Diltiazem     DB00343
4               diazepam       Diazepam     DB00829
5              tamoxifen      Tamoxifen     DB00675
6   paracetamol(paralen)  Acetaminophen     DB00316
7              metamizol     Metamizole     DB04817
8              neurontin     Gabapentin     DB00996
9                ketonal      Ketoprofe     DB01009
10               ibalgin      Ibuprofen     DB01050
11               calcium        Calcium     DB01373
12              euthyrox  Levothyroxine     DB00451
13            filgrastim     Filgrastim     DB00099
14                ananas      pineapple   FOOD00012
15                   mak          poppy   FOOD00127
16               merunka       appricot   FOOD00144
17                  grep     grapefruit   FOOD00256
18          

array(['DB01558', 'DB08813', 'DB04817', 'DB00343', 'DB00829', 'DB00675',
       'DB00316', 'DB04817', 'DB00996', 'DB01009', 'DB01050', 'DB01373',
       'DB00451', 'DB00099', 'FOOD00012', 'FOOD00127', 'FOOD00144',
       'FOOD00256', 'FOOD00206', 'FOOD00178'], dtype=object)

In [27]:
drug_id = common_drugs_ids[14]

predictions = pd.read_csv(prediction_dir + 'complex_' + drug_id + '_negative_best_pipeline.csv', sep=',', index_col=[0])

# keep just drug/food/food compound predictions
predictions['node_type'] = list(itertools.repeat('xxx', predictions.shape[0]))
predictions.loc[predictions['tail_label'].str.contains("DB\d+", regex=True), 'node_type'] = "drug"
predictions.loc[predictions['tail_label'].str.contains("FDB"), 'node_type'] = "food_compound"
predictions.loc[predictions['tail_label'].str.contains("FOOD"), 'node_type'] = "food"
predictions = predictions.loc[predictions['node_type'] != 'xxx']
predictions

Unnamed: 0,tail_id,score,tail_label,in_validation,in_testing,node_type
3071,3071,7.048107,DB01168,False,False,drug
2678,2678,6.327956,DB00752,False,False,drug
3149,3149,6.048725,DB01247,False,False,drug
3319,3319,6.003144,DB01626,False,False,drug
4493,4493,5.924361,DB12612,False,False,drug
3074,3074,5.851902,DB01171,False,False,drug
3227,3227,5.732646,DB01367,False,False,drug
2533,2533,5.526481,DB00601,False,False,drug
2648,2648,5.474874,DB00721,False,False,drug
2949,2949,5.452534,DB01037,False,False,drug


In [28]:
# assign entity names to ids
for row in predictions.iterrows():
    tail = row[1].tail_label
    node_type = row[1].node_type
    
    if node_type == 'drug':
        tail_name = drug_id_map_dict[tail]
    elif node_type == 'food':
        tail_name = food_id_map_dict[tail]
    else:
        tail_name = compound_id_map_dict[tail]
        
    print(tail, tail_name)    

DB01168 Procarbazine
DB00752 Tranylcypromine
DB01247 Isocarboxazid
DB01626 Pargyline
DB12612 Ozanimod
DB01171 Moclobemide
DB01367 Rasagiline
DB00601 Linezolid
DB00721 Procaine
DB01037 Selegiline
DB06654 Safinamide
DB00780 Phenelzine
DB00859 Penicillamine
DB00242 Cladribine
DB04820 Nialamide
DB00688 Mycophenolate mofetil
DB00805 Minaprine
DB04832 Zimelidine
DB09241 Methylene blue
DB00544 Fluorouracil


In [30]:
# check if the predicted drug/food is in different interaction with the same drug in the training data

idx = drug_id
snd_idx = 'DB00752'

train_triplets = pd.read_csv(triplets_dir + 'train.tsv', sep='\t', index_col=[0])
valid_triplets = pd.read_csv(triplets_dir + 'valid.tsv', sep='\t', index_col=[0])
test_triplets = pd.read_csv(triplets_dir + 'test.tsv', sep='\t', index_col=[0])

filtered_triplets = train_triplets.loc[train_triplets.index == idx]
in_train = filtered_triplets.loc[filtered_triplets['tail'] == snd_idx].any().sum()

filtered_triplets = valid_triplets.loc[valid_triplets.index == idx]
in_valid = filtered_triplets.loc[filtered_triplets['tail'] == snd_idx].any().sum()

filtered_triplets = test_triplets.loc[test_triplets.index == idx]
in_test = filtered_triplets.loc[filtered_triplets['tail'] == snd_idx].any().sum()

print(f'Relation in triplets:')
print(f'- train:', 'yes' if in_train else 'no')
print(f'- valid:', 'yes' if in_valid else 'no')
print(f'- test:', 'yes' if in_test else 'no')
      

Relation in triplets:
- train: no
- valid: no
- test: no
