In [1]:
import pandas as pd
import itertools

In [2]:
triplets_dir = '../data/triplets/'

In [28]:
dfi = pd.read_csv(triplets_dir + 'dfi.tsv', sep='\t', index_col=[0])
dfi

Unnamed: 0,drug1,interaction,drug2
0,DB00001,increase_antiplatelet_activities,herbs and supplements with anticoagulant/antip...
1,DB00006,decrease_effectiveness,echinacea
2,DB00006,increase_antiplatelet_activities,herbs and supplements with anticoagulant/antip...
3,DB00008,decrease_adverse_effects,water
4,DB00009,increase_antiplatelet_activities,herbs and supplements with anticoagulant/antip...
...,...,...,...
1506,DB15982,increase_effectiveness,food
1507,DB16165,decrease_effectiveness,grapefruit
1508,DB16261,increase_effectiveness,a high fat meal
1509,DB16390,decrease_effectiveness,St. John's Wort


In [4]:
# Herbs with antiplatelet properties
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6459456/

herbs = ['aloe', 'cranberry', 'chamomile', 'feverfew', 'garlic', 'ginger', 'ginkgo', 
         'meadowsweet', 'turmeric', 'white willow', 'fenugreek', 'red clover', 'dong quai', 
        'evening primrose', 'ginseng', 'flaxseed', 'grapefruit', 'green tea', 'oregano', 'saw palmetto']

In [5]:
# histamine-containing foods
# https://www.healthline.com/health/histamine-intolerance#diet

histamine_food =['alcohol', 'yoghurt', 'fermented food', 'avocado', 'eggplant', 'spinach', 'shellfish']

In [6]:
# tyramine-containing foods and supplements 
# https://www.webmd.com/diet/foods-high-in-tyramine

tyramine_food = ['cheese', 'feremnted food', 'orange', 'grapefruit', 'lemon', 'lime', 'tangerine',
                'banana', 'pineapple', 'avocado', 'alcohol']

In [7]:
food_name_map = pd.read_csv(triplets_dir + 'food_name.tsv', sep='\t', index_col =[0])
food_ids = food_name_map.public_id
food_names = food_name_map.name.str.lower()
food_id_map_dict = dict(zip(food_names, food_ids))
food_id_map_dict

{'angelica': 'FOOD00001',
 'savoy cabbage': 'FOOD00002',
 'silver linden': 'FOOD00003',
 'kiwi': 'FOOD00004',
 'allium': 'FOOD00005',
 'garden onion': 'FOOD00006',
 'leek': 'FOOD00007',
 'garlic': 'FOOD00008',
 'chives': 'FOOD00009',
 'lemon verbena': 'FOOD00010',
 'cashew nut': 'FOOD00011',
 'pineapple': 'FOOD00012',
 'dill': 'FOOD00013',
 'custard apple': 'FOOD00014',
 'wild celery': 'FOOD00015',
 'peanut': 'FOOD00016',
 'burdock': 'FOOD00017',
 'horseradish': 'FOOD00018',
 'tarragon': 'FOOD00019',
 'mugwort': 'FOOD00020',
 'asparagus': 'FOOD00021',
 'oat': 'FOOD00022',
 'star fruit': 'FOOD00023',
 'brazil nut': 'FOOD00024',
 'common beet': 'FOOD00025',
 'borage': 'FOOD00026',
 'chinese mustard': 'FOOD00027',
 'swede': 'FOOD00028',
 'rape': 'FOOD00029',
 'common cabbage': 'FOOD00030',
 'cauliflower': 'FOOD00031',
 'brussel sprouts': 'FOOD00032',
 'kohlrabi': 'FOOD00033',
 'broccoli': 'FOOD00034',
 'chinese cabbage': 'FOOD00035',
 'turnip': 'FOOD00036',
 'pigeon pea': 'FOOD00037',
 't

In [24]:
# caffein -> compounds
# pottasiun -> compounds
# folic acid -> compounds
# antacids -> compounds Ketoconazole
# histamine-containing foods -> compounds Histamine
# xanthines -> compounds xanthine
# iodine -> compounds
# iron supplements -> iron

compounds = pd.read_csv(triplets_dir + 'compounds_names.tsv', sep='\t', index_col=[0])
compound_ids = compounds.compound_id
compound_names = compounds.name.str.lower()
compound_id_map_dict = dict(zip(compound_names, compound_ids))
compound_id_map_dict

{'d-galactose': 'FDB012703',
 'syringic acid': 'FDB000514',
 'acutissimin a': 'FDB018737',
 'l-cysteine': 'FDB012678',
 'silver': 'FDB004188',
 'l-chicoric acid': 'FDB002580',
 'selenium': 'FDB013400',
 'lycopene': 'FDB014534',
 'butein': 'FDB000082',
 'kaempferol': 'FDB000633',
 'lutein': 'FDB015471',
 'linalool': 'FDB014940',
 'pentoses': 'FDB005965',
 'papaverine': 'FDB000447',
 'ecdysone': 'FDB007162',
 'phellandrene': 'FDB003865',
 'zinc': 'FDB003729',
 'estrone': 'FDB012798',
 'capsaicin': 'FDB012411',
 '2,6-dihydroxybenzoic acid': 'FDB000845',
 'l-aspartic acid': 'FDB012567',
 '4-methylphenol': 'FDB008789',
 'vanillin': 'FDB000838',
 'apigeninidin': 'FDB001565',
 'l-methionine': 'FDB012683',
 'ferulic acid': 'FDB012801',
 'diosmetin': 'FDB000861',
 'retinol': 'FDB013828',
 'hydroquinone': 'FDB000885',
 'lormetazepam': 'FDB007119',
 'myricetin': 'FDB012724',
 'glucosamine': 'FDB022668',
 'cyanidin': 'FDB002602',
 '(-)-matairesinol': 'FDB014417',
 'sinigrin': 'FDB012319',
 '5-isop

In [29]:
# substitue general groups of food with specific foods
new_rows = []

for i, row in dfi.iterrows():
    food_name = row['drug2']
    
    food_to_add = []
    
    if food_name == 'herbs and supplements with anticoagulant/antiplatelet activity':
        food_to_add = herbs
    elif food_name == 'histamine-containing foods':
        food_to_add = histamine_food
    elif food_name == 'tyramine-containing foods and supplements':
        food_to_add = tyramine_food
    else:
        new_rows.append(row)
    
    for f in food_to_add:
        new_rows.append((row['drug1'], row['interaction'], f))
    
new_dfi = pd.DataFrame(new_rows, columns=['drug1', 'interaction', 'drug2'])    
new_dfi

Unnamed: 0,drug1,interaction,drug2
0,DB00001,increase_antiplatelet_activities,aloe
1,DB00001,increase_antiplatelet_activities,cranberry
2,DB00001,increase_antiplatelet_activities,chamomile
3,DB00001,increase_antiplatelet_activities,feverfew
4,DB00001,increase_antiplatelet_activities,garlic
...,...,...,...
2785,DB15982,increase_effectiveness,food
2786,DB16165,decrease_effectiveness,grapefruit
2787,DB16261,increase_effectiveness,a high fat meal
2788,DB16390,decrease_effectiveness,St. John's Wort


In [None]:
# substitute named with ids

dfi_with_ids = new_dfi
dfi_with_ids['changed'] = list(itertools.repeat(False, dfi_with_ids.shape[0]))

food_names2keep = ['alcohol', 'food', "St. John's Wort", 'hypertensive herbs', 'calcium', 'vitamin supplements']

for i, row in dfi_with_ids.iterrows():
    food_name = row['drug2']
    food_id = food_id_map_dict.get(food_name)

    if food_id is None:
        compound_id = compound_id_map_dict.get(food_name)
        if compound_id is not None:
            food_id = compound_id
            dfi_with_ids.at[i, 'changed'] = True
            dfi_with_ids.at[i, 'drug2'] = food_id
        
        if food_name in food_names2keep:
            food_id = food_name
            dfi_with_ids.at[i, 'changed'] = True

        if food_name == 'dairy product':
            food_id = food_id_map_dict.get('milk and milk products')
            dfi_with_ids.at[i, 'changed'] = True
            dfi_with_ids.at[i, 'drug2'] = food_id
            
        if food_name == 'natural licorice':
            food_id = food_id_map_dict.get('liquorice')
            dfi_with_ids.at[i, 'changed'] = True
            dfi_with_ids.at[i, 'drug2'] = food_id
            
        if food_name == 'meal':
            food_id = 'food'
            dfi_with_ids.at[i, 'changed'] = True  
            dfi_with_ids.at[i, 'drug2'] = food_id
            
        # TODO:
        # foods rich in vitamin k -> vitamin K -> keep
        # foods containing vitamin c -> vitamin C -> keep
    
    else:
        dfi_with_ids.at[i, 'changed'] = True
        dfi_with_ids.at[i, 'drug2'] = food_id
        

In [32]:
dfi_with_ids = dfi_with_ids.drop(dfi_with_ids[dfi_with_ids.changed == False].index).drop(columns='changed')

In [33]:
dfi_with_ids

Unnamed: 0,drug1,interaction,drug2
62,DB00011,decrease_effectiveness,alcohol
105,DB00017,decrease_adverse_effects,calcium
106,DB00017,decrease_adverse_effects,vitamin supplements
112,DB00029,increase_antiplatelet_activities,FOOD00008
113,DB00029,increase_antiplatelet_activities,FOOD00206
...,...,...,...
2784,DB15873,increase_effectiveness,FOOD00664
2785,DB15982,increase_effectiveness,food
2786,DB16165,decrease_effectiveness,FOOD00256
2788,DB16390,decrease_effectiveness,St. John's Wort


In [34]:
dfi_with_ids.to_csv(triplets_dir + 'dfi_processed.tsv', sep='\t')