## Part I: find those ingredients that are UNSAFE when taken by mouth

In [1]:
import pandas as pd

In [2]:
fpath = f'ingredient_side_effects_first_sentences.csv'
df = pd.read_csv(fpath)
print(df.columns)
df.head()

Index(['ingredient', 'first_sentences'], dtype='object')


Unnamed: 0,ingredient,first_sentences
0,abscess-root,There isn’t enough information to know if absc...
1,abuta,There isn't enough reliable information availa...
2,acacia,Acacia is LIKELY SAFE for most adults when t...
3,acacia-rigidula,There isn’t enough reliable information availa...
4,acai,Acai is POSSIBLY SAFE when taken by mouth an...


In [3]:
pd.options.display.max_colwidth

50

In [4]:
pd.options.display.max_colwidth = 500

In [5]:
df[:10]

Unnamed: 0,ingredient,first_sentences
0,abscess-root,There isn’t enough information to know if abscess root is safe.
1,abuta,There isn't enough reliable information available about abuta to know if it is safe.
2,acacia,Acacia is LIKELY SAFE for most adults when taken by mouth in amounts commonly found in food.
3,acacia-rigidula,There isn’t enough reliable information available to know if Acacia rigidula is safe.
4,acai,"Acai is POSSIBLY SAFE when taken by mouth and appropriately, short-term."
5,acerola,Acerola is POSSIBLY SAFE for most adults.
6,acetyl-l-carnitine,Acetyl-L-carnitine is LIKELY SAFE for most adults and POSSIBLY SAFE for most children when taken by mouth.
7,ackee,The ripe fruit of ackee is LIKELY SAFE when eaten as a food.
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.
9,activated-charcoal,"Activated charcoal is LIKELY SAFE for most adults when taken by mouth, short-term, or when applied to wounds."


In [6]:
df['has_unsafe'] = df.first_sentences.apply(lambda x: x.lower().find('unsafe')>=0)
df[:10]

Unnamed: 0,ingredient,first_sentences,has_unsafe
0,abscess-root,There isn’t enough information to know if abscess root is safe.,False
1,abuta,There isn't enough reliable information available about abuta to know if it is safe.,False
2,acacia,Acacia is LIKELY SAFE for most adults when taken by mouth in amounts commonly found in food.,False
3,acacia-rigidula,There isn’t enough reliable information available to know if Acacia rigidula is safe.,False
4,acai,"Acai is POSSIBLY SAFE when taken by mouth and appropriately, short-term.",False
5,acerola,Acerola is POSSIBLY SAFE for most adults.,False
6,acetyl-l-carnitine,Acetyl-L-carnitine is LIKELY SAFE for most adults and POSSIBLY SAFE for most children when taken by mouth.,False
7,ackee,The ripe fruit of ackee is LIKELY SAFE when eaten as a food.,False
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True
9,activated-charcoal,"Activated charcoal is LIKELY SAFE for most adults when taken by mouth, short-term, or when applied to wounds.",False


In [7]:
df['has_likely'] = df.first_sentences.apply(lambda x: x.lower().find('likely')>=0)
df[:10]

Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely
0,abscess-root,There isn’t enough information to know if abscess root is safe.,False,False
1,abuta,There isn't enough reliable information available about abuta to know if it is safe.,False,False
2,acacia,Acacia is LIKELY SAFE for most adults when taken by mouth in amounts commonly found in food.,False,True
3,acacia-rigidula,There isn’t enough reliable information available to know if Acacia rigidula is safe.,False,False
4,acai,"Acai is POSSIBLY SAFE when taken by mouth and appropriately, short-term.",False,False
5,acerola,Acerola is POSSIBLY SAFE for most adults.,False,False
6,acetyl-l-carnitine,Acetyl-L-carnitine is LIKELY SAFE for most adults and POSSIBLY SAFE for most children when taken by mouth.,False,True
7,ackee,The ripe fruit of ackee is LIKELY SAFE when eaten as a food.,False,True
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False
9,activated-charcoal,"Activated charcoal is LIKELY SAFE for most adults when taken by mouth, short-term, or when applied to wounds.",False,True


In [8]:
df['has_possibly'] = df.first_sentences.apply(lambda x: x.lower().find('possibly')>=0)
df[:10]

Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly
0,abscess-root,There isn’t enough information to know if abscess root is safe.,False,False,False
1,abuta,There isn't enough reliable information available about abuta to know if it is safe.,False,False,False
2,acacia,Acacia is LIKELY SAFE for most adults when taken by mouth in amounts commonly found in food.,False,True,False
3,acacia-rigidula,There isn’t enough reliable information available to know if Acacia rigidula is safe.,False,False,False
4,acai,"Acai is POSSIBLY SAFE when taken by mouth and appropriately, short-term.",False,False,True
5,acerola,Acerola is POSSIBLY SAFE for most adults.,False,False,True
6,acetyl-l-carnitine,Acetyl-L-carnitine is LIKELY SAFE for most adults and POSSIBLY SAFE for most children when taken by mouth.,False,True,True
7,ackee,The ripe fruit of ackee is LIKELY SAFE when eaten as a food.,False,True,False
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False
9,activated-charcoal,"Activated charcoal is LIKELY SAFE for most adults when taken by mouth, short-term, or when applied to wounds.",False,True,False


In [9]:
df.has_unsafe & (~df.has_likely) & ~df.has_possibly

0       False
1       False
2       False
3       False
4       False
5       False
6       False
7       False
8        True
9       False
10      False
11       True
12      False
13      False
14       True
15      False
16      False
17      False
18      False
19      False
20      False
21      False
22      False
23      False
24      False
25      False
26      False
27      False
28      False
29      False
        ...  
1170    False
1171     True
1172     True
1173     True
1174    False
1175    False
1176     True
1177    False
1178    False
1179    False
1180     True
1181    False
1182    False
1183    False
1184    False
1185    False
1186    False
1187    False
1188    False
1189    False
1190    False
1191    False
1192    False
1193    False
1194    False
1195    False
1196    False
1197    False
1198    False
1199    False
Length: 1200, dtype: bool

In [10]:
condition = df.has_unsafe & (~df.has_likely) & ~df.has_possibly
dff = df[condition].copy()
print(dff.shape)
dff

(119, 5)


Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False
11,adrenal-extract,Adrenal extract is UNSAFE when injected.,True,False,False
14,aga,Aga is UNSAFE when taken by mouth.,True,False,False
58,androstenetrione,Androstenetrione might be UNSAFE for anyone.,True,False,False
60,angels-trumpet,Angel's trumpet is UNSAFE for everyone.,True,False,False
73,aristolochia,Aristolochia is UNSAFE.,True,False,False
80,arum,Arum is UNSAFE when taken by mouth.,True,False,False
96,autumn-crocus,Autumn crocus is UNSAFE.,True,False,False
129,beth-root,Beth root might be UNSAFE to take by mouth.,True,False,False
143,bitter-yam,"Wild, uncooked bitter yam can be UNSAFE to eat or take by mouth as medicine.",True,False,False


In [11]:
dff['has_might'] = dff.first_sentences.apply(lambda x: x.lower().find('might')>=0)
print(dff[dff.has_might].shape)
dff[dff.has_might]

(18, 6)


Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly,has_might
58,androstenetrione,Androstenetrione might be UNSAFE for anyone.,True,False,False,True
129,beth-root,Beth root might be UNSAFE to take by mouth.,True,False,False,True
163,blackthorn,Blackthorn might be UNSAFE when swallowed.,True,False,False,True
173,bog-bilberry,Fresh bog bilberry fruit might be UNSAFE.,True,False,False,True
176,boldo,Boldo might be UNSAFE when used for medicinal purposes.,True,False,False,True
235,canaigre,"Canaigre seems to be safe for most people in usual amounts, but taking large amounts might be UNSAFE.",True,False,False,True
272,cesium,High doses of cesium might be UNSAFE.,True,False,False,True
279,chaulmoogra,Chaulmoogra is UNSAFE when taken by mouth because it contains cyanide and might cause cyanide poisoning.,True,False,False,True
346,contrayerva,Contrayerva might be UNSAFE for use.,True,False,False,True
415,dyers-broom,Dyer's broom might be UNSAFE when taken by mouth.,True,False,False,True


In [12]:
dff['has_mouth'] = dff.first_sentences.apply(lambda x: x.lower().find('mouth')>=0)
print(dff[dff.has_mouth].shape)
dff[dff.has_mouth]

(42, 7)


Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly,has_might,has_mouth
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False,False,True
14,aga,Aga is UNSAFE when taken by mouth.,True,False,False,False,True
80,arum,Arum is UNSAFE when taken by mouth.,True,False,False,False,True
129,beth-root,Beth root might be UNSAFE to take by mouth.,True,False,False,True,True
143,bitter-yam,"Wild, uncooked bitter yam can be UNSAFE to eat or take by mouth as medicine.",True,False,False,False,True
154,black-nightshade,Black nightshade is UNSAFE to take by mouth.,True,False,False,False,True
208,butanediol-bd,Butanediol is UNSAFE when taken by mouth.,True,False,False,False,True
234,canadian-hemp,Canadian hemp is UNSAFE to take by mouth because of its effects on the heart and other side effects.,True,False,False,False,True
279,chaulmoogra,Chaulmoogra is UNSAFE when taken by mouth because it contains cyanide and might cause cyanide poisoning.,True,False,False,True,True
306,ciguatoxins,Ciguatoxins are UNSAFE when taken by mouth.,True,False,False,False,True


In [13]:
df_unsafe = dff[dff.has_mouth & ~dff.has_might]
print(df_unsafe.shape)
df_unsafe

(37, 7)


Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly,has_might,has_mouth
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False,False,True
14,aga,Aga is UNSAFE when taken by mouth.,True,False,False,False,True
80,arum,Arum is UNSAFE when taken by mouth.,True,False,False,False,True
143,bitter-yam,"Wild, uncooked bitter yam can be UNSAFE to eat or take by mouth as medicine.",True,False,False,False,True
154,black-nightshade,Black nightshade is UNSAFE to take by mouth.,True,False,False,False,True
208,butanediol-bd,Butanediol is UNSAFE when taken by mouth.,True,False,False,False,True
234,canadian-hemp,Canadian hemp is UNSAFE to take by mouth because of its effects on the heart and other side effects.,True,False,False,False,True
306,ciguatoxins,Ciguatoxins are UNSAFE when taken by mouth.,True,False,False,False,True
315,clematis,Fresh clematis is UNSAFE to take by mouth.,True,False,False,False,True
355,corkwood-tree,Corkwood tree is UNSAFE when taken by mouth.,True,False,False,False,True


## Part II: generate a html page, with links to WebMD, for those UNSAFE ingredients

Right now we only focus on these 37 UNSAFE ingredients (when taken by mouth). 

For others that are LIKELY/POSSIBLY UNSAFE, or UNSAFE when injected, we leave it for future.

In [14]:
# get the WebMD href for all ingredients
df_href = pd.read_csv('ingredient_id_name_url.csv')
print(df_href.shape)
df_href.head()

(1214, 3)


Unnamed: 0,id,name,href
0,abscess-root,ABSCESS ROOT,https://www.webmd.com/vitamins/ai/ingredientmono-266/abscess-root
1,abuta,ABUTA,https://www.webmd.com/vitamins/ai/ingredientmono-267/abuta
2,acacia,ACACIA,https://www.webmd.com/vitamins/ai/ingredientmono-268/acacia
3,acacia-rigidula,ACACIA RIGIDULA,https://www.webmd.com/vitamins/ai/ingredientmono-1411/acacia-rigidula
4,acai,ACAI,https://www.webmd.com/vitamins/ai/ingredientmono-1109/acai


In [15]:
df_unsafe.head()

Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly,has_might,has_mouth
8,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False,False,True
14,aga,Aga is UNSAFE when taken by mouth.,True,False,False,False,True
80,arum,Arum is UNSAFE when taken by mouth.,True,False,False,False,True
143,bitter-yam,"Wild, uncooked bitter yam can be UNSAFE to eat or take by mouth as medicine.",True,False,False,False,True
154,black-nightshade,Black nightshade is UNSAFE to take by mouth.,True,False,False,False,True


In [16]:
# merge the df_href with df_unsafe
df_final = pd.merge(df_unsafe, df_href, left_on='ingredient', right_on='id')
print(df_final.shape)
df_final.head()

(37, 10)


Unnamed: 0,ingredient,first_sentences,has_unsafe,has_likely,has_possibly,has_might,has_mouth,id,name,href
0,aconite,Do not use aconite. Aconite root is UNSAFE when taken by mouth.,True,False,False,False,True,aconite,ACONITE,https://www.webmd.com/vitamins/ai/ingredientmono-609/aconite
1,aga,Aga is UNSAFE when taken by mouth.,True,False,False,False,True,aga,AGA,https://www.webmd.com/vitamins/ai/ingredientmono-190/aga
2,arum,Arum is UNSAFE when taken by mouth.,True,False,False,False,True,arum,ARUM,https://www.webmd.com/vitamins/ai/ingredientmono-209/arum
3,bitter-yam,"Wild, uncooked bitter yam can be UNSAFE to eat or take by mouth as medicine.",True,False,False,False,True,bitter-yam,BITTER YAM,https://www.webmd.com/vitamins/ai/ingredientmono-1221/bitter-yam
4,black-nightshade,Black nightshade is UNSAFE to take by mouth.,True,False,False,False,True,black-nightshade,BLACK NIGHTSHADE,https://www.webmd.com/vitamins/ai/ingredientmono-821/black-nightshade


In [17]:
# create HTML page from df_final

out = "<table>"
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr> <td> <a href=\"{href}\">{ingredient}</a></td> <td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile

'/tmp/a.html'

In [18]:
# create HTML page from df_final

out = "<table>"
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr><td align=right valign=top><a target=_blank href=\"{href}\">{ingredient}</a><td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile

'/tmp/a.html'

In [19]:
out = """
<style>
body {width: 800px; margin:auto}
a {text-decoration: none; font-size:120%}
</style>
<table cellpadding=5>
"""
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr><td align=right valign=top><a target=_blank href=\"{href}\">{ingredient}</a><td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile

'/tmp/a.html'