## Part I: find those ingredients that are UNSAFE when taken by mouth

In [None]:
import pandas as pd

In [None]:
# I have already extracted the first one or two sentences in the section of side effects
# I will show you later how to do the extraction, which is a bit complicated now
# Our purpose here is to continue to practice on PANDAS

fpath = f'ingredient_side_effects_first_sentences.csv'
df = pd.read_csv(fpath)
print(df.columns)
df.head()

In [None]:
pd.options.display.max_colwidth

In [None]:
pd.options.display.max_colwidth = 500

In [None]:
df[5:10]

In [None]:
# Let's play how to create some new features for the table "df"
# For example, a feature called "num_words"

df['num_words'] = 10
df.head()

In [None]:
df.iloc[0]

In [None]:
df.iloc[0].first_sentences

In [None]:
s = df.iloc[0].first_sentences
s.split()

In [None]:
len(s.split())

In [None]:
def get_num_of_words(s):
    return len(s.split())

In [None]:
df['num_words'] = df.first_sentences.apply(get_num_of_words)
df.head()

### create a new feature: has word "unsafe" or not in the first sentences

In [None]:
def has_word_unsafe(s):
    return s.lower().find('unsafe')>=0

df['has_unsafe'] = df.first_sentences.apply(has_word_unsafe)
df[5:10]

The above two functions  
- get_num_of_words 
and 
- has_word_unsafe
are just one-line functions

In python, we can use "lambda" as a quick way to define such one-line functions.

In [None]:
df['has_unsafe'] = df.first_sentences.apply(lambda x: x.lower().find('unsafe')>=0)
df[5:10]

In [None]:
df['has_likely'] = df.first_sentences.apply(lambda x: x.lower().find('likely')>=0)
df[:10]

In [None]:
df['has_possibly'] = df.first_sentences.apply(lambda x: x.lower().find('possibly')>=0)
df[:10]

In [None]:
df.has_unsafe & (~df.has_likely) & ~df.has_possibly

In [None]:
condition = df.has_unsafe & (~df.has_likely) & ~df.has_possibly
dff = df[condition].copy()
print(dff.shape)
dff

In [None]:
dff['has_might'] = dff.first_sentences.apply(lambda x: x.lower().find('might')>=0)
print(dff[dff.has_might].shape)
dff[dff.has_might]

In [None]:
dff['has_mouth'] = dff.first_sentences.apply(lambda x: x.lower().find('mouth')>=0)
print(dff[dff.has_mouth].shape)
dff[dff.has_mouth]

In [None]:
df_unsafe = dff[dff.has_mouth & ~dff.has_might]
print(df_unsafe.shape)
df_unsafe

## Part II: generate a html page, with links to WebMD, for those UNSAFE ingredients

Right now we only focus on these 37 UNSAFE ingredients (when taken by mouth). 

For others that are LIKELY/POSSIBLY UNSAFE, or UNSAFE when injected, we leave it for future.

In [None]:
# get the WebMD href for all ingredients
df_href = pd.read_csv('ingredient_id_name_url.csv')
print(df_href.shape)
df_href.head()

In [None]:
df_unsafe.head()

In [None]:
# merge the df_href with df_unsafe

df_final = pd.merge(df_unsafe, df_href, left_on='ingredient', right_on='id')
print(df_final.shape)
df_final.head()

In [None]:
# create HTML page from df_final

out = "<table>"
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr> <td> <a href=\"{href}\">{ingredient}</a></td> <td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile

In [None]:
# add some attributes to <td>
# add one attribute to <a>

out = "<table>"
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr><td align=right valign=top><a target=_blank href=\"{href}\">{ingredient}</a><td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile

In [None]:
# add some styles to make the page look better

out = """
<style>
body {width: 800px; margin:auto}
a {text-decoration: none; font-size:120%}
</style>
<table cellpadding=5>
"""
for ingredient, first_sentences, href in zip(df_final.ingredient, df_final.first_sentences, df_final.href):
    row = f"<tr><td align=right valign=top><a target=_blank href=\"{href}\">{ingredient}</a><td>{first_sentences}</tr>"
    out += row
    
html_outfile = '/tmp/a.html'
open(html_outfile, 'w').write(out)
html_outfile