In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import requests
from requests import get
from matplotlib.ticker import MaxNLocator
import matplotlib.pyplot as plt
from bs4 import BeautifulSoup # for web scraping
import warnings
warnings.filterwarnings('ignore') # to ignore the warnings and make the notebook more presentable

# map functions
import os 
import folium
from folium import plugins
import rasterio as rio
from rasterio.warp import calculate_default_transform, reproject, Resampling
import earthpy as et

# Defining Fuctions

In [2]:
def Get_Links_from_Page(Pages):
    #empty list of links
    list_links = []
    # We import the values from this website to fetch the missing information for the previous graph

    for Page in Pages:
        print(Page)
        # To make a request
        response1 = get(Page)

        # The response variable will contain the response of that request object.
        r1 = requests.post(Page, data = {'key':'value'})

        # The soup_obj will us to fetch the our required results
        soup_obj1 = BeautifulSoup(response1.text,'html.parser')

        # find_all() will help to fetch all the details of the selected tag.
        list_links.append(soup_obj1.find_all('a'))

    return list_links

In [148]:
def Create_df_(Links):

    # creation of a dataframe which will take the values from the web scrapping
    d = ['Name','Address','Zip', 'Latest_Report_Date', 'Total_all_time_reports', 'Total_all_time_sick_persons','Latitude','Longitude']
    df = pd.DataFrame(columns = d)
    
    # taking each link seperately
    for link in Links:

        response1 = get(link)
        
        # The soup_obj will us to fetch the our required results
        soup_obj2 = BeautifulSoup(response1.text,'html.parser')
        # biz type complaint
        if '/biz/' in link:
            print(link)
            post2 = soup_obj2.find(class_ = 'col-12 single-post single-incident')
            
            # Total_all_time_reports and Total_all_time_sick_persons
            Reports_Sick = post2.find(class_ = 'row justify-content-start text-muted').get_text()
            Total_all_time_reports, Total_all_time_sick_persons = [int(s) for s in Reports_Sick.split() if s.isdigit()]
            
            # Latest_Report_Date
            p = post2.find_all(class_ = 'text-muted my-2')
            # getting rid of extra spaces
            p = p[1].get_text().replace('  ', '')
            # getting rid of \n
            p = p.replace('\n', '')
            p = p.replace('Latest report:', '')
            Latest_Report_Date = p
            
            # Longitude and Latitude
            s = post2.find(class_ = 'col-12 col-md-12 col-lg-5 mt-3 mt-md-0').find(class_ = 'img-fluid lazyload')['data-src']
            start = '=en¢er='
            end = '&zoom='
            loc = s[s.find(start)+len(start):s.rfind(end)]
            lat, lon = [x.strip() for x in loc.split(',')]
                
            # Address, Name and Zip
            # splitting with respect to ','
            Name_Address = post2.find(class_ = 'h1 post-title').get_text().split(",")
            Name = Name_Address[0]
            Address = Name_Address[1:]
            Zip = Name_Address[3][:6]
            
 
        else:
            print(link)
            
            # Latest_Report_Date
            post3 = soup_obj2.find_all(class_ = 'text-muted')
            for i in range(10):
                if 'date' in str(post3[i]):
                    p = post3[i].get_text()
                    p = p.replace('  ', '')
                    p = p.replace('\n', '')
                    p = p.replace('Reported:', '')
                    Latest_Report_Date = p
                
            post2 = soup_obj2.find(class_ = 'col-12 page-content mt-4 location-post single-post card py-3')
            
            # Total_all_time_reports, Total_all_time_sick_persons
            Reports_Sick = post2.find(class_ = 'row justify-content-start text-muted').get_text()
            Total_all_time_reports, Total_all_time_sick_persons = [int(s) for s in Reports_Sick.split() if s.isdigit()]
            
            # this gives the link for google map and has the longitude and latitude
            s = post2.find(class_ = 'col-12 col-md-12 col-lg-4 mt-3 mt-md-0').find(class_ = 'img-fluid lazyload')['data-src']
            
            # longitude and latitude values are in between the two following words
            start = '=en¢er='
            end = '&zoom='
            # retrieving longitude and latitude
            loc = s[s.find(start)+len(start):s.rfind(end)]
            lat, lon = [x.strip() for x in loc.split(',')]
            
            # Name and Address
            Name_Address = post2.find(class_ = 'h1 post-title').get_text().split(",")
            Name = Name_Address[0]
            Address = Name_Address[1:]
            
            # ZipCode
            Zip = soup_obj2.find_all(class_ = 'my-2')[2].get_text()
            # cleaning ZipCode
            Zip = Zip.replace('\n', '')
            Zip = Zip.replace('  ', '')
            Zip = Zip.split(',')
            Zip = Zip[2]
        
        # adding to dataframe
        df = df.append({'Name': Name, 'Address': Address,'Zip': Zip, 'Latest_Report_Date' : Latest_Report_Date,
                                    'Total_all_time_reports' : Total_all_time_reports,
                                    'Total_all_time_sick_persons' : Total_all_time_sick_persons,
                                    'Latitude' : lat, 'Longitude' : lon}, ignore_index=True) 
    return df


# Webscraping

In [126]:
# creating empty list of pages
Pages = []
# adding the 20 first pages from the website
for i in range(12):   
    i += 1
    Pages.append('https://iwaspoisoned.com/location/united-states/illinois/chicago?page='+ str(i)+'#emailscroll')
Pages

['https://iwaspoisoned.com/location/united-states/illinois/chicago?page=1#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=2#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=3#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=4#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=5#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=6#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=7#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=8#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=9#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=10#emailscroll',
 'https://iwaspoisoned.com/location/united-states/illinois/chicago?page=11#emailscroll',
 'https://iwaspoisoned.com/loc

In [127]:
# the function returns all links per page that lead to a specific restaurant complaint
list_links = Get_Links_from_Page(Pages)

https://iwaspoisoned.com/location/united-states/illinois/chicago?page=1#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=2#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=3#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=4#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=5#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=6#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=7#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=8#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=9#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=10#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=11#emailscroll
https://iwaspoisoned.com/location/united-states/illinois/chicago?page=12#e

In [128]:
# obtaining all href of the links and putting them into a list
List = []
for link in list_links:
    for l in link:
        url = l.get('href')
        # only keeping the compaints from chicago, so deleting all other links that aren't complaints
        if '-chicago-' in url:
            List.append(l.get('href'))
# delete duplicates
List = list(set(List))

In [129]:
List

['https://iwaspoisoned.com/biz/domino-s-pizza-143-west-division-street-chicago-60610-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/harmony-restaurant-6525-west-archer-avenue-chicago-60638-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/incident/swiss-air-11601-west-touhy-avenue-chicago-il-usa-279958#emailscroll',
 'https://iwaspoisoned.com/biz/del-seoul-2568-north-clark-street-chicago-60614-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/poke-poke-802-west-belmont-avenue-chicago-60657-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/subway-2412-north-lincoln-avenue-chicago-60614-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/fat-willy-s-rib-shack-2416-west-schubert-avenue-chicago-60647-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/mekato-s-colombian-bakery-5423-north-lincoln-avenue-chicago-60625-illinois-united-states#emailscroll',
 'https://iwaspoisoned.com/biz/lao-s

In [232]:
# creating a dataframe with all the information
df = Create_df_(List)


https://iwaspoisoned.com/biz/domino-s-pizza-143-west-division-street-chicago-60610-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/harmony-restaurant-6525-west-archer-avenue-chicago-60638-illinois-united-states#emailscroll
https://iwaspoisoned.com/incident/swiss-air-11601-west-touhy-avenue-chicago-il-usa-279958#emailscroll
https://iwaspoisoned.com/biz/del-seoul-2568-north-clark-street-chicago-60614-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/poke-poke-802-west-belmont-avenue-chicago-60657-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/subway-2412-north-lincoln-avenue-chicago-60614-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/fat-willy-s-rib-shack-2416-west-schubert-avenue-chicago-60647-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/mekato-s-colombian-bakery-5423-north-lincoln-avenue-chicago-60625-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/lao-sze-chuan-4832-north-broadway-chica

https://iwaspoisoned.com/biz/dunkin-150-west-63rd-street-chicago-60621-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/conte-s-pizza-8512-south-pulaski-road-chicago-60652-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/kabobi-persian-and-mediterranean-grill-4748-north-kedzie-avenue-chicago-60625-illinois-united-states#emailscroll
https://iwaspoisoned.com/incident/popeyes-louisiana-kitchen-chicago-il-usa-272490#emailscroll
https://iwaspoisoned.com/biz/loyola-university-simpson-living-learning-center-6333-north-winthrop-avenue-chicago-60660-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/university-club-of-chicago-76-east-monroe-street-chicago-60603-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/palermo-s-of-63rd-pizza-and-restaurant-3751-west-63rd-street-chicago-60629-illinois-united-states#emailscroll
https://iwaspoisoned.com/biz/top-dog-restaurant-north-sauganash-avenue-chicago-60646-illinois-united-states#emailscroll
ht

In [233]:
df

Unnamed: 0,Name,Address,Zip,Latest_Report_Date,Total_all_time_reports,Total_all_time_sick_persons,Latitude,Longitude
0,Domino's Pizza,"[ 143 West Division Street, Chicago, 60610 I...",60610,Oct 15 2019 at 10:11 PM,1,1,41.9037635,-87.63317540000003
1,Harmony Restaurant,"[ 6525 West Archer Avenue, Chicago, 60638 Il...",60638,Oct 22 2019 at 7:04 PM,2,0,41.7920435,-87.78526399999998
2,SWISS AIR,"[ 11601 West Touhy Avenue, Chicago, IL, USA ]",60631Illinois,Nov 22 2019 at 1:02 AM,1,0,42.0113823,-87.81998920000001
3,Del Seoul,"[ 2568 North Clark Street, Chicago, 60614 Il...",60614,Oct 14 2019 at 9:26 AM,1,2,41.92944199999999,-87.64301499999999
4,Poke Poké,"[ 802 West Belmont Avenue, Chicago, 60657 Il...",60657,Oct 8 2019 at 1:46 PM,2,1,41.94008649999999,-87.64953559999998
...,...,...,...,...,...,...,...,...
101,D'Nuez,"[ 4125 South Archer Avenue, Chicago, 60632 I...",60632,Oct 24 2019 at 7:00 AM,1,0,41.81840709999999,-87.69671590000002
102,Stelios Bottles & Bites,"[ 19 South Morgan Street, Chicago, 60607 Ill...",60607,Oct 24 2019 at 7:01 AM,1,0,41.8809528,-87.65180509999999
103,Baha Restaurant LLC,"[ 4846 West Diversey Avenue, Chicago, IL, U...",60639Illinois,Nov 16 2019 at 7:01 AM,1,0,41.93175100000001,-87.74852870000001
104,Roti Modern Mediterranean,"[ 1012 West Randolph Street, Chicago, IL, U...",60607Illinois,Oct 8 2019 at 1:31 AM,1,0,41.88459580000001,-87.65281729999998


In [234]:
# only keeping the 1st character to the 6th (location of zipcode)
new_zip = list(map(lambda x: x[1:6], list(df.Zip.values)))

In [237]:
# replacing in df
df = df.replace(list(df.Zip.values),new_zip)

In [238]:
df.Zip.values

array(['60610', '60638', '60631', '60614', '60657', '60614', '60647',
       '60625', '60640', '60639', '60629', '60647', '60647', '60634',
       '60666', '60707', '60601', '60614', '60614', '60646', '60642',
       '60607', '60647', '60666', '60608', '60638', '60647', '60707',
       '60622', '60647', '60601', '60630', '60605', '60622', '60603',
       '60634', '60625', '60622', '60647', '60634', '60614', '60637',
       '60647', '60707', '60630', '60655', '60629', '60630', '60623',
       '60647', '60647', '60659', '60611', 'Unite', '60634', '60630',
       '60611', '60647', '60707', '60707', '60625', '60647', '60656',
       '60647', 'Unite', '60647', '60657', '60625', 'Unite', '60638',
       '60639', '60621', '60652', '60625', 'Unite', '60660', '60603',
       '60629', '60646', '60657', '60639', '60622', '60620', '60611',
       '60609', 'Illin', '60614', '60608', '60647', '60656', '60630',
       '60647', '60643', '60643', '60607', '60622', '60647', '60607',
       '60647', '606

In [243]:
new_zip_ = list(map(lambda x: np.nan if (x[0] != '6') else x, list(df.Zip.values)))

In [245]:
df = df.replace(list(df.Zip.values),new_zip_)

In [246]:
df.to_csv('Food_Poisoning.csv')

# Mapping

In [201]:
# Create a map using the Map() function and the coordinates for Chicago
m = folium.Map(location=[41.714168, -87.655291])
# function that adds a marker which locates a facility on the map
def Adding_Marker(map_,longitude, latitude, popup, colour):
    folium.Marker(
        location=[latitude,longitude], # coordinates for the marker 
        popup= popup ,  # pop-up label for the marker
        icon=folium.Icon(color= colour)
    ).add_to(map_)

for i in range(106):
    popup = str(df.Name.values[i]) + '\n'+'#Sick Persons :'+ str(df.Total_all_time_sick_persons.values[i]) + '\n' +'#Reports :'+ str(df.Total_all_time_reports.values[i])
    if (df.Total_all_time_sick_persons.values[i] + df.Total_all_time_reports.values[i] < 2):
        colour = 'blue'
    if (df.Total_all_time_sick_persons.values[i]+ df.Total_all_time_reports.values[i] == 2):
        colour = 'orange'
    if (df.Total_all_time_sick_persons.values[i]+ df.Total_all_time_reports.values[i] > 2):
        colour = 'red'
    Adding_Marker(m,df.Longitude.values[i], df.Latitude.values[i], popup , colour)
    
m