# Webscrape restaurant addresses from Michelin website

#### Import libraries

In [175]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import time
import re

#### Get Michelin restaurants and their corresponding weblinks

In [176]:
# get first page of restaurants in Michelin guide website
page = requests.get("https://guide.michelin.com/sg/en/restaurants")
# page

In [177]:
soup = BeautifulSoup(page.content, 'html.parser')
# print(soup.prettify())

In [178]:
# find all restaurants and their web addresses in page 1
restaurants = soup.find_all('a', href= True, class_ = 'link')

In [179]:
# check the restaurants
restaurants

[<a aria-label="Open Poh Cheu" class="link" href="/sg/en/singapore-region/singapore/restaurant/poh-cheu"></a>,
 <a aria-label="Open New Rong Liang Ge Cantonese Roast Duck Double Boiled Soup" class="link" href="/sg/en/singapore-region/singapore/restaurant/new-rong-liang-ge-cantonese-roast-duck-double-boiled-soup"></a>,
 <a aria-label="Open Chef Kang's Noodle House" class="link" href="/sg/en/singapore-region/singapore/restaurant/chef-kang-s-noodle-house"></a>,
 <a aria-label="Open Guan Kee Fried Kway Teow" class="link" href="/sg/en/singapore-region/singapore/restaurant/guan-kee-fried-kway-teow"></a>,
 <a aria-label="Open Shi Le Yuan" class="link" href="/sg/en/singapore-region/singapore/restaurant/shi-le-yuan570556"></a>,
 <a aria-label="Open San Bao Soya Sauce Chicken" class="link" href="/sg/en/singapore-region/singapore/restaurant/san-bao-soya-sauce-chicken"></a>,
 <a aria-label="Open Hua Kee Chicken Rice" class="link" href="/sg/en/singapore-region/singapore/restaurant/hua-kee-chicken-r

#### Create dictionary of Michelin restaurants and their html webpages

In [180]:
restaurants_dict = {}

In [181]:
for i in restaurants:
    restaurant_name = i['aria-label'][5:]
    webadd = i['href']
    restaurants_dict[restaurant_name]=webadd

In [182]:
print(restaurants_dict)

{'Poh Cheu': '/sg/en/singapore-region/singapore/restaurant/poh-cheu', 'New Rong Liang Ge Cantonese Roast Duck Double Boiled Soup': '/sg/en/singapore-region/singapore/restaurant/new-rong-liang-ge-cantonese-roast-duck-double-boiled-soup', "Chef Kang's Noodle House": '/sg/en/singapore-region/singapore/restaurant/chef-kang-s-noodle-house', 'Guan Kee Fried Kway Teow': '/sg/en/singapore-region/singapore/restaurant/guan-kee-fried-kway-teow', 'Shi Le Yuan': '/sg/en/singapore-region/singapore/restaurant/shi-le-yuan570556', 'San Bao Soya Sauce Chicken': '/sg/en/singapore-region/singapore/restaurant/san-bao-soya-sauce-chicken', 'Hua Kee Chicken Rice': '/sg/en/singapore-region/singapore/restaurant/hua-kee-chicken-rice', 'Fu Ming Cooked Food': '/sg/en/singapore-region/singapore/restaurant/fu-ming-cooked-food', 'Koka Wanton Noodles': '/sg/en/singapore-region/singapore/restaurant/koka-wanton-noodles', 'R&B Express': '/sg/en/singapore-region/singapore/restaurant/r-b-express', 'Hock Hai Curry Chicken B

In [183]:
pages_souped = []

# find all the other pages of restaurants and BS them

for k in soup.find_all('a', class_="btn btn-outline-secondary btn-sm"):
    webadd_extension = k['href']
    page_new = requests.get("https://guide.michelin.com"+str(webadd_extension))
    soupy = BeautifulSoup(page_new.content, 'html.parser')
    pages_souped.append(soupy)
    time.sleep(5)

In [184]:
for i in pages_souped:
    restaurants = i.find_all('a', href= True, class_ = 'link')
    for j in restaurants:
        restaurant_name = j['aria-label'][5:]
        webadd = j['href']
        restaurants_dict[restaurant_name]=webadd

In [185]:
len(restaurants_dict)

213

#### Create dictionary of restaurant and their postcodes

In [186]:
restaurant_postcodes = {}

for key, value in restaurants_dict.items():
    rest_page = requests.get("https://guide.michelin.com"+value)
    soup = BeautifulSoup(rest_page.content, 'html.parser')
    address = soup.find("li", class_ = None).contents[1]
    postcode = re.findall(r', (\d{6})', address)
    # extract michelin class
    michelin_class_tag = soup.find_all("li", class_ = None)[2]
    michelin_class = re.search('</i>\n(.+?)\n', str(michelin_class_tag)).group(1).lstrip()
    if len(postcode) !=0:
        restaurant_postcodes[key] = [postcode[0], michelin_class]
    else:
        restaurant_postcodes[key] = None
    time.sleep(5)

In [187]:
restaurant_postcodes

{'Poh Cheu': ['150127', 'The MICHELIN Plate: Good cooking'],
 'New Rong Liang Ge Cantonese Roast Duck Double Boiled Soup': ['182269',
  'The MICHELIN Plate: Good cooking'],
 "Chef Kang's Noodle House": ['319579',
  'Bib Gourmand: good quality, good value cooking'],
 'Guan Kee Fried Kway Teow': ['270020',
  'Bib Gourmand: good quality, good value cooking'],
 'Shi Le Yuan': ['150085', 'The MICHELIN Plate: Good cooking'],
 'San Bao Soya Sauce Chicken': ['150085', 'The MICHELIN Plate: Good cooking'],
 'Hua Kee Chicken Rice': ['150085', 'The MICHELIN Plate: Good cooking'],
 'Fu Ming Cooked Food': ['150085',
  'Bib Gourmand: good quality, good value cooking'],
 'Koka Wanton Noodles': ['198783', 'The MICHELIN Plate: Good cooking'],
 'R&B Express': ['229495', 'The MICHELIN Plate: Good cooking'],
 'Hock Hai Curry Chicken Bee Hoon Noodle': ['180270',
  'Bib Gourmand: good quality, good value cooking'],
 'Zén': ['089855', 'Two MICHELIN Stars: Excellent cooking, worth a detour!'],
 'Yen Yakiniku':

In [188]:
# Manual override for Heng, which doesn't have postcode in the webpage
restaurant_postcodes['Majestic'] = ['018935','The MICHELIN Plate: Good cooking']
restaurant_postcodes['Hawker Chan Soya Sauce Chicken Rice & Noodle'] = ['058972', 'Bib Gourmand: good quality, good value cooking']
restaurant_postcodes['Preludio'] = ['069547', 'The MICHELIN Plate: Good cooking'] # Preludio's postcode is wrong in the Michelin website

In [209]:
restaurant_prep = []

for k,v in restaurant_postcodes.items():
    dict_container = {}
    dict_container['restaurant'] = k
    dict_container['postcode'] = v[0]
    dict_container['michelin_grade'] = v[1]
    restaurant_prep.append(dict_container)    

In [210]:
restaurant_postcodes_df = pd.DataFrame.from_dict(restaurant_prep)

In [211]:
restaurant_postcodes_df

Unnamed: 0,michelin_grade,postcode,restaurant
0,The MICHELIN Plate: Good cooking,150127,Poh Cheu
1,The MICHELIN Plate: Good cooking,182269,New Rong Liang Ge Cantonese Roast Duck Double ...
2,"Bib Gourmand: good quality, good value cooking",319579,Chef Kang's Noodle House
3,"Bib Gourmand: good quality, good value cooking",270020,Guan Kee Fried Kway Teow
4,The MICHELIN Plate: Good cooking,150085,Shi Le Yuan
5,The MICHELIN Plate: Good cooking,150085,San Bao Soya Sauce Chicken
6,The MICHELIN Plate: Good cooking,150085,Hua Kee Chicken Rice
7,"Bib Gourmand: good quality, good value cooking",150085,Fu Ming Cooked Food
8,The MICHELIN Plate: Good cooking,198783,Koka Wanton Noodles
9,The MICHELIN Plate: Good cooking,229495,R&B Express


In [214]:
cols = list(restaurant_postcodes_df.columns.values)
cols

['michelin_grade', 'postcode', 'restaurant']

In [216]:
restaurant_postcodes_df= restaurant_postcodes_df[['restaurant', 'postcode', 'michelin_grade']]
restaurant_postcodes_df

Unnamed: 0,restaurant,postcode,michelin_grade
0,Poh Cheu,150127,The MICHELIN Plate: Good cooking
1,New Rong Liang Ge Cantonese Roast Duck Double ...,182269,The MICHELIN Plate: Good cooking
2,Chef Kang's Noodle House,319579,"Bib Gourmand: good quality, good value cooking"
3,Guan Kee Fried Kway Teow,270020,"Bib Gourmand: good quality, good value cooking"
4,Shi Le Yuan,150085,The MICHELIN Plate: Good cooking
5,San Bao Soya Sauce Chicken,150085,The MICHELIN Plate: Good cooking
6,Hua Kee Chicken Rice,150085,The MICHELIN Plate: Good cooking
7,Fu Ming Cooked Food,150085,"Bib Gourmand: good quality, good value cooking"
8,Koka Wanton Noodles,198783,The MICHELIN Plate: Good cooking
9,R&B Express,229495,The MICHELIN Plate: Good cooking


In [218]:
#write to csv
restaurant_postcodes_df.to_csv("restaurant_postcodes.csv")

In [165]:
# Appendix: text for extracting Michelin grade

x = soup.find_all("li", class_ = None)[2]
y = re.search('</i>\n(.+?)\n', str(x)).group(1).lstrip
print(y)

<built-in method lstrip of str object at 0x000002D6C3D243F0>
