# A Simple Lightweight Script to Scrape McDelivery (Singapore) Menu

---

### Import required libraries

- **Requests** to send HTTP/1.1 requests
- **BeautifulSoup** to parse the received content
- **Pandas** to tabulate the filtered items
- **DateTime** to add current date and time values
- **re** to enable regular expressions

In [8]:
import requests as r
from bs4 import BeautifulSoup as BS
import pandas as pd
import datetime as dt
import re

### Declaring Request Headers to be Used
This will make the request seem to be from a normal browser

In [9]:
my_headers = {
    "Access-Control-Allow-Origin":"*",
    "Access-Control-Allow-Methods": "GET",
    "Access-Control-Allow-Headers": "Content-Type",
    "Access-Control-Max-Age": "3600",
    "User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_14_3) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36", 
    "Accept": "text/html,application/xhtml+xml,application/xml; q=0.9,image/webp,image/apng,*/*;q=0.8"
}
              
session = r.Session()
              
current_date = dt.datetime.now()

### Getting the Live Exchange Rate from XE

In [10]:
# Getting the correct XE webpage (all elements)
XE = BS(session.get("https://www.xe.com/currencyconverter/convert/?Amount=1&From=SGD&To=USD", headers=my_headers).content, "lxml")

# Scraping the text from the selected element (CSS selector)
# Extracting only the number from the text string and converting it to float value (decimal number) 
# findall() and select() methods return a list, indicate index [0] to extract the first element as a string value
exchange_rate = float(re.findall(r"[-+]?(?:\d*\.\d+|\d+)", XE.select("p.result__BigRate-sc-1bsijpp-1.iGrAod")[0].text)[0])

print(exchange_rate)
print(type(exchange_rate))

### Parsing Items from Different Menus
    
   https://data36.com/scrape-multiple-web-pages-beautiful-soup-tutorial/
    
   https://hackersandslackers.com/scraping-urls-with-beautifulsoup/

In [11]:
URL_list = [     # Regular Menu
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=29", # Promotion Meals 
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=63", # Chicken McCrispy®
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=66", # Sharing
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=30", # Ala Carte & Value Meals
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=31", # Sides
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=32", # Desserts
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=33", # Beverages
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=21&catId=34", # For the Family
                 # Breakfast Menu 
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=29", # Promotion Meals
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=30", # Breakfast & Value Meals
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=31", # Sides
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=32", # Desserts
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=33", # Beverages
    "https://www.mcdelivery.com.sg/sg/browse/menu.html?daypartId=22&catId=34"  # For the Family
]



# --------------------------------------
# Parsing the data into Dictionary List
# --------------------------------------

# Initialising the dictionary list object
product_list = []


# Outer loop iterates through list of webpages
for url in URL_list:
    response = session.get(url, headers=my_headers)
    soup = BS(response.content, "lxml")
    
    # Inner loop iterates through elements on each webpage
    for products in soup.select("div.product-card"):
        product = {}
        product["Date"] = current_date.strftime("%Y/%m/%d")
        product["Day"] = current_date.strftime("%a")
        product["Territory"] = "Singapore"
        product["Menu Item"] = products.select("h5.product-title")[0].text
        product["Price (SGD)"] = float((re.findall(r"[-+]?(?:\d*\.\d+|\d+)",products.select("span.starting-price")[0].text)[0]))
        product["Price (USD)"] = round((product["Price (SGD)"] * exchange_rate), 2)
        product["Category"] = soup.select("ol.breadcrumb > li.active")[0].text
        product["Menu"] = soup.select("li.primary-menu-item.selected > a > span")[0].text
        product_list.append(product)
        
# print(product_list)

### Constructing the Dataframe (Table) and Saving to File

https://pbpython.com/currency-cleanup.html

In [12]:
product_list_df = pd.DataFrame(product_list)

print(product_list_df)

timestamp = str(current_date.strftime("[%Y-%m-%d %H:%M:%S]"))
product_list_df.to_csv(str(timestamp + " mcd-bs4-sg.csv"), encoding="utf-8", index=False)

In [13]:
# name = h5.product-title
# price = span.starting-price
# category = div.clearfix li.active
# menu = li.primary-menu-item.selected > a > span


In [14]:
#     name = soup.find_all("h5", class_="product-title") # returns list of elements
#     name = [name.text for name in name] # for loop strips the tags off each element
#     df_name = df_name.append(name, ignore_index=True) # returns dataframe of elements listed in a single column
    
#     price = soup.find_all("span", class_="starting-price")
#     price = [price.text for price in price]
#     df_price = df_price.append(price, ignore_index=True)
    
#     category = soup.select("ol.breadcrumb > li.active")
#     category = [category.text for category in category]
#     df_category = df_category.append(category, ignore_index=True)
    
#     menu = soup.select("li.primary-menu-item.selected > a > span")
#     menu = [menu.text for menu in menu]
#     df_menu = df_menu.append(menu, ignore_index=True)