In [1]:
import re
import requests
from bs4 import BeautifulSoup

In [2]:
def strip_name(name):
    return re.fullmatch(r"^(.+?)(?:\s+\d+(?:,\d+)*)?$", name)[1]


def parse_price(price_str):
    return int(re.fullmatch(r"^(\d+),-\s*$", price_str)[1])


class Dish:
    def __init__(self, cafeteria: str, address: str, kind: str, name_czech: str, name_english: str, price_student: str, price_employee: str, price_guest: str):
        self.cafeteria      = cafeteria
        self.address        = address
        self.kind           = kind[0]
        self.name_czech     = strip_name(name_czech)
        self.name_english   = strip_name(name_english)
        self.price_student  = parse_price(price_student)
        self.price_employee = parse_price(price_employee)
        self.price_guest    = parse_price(price_guest)
        
    def __repr__(self):
        return "Dish(" + ", ".join(repr(x) for x in [self.cafeteria, self.address, self.kind, self.name_czech, self.name_english, self.price_student, self.price_employee, self.price_guest]) + ")"
        

In [3]:
available_dishes = []

for num in ["5", "10"]:
    resp = requests.get("http://www.kam.vutbr.cz/?p=menu&provoz=" + num)
    
    assert resp.status_code == 200
    assert resp.encoding == "utf-8"

    soup = BeautifulSoup(resp.text)

    sa2 = soup.find("div", {"id": "sa2"})
        
    #heading = sa2.find("h1").text
    cafeteria_match = re.fullmatch(r"^<h1>([^<>]+)<br/><small><small>([^<>]+)</small></small>.*</h1>$", str(sa2.find("h1")))
    cafeteria_name = cafeteria_match[1]
    cafeteria_address = cafeteria_match[2]
    
    last_update = re.fullmatch(r"^Čas aktualizace dat: (\d{1,2}\. \d{1,2}\. \d{4} \d{1,2}:\d{2}:\d{2})\.$", sa2.find_all("small")[-1].text)[1]
    
    rows = soup.find_all("tr", class_="wn0")

    print(f"[{last_update}] {cafeteria_name} -- {len(rows)} dish options available")
    
    available_dishes.extend(Dish(cafeteria_name, cafeteria_address, *(td.text.replace("\xA0", " ") for td in r.find_all("td"))) for r in rows)

available_dishes

[28. 1. 2020 18:32:23] Menza Starý pivovar -- 0 dish options available
[28. 1. 2020 18:32:23] Menza Purkyňova -- 11 dish options available


[Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Bramborové knedlíky se švestkami a mákem', 'Potato dumplings with plums', 42, 16, 78),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Kuřecí plátek 150g v omáčce pepřové', 'Kuřecí plátek 150g v omáčce pepřové', 50, 24, 85),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Rizoto s angl.slaninou a kuřecím masem, okurek', 'Risotto with bacon and chicken meat, pickle', 47, 22, 83),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Těstoviny s olivami,zeleninou a mozzarellou', 'Pasta with olives, vegetables and mozzarella', 38, 12, 74),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Uzená krkovička, omáčka česneková', 'Smoked pork neck, garlic sauce', 42, 16, 77),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'P', 'P čočková s uzeninou', 'Lentile soup with sausage', 12, 12, 12),
 Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'P'

In [4]:
# Filter out non-main dishes and sort the main dishes by their price in an ascending order.
cheapest = sorted([x for x in available_dishes if x.kind == "H"], key=lambda x: x.price_student)

# The cheapest main dish.
cheapest[0]

Dish('Menza Purkyňova', 'Purkyňova 93, Brno-Královo Pole', 'H', 'Těstoviny s olivami,zeleninou a mozzarellou', 'Pasta with olives, vegetables and mozzarella', 38, 12, 74)

In [5]:
# Get the description and price of the cheapest main dishes.
[(x.name_czech, x.price_student) for x in cheapest]

[('Těstoviny s olivami,zeleninou a mozzarellou', 38),
 ('Bramborové knedlíky se švestkami a mákem', 42),
 ('Uzená krkovička, omáčka česneková', 42),
 ('Rizoto s angl.slaninou a kuřecím masem, okurek', 47),
 ('Kuřecí plátek 150g v omáčce pepřové', 50)]