# Selecting elements with XPath using lxml

In [1]:
import requests
import xml.etree.ElementTree as ET
from lxml import etree
from lxml import html
from inspect import getmembers, isclass, isfunction

### Creating an xml file

In [2]:
# create xml file
url = "https://fhnw.sv-restaurant.ch/de/menuplan/"
res = requests.get(url)
if res.status_code == 200:
    with open('menuplan.xml', 'w', encoding="utf-8") as f:
        f.write(res.text)

### Using etree to parse the xml file

In [3]:
# get html from website
mensaPlan = requests.get("https://fhnw.sv-restaurant.ch/de/menuplan/")

In [4]:
# Put data in a decent structure and use xpath
mensaTree=etree.HTML(str(mensaPlan.content.decode())[2:])

In [5]:
# printing in a ugly way
# print(etree.tostring(mensaTree, pretty_print = True))

In [6]:
menu = []

for i in range(1,6):
    xpath_expr = f'//div[@id = "menu-plan-tab{i}"]/div/div/h2[@class ="menu-title"]'
    results = [tk.text for tk in mensaTree.xpath(xpath_expr)]
    for j in range(len(results)):
        results[j] = results[j].replace('\xad', '')
    if (len(results)) != 0:
        menu.append(results)
    
print(menu)

[['Frische Pasta', 'Rindfleischvogel', 'Quorn Southern Fried Bites', 'Pilzcreme Suppe'], ['Schweizer Pouletbrust', 'Kalbshacksteak', 'Gemüselasagne', 'Geflügel-Kerbel Suppe'], ['Fish and Chips', 'Truthahnschnitzel', 'Frühlingsrollen', 'Spargel Suppe'], ['Vegi Kebab aus Seitan', 'MSC-Schlemmerfilet Bordelaise', 'Medaglioni', 'Gersten Suppe']]


In [7]:
menu_dict = {}

for i in range(1,len(menu) + 1):
    xpath_expr = f'//div[@class = "day-nav"]/ul/li[{i}]/label/span[2]'
    results = [tk.text for tk in mensaTree.xpath(xpath_expr)]
    daily_menu = {results[0]: menu[i - 1]}
    menu_dict.update(daily_menu)

In [8]:
# print tomorrows menu
print(menu_dict['13.07.'])

['Frische Pasta', 'Rindfleischvogel', 'Quorn Southern Fried Bites', 'Pilzcreme Suppe']


### Get links to other pages

use /@href

In [9]:
i = 1
running = True
while running == True:
    xpath_expr = f'//div[@class = "footer-navigation"]/nav[@class = "footer-mainnav"]/ul/li[{i}]/a/@href'
    results = [tk for tk in mensaTree.xpath(xpath_expr)]
    if len(results) > 0:
        print(results)
        i += 1
    else:
        running = False

['/de/menuplan/']
['/de/catering/']
['/de/frisch-gesund/']
['/de/ueber-uns/']
