Python program to read the XML file and display a table with the title of each
recipe, the names of its ingredients, and the number of calories.

In [1]:
import xml.etree.ElementTree as ET
import pandas as pd
from lxml import etree

In [2]:
tree = ET.parse('recipes.xml')
root = tree.getroot()

In [3]:
recipes = []

for child in root[1:]:
    name = child.findtext('title')
    ingredients = "||".join(map(lambda tag: tag.attrib.get('name'),child.findall('ingredient')))
    calorie = child.find('nutrition').attrib.get('calories')
    recipes.append([name, ingredients, calorie])

In [4]:
recipes_data = pd.DataFrame(recipes, columns=['Title', 'Ingredients', 'Calories'])

In [5]:
recipes_data.head()

Unnamed: 0,Title,Ingredients,Calories
0,Beef Parmesan with Garlic Angel Hair Pasta,"beef cube steak||onion, sliced into thin rings...",1167
1,Ricotta Pie,filling||dough||milk,349
2,Linguine Pescadoro,linguini pasta||sauce,532
3,Zuppa Inglese,egg yolks||milk||Savoiardi biscuits||sugar||Al...,612
4,Cailles en Sarcophages,pastry||filling||package phyllo dough||egg whi...,8892


Working with XPath

In [6]:
tree = etree.parse('recipes.xml')

In [7]:
# Find the titles of all recipes.
tree.xpath('/collection/recipe/title/text()')

['Beef Parmesan with Garlic Angel Hair Pasta',
 'Ricotta Pie',
 'Linguine Pescadoro',
 'Zuppa Inglese',
 'Cailles en Sarcophages']

In [8]:
# Find the titles of recipes that use olive oil.
xpath_Ingredients = '/collection/recipe[%s]/ingredient/@name'
xpath_title = '/collection/recipe[%s]/title/text()'
count = len(tree.xpath('/collection/recipe'))

for i in range(count):
    if any(['OLIVE OIL' == item.upper() for item in tree.xpath(xpath_Ingredients.replace('%s', str(i)))]):
            print(tree.xpath(xpath_title.replace('%s', str(i))))

['Beef Parmesan with Garlic Angel Hair Pasta']


In [9]:
# Find the titles of all recipes with less than 500 calories.
title = tree.xpath('/collection/recipe/title/text()')
calories = tree.xpath('/collection/recipe/nutrition/@calories')

for i in range(len(calories)):
    if int(calories[i])<500:
        print(title[i])

Ricotta Pie


In [10]:
# Find the amount of sugar needed for Zuppa Inglese.
get_amt = "//*/title[text()='Zuppa Inglese']/following-sibling::ingredient[@name='sugar']/@amount"
get_unit = "//*/title[text()='Zuppa Inglese']/following-sibling::ingredient[@name='sugar']/@unit"
tree.xpath(f"{get_amt} | {get_unit}")

['0.75', 'cup']

In [11]:
# Find the titles of all recipes that require 4 steps.
for recipe in tree.xpath("//*/recipe"):
    if len(recipe.xpath(".//step"))==4:
        print(recipe.xpath("./title/text()")[0])

Beef Parmesan with Garlic Angel Hair Pasta


In [12]:
# Find the names of all item that are used to make other ingredients.
",".join(tree.xpath("//*/recipe/ingredient//ingredient/@name"))

'ricotta cheese,eggs,white sugar,vanilla extract,semisweet chocolate chips,flour,baking powder,white sugar,shortening,eggs, lightly beaten,vanilla extract,olive oil,minced cloves of garlic,Italian seasoning,dried thyme,crushed red pepper flakes,crushed tomatoes,black olives, drained,whole baby clams,minced clams, with juice,small salad shrimp,scallops,lemon zest,salt,ground black pepper,chilled unsalted butter,flour,salt,ice water,baked chicken,marinated chicken,small chickens, cut up,Herbes de Provence,dry white wine,orange juice,minced garlic,truffle oil,stock,chicken wings, giblets, and kidney,onions, peeled,carrots, peeled and cut lengthwise,celery, cut lengthwise,bay leaf,small bunch parsley,whole peppercorns,salt,sauteed mushrooms,white button mushrooms,butter,dry white wine,minced garlic,minced shallots,sauce,chicken juices,mushroom juices,sherry,flour,butter'

In [13]:
# Find the names of all item for which you need other ingredients.
tree.xpath("//ingredient[ingredient]/@name")

['filling',
 'dough',
 'sauce',
 'pastry',
 'filling',
 'baked chicken',
 'marinated chicken',
 'stock',
 'sauteed mushrooms',
 'sauce']

In [14]:
# Find the names of the first three ingredients in each recipe.
for recipe in tree.xpath("//*/recipe"):
    print(recipe.xpath("./ingredient/@name")[:3])

['beef cube steak', 'onion, sliced into thin rings', 'green bell pepper, sliced in rings']
['filling', 'dough', 'milk']
['linguini pasta', 'sauce']
['egg yolks', 'milk', 'Savoiardi biscuits']
['pastry', 'filling', 'package phyllo dough']
