# National Dishes List from Wikipedia

In [None]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

In [None]:
def parse_all_dishes(page):
    try:
        r = requests.get(f"https://en.wikipedia.org/w/index.php?title=Category:National_dishes&page{page}=Polenta#mw-pages")
        page = BeautifulSoup(r.text, 'html.parser')
        #ingredients = page.findAll('div')[-14].attrs['data-recipe_food_main_ingredients'].split(',')
        groups = page.find_all(class_="mw-category-group")
        dishes_links = []
        for group in groups:
          elements = map(lambda x: x.a.get('href'), group.find_all('li'))
          dishes_links.extend(elements)
        dishes_links = list(filter(lambda x: 'Category' not in x, dishes_links))
        return dishes_links[1:]
       
    except Exception as e:
        if e is ConnectionError:
            return "ConErr"
        return None

In [None]:
all_dishes_links = parse_all_dishes('until') + parse_all_dishes('from')

In [None]:
def parse_dish_info(link):
  try:
        r = requests.get(f"https://en.wikipedia.org/{link}")
        page = BeautifulSoup(r.text, 'html.parser')
        dish_info = {"Name": page.title.text.split('-')[0].strip(), "Country": None, "Course": None, "Serving temperature": None, "Main ingredients": None}
        #ingredients = page.findAll('div')[-14].attrs['data-recipe_food_main_ingredients'].split(',')
        groups = page.find_all('tr')
        points = list(filter(lambda group: group.find('th', class_="infobox-label"), groups))
        for point in points:
          if point.th.text == "Place of origin" or (point.th.text == "Region or state" and not dish_info["Country"]):
            dish_info["Country"] = point.td.a.text if point.td.a else point.td.text
          if point.th.text == "Course":
            dish_info["Course"] = point.td.a.text if point.td.a else point.td.text
          if point.th.text == "Serving temperature":
            dish_info["Serving temperature"] = point.td.text
          if point.th.text == "Main ingredients":
            ingredients = list(map(lambda x: x.text.capitalize(), point.td.find_all('a')))
            dish_info["Main ingredients"] = ingredients
        return dish_info

  except Exception as e:
        if e is ConnectionError:
            return "ConErr"
        return None

In [None]:
all_dishes_info = [parse_dish_info(link) for link in all_dishes_links]
dishes_df = pd.DataFrame(all_dishes_info)

In [None]:
dishes_df.to_csv("national_dishes.csv")