In [5]:
import requests
from bs4 import BeautifulSoup
import gzip
import functools

In [6]:
def get_soup(html_str, parser='html.parser'):
    return BeautifulSoup(html_str, parser)

In [7]:
BASE = "https://www.allrecipes.com/"
TEMP = 'temp.gz'

In [8]:
def get_request(endpoint):
    return requests.get(endpoint)

In [9]:
gs_index = get_soup(get_request('%sgsindex.xml'%BASE).text, parser='xml')

In [10]:
sitemaps = [s.loc.contents[0] for s in gs_index.find_all('sitemap')]

In [11]:
# TEMP FILE FOR INSPECTION IN VSCODE
sitemap = get_request(sitemaps[0])

with open(TEMP, 'wb') as f:
    f.write(sitemap.content)

with gzip.open(TEMP, 'rb') as f:
    file_content = f.read()
    with open("text.xml", "wb") as b:
        b.write(file_content)

In [12]:
def get_sitemap_xml(sitemap):
    sitemap = get_request(sitemap)
    with open(TEMP, 'wb') as f:
        f.write(sitemap.content)
    with gzip.open(TEMP, 'rb') as f:
        file_content = f.read()
    return get_soup(file_content, parser='xml')
    
        

In [13]:
def get_recipes_from_xml(sitemap):
    sitemap1 = get_sitemap_xml(sitemap)
    url_tags = sitemap1.find_all('url')
    return [e.loc.contents[0] for e in url_tags]

In [14]:
sitemaps_excluded = list(sitemaps)
sitemaps_excluded.pop(4)
recipes = [get_recipes_from_xml(sitemap) for sitemap in sitemaps_excluded]

In [15]:
recipe_list = functools.reduce(lambda a,b: a+b, recipes)

In [16]:
recipe_list

['https://www.allrecipes.com/recipe/6663/crispy-cheese-twists/',
 'https://www.allrecipes.com/recipe/6664/basil-roasted-peppers-and-monterey-jack-cornbread/',
 'https://www.allrecipes.com/recipe/6665/moms-yeast-rolls/',
 'https://www.allrecipes.com/recipe/6666/sweet-potato-bread-i/',
 'https://www.allrecipes.com/recipe/6667/cornish-splits/',
 'https://www.allrecipes.com/recipe/6668/orange-buns/',
 'https://www.allrecipes.com/recipe/6669/jalapeno-cheese-bread/',
 'https://www.allrecipes.com/recipe/6670/dees-health-bread/',
 'https://www.allrecipes.com/recipe/6671/oatmeal-bread-i/',
 'https://www.allrecipes.com/recipe/6672/strawberry-bread-i/',
 'https://www.allrecipes.com/recipe/6673/sun-dried-tomato-and-asiago-cheese-bread/',
 'https://www.allrecipes.com/recipe/6674/hawaiian-sweet-bread/',
 'https://www.allrecipes.com/recipe/6675/the-best-corn-bread-youll-ever-eat/',
 'https://www.allrecipes.com/recipe/6676/no-knead-refrigerator-rolls/',
 'https://www.allrecipes.com/recipe/6677/sourdou

In [17]:
soup = get_soup(get_request('https://www.allrecipes.com/recipe/7038/panettone-ii/').text)

In [18]:
def get_v1(soup):
    title = soup.find(id="recipe-main-content").text
    rating=float(soup.find("div",class_="rating-stars")['data-ratingstars'])
    review_count=soup.find("span",class_="review-count").text.split()[0]
    ingredients = [e.text for e in filter(lambda x: x.get('itemprop') =='recipeIngredient' ,soup.find_all('span',class_="recipe-ingred_txt"))]
    directions = [e.text.strip() for e in soup.find_all('span', class_='recipe-directions__list--item')[:-1] ]
    data={'title':title, 'rating': rating, 'review_count': review_count, "ingredients":ingredients,"directions":directions}
    return data

In [19]:
def func(x):
    try:
        soup = get_soup(get_request(x).text)
        data = get_v1(soup)
        data   
    except Exception:
        print("OTHER ALLRECIPES VERSION %s" %x)
        return  None
list(map(func, recipe_list[0:100]))

OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6689/nut-and-fruit-bread/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6703/strawberry-bread-ii/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6704/yummy-lemon-bread/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6711/babka-i/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6725/honey-wheat-bread-i/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6752/poppy-seed-bread-ii/
OTHER ALLRECIPES VERSION https://www.allrecipes.com/recipe/6755/sourdough-bread-ii/


[{'title': 'Crispy Cheese Twists',
  'rating': 4.17333316802979,
  'review_count': '51',
  'ingredients': ['1/2 cup Parmesan cheese',
   '3/4 teaspoon ground black pepper',
   '1/2 teaspoon garlic powder',
   '1 (17.5 ounce) package frozen puff pastry, thawed',
   '1 egg white'],
  'directions': ['Combine parmesan cheese, pepper and garlic powder. Unfold pastry sheets onto cutting board.  Brush lightly with egg white; sprinkle each sheet with 1/4 of the cheese mixture.  Lightly press into pastry, turn over; repeat.  Cut each sheet into 12 (1-inch) strips; twist.',
   'Place on ungreased cookie sheet and bake in 350 degrees F (175 degrees C) oven for 15 minutes or until golden brown.']},
 {'title': 'Basil, Roasted Peppers and Monterey Jack Cornbread',
  'rating': 4.32758617401123,
  'review_count': '47',
  'ingredients': ['1/2 cup unsalted butter, chilled and cubed',
   '1 cup chopped onion',
   '1 3/4 cups cornmeal',
   '1 1/4 cups all-purpose flour',
   '1/4 cup white sugar',
   '1 ta

In [26]:
soup=get_soup(get_request("https://www.allrecipes.com/recipe/6703/strawberry-bread-ii/").text)

In [27]:
get_v1(soup)

{'title': 'Strawberry Bread II',
 'rating': 4.27777767181396,
 'review_count': '17',
 'ingredients': ['1/2 cup butter, softened',
  '1 cup white sugar',
  '2 eggs, separated',
  '2 cups all-purpose flour',
  '1 teaspoon baking powder',
  '1 teaspoon baking soda',
  '1/2 teaspoon almond extract',
  '1 (10 ounce) package frozen strawberries - thawed, drained and juice reserved'],
 'directions': ['Lightly grease a 9 x 5 inch glass loaf pan, and line with greased waxed paper.  Preheat oven to 350 degrees F (175 Degrees C).',
  'In a large bowl, cream together butter or margarine, sugar, and almond extract.  Separate eggs, and beat in egg yolks one at a time until light and fluffy.  Sift flour, baking powder and soda into creamed mixture, and mix thoroughly. Stir in 1/4 cup strawberry juice. Fold in strawberries.',
  'In another bowl, beat egg whites until stiff.  Fold into strawberry batter. Turn batter into prepared pan.  Lightly drop pan to pop any air bubbles.',
  'Bake for 50 to 60 min