note: user has to input desired search url

code function:
1. extracts all recipe links from the search page
2. scrapes title, stars, and number of ratings from each of the links extracted
3. sorts each recipe according to the descending number of ratings, then descending stars
4. converts dictionary into data frame for organization

In [None]:
import requests 
import pandas as pd
from bs4 import BeautifulSoup
from urllib.parse import parse_qs, urlparse

# function that extracts each recipe link from search page (url)
def extractlinks(url):
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    #extract anchor tags w/ links
    cards = soup.find_all('a', href = True)
    links = [link['href'] for link in cards if '/recipe/' in link['href']]
    
    return list(set(links))

# function that scrapes title, stars, number of ratings    
def scrapedetails(rurl, searchq):
    response = requests.get(rurl)
    soup = BeautifulSoup(response.content, 'html.parser')
    
    #scrape title
    telement = soup.select_one("h1.article-heading")
    title = telement.get_text(strip = True) if telement else "n/a"
    
    #scrape stars (0 - 5)
    selement = soup.select_one("div.mm-recipes-review-bar__rating.mntl-text-block.text-label-300")
    stars = float(selement.get_text(strip = True)) if selement else 0.0
    
    #scrape number of ratings
    relement = soup.select_one("div.mm-recipes-review-bar__rating-count.mntl-text-block.text-label-300")
    
    #if/else to determine if rating number exists. 
    if relement:
        #note: if exists, it strips html of text (such as </div>) and ()
        ratingtext = relement.get_text(strip = True)
        ratingnum = int(ratingtext.strip("()").replace(",", ""))
    else:
        ratingnum = 0
    
    #matching search keywords with title
    if searchq.lower() not in title.lower():
        return None
    
    #dictionary for details scraped
    return {
        "title": title,
        "stars": stars,
        "ratings": ratingnum
    }
    
# function that fetches recipe from search page, then uses scrapedetails() to scrape specifics
def scrapeall(url, searchq):
    links = extractlinks(url)
    
    #list to store recipe links
    allrecipes = []
    
    #loops through each link and calls previous function to scrape details
    for link in links:
        rdetails = scrapedetails(link, searchq)
        if rdetails:
            #update list to store dictionary
            allrecipes.append(rdetails)
    
    return allrecipes

# sort function that sorts by descending number of ratings first, then descending number of stars
def sort(recipes): 
    rsorted = sorted(recipes, key = lambda x: (-x['ratings'], -x['stars']))
    return rsorted

# extract serach query from url
def extractsearchq(url):
    query = parse_qs(urlparse(url).query)
    search = query.get("q", [""])[0]
    searchq = search.replace("+", " ")
    return searchq

# main function
def main():
    url = input("paste search url here:")
    # specifically https://www.allrecipes.com/search?q=chocolate+chip+cookies for our project
    searchq = extractsearchq(url)
    recipes = scrapeall(url, searchq)
    rsorted = sort(recipes)
    
    #convert to data frame for organization purposes
    df = pd.DataFrame(rsorted)
    df.insert(0, "rank", range(1, 1 + len(df)))
    print(df.to_string(index = False))

# call main function
if __name__ == '__main__':
    main()