In [37]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.edge.service import Service
from webdriver_manager.microsoft import EdgeChromiumDriverManager
from selenium.common.exceptions import NoSuchElementException, ElementClickInterceptedException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support import expected_conditions as EC
import time
import csv
# Set up Edge WebDriver
driver = webdriver.Edge(service=Service(EdgeChromiumDriverManager().install()))
# Open the top beers page
driver.get('https://www.ratebeer.com/top-beers')
# Allow the page to load
time.sleep(3)
# Find all the beer entries on the page
beer_elements = driver.find_elements(By.CSS_SELECTOR, 'a.CompactList___StyledLink-dcQTlx')
# Collect beer names and links
beers = []
all_reviews_and_ratings = []
for beer in beer_elements:
    beer_name = beer.text  # Get the text of the beer (beer name)
    beer_link = beer.get_attribute('href')  # Get the link associated with the beer
    beers.append((beer_name, beer_link))
# Select top 4, middle 3 (27-30), and last 3 ranked beers
selected_beers = beers[:4] + beers[26:30] + beers[-3:]
# Create CSV file to store the reviews
csv_filename = "beer_reviews_ratings.csv"
# Function to scrape reviews from a beer page
def scrape_reviews(beer_name, beer_link):
    driver.get(beer_link)
    time.sleep(3)  # Allow the page to load
    review_count = 0  # Counter to keep track of the number of reviews scraped
    reviews_and_ratings = []
    # Close any cookie pop-up that appears on the first page
    try:
        close_button = driver.find_element(By.XPATH, "//button[@class='onetrust-close-btn-handler onetrust-close-btn-ui banner-close-button ot-close-icon']")
        close_button.click()
        time.sleep(1)  # Allow the click to register
    except NoSuchElementException:
        pass  # If the button is not there, move on
    # Keep scraping until there are no more pages or the review count reaches 200
    while review_count < 200:
        try:
            # Find the beer title using XPath to locate the div with the specific class
            beer_name = driver.find_element(By.XPATH, "//div[contains(@class, 'MuiTypography-h4') and contains(@class, 'mt-3')]").text
            review_elements = driver.find_elements(By.CSS_SELECTOR, '.MuiTypography-root.Text___StyledTypographyTypeless-bukSfn.pzIrn.colorized__WrappedComponent-hrwcZr.hwjOn.BeerReviewListItem___StyledText-kMbsdb.gCtEHi.pre-wrap.MuiTypography-body1 .LinesEllipsis')
            rating_elements = driver.find_elements(By.CSS_SELECTOR, '.MuiTypography-root.Text___StyledTypographyTypeless-bukSfn.pzIrn.text-500.colorized__WrappedComponent-hrwcZr.bRPQdN.MuiTypography-subtitle1')
            # Pair each review with the corresponding rating
            for review, rating in zip(review_elements, rating_elements):
                if review_count >= 200:
                    break
                reviews_and_ratings.append({
                    "name": beer_name,
                    "review": review.text,
                    "rating": rating.text
                })
                review_count += 1
            # Check if the "Next page" button is available and interactable
            # next_button = WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//button[@aria-label='Next page']")))
            next_button = driver.find_element(By.XPATH, "//button[@aria-label='Next page']")
            # Check if the button is disabled (i.e., we have reached the last page)
            if next_button.get_attribute("disabled"):
                break
            # Scroll to the "Next page" button and click it
            actions = ActionChains(driver)
            actions.move_to_element(next_button).perform()
            next_button.click()
            time.sleep(3)  # Allow time for the next page to load
        except NoSuchElementException:
            print("No 'Next page' button found or no more pages available.")
            break
        except ElementClickInterceptedException:
            print("Element click was intercepted. Attempting to retry.")
            time.sleep(2)  # Give time for any pop-ups to disappear
            continue
    return reviews_and_ratings
# Scrape reviews for selected beers
for beer_name, beer_link in selected_beers:
    print(f"Scraping reviews for {beer_name}...")
    reviews_and_ratings = scrape_reviews(beer_name, beer_link)
    all_reviews_and_ratings.extend(reviews_and_ratings)
# Write reviews and ratings to the CSV file
with open(csv_filename, 'w', newline='', encoding='utf-8') as file:
    writer = csv.DictWriter(file, fieldnames=["Name", "Review", "Rating"])
    writer.writeheader()  # CSV header
    for data in all_reviews_and_ratings:
        writer.writerow({"Name": data["name"], "Review": data["review"], "Rating": data["rating"]})
# Close the browser
driver.quit()
print(f"Scraping completed. Reviews saved in {csv_filename}.")

Scraping reviews for 1
Toppling Goliath Kentucky Brunch
🇺🇸Stout - Imperial Flavored / Pastry · 12.0%
100
100
4.55
(1,058)...
Scraping reviews for 2
Närke Kaggen Stormaktsporter
🇸🇪Stout - Imperial · 12.0%
100
100
4.47
(882)...
Scraping reviews for 3
Schramm's The Heart of Darkness
🇺🇸Mead - Melomel / Fruited · 14.0%
100
100
4.43
(293)...
Scraping reviews for 4
Westvleteren 12
🇧🇪Quadrupel / Abt · 10.2%
100
100
4.42
(7,575)...
Scraping reviews for 27
Bell's Black Note Stout
🇺🇸Stout - Imperial · 10.8%
100
100
4.30
(1,324)...
Scraping reviews for 28
Goose Island Bourbon County Stout - Proprietor's 2013
🇺🇸Stout - Imperial Flavored / Pastry · 13.9%
100
100
4.30
(285)...
Scraping reviews for 29
Superstition Straw Berry White
🇺🇸Mead - Melomel / Fruited · 13.5%
100
99
4.30
(234)...
Scraping reviews for 30
Toppling Goliath SR-71 Blackbird (2015 Bottling / Draft)
🇺🇸Stout - Imperial · 14.0%
100
100
4.30
(109)...
Scraping reviews for 48
Schramm's Madeline
🇺🇸Mead - Melomel / Fruited · 14.0%
100
99
4.2