In [None]:
from selenium import webdriver
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
from webdriver_manager.chrome import ChromeDriverManager
from bs4 import BeautifulSoup
import csv
import time
import random


options = webdriver.ChromeOptions()
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
service = Service(ChromeDriverManager().install())
driver = webdriver.Chrome(service=service, options=options)


with open("album_data.csv", "w", newline="", encoding="utf-8") as file:
    writer = csv.writer(file)
    writer.writerow(["Album Name", "Artist", "Critic Score", "User Score"])

    # loop through pages 
    for page_num in range(1, 5):
        page_url = f"https://www.albumoftheyear.org/ratings/user-highest-rated/all/{page_num}/"
        driver.get(page_url)
        print(f"going to page {page_num}: {page_url}") #delete after finished

        
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "a[itemprop='url']")))
        time.sleep(random.uniform(2, 4))  # still trying to fool captcha + wait until everything is loaded onto page

        
        albums = driver.find_elements(By.CSS_SELECTOR, "a[itemprop='url']")

        for album in albums:
            album_url = "https://www.albumoftheyear.org" + album.get_attribute("href")
            print(f"Navigating to album page: {album_url}")

            
            driver.execute_script("arguments[0].scrollIntoView();", album)
            ActionChains(driver).move_to_element(album).pause(random.uniform(1, 2)).click().perform()

            
            WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, "h1.albumTitle")))
            time.sleep(random.uniform(2, 4))  # more delays

            # make soup
            album_soup = BeautifulSoup(driver.page_source, "html.parser")

            # extract album data
            album_title = album_soup.select_one("h1.albumTitle span[itemprop='name']")
            album_title = album_title.text.strip() if album_title else "Unknown Album"

            artist_name = album_soup.select_one("span[itemprop='name'] a")
            artist_name = artist_name.text.strip() if artist_name else "Unknown Artist"

            critic_score = album_soup.select_one("span[itemprop='ratingValue']")
            critic_score = critic_score.text.strip() if critic_score else "No Critic Score"

            user_score = album_soup.select_one("div.albumUserScore a")
            user_score = user_score.text.strip() if user_score else "No User Score"

            # write data
            writer.writerow([album_title, artist_name, critic_score, user_score])
            print(f"Scraped: {album_title} by {artist_name} | Critic: {critic_score}, User: {user_score}")

            
            driver.back()
            time.sleep(random.uniform(3, 5))  


driver.quit()
