In [19]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import time
import re
import csv

def scrape_imdb_movies(genre_name, csv_filename):
    # Initialize WebDriver
    driver = webdriver.Chrome()
    driver.implicitly_wait(5)

    # Open IMDb search page
    driver.get('https://www.imdb.com/search/title/?title_type=feature&release_date=2024-01-01,2024-12-31')
    driver.maximize_window()

    # Wait for Genre dropdown to be visible
    genre_dropdown = WebDriverWait(driver, 10).until(
        EC.visibility_of_element_located((By.XPATH, "//*[@id='genreAccordion']/div[1]/label"))
    )

    # Scroll to the Genre dropdown and click it
    actions = ActionChains(driver)
    actions.move_to_element(genre_dropdown).perform()
    genre_dropdown.click()
    time.sleep(2)

    # Select the desired genre
    genre_button = WebDriverWait(driver, 10).until(
        EC.element_to_be_clickable((By.XPATH, f"//button[span[text()='{genre_name}']]"))
    )

    # Scroll to the genre button before clicking
    actions.move_to_element(genre_button).click().perform()
    time.sleep(3)

    # Keep clicking "50 more" until it disappears
    while True:
        try:
            more_button = WebDriverWait(driver, 5).until(
                EC.element_to_be_clickable((By.XPATH, "/html/body/div[2]/main/div[2]/div[3]/section/section/div/section/section/div[2]/div/section/div[2]/div[2]/div[2]/div/span/button"))
            )
            actions.move_to_element(more_button).perform()
            more_button.click()
            time.sleep(3)  # Wait for new data to load
        except:
            print(f"No more movies to load for {genre_name}.")
            break  # Exit loop when button is not found

    # Get the movie title elements
    movie_elements = driver.find_elements(By.XPATH, "//h3[@class='ipc-title__text']")
    movie_titles = [title.text for title in movie_elements if "Recently viewed" not in title.text]

    durations = driver.find_elements(By.XPATH, "//span[contains(@class, 'sc-f30335b4-7 jhjEEd dli-title-metadata-item') and contains(text(), 'h')]")
    ratings = driver.find_elements(By.XPATH, "//span[contains(@class, 'ipc-rating-star--rating')]")
    votes = [re.sub(r"[()\s]", "", vote.text)  
             for vote in driver.find_elements(By.XPATH, "//span[contains(@class, 'ipc-rating-star--voteCount')]")]

    # Ensure lists have the same length to avoid mismatches
    movie_data = []
    for i in range(len(movie_titles)):  
        title = movie_titles[i]
        duration = durations[i].text if i < len(durations) else "N/A"  
        rating = ratings[i].text if i < len(ratings) else "N/A"
        vote = votes[i] if i < len(votes) else "N/A"  

        # Convert votes if in shorthand format (e.g., "44k" → 44000)
        if vote.replace("-", "").isdigit():  
            vote = str(abs(int(vote)))  

        # Append data with Genre column
        movie_data.append([title, duration, rating, vote, genre_name])

    # Save data to CSV
    with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(["Title", "Duration", "Rating", "Votes", "Genre"])  # Updated Header
        writer.writerows(movie_data)

    print(f"Data saved to {csv_filename}")

    # Close the driver
    driver.quit()

# **Scrape multiple genres and save them with the Genre column**
scrape_imdb_movies("Action", "imdb_action_movies.csv")
scrape_imdb_movies("Comedy", "imdb_comedy_movies.csv")
scrape_imdb_movies("Crime", "imdb_crime_movies.csv")
scrape_imdb_movies("Fantasy", "imdb_fantasy.csv")
scrape_imdb_movies("Romance", "imdb_romance_movies.csv")

No more movies to load for Action.
Data saved to imdb_action_movies.csv
No more movies to load for Comedy.
Data saved to imdb_comedy_movies.csv
No more movies to load for Crime.
Data saved to imdb_crime_movies.csv
No more movies to load for Fantasy.
Data saved to imdb_fantasy.csv
No more movies to load for Romance.
Data saved to imdb_romance_movies.csv
