In [3]:
# Importing all libraries
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.common.exceptions import NoSuchElementException, TimeoutException
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import csv
import time

# Url of rottentomatoes 
base_url = 'https://www.rottentomatoes.com/browse/movies_at_home/genres:{}'

# Defining different genres
genres = ['action', 'adventure', 'animation','anime','biography','comedy','crime','documentary','drama','entertainment',
         'faith_and_spirituality','fantasy','game_show','horror','romance','sci_fi','mystery_and_thriller',
         'travel','war','kids_and_family','sports','reality','music','special_interest',
         'short','variety','western','musical','nature','holiday','lgbtq','history']

# Number of movies to process
max_movies = 500  

# Setup the driver
driver = webdriver.Chrome(service=Service(ChromeDriverManager().install()))
wait = WebDriverWait(driver, 30)

# Function to get synopsis from the driver details page
def fetch_synopsis(driver):
    try:
        synopsis_element = WebDriverWait(driver, 10).until(
            EC.visibility_of_element_located((By.CSS_SELECTOR, "div.synopsis-wrap rt-text:last-child"))
        )
        synopsis = synopsis_element.text.strip()
    except TimeoutException:
        synopsis = 'N/A'
        print("Failed to load the synopsis.")
    return synopsis

# Function to get director name from the driver details page
def fetch_director(driver):
    try:
        category_elements = WebDriverWait(driver, 10).until(
            EC.visibility_of_all_elements_located((By.CSS_SELECTOR, "div.category-wrap"))
        )
        directors = 'N/A'
        for category in category_elements:
            key_element = category.find_element(By.CSS_SELECTOR, "dt.key rt-text.key")
            if key_element.text.strip().lower() == "director":
                director_links = category.find_elements(By.CSS_SELECTOR, "dd rt-link")
                directors = ', '.join([link.text.strip() for link in director_links])
                break
    except TimeoutException:
        directors = 'N/A'
        print("Failed to load the director's name.")
    except NoSuchElementException:
        print("Director section not found.")
    return directors

# Function for extracting movies
def movie_extract(genre, writer, movie_count):
    url = base_url.format(genre)
    driver.get(url)
    wait.until(EC.presence_of_element_located((By.CSS_SELECTOR, 'div.flex-container')))
    time.sleep(2)  # Allow some time for dynamic content to load initially

    movie_elements = driver.find_elements(By.CSS_SELECTOR, 'div.js-tile-link')
    for movie in movie_elements:
        if movie_count >= max_movies:
            return movie_count  # Return the count to stop processing in other genres

        try:
            # Extracting movie title
            title = movie.find_element(By.CSS_SELECTOR, 'span[data-qa="discovery-media-list-item-title"]').text
            
            # Extracting audience score title
            audience_score = movie.find_element(By.CSS_SELECTOR, 'score-pairs-deprecated').get_attribute('audiencescore')
            
            # Extracting critics score title
            critic_score = movie.find_element(By.CSS_SELECTOR, 'score-pairs-deprecated').get_attribute('criticsscore')
            
            # Getting link for details page
            detail_link = movie.find_element(By.CSS_SELECTOR, 'a[data-qa="discovery-media-list-item-caption"]').get_attribute('href')
            
            driver.execute_script("window.open(arguments[0]);", detail_link)
            
            # Opening the details page and extacting details
            driver.switch_to.window(driver.window_handles[1])
            time.sleep(2)
            
            # Getting synopsis from the page
            synopsis = fetch_synopsis(driver)
            
             # Getting directors name from the page
            directors = fetch_director(driver)

            driver.close()
            driver.switch_to.window(driver.window_handles[0])

            writer.writerow([genre, title, audience_score, critic_score, synopsis, directors])
            movie_count += 1
            print(movie_count)
            print(f"Processed movie: {title}")

        except NoSuchElementException:
            print(f"Data extraction failed for movie: {title if 'title' in locals() else 'N/A'}")

    return movie_count

# CSV file for all genres
with open('movies_data_all_genres.csv', 'w', newline='', encoding='utf-8') as csv_file:
    writer = csv.writer(csv_file)
    writer.writerow(['Genre', 'Title', 'Audience Score', 'Critics Score', 'Synopsis', 'Director'])

    movie_count = 0
    for genre in genres:
        movie_count = movie_extract(genre, writer, movie_count)
        if movie_count >= max_movies:
            break 
            
# Cleanup
driver.quit()


1
Processed movie: Kitty the Killer
2
Processed movie: The Ministry of Ungentlemanly Warfare
3
Processed movie: Monkey Man
4
Processed movie: Arcadian
5
Processed movie: Love Lies Bleeding
6
Processed movie: War for the Planet of the Apes
7
Processed movie: Dune: Part Two
8
Processed movie: The Beekeeper
9
Processed movie: Dawn of the Planet of the Apes
10
Processed movie: In the Land of Saints and Sinners
11
Processed movie: Argylle
12
Processed movie: Kung Fu Panda 4
13
Processed movie: The Great Wall
14
Processed movie: Star Wars: Episode I - The Phantom Menace
15
Processed movie: Bullet Train
16
Processed movie: Rebel Moon: Part Two - The Scargiver
17
Processed movie: Madame Web
18
Processed movie: Rebel Moon: Part One - A Child of Fire
19
Processed movie: The Iron Claw
20
Processed movie: Planet of the Apes
21
Processed movie: Land of Bad
22
Processed movie: Chief of Station
23
Processed movie: Road House
24
Processed movie: 3 Days in Malay
25
Processed movie: Mad Max: Fury Road
2

213
Processed movie: Ricky Stanicky
214
Processed movie: Star Wars: The Rise of Skywalker
215
Processed movie: Star Wars: The Last Jedi
216
Processed movie: Star Wars: Episode III - Revenge of the Sith
217
Processed movie: Aquaman and the Lost Kingdom
218
Processed movie: Marvel's the Avengers
219
Processed movie: Avengers: Endgame
220
Processed movie: Spider-Man: No Way Home
221
Processed movie: Spirited Away
222
Processed movie: Spider-Man: Across the Spider-Verse
223
Processed movie: Mortal Kombat
224
Processed movie: Transformers: Rise of the Beasts
225
Processed movie: Make Me Scream
226
Processed movie: Abigail
227
Processed movie: Founders Day
228
Processed movie: Mind Body Spirit
229
Processed movie: Late Night with the Devil
230
Processed movie: Arcadian
231
Processed movie: New Life
232
Processed movie: Infested
233
Processed movie: Immaculate
234
Processed movie: Rust Creek
235
Processed movie: Barbarian
236
Processed movie: Dream Scenario
237
Processed movie: Talk to Me
238

427
Processed movie: Seraphim Falls
428
Processed movie: The Magnificent Seven
429
Processed movie: Strange Way of Life
430
Processed movie: In a Valley of Violence
431
Processed movie: The Old Way
432
Processed movie: The Ridiculous 6
433
Processed movie: Blood & Gold
434
Processed movie: Old Henry
435
Processed movie: Wish
436
Processed movie: Wonka
437
Processed movie: Mean Girls
438
Processed movie: This Is Me... Now: A Love Story
439
Processed movie: La La Land
440
Processed movie: The Little Mermaid
441
Processed movie: The Greatest Hits
442
Processed movie: Cinderella
443
Processed movie: The Color Purple
444
Processed movie: The Greatest Showman
445
Processed movie: Moana
446
Processed movie: Leo
447
Processed movie: Aladdin
448
Processed movie: Encanto
449
Processed movie: The Lion King
450
Processed movie: Mamma Mia!
451
Processed movie: Rocketman
452
Processed movie: West Side Story
453
Processed movie: The Wizard of Oz
454
Processed movie: Tangled
455
Processed movie: Froze