# Whole

In [4]:
import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep
from selenium.common.exceptions import ElementClickInterceptedException, StaleElementReferenceException, TimeoutException, WebDriverException
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options

# Function to safely get the text of an element or return None if it doesn't exist
def safe_get_text(driver, xpath):
    try:
        element = driver.find_element(By.XPATH, xpath)
        text = element.text
        if text == "N/A" or text == "":
            return None  # Return None for N/A or empty text
        return text
    except Exception as e:
        print(f"Error retrieving text: {e}")
        return None  # Return None if any exception occurs

# Initialize the Chrome WebDriver with options
chrome_options = Options()
chrome_options.add_argument('--disable-gpu')    # Disable GPU rendering
chrome_options.add_argument('--disable-images') # Disable images to improve performance
chrome_options.add_argument('--blink-settings=imagesEnabled=false') # Another way to disable images
# Remove '--headless' to open the browser window
# chrome_options.add_argument('--headless')    # Comment out this line to see the browser

driver = webdriver.Chrome(options=chrome_options)

# Set the page load timeout (increase if needed)
driver.set_page_load_timeout(600)

# Retry mechanism in case of WebDriverException
max_retries = 3
start_url = "https://myflixerz.to/filter?type=tv&quality=all&release_year=all&genre=all&country=101-135-57"

for attempt in range(max_retries):
    try:
        driver.get(start_url)
        break  # Exit the loop if the page loads successfully
    except WebDriverException as e:
        print(f"Attempt {attempt + 1} failed: {e}")
        if attempt == max_retries - 1:
            raise  # Reraise the error after all attempts fail
        sleep(5)  # Wait for 5 seconds before retrying

# Set implicit wait time for elements
driver.implicitly_wait(10)

# CSV file setup (make sure the file is created)
csv_filename = 'dramas_data.csv'
with open(csv_filename, mode='w', newline='', encoding='utf-8') as file:
    writer = csv.writer(file)
    writer.writerow(['Title', 'Country', 'Genre', 'Release Date'])  # CSV headers

    for i in range(61):  # Assuming 61 pages
        print("Scraping Page: ", i + 1)

        try:
            # Wait for the dramas list to load
            drama_elements = WebDriverWait(driver, 60).until(
                EC.presence_of_all_elements_located((By.XPATH, "//div[@class='flw-item']/div//a[@class='film-poster-ahref flw-item-tip']"))
            )
        except TimeoutException:
            print(f"Page {i + 1} took too long to load. Skipping this page.")
            continue

        print(f"{len(drama_elements)} dramas found on page {i + 1}.")

        for drama_index in range(len(drama_elements)):
            try:
                drama_elements = driver.find_elements(By.XPATH, "//div[@class='flw-item']/div//a[@class='film-poster-ahref flw-item-tip']")
                drama_element = drama_elements[drama_index]

                # Scroll into view and click the drama element
                driver.execute_script("arguments[0].scrollIntoView();", drama_element)
                sleep(1)
                driver.execute_script("arguments[0].click();", drama_element)
                sleep(2)

                # Use the safe_get_text function to get values
                name = safe_get_text(driver, "//h2[@class='heading-name']/a")
                country = safe_get_text(driver, "//strong[text()='Country: ']/ancestor::span/following-sibling::a")
                genre = safe_get_text(driver, "//strong[text()='Genre: ']/ancestor::span/following-sibling::a")
                released = safe_get_text(driver, "//div[@class='row-line']")

                print(f"Title: {name}, Genre: {genre}, Country: {country}, Release Date: {released}")

                # Append to the CSV file
                writer.writerow([name, country, genre, released])

                # Go back to the main page after extracting drama details
                driver.back()
                sleep(2)

            except StaleElementReferenceException:
                print(f"StaleElementReferenceException encountered for drama {drama_index + 1}. Retrying...")
                driver.refresh()  # Refresh the page and retry
                sleep(2)
                continue

            except ElementClickInterceptedException as e:
                print(f"ElementClickInterceptedException encountered: {e}")
                driver.execute_script("arguments[0].click();", drama_element)
                sleep(2)

        try:
            # Click the "Next" button to go to the next page
            next_button = WebDriverWait(driver, 30).until(
                EC.element_to_be_clickable((By.XPATH, "//a[@title='Next']"))
            )
            driver.execute_script("arguments[0].click();", next_button)
            sleep(2)
        except TimeoutException:
            print("Next button not found or page took too long to load. Ending scraping.")
            break

# Quit the driver after completion
driver.quit()

# Confirm that the CSV file was created
print(f"Data has been written to {csv_filename}")

Scraping Page:  1
32 dramas found on page 1.
Title: Spice Up Our Love, Genre: Comedy, Country: South Korea, Release Date: Released: 2024-10-03
Title: Love in the Desert, Genre: Drama, Country: China, Release Date: Released: 2024-09-29
Title: Iron Family, Genre: Family, Country: South Korea, Release Date: Released: 2024-09-28
Title: What Comes After Love, Genre: Drama, Country: South Korea, Release Date: Released: 2024-09-27
Title: Dual Love, Genre: Drama, Country: China, Release Date: Released: 2024-09-27
Title: The Princess, Genre: Drama, Country: China, Release Date: Released: 2024-09-26
Title: The Limbo, Genre: Drama, Country: China, Release Date: Released: 2024-09-26
Title: A Talented Girl Grows Up, Genre: Drama, Country: China, Release Date: Released: 2024-09-24
Title: Hero Is Back, Genre: Drama, Country: China, Release Date: Released: 2024-09-23
Title: Dear Hyeri, Genre: Drama, Country: South Korea, Release Date: Released: 2024-09-23
Title: You Are My Lover Friend, Genre: Drama, 

##### Extract Drama Names and Navigate --Individual

In [None]:
# Extract Dramas Names
dramas = []

for i in range(61):
    print("Scraping Page: ", i+1)
    drama = driver.find_elements(By.XPATH, "//div[@class='flw-item']/div//a[@class='film-poster-ahref flw-item-tip']")
    print(f"{len(drama)} items found.")
    
    for items in drama:
        dramas.append(items.get_attribute('title'))
        
    
    # Locate the "Next" button
    next_button = driver.find_elements(By.XPATH, "//a[@title='Next']")
    if next_button:
        # Scroll to the bottom of the page to ensure no elements are blocking the click
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(2)  # Allow some time for scrolling
        
        try:
            # Attempt to click the next button
            next_button[0].click()
        except ElementClickInterceptedException:
            # If click is intercepted, use JavaScript click as a fallback
            print("Next button click intercepted. Retrying with JavaScript click.")
            driver.execute_script("arguments[0].click();", next_button[0])

driver.quit()

##### Extract Details from individual pages

In [2]:
## Extract Details from individual pages

import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from time import sleep

driver = webdriver.Chrome()

driver.get("https://myflixerz.to/tv/personal-taste-26996")

name = driver.find_element(By.XPATH, "//h2[@class='heading-name']/a").text
country = driver.find_element(By.XPATH, "//strong[text()='Country: ']/ancestor::span/following-sibling::a").get_attribute("text")
genre = driver.find_element(By.XPATH, "//strong[text()='Genre: ']/ancestor::span/following-sibling::a").get_attribute("text")
released = driver.find_element(By.XPATH, "//div[@class='row-line']").text


print("Title: ", name)
print("Genre: ", genre)
print("Country: ", country)
print(released)

Title:  Personal Taste
Genre:  Comedy
Country:  South Korea
Released: 2010-03-31


# Practise

In [23]:
# Extract Dramas Names
dramas = []

for i in range(61):
    print("Scraping Page: ", i+1)
    drama = driver.find_elements(By.XPATH, "//div[@class='flw-item']/div//a[@class='film-poster-ahref flw-item-tip']")

    print(f"{len(drama)} items found.")
    
    for items in drama:
        dramas.append(items.get_attribute('title'))

    # Wait for any possible overlay to disappear
    # try:
    #     WebDriverWait(driver, 10).until(
    #         EC.invisibility_of_element_located((By.XPATH, "//div[contains(@style, 'animation: 0.3s')]"))  # Adjust XPATH if necessary
    #     )
    # except:
    #     print("No blocking overlay found.")

    
    # Locate the "Next" button
    next_button = driver.find_elements(By.XPATH, "//a[@title='Next']")
    
    if next_button:
        # Scroll to the bottom of the page to ensure no elements are blocking the click
        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        sleep(2)  # Allow some time for scrolling
        
        try:
            # Attempt to click the next button
            next_button[0].click()
        except ElementClickInterceptedException:
            # If click is intercepted, use JavaScript click as a fallback
            print("Next button click intercepted. Retrying with JavaScript click.")
            driver.execute_script("arguments[0].click();", next_button[0])

driver.quit()


Scraping Page:  1
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  2
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  3
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  4
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  5
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  6
32 items found.
Next button click intercepted. Retrying with JavaScript click.
Scraping Page:  7
32 items found.
Next button click intercepted. Retrying with JavaScript click.


KeyboardInterrupt: 

In [24]:
len(dramas)

224

In [25]:
dramas

['Love in the Desert',
 'Iron Family',
 'What Comes After Love',
 'Dual Love',
 'The Princess',
 'The Limbo',
 'A Talented Girl Grows Up',
 'Dear Hyeri',
 'Hero Is Back',
 'You Are My Lover Friend',
 'Echo of Her Voice',
 'Fate of Beauty',
 'The Judge from Hell',
 'Fall in Love with a Fox',
 'Please Remember Me',
 'In Between',
 'Dark Night and Dawn',
 'Culinary Class Wars',
 'Love of Nirvana',
 'Be Your Knight',
 'The Time of Fever',
 'Seoul Busters',
 'No One But You',
 'Wind Direction',
 'Fragile',
 'The First Shot',
 'Their Wonderful Time',
 'Born to be the One',
 'Original Sin',
 'Fateful Love',
 'A-List to Playlist',
 'Fairy Charge Forward',
 'Debit Queen',
 'Unspeakable Longing',
 'Queen Woo',
 'We All Lie',
 'No Gain No Love',
 'Adventure Behind the Bronze Door',
 'Melody of Golden Age',
 'Cinderella at 2AM',
 'The Frog',
 'Go East',
 'Romantic Boyfriend',
 'Pop Star Academy: KATSEYE',
 'Terror Tuesday: Extreme',
 'Dawn is Breaking',
 'Love & Bid Farewell',
 'Love Next Door',
 