In [6]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import time
import pandas as pd
from selenium.common.exceptions import NoSuchElementException, TimeoutException

def redbus(links):
    driver = webdriver.Chrome()
    wait = WebDriverWait(driver, 30)
    driver.get(links)
    all_data = []

    def scrape_route_details(route_name, route_link):
        driver.execute_script(f"window.open('{route_link}');")
        driver.switch_to.window(driver.window_handles[1])

        try:
            view_buses_button = WebDriverWait(driver, 40).until(EC.element_to_be_clickable((By.CLASS_NAME, 'button')))
            view_buses_button.click()
            time.sleep(5)
        except NoSuchElementException:
            pass
        
        try:
            # Scroll to load all buses
            last_height = driver.execute_script("return document.body.scrollHeight")
            while True:
                driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
                time.sleep(2)
                new_height = driver.execute_script("return document.body.scrollHeight")
                if new_height == last_height:
                    break
                last_height = new_height

            # Find all bus entries
            bus_entries = driver.find_elements(By.CSS_SELECTOR, 'div.clearfix.bus-item-details')

            for entry in bus_entries:
                try:
                    bus_name = entry.find_element(By.CSS_SELECTOR, 'div.travels.lh-24.f-bold.d-color').text
                    bus_type = entry.find_element(By.CSS_SELECTOR, 'div.bus-type.f-12.m-top-16.l-color').text
                    departing_time = entry.find_element(By.CSS_SELECTOR, 'div.dp-time.f-19.d-color.f-bold').text
                    duration = entry.find_element(By.CSS_SELECTOR, 'div.dur.l-color.lh-24').text
                    reaching_time = entry.find_element(By.CSS_SELECTOR, 'div.bp-time.f-19.d-color.disp-Inline').text
                    try:
                        star_rating = float(driver.find_element(By.CSS_SELECTOR, 'div.rating-sec.lh-24').text)
                    except NoSuchElementException:
                        star_rating = None
                    price = float(entry.find_element(By.CSS_SELECTOR, 'span.f-19.f-bold').text.replace("₹", "").replace(",", ""))
                    
                    try:
                        seats_available = entry.find_element(By.CSS_SELECTOR, 'div.seat-left.m-top-16').text
                    except NoSuchElementException:
                        seats_available = entry.find_element(By.CSS_SELECTOR, 'div.seat-left.m-top-30').text.split()[0]

                    all_data.append({
                        
                        'bus_name': bus_name,
                        'bus_type': bus_type,
                        'departing_time': departing_time,
                        'duration': duration,
                        'reaching_time': reaching_time,
                        'star_rating': star_rating,
                        'price': price,
                        'route_name': route_name,
                        'route_link': route_link,
                        'seats_available': seats_available
                    })
                except NoSuchElementException as e:
                    print(f"An error occurred while extracting data from a bus entry in {route_link}: {e}")

        except NoSuchElementException as e:
            print(f"An error occurred while extracting data from {route_link}: {e}")
        finally:
            driver.close()
            driver.switch_to.window(driver.window_handles[0])

    def scrape_page():
        try:
            routescontainer = wait.until(EC.presence_of_all_elements_located((By.CLASS_NAME, 'route')))
            for route in routescontainer:
                try:
                    route_name = route.text
                    route_link = route.get_attribute('href')
                    scrape_route_details(route_name, route_link)
                except Exception as e:
                    print(f"An error occurred while processing route: {e}")
                    continue
        except TimeoutException:
            print("Failed to load routes container.")
        time.sleep(5)

    page_number = 1
    while page_number <= 5:
        scrape_page()
        try:
            if page_number >= 5:
                break
            pagination_container = wait.until(EC.presence_of_element_located(
                (By.XPATH, '//div[contains(@class, "DC_117_paginationTable")]')
            ))
            try:
                next_page_button = pagination_container.find_element(
                    By.XPATH, f'.//div[contains(@class, "DC_117_pageTabs") and text()="{page_number + 1}"]'
                )
            except:
                pass
            actions = ActionChains(driver)
            actions.move_to_element(next_page_button).perform()
            time.sleep(1)
            next_page_button.click()
            wait.until(EC.text_to_be_present_in_element(
                (By.XPATH, '//div[contains(@class, "DC_117_pageTabs DC_117_pageActive")]'), str(page_number + 1)))
            time.sleep(3)
            page_number += 1
        except :
            pass
            break

    df_T = pd.DataFrame(all_data)
    df_T.to_csv('usrtc.csv', index=False)
    driver.quit()
    return df_T


In [None]:
links = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"
bus_full_details = redbus(links)
