In [1]:
#### url : https://www.redbus.in/online-booking/astc/?utm_source=rtchometile
### webscraping_bus route details in red bus:Assam

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.ui import WebDriverWait
import time
import pandas as pd

# Initialize the WebDriver and open the website
driver = webdriver.Chrome()
driver.maximize_window()
driver.get('https://www.redbus.in/online-booking/astc/?utm_source=rtchometile')

# List to store the collected bus route data
bus_route_data = []

# Function to collect route details from the page
def route_details():
    try:
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route_link")))
        route_elements = driver.find_elements(By.CLASS_NAME, "route")
        
        route_links = [route.get_attribute("href") for route in route_elements]
        route_names = [route.get_attribute("title").strip() for route in route_elements]
        
        # Collect the route details for this page and return them
        return [{'route_name': name, 'route_link': link} for name, link in zip(route_names, route_links)]
        
    except Exception as e:
        print(f"An error occurred: {e}")
        return []

# Function to navigate through all the pages and scrape route details
def scrape_all_pages():
    all_routes = []
    try:
        
        for page in range(1, 6):  
            if page > 1:
                # Wait for the pagination tab of the current page to be clickable
                pagination_tab = WebDriverWait(driver, 10).until(
                    EC.presence_of_element_located((By.XPATH, f"//div[contains(@class, 'DC_117_pageTabs')][text()='{page}']"))
                )
                # Scroll to the pagination tab and click it
                driver.execute_script("arguments[0].scrollIntoView();", pagination_tab)
                driver.execute_script("arguments[0].click();", pagination_tab)
                time.sleep(5)  # Wait for the new page to load
            
            # Collect the route details after navigating to the page (either the first page or after pagination)
            all_routes.extend(route_details())

    except Exception as e:
        print(f"Error occurred while accessing page {page}: {str(e)}")
    return all_routes

# Function to scrape bus details for each route
def scrape_bus_details(route_name, route_link):
    try:
        driver.get(route_link)
        time.sleep(5)

        # Try to click the "View Buses" button if available
        try:
            view_buses = WebDriverWait(driver, 10).until(
                EC.element_to_be_clickable((By.CLASS_NAME, "button"))
            )
            driver.execute_script("arguments[0].click();", view_buses)
            time.sleep(5)
        except:
            print(f"No 'View Buses' button found for {route_link}")

        driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
        time.sleep(5)

        # Extract data from the page
        bus_names = driver.find_elements(By.CSS_SELECTOR, "div.travels.lh-24.f-bold.d-color")
        bus_types = driver.find_elements(By.CSS_SELECTOR, "div.bus-type.f-12.m-top-16.l-color.evBus")
        departure_times = driver.find_elements(By.CSS_SELECTOR, "div.dp-time.f-19.d-color.f-bold")
        durations = driver.find_elements(By.CSS_SELECTOR, "div.dur.l-color.lh-24")
        reaching_times = driver.find_elements(By.CSS_SELECTOR, "div.bp-time.f-19.d-color.disp-Inline")
        star_ratings = driver.find_elements(By.CSS_SELECTOR, "div.lh-18.rating.rat-red")
        prices = driver.find_elements(By.CSS_SELECTOR, "div.fare.d-block")
        seat_availabilities = driver.find_elements(By.CSS_SELECTOR, "div.seat-left.m-top-16")
    
        bus_route_details = []
    
        # Loop through each bus and store its details
        for i in range(len(bus_names)):
            bus_detail = {
                "Route Name": route_name,
                "Route Link": route_link,
                "Bus Name": bus_names[i].text,
                "Bus Type": bus_types[i].text if i < len(bus_types) else "N/A",
                "Departure Time": departure_times[i].text if i < len(departure_times) else "N/A",
                "Duration": durations[i].text if i < len(durations) else "N/A",
                "Reaching Time": reaching_times[i].text if i < len(reaching_times) else "N/A",
                "Rating": star_ratings[i].text if i < len(star_ratings) else "N/A",
                "Price": prices[i].text if i < len(prices) else "N/A",
                "Seats Available": seat_availabilities[i].text if i < len(seat_availabilities) else "N/A"
            }
            bus_route_details.append(bus_detail)
                     
        return bus_route_details
    except Exception as e:
        print(f"Error occurred while accessing {route_link}: {str(e)}")
        return []   

# Scrape all route details
routes = scrape_all_pages()

# For each route, scrape bus details and add to the bus_route_data list
for route in routes:
    bus_details = scrape_bus_details(route["route_name"], route["route_link"])
    if bus_details:
        bus_route_data.extend(bus_details)  # Add bus details to the bus_route_data list

# Convert bus_route_data to a pandas DataFrame
df = pd.DataFrame(bus_route_data)

# Save the DataFrame to a CSV file
df.to_csv('Assam_bus_details.csv', index=False)

# Optionally, print the collected bus details
print("Data saved to Assam_bus_details.csv")

# Close the driver after scraping
driver.quit()


No 'View Buses' button found for https://www.redbus.in/bus-tickets/nagaon-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/goalpara-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/sibsagar-to-north-lakhimpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/north-lakhimpur-to-sibsagar
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-north-lakhimpur
No 'View Buses' button found for https://www.redbus.in/bus-tickets/dhekiajuli-to-guwahati
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-dibrugarh
No 'View Buses' button found for https://www.redbus.in/bus-tickets/north-lakhimpur-to-jorhat
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-dhemaji
No 'View Buses' button found for https://www.redbus.in/bus-tickets/dhemaji-to-jorhat
No 'View Buses' button found for https://www.redbus.in/bus-tickets/jorhat-to-tinsukia
No 'View Buses' b

In [3]:
df

Unnamed: 0,Route Name,Route Link,Bus Name,Bus Type,Departure Time,Duration,Reaching Time,Rating,Price,Seats Available
0,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,BLUE HILL TRAVELS INDIA LIMITED,NON A/C Seater Semi Sleeper (2+1),21:15,05h 45m,03:00,2.4,INR 500,
1,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Shopuris Travels,Non AC Seater (2+1),21:15,05h 45m,03:00,2.4,INR 500,
2,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,DESTINY TRAVELS,NON A/C Seater (2+1),23:00,05h 15m,04:15,,INR 488.78,
3,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...,DESTINY TRAVELS,NON A/C Seater (2+1),22:15,0-19h 0-30m,03:45,,445,
4,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...,Yatra Travels,NON A/C Seater (2+1),18:00,05h 30m,23:30,1.6,450,11 Seats available
...,...,...,...,...,...,...,...,...,...,...
173,North Lakhimpur to Nagaon (Assam),https://www.redbus.in/bus-tickets/north-lakhim...,Yatra Travels,NON A/C Seater (2+1),20:40,06h 20m,03:00,,540,
174,North Lakhimpur to Nagaon (Assam),https://www.redbus.in/bus-tickets/north-lakhim...,Island Travels (Under ASTC),NON AC Seater / Sleeper 2+1,20:40,06h 20m,03:00,,601,
175,North Lakhimpur to Nagaon (Assam),https://www.redbus.in/bus-tickets/north-lakhim...,PARIJAT TRAVELS(UNDER ASTC),NON A/C Seater (2+1),20:56,06h 04m,03:00,,INR 590,
176,North Lakhimpur to Nagaon (Assam),https://www.redbus.in/bus-tickets/north-lakhim...,DESTINY TRAVELS,NON A/C Seater (2+1),18:30,07h 00m,01:30,,INR 498.75,
