In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/tsrtc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_1 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_1.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_1)

# Save the DataFrame to a CSV file
df.to_csv("redbus_tsrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [2]:
import pandas as pd
df_tsrtc = pd.read_csv("redbus_tsrtc.csv")
df_tsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,FRESHBUS,Electric A/C Seater (2+2),11:05,06h 35m,17:40,4.6,Starts from\nINR 550 449,11 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
1,FRESHBUS,Electric A/C Seater (2+2),11:30,06h 35m,18:05,4.6,Starts from\nINR 600 499,14 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
2,FRESHBUS,Electric A/C Seater (2+2),13:10,06h 15m,19:25,4.5,Starts from\nINR 650 549,9 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
3,AdSitara Travels,NON A/C Push Back (2+2),22:00,06h 00m,04:00,4.6,Starts from\nINR 610,23 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
4,Zingbus Plus,A/C Seater / Sleeper (2+1),23:38,06h 27m,06:05,4.5,Starts from\nINR 751 657,25 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
...,...,...,...,...,...,...,...,...,...,...
501,PMR Express,A/C Sleeper (2+1),21:15,05h 55m,03:10,2.1,INR 1650,25 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
502,Sri Laxmi Sravanthi Siva Tours And Travels,Non A/C Seater / Sleeper (2+1),21:50,07h 30m,05:20,1.0,Starts from\nINR 799,23 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
503,Star Bus,Volvo Multi-Axle A/C Seater/Sleeper (2+1),23:30,10h 00m,09:30,1.0,INR 1500 1425,47 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
504,Sri Laxmi Sravanthi Siva Tours And Travels,Non A/C Seater / Sleeper (2+1),21:40,07h 35m,05:15,0.0,Starts from\nINR 755,15 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...


In [34]:
df_tsrtc['Price'] = df_tsrtc['Price'].str.extract(r'(\d+)', expand=False)

In [35]:
df_tsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,FRESHBUS,Electric A/C Seater (2+2),11:05,06h 35m,17:40,4.6,550,11 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
1,FRESHBUS,Electric A/C Seater (2+2),11:30,06h 35m,18:05,4.6,600,14 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
2,FRESHBUS,Electric A/C Seater (2+2),13:10,06h 15m,19:25,4.5,650,9 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
3,AdSitara Travels,NON A/C Push Back (2+2),22:00,06h 00m,04:00,4.6,610,23 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
4,Zingbus Plus,A/C Seater / Sleeper (2+1),23:38,06h 27m,06:05,4.5,751,25 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
...,...,...,...,...,...,...,...,...,...,...
501,PMR Express,A/C Sleeper (2+1),21:15,05h 55m,03:10,2.1,1650,25 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
502,Sri Laxmi Sravanthi Siva Tours And Travels,Non A/C Seater / Sleeper (2+1),21:50,07h 30m,05:20,1.0,799,23 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
503,Star Bus,Volvo Multi-Axle A/C Seater/Sleeper (2+1),23:30,10h 00m,09:30,1.0,1500,47 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...
504,Sri Laxmi Sravanthi Siva Tours And Travels,Non A/C Seater / Sleeper (2+1),21:40,07h 35m,05:15,0.0,755,15 Seats available,Hyderabad to Ongole,https://www.redbus.in/bus-tickets/hyderabad-to...


In [5]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/ksrtc-kerala/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_2 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_2.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_2)

# Save the DataFrame to a CSV file
df.to_csv("redbus_ksrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [7]:
df_ksrtc = pd.read_csv("redbus_ksrtc.csv")
df_ksrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,National travels and logistics,A/C Sleeper (2+1),22:45,08h 15m,07:00,4.1,Starts from\nINR 1090 990,10 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
1,P K Travels,A/C Seater / Sleeper (2+1),21:30,08h 30m,06:00,4.0,Starts from\nINR 900,18 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
2,DLT BUS,A/C Sleeper (2+1),22:30,09h 00m,07:30,4.1,INR 1099,1 Seat available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
3,AdSuraj Holidays,VE A/C Sleeper (2+1),22:30,09h 00m,07:30,3.9,Starts from\nINR 1250,6 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
4,SKS Tours & Travels,A/C Sleeper (2+1),21:15,08h 30m,05:45,4.3,INR 1099,4 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
...,...,...,...,...,...,...,...,...,...,...
169,P K Travels,A/C Seater / Sleeper (2+1),21:30,06h 30m,04:00,5.0,Starts from\nINR 900,19 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
170,National travels and logistics,NON A/C Sleeper (2+1),09:45,06h 15m,16:00,3.4,Starts from\nINR 549,13 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
171,National travels and logistics,NON A/C Sleeper (2+1),09:45,06h 30m,16:15,1.0,Starts from\nINR 549,13 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
172,National travels and logistics,A/C Sleeper (2+1),09:45,06h 30m,16:15,1.0,Starts from\nINR 699,16 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...


In [8]:
df_ksrtc['Price'] = df_ksrtc['Price'].str.extract(r'(\d+)', expand=False)
df_ksrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,National travels and logistics,A/C Sleeper (2+1),22:45,08h 15m,07:00,4.1,1090,10 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
1,P K Travels,A/C Seater / Sleeper (2+1),21:30,08h 30m,06:00,4.0,900,18 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
2,DLT BUS,A/C Sleeper (2+1),22:30,09h 00m,07:30,4.1,1099,1 Seat available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
3,AdSuraj Holidays,VE A/C Sleeper (2+1),22:30,09h 00m,07:30,3.9,1250,6 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
4,SKS Tours & Travels,A/C Sleeper (2+1),21:15,08h 30m,05:45,4.3,1099,4 Seats available,Bangalore to Kozhikode,https://www.redbus.in/bus-tickets/bangalore-to...
...,...,...,...,...,...,...,...,...,...,...
169,P K Travels,A/C Seater / Sleeper (2+1),21:30,06h 30m,04:00,5.0,900,19 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
170,National travels and logistics,NON A/C Sleeper (2+1),09:45,06h 15m,16:00,3.4,549,13 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
171,National travels and logistics,NON A/C Sleeper (2+1),09:45,06h 30m,16:15,1.0,549,13 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...
172,National travels and logistics,A/C Sleeper (2+1),09:45,06h 30m,16:15,1.0,699,16 Seats available,Bangalore to Kalpetta (kerala),https://www.redbus.in/bus-tickets/bangalore-to...


In [9]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/uttar-pradesh-state-road-transport-corporation-upsrtc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_3 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_3.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_3)

# Save the DataFrame to a CSV file
df.to_csv("redbus_upsrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [10]:
df_upsrtc = pd.read_csv("redbus_upsrtc.csv")
df_upsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,New Khaira Transport,A/C Seater / Sleeper (2+1),20:40,06h 50m,03:30,4.2,INR 700 630,1 Seat available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
1,RS YADAV SMART BUS,A/C Seater / Sleeper (2+1),21:40,06h 55m,04:35,5.0,Starts from\nINR 699,26 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
2,FlixBus,Bharat Benz A/C Seater (2+2),23:35,05h 25m,05:00,3.0,Starts from\nINR 616.07,8 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
3,SHRI RAM TRAVELS,A/C Seater (2+2),23:59,05h 16m,05:15,4.0,INR 740,16 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
4,FlixBus,AC Seater (2+2),17:15,05h 10m,22:25,4.0,Starts from\nINR 239.29,20 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
...,...,...,...,...,...,...,...,...,...,...
300,Sethi Yatra Company,A/C Seater / Sleeper (2+1),18:30,05h 20m,23:50,3.6,Starts from\nINR 600 555,25 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
301,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),19:30,04h 15m,23:45,3.0,Starts from\nINR 799,23 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
302,ARORA TRAVELS,Volvo AC Seater (2+2),21:01,04h 40m,01:41,4.7,INR 703,25 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
303,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),21:15,04h 15m,01:30,3.4,Starts from\nINR 799,26 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...


In [11]:
df_upsrtc['Price'] = df_upsrtc['Price'].str.extract(r'(\d+)', expand=False)
df_upsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,New Khaira Transport,A/C Seater / Sleeper (2+1),20:40,06h 50m,03:30,4.2,700,1 Seat available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
1,RS YADAV SMART BUS,A/C Seater / Sleeper (2+1),21:40,06h 55m,04:35,5.0,699,26 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
2,FlixBus,Bharat Benz A/C Seater (2+2),23:35,05h 25m,05:00,3.0,616,8 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
3,SHRI RAM TRAVELS,A/C Seater (2+2),23:59,05h 16m,05:15,4.0,740,16 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
4,FlixBus,AC Seater (2+2),17:15,05h 10m,22:25,4.0,239,20 Seats available,Delhi to Bareilly,https://www.redbus.in/bus-tickets/delhi-to-bar...
...,...,...,...,...,...,...,...,...,...,...
300,Sethi Yatra Company,A/C Seater / Sleeper (2+1),18:30,05h 20m,23:50,3.6,600,25 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
301,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),19:30,04h 15m,23:45,3.0,799,23 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
302,ARORA TRAVELS,Volvo AC Seater (2+2),21:01,04h 40m,01:41,4.7,703,25 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...
303,Mahalaxmi Travels,Bharat Benz A/C Seater /Sleeper (2+1),21:15,04h 15m,01:30,3.4,799,26 Seats available,Allahabad to Lucknow,https://www.redbus.in/bus-tickets/allahabad-to...


In [12]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/rsrtc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_4 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_4.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_4)

# Save the DataFrame to a CSV file
df.to_csv("redbus_rsrtctc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [13]:
df_rsrtc = pd.read_csv("redbus_rsrtctc.csv")
df_rsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Raj Travel,A/C Sleeper (2+1),22:30,07h 00m,05:30,4.6,Starts from\nINR 550,7 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
1,Shree Devnarayan Travels,Bharat Benz A/C Seater (2+1),17:00,04h 30m,21:30,4.5,INR 599,6 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
2,M R Travels,Volvo Multi-Axle I-Shift B11R Semi Sleeper (2+2),17:10,04h 45m,21:55,4.4,Starts from\nINR 485,27 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
3,AdKalpana Travels,NON AC Seater / Sleeper 2+1,22:00,06h 54m,04:54,4.1,Starts from\nINR 300,26 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
4,Jain travels regd,A/C Seater / Sleeper (2+1),21:30,07h 00m,04:30,4.1,Starts from\nINR 460,15 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
...,...,...,...,...,...,...,...,...,...,...
432,RR Travels,A/C Seater / Sleeper (2+1),22:35,02h 20m,00:55,1.9,Starts from\nINR 500 475,41 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
433,Deepak Travels,NON A/C Seater/ Sleeper (2+1),23:15,02h 45m,02:00,2.6,Starts from\nINR 1050,17 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
434,Kothari Travels,NON A/C Sleeper (2+1),23:45,02h 30m,02:15,2.8,INR 750,28 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
435,Om shree ji travles,NON A/C Sleeper (2+1),23:45,01h 30m,01:15,0.0,Starts from\nINR 800 760,31 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...


In [14]:
df_rsrtc['Price'] = df_rsrtc['Price'].str.extract(r'(\d+)', expand=False)
df_rsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Raj Travel,A/C Sleeper (2+1),22:30,07h 00m,05:30,4.6,550,7 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
1,Shree Devnarayan Travels,Bharat Benz A/C Seater (2+1),17:00,04h 30m,21:30,4.5,599,6 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
2,M R Travels,Volvo Multi-Axle I-Shift B11R Semi Sleeper (2+2),17:10,04h 45m,21:55,4.4,485,27 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
3,AdKalpana Travels,NON AC Seater / Sleeper 2+1,22:00,06h 54m,04:54,4.1,300,26 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
4,Jain travels regd,A/C Seater / Sleeper (2+1),21:30,07h 00m,04:30,4.1,460,15 Seats available,Udaipur to Jodhpur,https://www.redbus.in/bus-tickets/udaipur-to-j...
...,...,...,...,...,...,...,...,...,...,...
432,RR Travels,A/C Seater / Sleeper (2+1),22:35,02h 20m,00:55,1.9,500,41 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
433,Deepak Travels,NON A/C Seater/ Sleeper (2+1),23:15,02h 45m,02:00,2.6,1050,17 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
434,Kothari Travels,NON A/C Sleeper (2+1),23:45,02h 30m,02:15,2.8,750,28 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...
435,Om shree ji travles,NON A/C Sleeper (2+1),23:45,01h 30m,01:15,0.0,800,31 Seats available,Kishangarh to Jaipur (Rajasthan),https://www.redbus.in/bus-tickets/kishangarh-t...


In [15]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/south-bengal-state-transport-corporation-sbstc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_5 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_5.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_5)

# Save the DataFrame to a CSV file
df.to_csv("redbus_sbstc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Timeout occurred at https://www.redbus.in/bus-tickets/midnapore-to-kolkata.
Scraping completed. Data saved to redbus_details.csv


In [16]:
df_sbstc = pd.read_csv("redbus_sbstc.csv")
df_sbstc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,WBTC (CTC) Habra - Barasat - Durgapur - Asanso...,Non AC Seater (2+3),14:25,02h 35m,17:00,5.0,INR 135,20 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
1,SHYAMOLI Pvt. Ltd.(Karunamoyee),Volvo A/C Seater (2+2),11:00,04h 00m,15:00,4.0,INR 486,4 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
2,Shyamoli Paribahan Pvt Ltd,Volvo A/C Seater (2+2),12:55,03h 50m,16:45,4.4,INR 486,2 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
3,Pammi Travels,A/C Sleeper (2+1),12:15,04h 15m,16:30,2.4,Starts from\nINR 999,26 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
4,Express Line,Volvo 9600 A/C Seater (2+2),10:25,02h 10m,12:35,4.3,INR 429,29 Seats available,Kolkata to Burdwan,https://www.redbus.in/bus-tickets/kolkata-to-b...
...,...,...,...,...,...,...,...,...,...,...
263,SBSTC-KARUNAMOYEE - BANKURA - VIA - DURGAPUR -...,Non AC Seater (2+3),14:30,05h 30m,20:00,3.8,INR 185,46 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
264,SBSTC-KOLKATA - BANKURA - 14:30 (Bankura Depot...,Non AC Seater (2+3),14:30,04h 00m,18:30,2.2,INR 180,52 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
265,SBSTC-KARUNAMOYEE - BANKURA - A/C - 15:15 (BNK...,AC Seater (2+2),15:15,05h 30m,20:45,2.4,INR 360,23 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
266,SBSTC-KOLKATA - BANKURA - 15:30 (Bankura Depot...,Non AC Seater (2+3),15:30,04h 00m,19:30,2.7,INR 180,52 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...


In [17]:
df_sbstc['Price'] = df_sbstc['Price'].str.extract(r'(\d+)', expand=False)
df_sbstc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,WBTC (CTC) Habra - Barasat - Durgapur - Asanso...,Non AC Seater (2+3),14:25,02h 35m,17:00,5.0,135,20 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
1,SHYAMOLI Pvt. Ltd.(Karunamoyee),Volvo A/C Seater (2+2),11:00,04h 00m,15:00,4.0,486,4 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
2,Shyamoli Paribahan Pvt Ltd,Volvo A/C Seater (2+2),12:55,03h 50m,16:45,4.4,486,2 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
3,Pammi Travels,A/C Sleeper (2+1),12:15,04h 15m,16:30,2.4,999,26 Seats available,Durgapur to Calcutta,https://www.redbus.in/bus-tickets/durgapur-to-...
4,Express Line,Volvo 9600 A/C Seater (2+2),10:25,02h 10m,12:35,4.3,429,29 Seats available,Kolkata to Burdwan,https://www.redbus.in/bus-tickets/kolkata-to-b...
...,...,...,...,...,...,...,...,...,...,...
263,SBSTC-KARUNAMOYEE - BANKURA - VIA - DURGAPUR -...,Non AC Seater (2+3),14:30,05h 30m,20:00,3.8,185,46 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
264,SBSTC-KOLKATA - BANKURA - 14:30 (Bankura Depot...,Non AC Seater (2+3),14:30,04h 00m,18:30,2.2,180,52 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
265,SBSTC-KARUNAMOYEE - BANKURA - A/C - 15:15 (BNK...,AC Seater (2+2),15:15,05h 30m,20:45,2.4,360,23 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...
266,SBSTC-KOLKATA - BANKURA - 15:30 (Bankura Depot...,Non AC Seater (2+3),15:30,04h 00m,19:30,2.7,180,52 Seats available,Kolkata to Bankura,https://www.redbus.in/bus-tickets/kolkata-to-b...


In [18]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/astc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_6 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_6.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_6)

# Save the DataFrame to a CSV file
df.to_csv("redbus_hrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Timeout occurred at https://www.redbus.in/bus-tickets/jorhat-to-north-lakhimpur.
Timeout occurred at https://www.redbus.in/bus-tickets/north-lakhimpur-to-sibsagar.
Scraping completed. Data saved to redbus_details.csv


In [19]:
df_hrtc = pd.read_csv("redbus_hrtc.csv")
df_hrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Chartered Bus - ASTC,Volvo AC Seater Pushback 2+2,13:00,04h 00m,17:00,4.6,INR 389.13,1 Seat available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
1,SHIVAM TRAVELS,Bharat Benz A/C Seater (2+1),15:00,04h 15m,19:15,4.3,Starts from\nINR 370 333,15 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
2,Chartered Bus - ASTC,Volvo AC Seater Pushback 2+2,15:30,03h 45m,19:15,4.5,Starts from\nINR 389.13,7 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
3,Maya Travels,Tata A/C Seater (2+2),13:00,04h 30m,17:30,1.6,INR 300,39 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
4,Himalayan Travels,NON AC Seater / Sleeper 2+1,23:40,04h 20m,04:00,2.1,INR 500,24 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
5,Shopuris Travels,Non AC Seater (2+1),22:15,05h 45m,04:00,0.0,INR 600,1 Seat available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
6,Orient Transline,A/C Seater (2+1),11:30,04h 20m,15:50,4.1,INR 380.95,1 Seat available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
7,Orient Transline,A/C Seater (2+1),13:00,04h 10m,17:10,4.3,INR 380.95,1 Seat available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
8,WARISPIYA TRAVELS,AC Seater (2+2),15:00,03h 40m,18:40,4.4,INR 360,2 Seats available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
9,WARISPIYA TRAVELS,AC Seater (2+2),15:35,03h 40m,19:15,3.9,INR 360,18 Seats available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...


In [20]:
df_hrtc['Price'] = df_hrtc['Price'].str.extract(r'(\d+)', expand=False)
df_hrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Chartered Bus - ASTC,Volvo AC Seater Pushback 2+2,13:00,04h 00m,17:00,4.6,389,1 Seat available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
1,SHIVAM TRAVELS,Bharat Benz A/C Seater (2+1),15:00,04h 15m,19:15,4.3,370,15 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
2,Chartered Bus - ASTC,Volvo AC Seater Pushback 2+2,15:30,03h 45m,19:15,4.5,389,7 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
3,Maya Travels,Tata A/C Seater (2+2),13:00,04h 30m,17:30,1.6,300,39 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
4,Himalayan Travels,NON AC Seater / Sleeper 2+1,23:40,04h 20m,04:00,2.1,500,24 Seats available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
5,Shopuris Travels,Non AC Seater (2+1),22:15,05h 45m,04:00,0.0,600,1 Seat available,Tezpur to Guwahati,https://www.redbus.in/bus-tickets/tezpur-to-gu...
6,Orient Transline,A/C Seater (2+1),11:30,04h 20m,15:50,4.1,380,1 Seat available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
7,Orient Transline,A/C Seater (2+1),13:00,04h 10m,17:10,4.3,380,1 Seat available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
8,WARISPIYA TRAVELS,AC Seater (2+2),15:00,03h 40m,18:40,4.4,360,2 Seats available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...
9,WARISPIYA TRAVELS,AC Seater (2+2),15:35,03h 40m,19:15,3.9,360,18 Seats available,Guwahati to Tezpur,https://www.redbus.in/bus-tickets/guwahati-to-...


In [21]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_7 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_7.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_7)

# Save the DataFrame to a CSV file
df.to_csv("redbus_ctu.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [22]:
df_ctu = pd.read_csv("redbus_ctu.csv")
df_ctu

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Zimindara Travels,AC Sleeper (2+1),20:30,04h 00m,00:30,4.1,Starts from\nINR 799,23 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
1,City Land Travels,A/C Seater/Sleeper (2+1),22:20,02h 00m,00:20,3.5,Starts from\nINR 948,16 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
2,Om Sidh Express,Volvo A/C Semi Sleeper (2+2),20:20,02h 40m,23:00,3.4,Starts from\nINR 1099 1044,33 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
3,SHRI KRISHNA TRAVELS (JAI SHREE GANESH YATRA CO.),VE A/C Seater / Sleeper (2+1),20:40,02h 50m,23:30,3.7,Starts from\nINR 999,19 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
4,SHRI KRISHNA TRAVELS (JAI SHREE GANESH YATRA CO.),A/C Sleeper (2+2),20:00,01h 45m,21:45,3.7,Starts from\nINR 1199,29 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
...,...,...,...,...,...,...,...,...,...,...
434,RK Holiday,A/C Seater / Sleeper (2+2),23:20,04h 20m,03:40,0.0,Starts from\nINR 855,28 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
435,Khurana Bus Service,Volvo A/C (2+2),14:25,05h 05m,19:30,0.0,INR 4000,27 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
436,Comfort Tour and Travels,A/C Seater / Sleeper (2+2),20:20,04h 20m,00:40,0.0,INR 3000,54 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
437,Laxmi holidays,Volvo 9600 Multi-Axle A/C Sleeper (2+1),23:50,04h 10m,04:00,0.0,Starts from\nINR 2010 1910,9 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...


In [23]:
df_ctu['Price'] = df_ctu['Price'].str.extract(r'(\d+)', expand=False)
df_ctu

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Zimindara Travels,AC Sleeper (2+1),20:30,04h 00m,00:30,4.1,799,23 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
1,City Land Travels,A/C Seater/Sleeper (2+1),22:20,02h 00m,00:20,3.5,948,16 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
2,Om Sidh Express,Volvo A/C Semi Sleeper (2+2),20:20,02h 40m,23:00,3.4,1099,33 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
3,SHRI KRISHNA TRAVELS (JAI SHREE GANESH YATRA CO.),VE A/C Seater / Sleeper (2+1),20:40,02h 50m,23:30,3.7,999,19 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
4,SHRI KRISHNA TRAVELS (JAI SHREE GANESH YATRA CO.),A/C Sleeper (2+2),20:00,01h 45m,21:45,3.7,1199,29 Seats available,Yamuna Nagar to Chandigarh,https://www.redbus.in/bus-tickets/yamuna-nagar...
...,...,...,...,...,...,...,...,...,...,...
434,RK Holiday,A/C Seater / Sleeper (2+2),23:20,04h 20m,03:40,0.0,855,28 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
435,Khurana Bus Service,Volvo A/C (2+2),14:25,05h 05m,19:30,0.0,4000,27 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
436,Comfort Tour and Travels,A/C Seater / Sleeper (2+2),20:20,04h 20m,00:40,0.0,3000,54 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...
437,Laxmi holidays,Volvo 9600 Multi-Axle A/C Sleeper (2+1),23:50,04h 10m,04:00,0.0,2010,9 Seats available,Chandigarh to Pathankot,https://www.redbus.in/bus-tickets/chandigarh-t...


In [24]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/pepsu/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_8 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_8.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_8)

# Save the DataFrame to a CSV file
df.to_csv("redbus_pepsu.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [25]:
df_pepsu = pd.read_csv("redbus_pepsu.csv")
df_pepsu

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,PEPSU (Punjab) - 66665,Volvo AC Seater (2+2),14:01,04h 59m,19:00,4.1,INR 711,8 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
1,PEPSU (Punjab) - 66689,Volvo AC Seater (2+2),15:26,04h 59m,20:25,3.9,INR 711,30 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
2,PEPSU (Punjab) - 67140,AC Seater Hvac 2+2,16:00,05h 30m,21:30,2.7,INR 454,38 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
3,PEPSU (Punjab) - 66667,Volvo AC Seater (2+2),17:31,04h 59m,22:30,4.5,INR 711,36 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
4,PEPSU (Punjab) - 67103,AC Seater Hvac 2+2,22:31,05h 29m,04:00,3.2,INR 454,30 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
...,...,...,...,...,...,...,...,...,...,...
402,Bhawani Travels,A/C Seater / Sleeper (2+2),23:00,07h 10m,06:10,1.4,Starts from\nINR 700,2 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
403,SHREE SATGURU TRAVELS,A/C Seater / Sleeper (2+2),23:50,06h 10m,06:00,1.4,Starts from\nINR 999,38 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
404,SHREE SATGURU TRAVELS,A/C Seater / Sleeper (2+2),23:40,06h 35m,06:15,1.0,Starts from\nINR 999,41 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
405,Deltin Travels,Bharat Benz A/C Seater /Sleeper (2+1),21:05,07h 55m,05:00,1.4,Starts from\nINR 949,28 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...


In [26]:
df_pepsu['Price'] = df_pepsu['Price'].str.extract(r'(\d+)', expand=False)
df_pepsu

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,PEPSU (Punjab) - 66665,Volvo AC Seater (2+2),14:01,04h 59m,19:00,4.1,711,8 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
1,PEPSU (Punjab) - 66689,Volvo AC Seater (2+2),15:26,04h 59m,20:25,3.9,711,30 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
2,PEPSU (Punjab) - 67140,AC Seater Hvac 2+2,16:00,05h 30m,21:30,2.7,454,38 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
3,PEPSU (Punjab) - 66667,Volvo AC Seater (2+2),17:31,04h 59m,22:30,4.5,711,36 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
4,PEPSU (Punjab) - 67103,AC Seater Hvac 2+2,22:31,05h 29m,04:00,3.2,454,30 Seats available,Delhi to Patiala,https://www.redbus.in/bus-tickets/delhi-to-pat...
...,...,...,...,...,...,...,...,...,...,...
402,Bhawani Travels,A/C Seater / Sleeper (2+2),23:00,07h 10m,06:10,1.4,700,2 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
403,SHREE SATGURU TRAVELS,A/C Seater / Sleeper (2+2),23:50,06h 10m,06:00,1.4,999,38 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
404,SHREE SATGURU TRAVELS,A/C Seater / Sleeper (2+2),23:40,06h 35m,06:15,1.0,999,41 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...
405,Deltin Travels,Bharat Benz A/C Seater /Sleeper (2+1),21:05,07h 55m,05:00,1.4,949,28 Seats available,Jalandhar to Delhi Airport,https://www.redbus.in/bus-tickets/jalandhar-to...


In [27]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/bihar-state-road-transport-corporation-bsrtc/?utm_source=rtchometile"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_9 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_9.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_9)

# Save the DataFrame to a CSV file
df.to_csv("redbus_bsrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Scraping completed. Data saved to redbus_details.csv


In [28]:
df_bsrtc = pd.read_csv("redbus_bsrtc.csv")
df_bsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,ROYAL TRAVELS AND CARGO,A/C Seater / Sleeper (2+2),17:30,15h 00m,08:30,2.2,Starts from\nINR 896,35 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
1,Travel Point World LLP,A/C Seater / Sleeper (2+2),20:20,14h 30m,10:50,1.9,Starts from\nINR 1999 1898,38 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
2,Baba Khatushyam Travels Pvt Ltd,A/C Seater / Sleeper (3+1),19:00,16h 49m,11:49,1.3,Starts from\nINR 1050,58 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
3,Rajdhani Express,A/C Seater / Sleeper (2+2),15:00,16h 00m,07:00,1.0,Starts from\nINR 1199 1109,29 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
4,Singh Express Bus Service,A/C Seater / Sleeper (2+2),15:00,19h 00m,10:00,1.0,Starts from\nINR 999,20 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
...,...,...,...,...,...,...,...,...,...,...
68,Shivam Travels,NON A/C Seater / Sleeper (2+2),21:00,09h 30m,06:30,1.2,Starts from\nINR 450,40 Seats available,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea
69,Sri Krishna Rath,A/C Seater / Sleeper (2+2),21:00,09h 15m,06:15,1.9,Starts from\nINR 500 450,23 Seats available,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea
70,Bihar state road transport corporation (BSRTC)...,AC Seater (2+2),21:25,09h 05m,06:30,3.2,INR 468,24 Seats available,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...
71,Sri Krishna Rath,A/C Seater / Sleeper (2+2),21:00,09h 05m,06:05,1.4,Starts from\nINR 800 720,21 Seats available,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...


In [29]:
df_bsrtc['Price'] = df_bsrtc['Price'].str.extract(r'(\d+)', expand=False)
df_bsrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,ROYAL TRAVELS AND CARGO,A/C Seater / Sleeper (2+2),17:30,15h 00m,08:30,2.2,896,35 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
1,Travel Point World LLP,A/C Seater / Sleeper (2+2),20:20,14h 30m,10:50,1.9,1999,38 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
2,Baba Khatushyam Travels Pvt Ltd,A/C Seater / Sleeper (3+1),19:00,16h 49m,11:49,1.3,1050,58 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
3,Rajdhani Express,A/C Seater / Sleeper (2+2),15:00,16h 00m,07:00,1.0,1199,29 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
4,Singh Express Bus Service,A/C Seater / Sleeper (2+2),15:00,19h 00m,10:00,1.0,999,20 Seats available,Gopalganj (Bihar) to Delhi,https://www.redbus.in/bus-tickets/gopalganj-to...
...,...,...,...,...,...,...,...,...,...,...
68,Shivam Travels,NON A/C Seater / Sleeper (2+2),21:00,09h 30m,06:30,1.2,450,40 Seats available,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea
69,Sri Krishna Rath,A/C Seater / Sleeper (2+2),21:00,09h 15m,06:15,1.9,500,23 Seats available,Patna (Bihar) to Purnea,https://www.redbus.in/bus-tickets/patna-to-purnea
70,Bihar state road transport corporation (BSRTC)...,AC Seater (2+2),21:25,09h 05m,06:30,3.2,468,24 Seats available,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...
71,Sri Krishna Rath,A/C Seater / Sleeper (2+2),21:00,09h 05m,06:05,1.4,800,21 Seats available,Patna (Bihar) to Katihar,https://www.redbus.in/bus-tickets/patna-to-kat...


In [30]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException, TimeoutException
import time
import pandas as pd

# URL of the website
URL = "https://www.redbus.in/online-booking/jksrtc"

# Set up the Chrome driver
driver = webdriver.Chrome()
driver.get(URL)
driver.maximize_window()

# Wait for the page to load
WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "route")))

# Function to scrape bus routes
def scrape_bus_routes():
    route_elements = driver.find_elements(By.CLASS_NAME, "route")
    bus_routes_link = [route.get_attribute("href") for route in route_elements]
    bus_routes_name = [route.text.strip() for route in route_elements]
    return bus_routes_link, bus_routes_name

# Scrape the first page
all_bus_routes_link, all_bus_routes_name = scrape_bus_routes()

# Retry mechanism for stale element exceptions
def safe_find_elements(by, value, retries=3):
    attempts = 0
    while attempts < retries:
        try:
            return driver.find_elements(by, value)
        except StaleElementReferenceException:
            time.sleep(1)  # Wait and retry
            attempts += 1
    return []  # Return an empty list if not found after retries

# Function to scrape bus details and include route name and link
def scrape_bus_details(url, route_name):
    try:
        driver.get(url)
        WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, "bus-item")))

        # Scroll down to load all bus items
        last_height = driver.execute_script("return document.body.scrollHeight")
        while True:
            driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
            time.sleep(2)
            new_height = driver.execute_script("return document.body.scrollHeight")
            if new_height == last_height:
                break
            last_height = new_height

        # Use safe find to avoid stale element issues
        bus_names = safe_find_elements(By.CLASS_NAME, "travels")
        bus_types = safe_find_elements(By.CLASS_NAME, "bus-type")
        departing_time_elements = safe_find_elements(By.CLASS_NAME, "dp-time")
        duration_elements = safe_find_elements(By.CLASS_NAME, "dur")
        reaching_time_elements = safe_find_elements(By.CLASS_NAME, "bp-time")
        star_rating_elements = safe_find_elements(By.CLASS_NAME, "rating")
        price_elements = safe_find_elements(By.CLASS_NAME, "seat-fare")
        seat_availability_elements = safe_find_elements(By.CLASS_NAME, "seat-left")

        bus_details = []
        num_items = min(
            len(bus_names), len(bus_types), len(departing_time_elements),
            len(duration_elements), len(reaching_time_elements),
            len(price_elements)
        )  # Ensure the minimum length to avoid index out of range errors

        for i in range(num_items):
            bus_detail = {
                "bus_name": bus_names[i].text,
                "bus_type": bus_types[i].text,
                "Departing_Time": departing_time_elements[i].text,
                "Duration": duration_elements[i].text,
                "Reaching_Time": reaching_time_elements[i].text,
                "Star_Rating": star_rating_elements[i].text if i < len(star_rating_elements) else '0',
                "Price": price_elements[i].text,
                "Seat_Availability": seat_availability_elements[i].text if i < len(seat_availability_elements) else '0',
                "bus_routes_name": route_name,
                "bus_routes_link": url          
            }
            bus_details.append(bus_detail)

        return bus_details

    except StaleElementReferenceException:
        print(f"Stale element error occurred at {url}. Retrying...")
        time.sleep(2)
        return scrape_bus_details(url, route_name)  # Retry the entire function

    except TimeoutException:
        print(f"Timeout occurred at {url}.")
        return []

    except Exception as e:
        print(f"Error occurred while accessing {url}: {str(e)}")
        return []

# List to hold all bus details
all_bus_details_10 = []

# Iterate over each bus route link and scrape the details
for link, name in zip(all_bus_routes_link, all_bus_routes_name):
    bus_details = scrape_bus_details(link, name)
    if bus_details:
        all_bus_details_10.extend(bus_details)

# Convert the list of dictionaries to a DataFrame
df = pd.DataFrame(all_bus_details_10)

# Save the DataFrame to a CSV file
df.to_csv("redbus_jksrtc.csv", index=False)

# Close the driver
driver.quit()

print("Scraping completed. Data saved to redbus_details.csv")

Timeout occurred at https://www.redbus.in/bus-tickets/jammu-to-poonch.
Timeout occurred at https://www.redbus.in/bus-tickets/mendhar-j-k-to-jammu.
Timeout occurred at https://www.redbus.in/bus-tickets/kishtwar-to-jammu.
Scraping completed. Data saved to redbus_details.csv


In [31]:
df_jksrtc = pd.read_csv("redbus_jksrtc.csv")
df_jksrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Jamna Travels-Jammu,Non AC Seater (2+2),19:00,11h 45m,06:45,2.3,Starts from\nINR 649,28 Seats available,Srinagar to Jammu (j and k),https://www.redbus.in/bus-tickets/srinagar-to-...
1,New Pal Travels,NON A/C Seater / Sleeper (2+2),19:00,10h 00m,05:00,1.8,Starts from\nINR 799 719,41 Seats available,Srinagar to Jammu (j and k),https://www.redbus.in/bus-tickets/srinagar-to-...
2,Zingbus Plus,Bharat Benz A/C Seater /Sleeper (2+1),20:00,19h 20m,15:20,5.0,Starts from\nINR 3783 3310,29 Seats available,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...
3,Zingbus Plus,A/C Sleeper (2+1),21:35,18h 55m,16:30,5.0,Starts from\nINR 3943 3450,22 Seats available,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...
4,Jamna Travels-Jammu,Non AC Seater (2+2),21:30,10h 30m,08:00,1.3,Starts from\nINR 449,27 Seats available,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...
...,...,...,...,...,...,...,...,...,...,...
118,Khurana Bus Service,Volvo A/C (2+2),17:00,06h 00m,23:00,0.0,INR 750,5 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
119,Maheshwaram Travels,A/C Seater / Sleeper (2+2),21:00,10h 35m,07:35,0.0,Starts from\nINR 1026 926,30 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
120,SL Choudhary travels,Bharath Benz A/C Sleeper (2+2),21:25,07h 35m,05:00,0.0,INR 1000,36 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
121,SWAGATAM TOUR HOLIDAY,A/C Sleeper (2+2),23:59,07h 01m,07:00,0.0,INR 1599 1439,42 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...


In [32]:
df_jksrtc['Price'] = df_jksrtc['Price'].str.extract(r'(\d+)', expand=False)
df_jksrtc

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,Jamna Travels-Jammu,Non AC Seater (2+2),19:00,11h 45m,06:45,2.3,649,28 Seats available,Srinagar to Jammu (j and k),https://www.redbus.in/bus-tickets/srinagar-to-...
1,New Pal Travels,NON A/C Seater / Sleeper (2+2),19:00,10h 00m,05:00,1.8,799,41 Seats available,Srinagar to Jammu (j and k),https://www.redbus.in/bus-tickets/srinagar-to-...
2,Zingbus Plus,Bharat Benz A/C Seater /Sleeper (2+1),20:00,19h 20m,15:20,5.0,3783,29 Seats available,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...
3,Zingbus Plus,A/C Sleeper (2+1),21:35,18h 55m,16:30,5.0,3943,22 Seats available,Delhi to Srinagar,https://www.redbus.in/bus-tickets/delhi-to-sri...
4,Jamna Travels-Jammu,Non AC Seater (2+2),21:30,10h 30m,08:00,1.3,449,27 Seats available,Jammu (j and k) to Srinagar,https://www.redbus.in/bus-tickets/jammu-to-sri...
...,...,...,...,...,...,...,...,...,...,...
118,Khurana Bus Service,Volvo A/C (2+2),17:00,06h 00m,23:00,0.0,750,5 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
119,Maheshwaram Travels,A/C Seater / Sleeper (2+2),21:00,10h 35m,07:35,0.0,1026,30 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
120,SL Choudhary travels,Bharath Benz A/C Sleeper (2+2),21:25,07h 35m,05:00,0.0,1000,36 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
121,SWAGATAM TOUR HOLIDAY,A/C Sleeper (2+2),23:59,07h 01m,07:00,0.0,1599,42 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...


In [36]:
redbus_all=pd.concat([df_tsrtc,df_ksrtc,df_upsrtc,df_rsrtc,df_sbstc,df_hrtc,df_ctu,df_pepsu,df_bsrtc,df_jksrtc],ignore_index=True)
redbus_all

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,FRESHBUS,Electric A/C Seater (2+2),11:05,06h 35m,17:40,4.6,550,11 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
1,FRESHBUS,Electric A/C Seater (2+2),11:30,06h 35m,18:05,4.6,600,14 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
2,FRESHBUS,Electric A/C Seater (2+2),13:10,06h 15m,19:25,4.5,650,9 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
3,AdSitara Travels,NON A/C Push Back (2+2),22:00,06h 00m,04:00,4.6,610,23 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
4,Zingbus Plus,A/C Seater / Sleeper (2+1),23:38,06h 27m,06:05,4.5,751,25 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
...,...,...,...,...,...,...,...,...,...,...
2770,Khurana Bus Service,Volvo A/C (2+2),17:00,06h 00m,23:00,0.0,750,5 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2771,Maheshwaram Travels,A/C Seater / Sleeper (2+2),21:00,10h 35m,07:35,0.0,1026,30 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2772,SL Choudhary travels,Bharath Benz A/C Sleeper (2+2),21:25,07h 35m,05:00,0.0,1000,36 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2773,SWAGATAM TOUR HOLIDAY,A/C Sleeper (2+2),23:59,07h 01m,07:00,0.0,1599,42 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...


In [37]:
redbus_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2775 entries, 0 to 2774
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   bus_name           2775 non-null   object 
 1   bus_type           2775 non-null   object 
 2   Departing_Time     2775 non-null   object 
 3   Duration           2775 non-null   object 
 4   Reaching_Time      2775 non-null   object 
 5   Star_Rating        2775 non-null   float64
 6   Price              2775 non-null   object 
 7   Seat_Availability  2775 non-null   object 
 8   bus_routes_name    2775 non-null   object 
 9   bus_routes_link    2775 non-null   object 
dtypes: float64(1), object(9)
memory usage: 216.9+ KB


In [38]:
redbus_all['Price'] = pd.to_numeric(redbus_all['Price'], errors='coerce').astype(float)
redbus_all

Unnamed: 0,bus_name,bus_type,Departing_Time,Duration,Reaching_Time,Star_Rating,Price,Seat_Availability,bus_routes_name,bus_routes_link
0,FRESHBUS,Electric A/C Seater (2+2),11:05,06h 35m,17:40,4.6,550.0,11 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
1,FRESHBUS,Electric A/C Seater (2+2),11:30,06h 35m,18:05,4.6,600.0,14 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
2,FRESHBUS,Electric A/C Seater (2+2),13:10,06h 15m,19:25,4.5,650.0,9 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
3,AdSitara Travels,NON A/C Push Back (2+2),22:00,06h 00m,04:00,4.6,610.0,23 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
4,Zingbus Plus,A/C Seater / Sleeper (2+1),23:38,06h 27m,06:05,4.5,751.0,25 Seats available,Hyderabad to Vijayawada,https://www.redbus.in/bus-tickets/hyderabad-to...
...,...,...,...,...,...,...,...,...,...,...
2770,Khurana Bus Service,Volvo A/C (2+2),17:00,06h 00m,23:00,0.0,750.0,5 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2771,Maheshwaram Travels,A/C Seater / Sleeper (2+2),21:00,10h 35m,07:35,0.0,1026.0,30 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2772,SL Choudhary travels,Bharath Benz A/C Sleeper (2+2),21:25,07h 35m,05:00,0.0,1000.0,36 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...
2773,SWAGATAM TOUR HOLIDAY,A/C Sleeper (2+2),23:59,07h 01m,07:00,0.0,1599.0,42 Seats available,Chandigarh to Jammu (j and k),https://www.redbus.in/bus-tickets/chandigarh-t...


In [39]:
redbus_all.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2775 entries, 0 to 2774
Data columns (total 10 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   bus_name           2775 non-null   object 
 1   bus_type           2775 non-null   object 
 2   Departing_Time     2775 non-null   object 
 3   Duration           2775 non-null   object 
 4   Reaching_Time      2775 non-null   object 
 5   Star_Rating        2775 non-null   float64
 6   Price              2775 non-null   float64
 7   Seat_Availability  2775 non-null   object 
 8   bus_routes_name    2775 non-null   object 
 9   bus_routes_link    2775 non-null   object 
dtypes: float64(2), object(8)
memory usage: 216.9+ KB


In [40]:
pip install mysql-connector-python

Note: you may need to restart the kernel to use updated packages.


In [41]:
import mysql.connector
con= mysql.connector.connect(
    host="localhost",
    user="root",
    password="AKSBRSt@2000",
    auth_plugin="mysql_native_password"
    )
print(con)

<mysql.connector.connection_cext.CMySQLConnection object at 0x0000023C20FBDDF0>


In [42]:
my_cursor=con.cursor()
my_cursor.execute("USE RED_BUS")
my_cursor.execute('''
CREATE TABLE IF NOT EXISTS bus_details_3(
ID INT AUTO_INCREMENT PRIMARY KEY,
Bus_name VARCHAR(255) NOT NULL, 
Bus_type VARCHAR(255) NOT NULL, 
Departing_Time VARCHAR(255) NOT NULL,
Reaching_Time VARCHAR(255) NOT NULL,
Total_duration VARCHAR(255) NOT NULL,
Star_Rating FLOAT NULL,                  
Price FLOAT NULL, 
Seats_Available VARCHAR(255) NULL,
bus_routes_name VARCHAR(255) NULL,
bus_routes_link VARCHAR(255) NULL
)''')

print("Table Created successfully")

Table Created successfully


In [43]:
insert_query = '''INSERT INTO bus_details_3 (
    Bus_name,
    Bus_type,
    Departing_Time,
    Total_duration,
    Reaching_Time,
    Star_Rating,
    Price,
    Seats_Available,
    bus_routes_name,
    bus_routes_link)
    VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s)'''

data_redbus_3 = redbus_all.values.tolist()
my_cursor.executemany(insert_query, data_redbus_3)
con.commit()

print("Values inserted successfully")

Values inserted successfully


In [44]:
data_redbus_3

[['FRESHBUS',
  'Electric A/C Seater (2+2)',
  '11:05',
  '06h 35m',
  '17:40',
  4.6,
  550.0,
  '11 Seats available',
  'Hyderabad to Vijayawada',
  'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada'],
 ['FRESHBUS',
  'Electric A/C Seater (2+2)',
  '11:30',
  '06h 35m',
  '18:05',
  4.6,
  600.0,
  '14 Seats available',
  'Hyderabad to Vijayawada',
  'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada'],
 ['FRESHBUS',
  'Electric A/C Seater (2+2)',
  '13:10',
  '06h 15m',
  '19:25',
  4.5,
  650.0,
  '9 Seats available',
  'Hyderabad to Vijayawada',
  'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada'],
 ['AdSitara Travels',
  'NON A/C Push Back (2+2)',
  '22:00',
  '06h 00m',
  '04:00',
  4.6,
  610.0,
  '23 Seats available',
  'Hyderabad to Vijayawada',
  'https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada'],
 ['Zingbus Plus',
  'A/C Seater / Sleeper (2+1)',
  '23:38',
  '06h 27m',
  '06:05',
  4.5,
  751.0,
  '25 Seats available',
  'Hyderabad to Vi