In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.action_chains import ActionChains
import pandas as pd

### Goa Bus Routes - KTCL 

In [2]:
# Initialize WebDriver (assuming you have set up the WebDriver for your browser)
browser_driver = webdriver.Chrome()

def move_to_page(page_num):
    try:
        # Wait for the pagination section to be present
        pagination_section = WebDriverWait(browser_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Collect all page buttons within the pagination section
        page_buttons = pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Check each button and click the one corresponding to the desired page number
        for button in page_buttons:
            if button.text.strip() == str(page_num):
                # Optional: Scroll to the page button
                # browser_driver.execute_script("arguments[0].scrollIntoView(true);", button)
                
                # Use ActionChains to click the page button
                action = ActionChains(browser_driver)
                action.move_to_element(button).click().perform()
                
                # Wait for new page content with bus routes to load
                WebDriverWait(browser_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True  # Successful navigation
        return False  # Page number not found
    except Exception as ex:
        print(f"Error navigating to page {page_num}: {str(ex)}")
        return False

try:
    # Open the target webpage
    browser_driver.get('https://www.redbus.in/online-booking/ktcl')

    # Wait for bus route elements to load
    WebDriverWait(browser_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Lists to store bus route data
    bus_urls = []
    route_names = []

    # Define range for pagination
    first_page = 1
    last_page = 4

    # Scrape data from each page within the range
    for current_page in range(first_page, last_page + 1):
        if move_to_page(current_page):
            # Find all elements that have class="route" and href attributes
            bus_elements = browser_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Loop through found elements to extract link and route name
            for bus_element in bus_elements:
                link = bus_element.get_attribute('href')
                route_name = bus_element.get_attribute('title')
                bus_urls.append(link)
                route_names.append(route_name)
                print(f"Bus URL: {link}, Route Name: {route_name}")
        else:
            print(f"Failed to move to page {current_page}")

    # Create DataFrame to store the collected bus data
    ktcl_df = pd.DataFrame({
        'route_link': bus_urls,
        'route_name': route_names
    })

    # Check the DataFrame content
    print(ktcl_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\ktcl.csv"
    ktcl_df.to_csv(output_file_path, index=False)

finally:
    # Close the browser after scraping
    browser_driver.quit()


Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


Bus URL: https://www.redbus.in/bus-tickets/pune-to-goa, Route Name: Pune to Goa
Bus URL: https://www.redbus.in/bus-tickets/goa-to-pune, Route Name: Goa to Pune
Bus URL: https://www.redbus.in/bus-tickets/mumbai-to-goa, Route Name: Mumbai to Goa
Bus URL: https://www.redbus.in/bus-tickets/bangalore-to-goa, Route Name: Bangalore to Goa
Bus URL: https://www.redbus.in/bus-tickets/goa-to-bangalore, Route Name: Goa to Bangalore
Bus URL: https://www.redbus.in/bus-tickets/goa-to-mumbai, Route Name: Goa to Mumbai
Bus URL: https://www.redbus.in/bus-tickets/pandharpur-to-goa, Route Name: Pandharpur to Goa
Bus URL: https://www.redbus.in/bus-tickets/goa-to-pandharpur, Route Name: Goa to Pandharpur
Bus URL: https://www.redbus.in/bus-tickets/solapur-to-goa, Route Name: Solapur to Goa
Bus URL: https://www.redbus.in/bus-tickets/calangute-goa-to-goa-airport, Route Name: Calangute (goa) to Goa Airport
Bus URL: https://www.redbus.in/bus-tickets/goa-to-kolhapur-maharashtra, Route Name: Goa to Kolhapur(Mahara

In [4]:
ktcl_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
1,https://www.redbus.in/bus-tickets/goa-to-pune,Goa to Pune
2,https://www.redbus.in/bus-tickets/mumbai-to-goa,Mumbai to Goa
3,https://www.redbus.in/bus-tickets/bangalore-to...,Bangalore to Goa
4,https://www.redbus.in/bus-tickets/goa-to-banga...,Goa to Bangalore
5,https://www.redbus.in/bus-tickets/goa-to-mumbai,Goa to Mumbai
6,https://www.redbus.in/bus-tickets/pandharpur-t...,Pandharpur to Goa
7,https://www.redbus.in/bus-tickets/goa-to-pandh...,Goa to Pandharpur
8,https://www.redbus.in/bus-tickets/solapur-to-goa,Solapur to Goa
9,https://www.redbus.in/bus-tickets/calangute-go...,Calangute (goa) to Goa Airport


### Telangana Bus Routes - TSRTC

In [5]:
# Initialize WebDriver (assuming WebDriver is correctly set up)
ts_driver = webdriver.Chrome()

def navigate_to_ts_page(ts_page_num):
    try:
        # Wait for the pagination section specific to the TSRTC page to be present
        ts_pagination_section = WebDriverWait(ts_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons within the TSRTC pagination section
        ts_page_buttons = ts_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click the appropriate page number
        for ts_button in ts_page_buttons:
            if ts_button.text.strip() == str(ts_page_num):
                # Optional: Scroll to the page button
                # ts_driver.execute_script("arguments[0].scrollIntoView(true);", ts_button)
                
                # Use ActionChains to click the page button
                ts_actions = ActionChains(ts_driver)
                ts_actions.move_to_element(ts_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(ts_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as ts_error:
        print(f"Error navigating to page {ts_page_num}: {str(ts_error)}")
        return False

try:
    # Load the TSRTC webpage
    ts_driver.get('https://www.redbus.in/online-booking/tsrtc')

    # Wait until route links are available
    WebDriverWait(ts_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect TSRTC bus data
    ts_bus_links = []
    ts_bus_routes = []

    # Define range of pages to scrape
    ts_start_page = 1
    ts_end_page = 3  # Adjust this range as needed

    # Loop through the pages to extract data
    for ts_page_num in range(ts_start_page, ts_end_page + 1):
        if navigate_to_ts_page(ts_page_num):
            # Find all TSRTC bus elements
            ts_bus_elements = ts_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for ts_element in ts_bus_elements:
                ts_link = ts_element.get_attribute('href')
                ts_route = ts_element.get_attribute('title')
                ts_bus_links.append(ts_link)
                ts_bus_routes.append(ts_route)
                print(f"TS Bus Link: {ts_link}, TS Bus Route: {ts_route}")
        else:
            print(f"Failed to navigate to page {ts_page_num}")

    # Create a DataFrame for TSRTC bus data
    tsrtc_df = pd.DataFrame({
        'route_link': ts_bus_links,
        'route_name': ts_bus_routes
    })

    # Check the extracted data
    print(tsrtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\tsrtc.csv"
    tsrtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    ts_driver.quit()


Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada, TS Bus Route: Hyderabad to Vijayawada
TS Bus Link: https://www.redbus.in/bus-tickets/khammam-to-hyderabad, TS Bus Route: Khammam to Hyderabad
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-khammam, TS Bus Route: Hyderabad to Khammam
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-srisailam, TS Bus Route: Hyderabad to Srisailam
TS Bus Link: https://www.redbus.in/bus-tickets/karimnagar-to-hyderabad, TS Bus Route: Karimnagar to Hyderabad
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-karimnagar, TS Bus Route: Hyderabad to Karimnagar
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-mancherial, TS Bus Route: Hyderabad to Mancherial
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-nirmal, TS Bus Route: Hyderabad to Nirmal
TS Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-adilabad, TS Bus Route: Hyderabad to Adilabad
TS Bus Link: https://www.redbus.in/b

In [6]:
tsrtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Vijayawada
1,https://www.redbus.in/bus-tickets/khammam-to-h...,Khammam to Hyderabad
2,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Khammam
3,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Srisailam
4,https://www.redbus.in/bus-tickets/karimnagar-t...,Karimnagar to Hyderabad
5,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Karimnagar
6,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Mancherial
7,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Nirmal
8,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Adilabad
9,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Ongole


### Karbi Anlong Autonomous Council - KAAC

In [7]:
# Initialize WebDriver for KAAC Transport (assuming WebDriver is correctly set up)
kaac_driver = webdriver.Chrome()

def navigate_to_kaac_page(kaac_page_num):
    try:
        # Wait for pagination section specific to KAAC Transport to be present
        kaac_pagination_section = WebDriverWait(kaac_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        kaac_page_buttons = kaac_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for kaac_button in kaac_page_buttons:
            if kaac_button.text.strip() == str(kaac_page_num):
                # Optional: Scroll to the page button
                # kaac_driver.execute_script("arguments[0].scrollIntoView(true);", kaac_button)
                
                # Use ActionChains to click the page button
                kaac_actions = ActionChains(kaac_driver)
                kaac_actions.move_to_element(kaac_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(kaac_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as kaac_error:
        print(f"Error navigating to page {kaac_page_num}: {str(kaac_error)}")
        return False

try:
    # Load the KAAC Transport webpage
    kaac_driver.get('https://www.redbus.in/online-booking/kaac-transport')

    # Wait until route links are available
    WebDriverWait(kaac_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect KAAC Transport bus data
    kaac_bus_links = []
    kaac_bus_routes = []

    # Set the range of pages to scrape (only 2 pages)
    kaac_start_page = 1
    kaac_end_page = 2

    # Loop through the pages to extract data
    for kaac_page_num in range(kaac_start_page, kaac_end_page + 1):
        if navigate_to_kaac_page(kaac_page_num):
            # Find all KAAC bus elements
            kaac_bus_elements = kaac_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for kaac_element in kaac_bus_elements:
                kaac_link = kaac_element.get_attribute('href')
                kaac_route = kaac_element.get_attribute('title')
                kaac_bus_links.append(kaac_link)
                kaac_bus_routes.append(kaac_route)
                print(f"KAAC Bus Link: {kaac_link}, KAAC Bus Route: {kaac_route}")
        else:
            print(f"Failed to navigate to page {kaac_page_num}")

    # Create a DataFrame for KAAC bus data
    kaac_df = pd.DataFrame({
        'route_link': kaac_bus_links,
        'route_name': kaac_bus_routes
    })

    # Check the extracted data
    print(kaac_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\kaac.csv"
    kaac_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    kaac_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


KAAC Bus Link: https://www.redbus.in/bus-tickets/diphu-to-guwahati, KAAC Bus Route: Diphu to Guwahati
KAAC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-diphu, KAAC Bus Route: Guwahati to Diphu
KAAC Bus Link: https://www.redbus.in/bus-tickets/dokmoka-to-guwahati, KAAC Bus Route: Dokmoka to Guwahati
KAAC Bus Link: https://www.redbus.in/bus-tickets/bokolia-assam-to-guwahati, KAAC Bus Route: Bokolia (assam) to Guwahati
KAAC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-bokolia-assam, KAAC Bus Route: Guwahati to Bokolia (assam)
KAAC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-dokmoka, KAAC Bus Route: Guwahati to Dokmoka
KAAC Bus Link: https://www.redbus.in/bus-tickets/langhin-assam-to-guwahati, KAAC Bus Route: Langhin (assam) to Guwahati
KAAC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-langhin-assam, KAAC Bus Route: Guwahati to Langhin (assam)
KAAC Bus Link: https://www.redbus.in/bus-tickets/diphu-to-hamren, KAAC Bus Route: Diphu to Hamren
KAAC B

In [8]:
kaac_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/diphu-to-guw...,Diphu to Guwahati
1,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Diphu
2,https://www.redbus.in/bus-tickets/dokmoka-to-g...,Dokmoka to Guwahati
3,https://www.redbus.in/bus-tickets/bokolia-assa...,Bokolia (assam) to Guwahati
4,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Bokolia (assam)
5,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Dokmoka
6,https://www.redbus.in/bus-tickets/langhin-assa...,Langhin (assam) to Guwahati
7,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Langhin (assam)
8,https://www.redbus.in/bus-tickets/diphu-to-hamren,Diphu to Hamren
9,https://www.redbus.in/bus-tickets/manja-assam-...,Manja (assam) to Guwahati


### Meghalaya Transport Corporation - MTC

In [9]:
# Initialize WebDriver for Meghalaya Transport Corporation
mtc_driver = webdriver.Chrome()

def navigate_to_mtc_page(mtc_page_num):
    try:
        # Wait for pagination section specific to MTC to be present
        mtc_pagination_section = WebDriverWait(mtc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        mtc_page_buttons = mtc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for mtc_button in mtc_page_buttons:
            if mtc_button.text.strip() == str(mtc_page_num):
                # Optional: Scroll to the page button
                # mtc_driver.execute_script("arguments[0].scrollIntoView(true);", mtc_button)
                
                # Use ActionChains to click the page button
                mtc_actions = ActionChains(mtc_driver)
                mtc_actions.move_to_element(mtc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(mtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as mtc_error:
        print(f"Error navigating to page {mtc_page_num}: {str(mtc_error)}")
        return False

try:
    # Load the MTC webpage
    mtc_driver.get('https://www.redbus.in/online-booking/meghalaya-transport-corporation-mtc')

    # Wait until route links are available
    WebDriverWait(mtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect MTC bus data
    mtc_bus_links = []
    mtc_bus_routes = []

    # Set the range of pages to scrape (only 2 pages)
    mtc_start_page = 1
    mtc_end_page = 2

    # Loop through the pages to extract data
    for mtc_page_num in range(mtc_start_page, mtc_end_page + 1):
        if navigate_to_mtc_page(mtc_page_num):
            # Find all MTC bus elements
            mtc_bus_elements = mtc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for mtc_element in mtc_bus_elements:
                mtc_link = mtc_element.get_attribute('href')
                mtc_route = mtc_element.get_attribute('title')
                mtc_bus_links.append(mtc_link)
                mtc_bus_routes.append(mtc_route)
                print(f"MTC Bus Link: {mtc_link}, MTC Bus Route: {mtc_route}")
        else:
            print(f"Failed to navigate to page {mtc_page_num}")

    # Create a DataFrame for MTC bus data
    mtc_df = pd.DataFrame({
        'route_link': mtc_bus_links,
        'route_name': mtc_bus_routes
    })

    # Check the extracted data
    print(mtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\mtc.csv"
    mtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    mtc_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


MTC Bus Link: https://www.redbus.in/bus-tickets/shillong-to-tura-meghalaya, MTC Bus Route: Shillong to Tura (Meghalaya)
MTC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-shillong, MTC Bus Route: Guwahati to Shillong
MTC Bus Link: https://www.redbus.in/bus-tickets/tura-meghalaya-to-shillong, MTC Bus Route: Tura (Meghalaya) to Shillong
MTC Bus Link: https://www.redbus.in/bus-tickets/shillong-to-guwahati, MTC Bus Route: Shillong to Guwahati
MTC Bus Link: https://www.redbus.in/bus-tickets/shillong-to-silchar, MTC Bus Route: Shillong to Silchar
MTC Bus Link: https://www.redbus.in/bus-tickets/silchar-to-shillong, MTC Bus Route: Silchar to Shillong
MTC Bus Link: https://www.redbus.in/bus-tickets/shillong-to-williamnagar-meghalaya, MTC Bus Route: Shillong to Williamnagar (Meghalaya)
MTC Bus Link: https://www.redbus.in/bus-tickets/williamnagar-meghalaya-to-shillong, MTC Bus Route: Williamnagar (Meghalaya) to Shillong
MTC Bus Link: https://www.redbus.in/bus-tickets/shillong-to-karimgan

In [10]:
mtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/shillong-to-...,Shillong to Tura (Meghalaya)
1,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Shillong
2,https://www.redbus.in/bus-tickets/tura-meghala...,Tura (Meghalaya) to Shillong
3,https://www.redbus.in/bus-tickets/shillong-to-...,Shillong to Guwahati
4,https://www.redbus.in/bus-tickets/shillong-to-...,Shillong to Silchar
5,https://www.redbus.in/bus-tickets/silchar-to-s...,Silchar to Shillong
6,https://www.redbus.in/bus-tickets/shillong-to-...,Shillong to Williamnagar (Meghalaya)
7,https://www.redbus.in/bus-tickets/williamnagar...,Williamnagar (Meghalaya) to Shillong
8,https://www.redbus.in/bus-tickets/shillong-to-...,Shillong to Karimganj
9,https://www.redbus.in/bus-tickets/karimganj-to...,Karimganj to Shillong


### Himachal Road Transport Corporation - HRTC

In [11]:
# Initialize WebDriver for HRTC
hrtc_driver = webdriver.Chrome()

def navigate_to_hrtc_page(hrtc_page_num):
    try:
        # Wait for pagination section specific to HRTC to be present
        hrtc_pagination_section = WebDriverWait(hrtc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        hrtc_page_buttons = hrtc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for hrtc_button in hrtc_page_buttons:
            if hrtc_button.text.strip() == str(hrtc_page_num):
                # Optional: Scroll to the page button
                # hrtc_driver.execute_script("arguments[0].scrollIntoView(true);", hrtc_button)
                
                # Use ActionChains to click the page button
                hrtc_actions = ActionChains(hrtc_driver)
                hrtc_actions.move_to_element(hrtc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(hrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as hrtc_error:
        print(f"Error navigating to page {hrtc_page_num}: {str(hrtc_error)}")
        return False

try:
    # Load the HRTC webpage
    hrtc_driver.get('https://www.redbus.in/online-booking/hrtc')

    # Wait until route links are available
    WebDriverWait(hrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect HRTC bus data
    hrtc_bus_links = []
    hrtc_bus_routes = []

    # Set the range of pages to scrape (4 pages)
    hrtc_start_page = 1
    hrtc_end_page = 4

    # Loop through the pages to extract data
    for hrtc_page_num in range(hrtc_start_page, hrtc_end_page + 1):
        if navigate_to_hrtc_page(hrtc_page_num):
            # Find all HRTC bus elements
            hrtc_bus_elements = hrtc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for hrtc_element in hrtc_bus_elements:
                hrtc_link = hrtc_element.get_attribute('href')
                hrtc_route = hrtc_element.get_attribute('title')
                hrtc_bus_links.append(hrtc_link)
                hrtc_bus_routes.append(hrtc_route)
                print(f"HRTC Bus Link: {hrtc_link}, HRTC Bus Route: {hrtc_route}")
        else:
            print(f"Failed to navigate to page {hrtc_page_num}")

    # Create a DataFrame for HRTC bus data
    hrtc_df = pd.DataFrame({
        'route_link': hrtc_bus_links,
        'route_name': hrtc_bus_routes
    })

    # Check the extracted data
    print(hrtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\hrtc.csv"
    hrtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    hrtc_driver.quit()


Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


HRTC Bus Link: https://www.redbus.in/bus-tickets/delhi-to-shimla, HRTC Bus Route: Delhi to Shimla
HRTC Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himachal-pradesh, HRTC Bus Route: Chandigarh to Hamirpur (Himachal Pradesh)
HRTC Bus Link: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh, HRTC Bus Route: Hamirpur (Himachal Pradesh) to Chandigarh
HRTC Bus Link: https://www.redbus.in/bus-tickets/shimla-to-delhi, HRTC Bus Route: Shimla to Delhi
HRTC Bus Link: https://www.redbus.in/bus-tickets/delhi-to-chandigarh, HRTC Bus Route: Delhi to Chandigarh
HRTC Bus Link: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-delhi, HRTC Bus Route: Hamirpur (Himachal Pradesh) to Delhi
HRTC Bus Link: https://www.redbus.in/bus-tickets/chamba-himachal-pradesh-to-chandigarh, HRTC Bus Route: Chamba (Himachal Pradesh) to Chandigarh
HRTC Bus Link: https://www.redbus.in/bus-tickets/delhi-to-hamirpur-himachal-pradesh, HRTC Bus Route: Delhi to Hamirpur (

In [12]:
hrtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/delhi-to-shimla,Delhi to Shimla
1,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Hamirpur (Himachal Pradesh)
2,https://www.redbus.in/bus-tickets/hamirpur-him...,Hamirpur (Himachal Pradesh) to Chandigarh
3,https://www.redbus.in/bus-tickets/shimla-to-delhi,Shimla to Delhi
4,https://www.redbus.in/bus-tickets/delhi-to-cha...,Delhi to Chandigarh
5,https://www.redbus.in/bus-tickets/hamirpur-him...,Hamirpur (Himachal Pradesh) to Delhi
6,https://www.redbus.in/bus-tickets/chamba-himac...,Chamba (Himachal Pradesh) to Chandigarh
7,https://www.redbus.in/bus-tickets/delhi-to-ham...,Delhi to Hamirpur (Himachal Pradesh)
8,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Dharamshala (Himachal Pradesh)
9,https://www.redbus.in/bus-tickets/delhi-to-cha...,Delhi to Chamba (Himachal Pradesh)


### West Bengal Transport Corporation - WBTC

In [13]:
# Initialize WebDriver for WBTC
wbtc_driver = webdriver.Chrome()

def navigate_to_wbtc_page(wbtc_page_num):
    try:
        # Wait for the pagination section specific to WBTC to be present
        wbtc_pagination_section = WebDriverWait(wbtc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        wbtc_page_buttons = wbtc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for wbtc_button in wbtc_page_buttons:
            if wbtc_button.text.strip() == str(wbtc_page_num):
                # Use ActionChains to click the page button
                wbtc_actions = ActionChains(wbtc_driver)
                wbtc_actions.move_to_element(wbtc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(wbtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as wbtc_error:
        print(f"Error navigating to page {wbtc_page_num}: {str(wbtc_error)}")
        return False

try:
    # Load the WBTC webpage
    wbtc_driver.get('https://www.redbus.in/online-booking/wbtc-ctc')

    # Wait until route links are available
    WebDriverWait(wbtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect WBTC bus data
    wbtc_bus_links = []
    wbtc_bus_routes = []

    # Set the range of pages to scrape (4 pages)
    wbtc_start_page = 1
    wbtc_end_page = 4

    # Loop through the pages to extract data
    for wbtc_page_num in range(wbtc_start_page, wbtc_end_page + 1):
        if navigate_to_wbtc_page(wbtc_page_num):
            # Find all WBTC bus elements
            wbtc_bus_elements = wbtc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for wbtc_element in wbtc_bus_elements:
                wbtc_link = wbtc_element.get_attribute('href')
                wbtc_route = wbtc_element.get_attribute('title')
                wbtc_bus_links.append(wbtc_link)
                wbtc_bus_routes.append(wbtc_route)
                print(f"WBTC Bus Link: {wbtc_link}, WBTC Bus Route: {wbtc_route}")
        else:
            print(f"Failed to navigate to page {wbtc_page_num}")

    # Create a DataFrame for WBTC bus data
    wbtc_df = pd.DataFrame({
        'route_link': wbtc_bus_links,
        'route_name': wbtc_bus_routes
    })

    # Check the extracted data
    print(wbtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\wbtc.csv"
    wbtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    wbtc_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


WBTC Bus Link: https://www.redbus.in/bus-tickets/digha-to-barasat-west-bengal, WBTC Bus Route: Digha to Barasat (West Bengal)
WBTC Bus Link: https://www.redbus.in/bus-tickets/durgapur-to-kolkata, WBTC Bus Route: Durgapur to Calcutta
WBTC Bus Link: https://www.redbus.in/bus-tickets/digha-to-kolkata, WBTC Bus Route: Digha to Calcutta
WBTC Bus Link: https://www.redbus.in/bus-tickets/kolkata-to-digha, WBTC Bus Route: Kolkata to Digha
WBTC Bus Link: https://www.redbus.in/bus-tickets/barasat-west-bengal-to-digha, WBTC Bus Route: Barasat (West Bengal) to Digha
WBTC Bus Link: https://www.redbus.in/bus-tickets/kolkata-to-suri, WBTC Bus Route: Kolkata to Suri
WBTC Bus Link: https://www.redbus.in/bus-tickets/barasat-west-bengal-to-midnapore, WBTC Bus Route: Barasat (West Bengal) to Midnapore
WBTC Bus Link: https://www.redbus.in/bus-tickets/midnapore-to-kolkata, WBTC Bus Route: Midnapore to Kolkata
WBTC Bus Link: https://www.redbus.in/bus-tickets/barasat-west-bengal-to-kolaghat, WBTC Bus Route: Ba

In [14]:
wbtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/digha-to-bar...,Digha to Barasat (West Bengal)
1,https://www.redbus.in/bus-tickets/durgapur-to-...,Durgapur to Calcutta
2,https://www.redbus.in/bus-tickets/digha-to-kol...,Digha to Calcutta
3,https://www.redbus.in/bus-tickets/kolkata-to-d...,Kolkata to Digha
4,https://www.redbus.in/bus-tickets/barasat-west...,Barasat (West Bengal) to Digha
5,https://www.redbus.in/bus-tickets/kolkata-to-suri,Kolkata to Suri
6,https://www.redbus.in/bus-tickets/barasat-west...,Barasat (West Bengal) to Midnapore
7,https://www.redbus.in/bus-tickets/midnapore-to...,Midnapore to Kolkata
8,https://www.redbus.in/bus-tickets/barasat-west...,Barasat (West Bengal) to Kolaghat
9,https://www.redbus.in/bus-tickets/barasat-west...,Barasat (West Bengal) to Contai (Kanthi)


### Assam State Transport Corporation - ASTC

In [15]:
# Initialize WebDriver for ASTC
astc_driver = webdriver.Chrome()

def navigate_to_astc_page(astc_page_num):
    try:
        # Wait for the pagination section specific to ASTC to be present
        astc_pagination_section = WebDriverWait(astc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        astc_page_buttons = astc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for astc_button in astc_page_buttons:
            if astc_button.text.strip() == str(astc_page_num):
                # Use ActionChains to click the page button
                astc_actions = ActionChains(astc_driver)
                astc_actions.move_to_element(astc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(astc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as astc_error:
        print(f"Error navigating to page {astc_page_num}: {str(astc_error)}")
        return False

try:
    # Load the ASTC webpage
    astc_driver.get('https://www.redbus.in/online-booking/assam-state-transport-corporation-astc')

    # Wait until route links are available
    WebDriverWait(astc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect ASTC bus data
    astc_bus_links = []
    astc_bus_routes = []

    # Set the range of pages to scrape (5 pages)
    astc_start_page = 1
    astc_end_page = 5

    # Loop through the pages to extract data
    for astc_page_num in range(astc_start_page, astc_end_page + 1):
        if navigate_to_astc_page(astc_page_num):
            # Find all ASTC bus elements
            astc_bus_elements = astc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for astc_element in astc_bus_elements:
                astc_link = astc_element.get_attribute('href')
                astc_route = astc_element.get_attribute('title')
                astc_bus_links.append(astc_link)
                astc_bus_routes.append(astc_route)
                print(f"ASTC Bus Link: {astc_link}, ASTC Bus Route: {astc_route}")
        else:
            print(f"Failed to navigate to page {astc_page_num}")

    # Create a DataFrame for ASTC bus data
    astc_df = pd.DataFrame({
        'route_link': astc_bus_links,
        'route_name': astc_bus_routes
    })

    # Check the extracted data
    print(astc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\astc.csv"
    astc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    astc_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


ASTC Bus Link: https://www.redbus.in/bus-tickets/tezpur-to-guwahati, ASTC Bus Route: Tezpur to Guwahati
ASTC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-tezpur, ASTC Bus Route: Guwahati to Tezpur
ASTC Bus Link: https://www.redbus.in/bus-tickets/guwahati-to-nagaon, ASTC Bus Route: Guwahati to Nagaon (Assam)
ASTC Bus Link: https://www.redbus.in/bus-tickets/nagaon-to-guwahati, ASTC Bus Route: Nagaon (Assam) to Guwahati
ASTC Bus Link: https://www.redbus.in/bus-tickets/goalpara-to-guwahati, ASTC Bus Route: Goalpara to Guwahati
ASTC Bus Link: https://www.redbus.in/bus-tickets/jorhat-to-north-lakhimpur, ASTC Bus Route: Jorhat to North Lakhimpur
ASTC Bus Link: https://www.redbus.in/bus-tickets/dhubri-to-guwahati, ASTC Bus Route: Dhubri to Guwahati
ASTC Bus Link: https://www.redbus.in/bus-tickets/jorhat-to-dibrugarh, ASTC Bus Route: Jorhat to Dibrugarh
ASTC Bus Link: https://www.redbus.in/bus-tickets/north-lakhimpur-to-jorhat, ASTC Bus Route: North Lakhimpur to Jorhat
ASTC Bus Link:

In [16]:
astc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/tezpur-to-gu...,Tezpur to Guwahati
1,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Tezpur
2,https://www.redbus.in/bus-tickets/guwahati-to-...,Guwahati to Nagaon (Assam)
3,https://www.redbus.in/bus-tickets/nagaon-to-gu...,Nagaon (Assam) to Guwahati
4,https://www.redbus.in/bus-tickets/goalpara-to-...,Goalpara to Guwahati
5,https://www.redbus.in/bus-tickets/jorhat-to-no...,Jorhat to North Lakhimpur
6,https://www.redbus.in/bus-tickets/dhubri-to-gu...,Dhubri to Guwahati
7,https://www.redbus.in/bus-tickets/jorhat-to-di...,Jorhat to Dibrugarh
8,https://www.redbus.in/bus-tickets/north-lakhim...,North Lakhimpur to Jorhat
9,https://www.redbus.in/bus-tickets/north-lakhim...,North Lakhimpur to Sibsagar


### Andhra Pradesh State Road Transport Corporation - APSRTC

In [17]:
# Initialize WebDriver for APSRTC
apsrtc_driver = webdriver.Chrome()

def navigate_to_apsrtc_page(apsrtc_page_num):
    try:
        # Wait for the pagination section specific to APSRTC to be present
        apsrtc_pagination_section = WebDriverWait(apsrtc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        apsrtc_page_buttons = apsrtc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for apsrtc_button in apsrtc_page_buttons:
            if apsrtc_button.text.strip() == str(apsrtc_page_num):
                # Use ActionChains to click the page button
                apsrtc_actions = ActionChains(apsrtc_driver)
                apsrtc_actions.move_to_element(apsrtc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(apsrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as apsrtc_error:
        print(f"Error navigating to page {apsrtc_page_num}: {str(apsrtc_error)}")
        return False

try:
    # Load the APSRTC webpage
    apsrtc_driver.get('https://www.redbus.in/online-booking/apsrtc')

    # Wait until route links are available
    WebDriverWait(apsrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect APSRTC bus data
    apsrtc_bus_links = []
    apsrtc_bus_routes = []

    # Set the range of pages to scrape (5 pages)
    apsrtc_start_page = 1
    apsrtc_end_page = 5

    # Loop through the pages to extract data
    for apsrtc_page_num in range(apsrtc_start_page, apsrtc_end_page + 1):
        if navigate_to_apsrtc_page(apsrtc_page_num):
            # Find all APSRTC bus elements
            apsrtc_bus_elements = apsrtc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for apsrtc_element in apsrtc_bus_elements:
                apsrtc_link = apsrtc_element.get_attribute('href')
                apsrtc_route = apsrtc_element.get_attribute('title')
                apsrtc_bus_links.append(apsrtc_link)
                apsrtc_bus_routes.append(apsrtc_route)
                print(f"APSRTC Bus Link: {apsrtc_link}, APSRTC Bus Route: {apsrtc_route}")
        else:
            print(f"Failed to navigate to page {apsrtc_page_num}")

    # Create a DataFrame for APSRTC bus data
    apsrtc_df = pd.DataFrame({
        'route_link': apsrtc_bus_links,
        'route_name': apsrtc_bus_routes
    })

    # Check the extracted data
    print(apsrtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\apstrc.csv"
    apsrtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    apsrtc_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


APSRTC Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-vijayawada, APSRTC Bus Route: Hyderabad to Vijayawada
APSRTC Bus Link: https://www.redbus.in/bus-tickets/vijayawada-to-hyderabad, APSRTC Bus Route: Vijayawada to Hyderabad
APSRTC Bus Link: https://www.redbus.in/bus-tickets/hyderabad-to-ongole, APSRTC Bus Route: Hyderabad to Ongole
APSRTC Bus Link: https://www.redbus.in/bus-tickets/kakinada-to-visakhapatnam, APSRTC Bus Route: Kakinada to Visakhapatnam
APSRTC Bus Link: https://www.redbus.in/bus-tickets/bangalore-to-tirupathi, APSRTC Bus Route: Bangalore to Tirupati
APSRTC Bus Link: https://www.redbus.in/bus-tickets/bangalore-to-kadapa, APSRTC Bus Route: Bangalore to Kadapa
APSRTC Bus Link: https://www.redbus.in/bus-tickets/ongole-to-hyderabad, APSRTC Bus Route: Ongole to Hyderabad
APSRTC Bus Link: https://www.redbus.in/bus-tickets/kadapa-to-bangalore, APSRTC Bus Route: Kadapa to Bangalore
APSRTC Bus Link: https://www.redbus.in/bus-tickets/chittoor-andhra-pradesh-to-bangalore

In [18]:
apsrtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Vijayawada
1,https://www.redbus.in/bus-tickets/vijayawada-t...,Vijayawada to Hyderabad
2,https://www.redbus.in/bus-tickets/hyderabad-to...,Hyderabad to Ongole
3,https://www.redbus.in/bus-tickets/kakinada-to-...,Kakinada to Visakhapatnam
4,https://www.redbus.in/bus-tickets/bangalore-to...,Bangalore to Tirupati
5,https://www.redbus.in/bus-tickets/bangalore-to...,Bangalore to Kadapa
6,https://www.redbus.in/bus-tickets/ongole-to-hy...,Ongole to Hyderabad
7,https://www.redbus.in/bus-tickets/kadapa-to-ba...,Kadapa to Bangalore
8,https://www.redbus.in/bus-tickets/chittoor-and...,Chittoor (Andhra Pradesh) to Bangalore
9,https://www.redbus.in/bus-tickets/visakhapatna...,Visakhapatnam to Kakinada


### Rajasthan State Road Transport Corporation - RSRTC

In [19]:
# Initialize WebDriver for RSRTC
rsrtc_driver = webdriver.Chrome()

def navigate_to_rsrtc_page(rsrtc_page_num):
    try:
        # Wait for the pagination section specific to RSRTC to be present
        rsrtc_pagination_section = WebDriverWait(rsrtc_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        rsrtc_page_buttons = rsrtc_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for rsrtc_button in rsrtc_page_buttons:
            if rsrtc_button.text.strip() == str(rsrtc_page_num):
                # Use ActionChains to click the page button
                rsrtc_actions = ActionChains(rsrtc_driver)
                rsrtc_actions.move_to_element(rsrtc_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(rsrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as rsrtc_error:
        print(f"Error navigating to page {rsrtc_page_num}: {str(rsrtc_error)}")
        return False

try:
    # Load the RSRTC webpage
    rsrtc_driver.get('https://www.redbus.in/online-booking/rajasthan-state-road-transport-corporation')

    # Wait until route links are available
    WebDriverWait(rsrtc_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect RSRTC bus data
    rsrtc_bus_links = []
    rsrtc_bus_routes = []

    # Set the range of pages to scrape (2 pages)
    rsrtc_start_page = 1
    rsrtc_end_page = 2

    # Loop through the pages to extract data
    for rsrtc_page_num in range(rsrtc_start_page, rsrtc_end_page + 1):
        if navigate_to_rsrtc_page(rsrtc_page_num):
            # Find all RSRTC bus elements
            rsrtc_bus_elements = rsrtc_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for rsrtc_element in rsrtc_bus_elements:
                rsrtc_link = rsrtc_element.get_attribute('href')
                rsrtc_route = rsrtc_element.get_attribute('title')
                rsrtc_bus_links.append(rsrtc_link)
                rsrtc_bus_routes.append(rsrtc_route)
                print(f"RSRTC Bus Link: {rsrtc_link}, RSRTC Bus Route: {rsrtc_route}")
        else:
            print(f"Failed to navigate to page {rsrtc_page_num}")

    # Create a DataFrame for RSRTC bus data
    rsrtc_df = pd.DataFrame({
        'route_link': rsrtc_bus_links,
        'route_name': rsrtc_bus_routes
    })

    # Check the extracted data
    print(rsrtc_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\rsrtc.csv"
    rsrtc_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    rsrtc_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


RSRTC Bus Link: https://www.redbus.in/bus-tickets/udaipur-to-jodhpur, RSRTC Bus Route: Udaipur to Jodhpur
RSRTC Bus Link: https://www.redbus.in/bus-tickets/jodhpur-to-ajmer, RSRTC Bus Route: Jodhpur to Ajmer
RSRTC Bus Link: https://www.redbus.in/bus-tickets/beawer-to-jaipur, RSRTC Bus Route: Beawar (Rajasthan) to Jaipur (Rajasthan)
RSRTC Bus Link: https://www.redbus.in/bus-tickets/sikar-to-jaipur, RSRTC Bus Route: Sikar to Jaipur (Rajasthan)
RSRTC Bus Link: https://www.redbus.in/bus-tickets/jaipur-to-jodhpur, RSRTC Bus Route: Jaipur (Rajasthan) to Jodhpur
RSRTC Bus Link: https://www.redbus.in/bus-tickets/aligarh-uttar-pradesh-to-jaipur, RSRTC Bus Route: Aligarh (uttar pradesh) to Jaipur (Rajasthan)
RSRTC Bus Link: https://www.redbus.in/bus-tickets/jaipur-to-aligarh-uttar-pradesh, RSRTC Bus Route: Jaipur (Rajasthan) to Aligarh (uttar pradesh)
RSRTC Bus Link: https://www.redbus.in/bus-tickets/jodhpur-to-beawer, RSRTC Bus Route: Jodhpur to Beawar (Rajasthan)
RSRTC Bus Link: https://www.re

In [20]:
rsrtc_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/udaipur-to-j...,Udaipur to Jodhpur
1,https://www.redbus.in/bus-tickets/jodhpur-to-a...,Jodhpur to Ajmer
2,https://www.redbus.in/bus-tickets/beawer-to-ja...,Beawar (Rajasthan) to Jaipur (Rajasthan)
3,https://www.redbus.in/bus-tickets/sikar-to-jaipur,Sikar to Jaipur (Rajasthan)
4,https://www.redbus.in/bus-tickets/jaipur-to-jo...,Jaipur (Rajasthan) to Jodhpur
5,https://www.redbus.in/bus-tickets/aligarh-utta...,Aligarh (uttar pradesh) to Jaipur (Rajasthan)
6,https://www.redbus.in/bus-tickets/jaipur-to-al...,Jaipur (Rajasthan) to Aligarh (uttar pradesh)
7,https://www.redbus.in/bus-tickets/jodhpur-to-b...,Jodhpur to Beawar (Rajasthan)
8,https://www.redbus.in/bus-tickets/jaipur-to-pi...,Jaipur (Rajasthan) to Pilani
9,https://www.redbus.in/bus-tickets/kishangarh-t...,Kishangarh to Jaipur (Rajasthan)


### Chandigarh Transport Undertaking - CTU

In [21]:
# Initialize WebDriver for CTU
ctu_driver = webdriver.Chrome()

def navigate_to_ctu_page(ctu_page_num):
    try:
        # Wait for the pagination section specific to CTU to be present
        ctu_pagination_section = WebDriverWait(ctu_driver, 10).until(EC.presence_of_element_located((By.CLASS_NAME, 'DC_117_paginationTable')))
        
        # Find all page buttons in the pagination section
        ctu_page_buttons = ctu_pagination_section.find_elements(By.CLASS_NAME, 'DC_117_pageTabs')
        
        # Click on the correct page number
        for ctu_button in ctu_page_buttons:
            if ctu_button.text.strip() == str(ctu_page_num):
                # Use ActionChains to click the page button
                ctu_actions = ActionChains(ctu_driver)
                ctu_actions.move_to_element(ctu_button).click().perform()
                
                # Wait for the page to load routes with href attributes
                WebDriverWait(ctu_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))
                return True
        return False  # If page is not found
    except Exception as ctu_error:
        print(f"Error navigating to page {ctu_page_num}: {str(ctu_error)}")
        return False

try:
    # Load the CTU webpage
    ctu_driver.get('https://www.redbus.in/online-booking/chandigarh-transport-undertaking-ctu')

    # Wait until route links are available
    WebDriverWait(ctu_driver, 10).until(EC.presence_of_element_located((By.CSS_SELECTOR, 'a.route[href]')))

    # Prepare lists to collect CTU bus data
    ctu_bus_links = []
    ctu_bus_routes = []

    # Set the range of pages to scrape (5 pages)
    ctu_start_page = 1
    ctu_end_page = 5

    # Loop through the pages to extract data
    for ctu_page_num in range(ctu_start_page, ctu_end_page + 1):
        if navigate_to_ctu_page(ctu_page_num):
            # Find all CTU bus elements
            ctu_bus_elements = ctu_driver.find_elements(By.CSS_SELECTOR, 'a.route[href]')
            
            # Extract href (link) and title (route) from each element
            for ctu_element in ctu_bus_elements:
                ctu_link = ctu_element.get_attribute('href')
                ctu_route = ctu_element.get_attribute('title')
                ctu_bus_links.append(ctu_link)
                ctu_bus_routes.append(ctu_route)
                print(f"CTU Bus Link: {ctu_link}, CTU Bus Route: {ctu_route}")
        else:
            print(f"Failed to navigate to page {ctu_page_num}")

    # Create a DataFrame for CTU bus data
    ctu_df = pd.DataFrame({
        'route_link': ctu_bus_links,
        'route_name': ctu_bus_routes
    })

    # Check the extracted data
    print(ctu_df)

    output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\ctu.csv"
    ctu_df.to_csv(output_file_path, index=False)

finally:
    # Close the WebDriver after the process
    ctu_driver.quit()

Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)
Cache folder (C:\Users\abdpa\.cache\selenium) cannot be created: Cannot create a file when that file already exists. (os error 183)


CTU Bus Link: https://www.redbus.in/bus-tickets/yamuna-nagar-to-chandigarh, CTU Bus Route: Yamuna Nagar to Chandigarh
CTU Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-delhi, CTU Bus Route: Chandigarh to Delhi
CTU Bus Link: https://www.redbus.in/bus-tickets/delhi-to-chandigarh, CTU Bus Route: Delhi to Chandigarh
CTU Bus Link: https://www.redbus.in/bus-tickets/ludhiana-to-chandigarh, CTU Bus Route: Ludhiana to Chandigarh
CTU Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-yamuna-nagar, CTU Bus Route: Chandigarh to Yamuna Nagar
CTU Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-ludhiana, CTU Bus Route: Chandigarh to Ludhiana
CTU Bus Link: https://www.redbus.in/bus-tickets/hamirpur-himachal-pradesh-to-chandigarh, CTU Bus Route: Hamirpur (Himachal Pradesh) to Chandigarh
CTU Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-vrindavan, CTU Bus Route: Chandigarh to Vrindavan
CTU Bus Link: https://www.redbus.in/bus-tickets/chandigarh-to-hamirpur-himacha

In [22]:
ctu_df 

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/yamuna-nagar...,Yamuna Nagar to Chandigarh
1,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Delhi
2,https://www.redbus.in/bus-tickets/delhi-to-cha...,Delhi to Chandigarh
3,https://www.redbus.in/bus-tickets/ludhiana-to-...,Ludhiana to Chandigarh
4,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Yamuna Nagar
5,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Ludhiana
6,https://www.redbus.in/bus-tickets/hamirpur-him...,Hamirpur (Himachal Pradesh) to Chandigarh
7,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Vrindavan
8,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Hamirpur (Himachal Pradesh)
9,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Pathankot


## Concatenating all the dataframes into one

In [23]:
dataframes = [
    ktcl_df, tsrtc_df, kaac_df, mtc_df, hrtc_df, wbtc_df, astc_df, apsrtc_df, rsrtc_df, ctu_df
]
# Concatenate all DataFrames into one
all_bus_routes_df = pd.concat(dataframes, ignore_index=True)

# Print the combined DataFrame to check
print(all_bus_routes_df)

                                            route_link  \
0        https://www.redbus.in/bus-tickets/pune-to-goa   
1        https://www.redbus.in/bus-tickets/goa-to-pune   
2      https://www.redbus.in/bus-tickets/mumbai-to-goa   
3    https://www.redbus.in/bus-tickets/bangalore-to...   
4    https://www.redbus.in/bus-tickets/goa-to-banga...   
..                                                 ...   
319  https://www.redbus.in/bus-tickets/chandigarh-t...   
320  https://www.redbus.in/bus-tickets/ferozpur-to-...   
321  https://www.redbus.in/bus-tickets/hansi-to-cha...   
322  https://www.redbus.in/bus-tickets/chandigarh-t...   
323  https://www.redbus.in/bus-tickets/chandigarh-t...   

                        route_name  
0                      Pune to Goa  
1                      Goa to Pune  
2                    Mumbai to Goa  
3                 Bangalore to Goa  
4                 Goa to Bangalore  
..                             ...  
319  Chandigarh to Jammu (j and k)  
320    

In [24]:
all_bus_routes_df

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
1,https://www.redbus.in/bus-tickets/goa-to-pune,Goa to Pune
2,https://www.redbus.in/bus-tickets/mumbai-to-goa,Mumbai to Goa
3,https://www.redbus.in/bus-tickets/bangalore-to...,Bangalore to Goa
4,https://www.redbus.in/bus-tickets/goa-to-banga...,Goa to Bangalore
...,...,...
319,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Jammu (j and k)
320,https://www.redbus.in/bus-tickets/ferozpur-to-...,Firozpur to Chandigarh
321,https://www.redbus.in/bus-tickets/hansi-to-cha...,Hansi to Chandigarh
322,https://www.redbus.in/bus-tickets/chandigarh-t...,Chandigarh to Haldwani


In [25]:
# Save the combined DataFrame to a CSV file in the specified folder
output_file_path = r"P:\Capstone Guvi\Redbus Data Scrapping\all_bus_routes.csv"
all_bus_routes_df.to_csv(output_file_path, index=False)

In [26]:
all_bus_routes_df.head()

Unnamed: 0,route_link,route_name
0,https://www.redbus.in/bus-tickets/pune-to-goa,Pune to Goa
1,https://www.redbus.in/bus-tickets/goa-to-pune,Goa to Pune
2,https://www.redbus.in/bus-tickets/mumbai-to-goa,Mumbai to Goa
3,https://www.redbus.in/bus-tickets/bangalore-to...,Bangalore to Goa
4,https://www.redbus.in/bus-tickets/goa-to-banga...,Goa to Bangalore
