In [9]:
import time
import pandas as pd
from pathlib import Path

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException, TimeoutException, ElementClickInterceptedException

filename = "NammaMetro_Ridership_Dataset.csv"

In [10]:
# Selenium options required to create a 'headless' browser
options = Options()
options.add_argument("--blink-settings=imagesEnabled=false")
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--window-size=1920,1080")
options.add_argument("--incognito")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.77 Safari/537.37")

driver = webdriver.Chrome(options=options)
wait = WebDriverWait(driver, 20)

In [11]:
# Load ridership page from BMRCL website
driver.get("https://english.bmrc.co.in/ridership/")

# Click on Kannada toggle button to load English results
toggle_button = wait.until(EC.element_to_be_clickable((By.CSS_SELECTOR, ".link.top-navcustom-text")))
time.sleep(10)  # mirror script timing before click
toggle_button.click()

In [12]:
# Results are published with a lag of about one day. 
# So get the date on the page rather than date.today()
record_date = wait.until(EC.visibility_of_element_located((By.TAG_NAME, "h3"))).text
record_date

'Passenger Flow as on 11-10-2025'

In [13]:
# Initialize dict to store ridership data
day_record = {}
day_record['Record Date'] = [record_date.split()[-1]]  # Extracting date part
day_record

{'Record Date': ['11-10-2025']}

In [14]:
# Parse html for remaining data points and store in pandas dataframe
data_points = wait.until(EC.presence_of_all_elements_located((By.CSS_SELECTOR, ".features-card.achivement-area.bg-color")))

for l1 in data_points:
    for l2 in l1.text.split('\n'):
        data = l2.split(': ')
        if len(data) == 2:
            key, value = data
            try:
                day_record[key] = [int(value.replace(',', ''))]
            except ValueError:
                day_record[key] = [value]

driver.quit()

day_record = pd.DataFrame(day_record)
day_record.rename(columns={'Tokens':'Total Tokens'}, inplace=True)
day_record

Unnamed: 0,Record Date,Total Smart Cards,Stored Value Card,One Day Pass,Three Day Pass,Five Day Pass,Total Tokens,Total NCMC,Group Ticket,Total QR,QR NammaMetro,QR WhatsApp,QR Paytm
0,11-10-2025,297891,297354,330,79,128,311567,40890,343,143606,86498,44928,7815


In [15]:
# Store data in csv file - create file if necessary
filePath = Path(filename)
if filePath.exists() and filePath.is_file():
    day_record.to_csv(filename, mode='a', header=False, index=False, lineterminator='\n')
    print('Appended '+filename)
else:
    day_record.to_csv(filename, mode='w', header=True, index=False, lineterminator='\n')
    print('Created '+filename)

Appended NammaMetro_Ridership_Dataset.csv


In [16]:
# Optimize dataset by removing duplicates and rewrite to file
df = pd.read_csv(filename).drop_duplicates(subset=['Record Date'], keep='last', ignore_index=True)
df[df.loc[:, 'Total Smart Cards':].columns] = df[df.loc[:, 'Total Smart Cards':].columns].astype('Int64')
df.to_csv(filename, mode='w', header=True, index=False, lineterminator='\n')
df.tail()

Unnamed: 0,Record Date,Total Smart Cards,Stored Value Card,One Day Pass,Three Day Pass,Five Day Pass,Total Tokens,Total NCMC,Group Ticket,Total QR,QR NammaMetro,QR WhatsApp,QR Paytm
261,04-10-2025,244249,243426,631,99,93,270822,28446,280,98542,60465,31538,4977
262,06-10-2025,454943,453961,199,29,754,277752,53855,490,112825,73443,31251,5411
263,09-10-2025,453055,452011,175,44,825,247329,63691,166,93815,67335,84416,5369
264,10-10-2025,471904,470843,145,61,855,248303,61216,230,100450,69750,38647,5470
265,11-10-2025,297891,297354,330,79,128,311567,40890,343,143606,86498,44928,7815
