In [1]:
import pandas as pd
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
from pathlib import Path
import time

In [2]:
# Selenium options required to create a 'headless' browser
options = Options()
options.add_argument("--blink-settings=imagesEnabled=false")
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.77 Safari/537.37")

driver = webdriver.Chrome(options=options)

In [3]:
# Load ridership page from BMRCL website
driver.get("https://english.bmrc.co.in/ridership/")

# Click on Kannada toggle button to load English results
time.sleep(10)  # Allow extra time for JavaScript to load translated data
result = driver.find_element(By.CLASS_NAME, "link.top-navcustom-text").click()

In [4]:
# Results are published with a lag of about one day. 
# So get the date on the page rather than date.today()
record_date = driver.find_element(By.TAG_NAME, "h3").text
record_date

'Passenger Flow as on 14-02-2025'

In [5]:
# Initialize dict to store ridership data
day_record = {}
day_record['Record Date'] = [record_date.split()[-1]]  # Extracting date part
day_record

{'Record Date': ['14-02-2025']}

In [6]:
# Parse html for remaining data points and store in pandas dataframe
data_points = driver.find_elements(By.CLASS_NAME, "features-card.achivement-area.bg-color")

for l1 in data_points:
    for l2 in l1.text.split('\n'):
        data = l2.split(': ')
        day_record[data[0]] = [int(data[1])]

driver.quit()

day_record = pd.DataFrame(day_record)
day_record.rename(columns={'Tokens':'Total Tokens'}, inplace=True)
day_record

Unnamed: 0,Record Date,Total Smart Cards,Stored Value Card,One Day Pass,Three Day Pass,Five Day Pass,Total Tokens,Total NCMC,Group Ticket,Total QR,QR NammaMetro,QR WhatsApp,QR Paytm
0,14-02-2025,381182,380410,293,66,413,187060,16023,130,178855,48653,95356,34846


In [7]:
# Store data in csv file - create file if necessary
filename = "NammaMetro_Ridership_Dataset.csv"  
filePath = Path(filename)

if filePath.exists() and filePath.is_file():
    day_record.to_csv(filename, mode='a', header=False)
    print('Appended '+filename)
else:
    day_record.to_csv(filename, mode='w', header=True)
    print('Created '+filename)

Appended NammaMetro_Ridership_Dataset.csv


In [8]:
# Optimize dataset by removing duplicates and rewrite to file
df = pd.read_csv(filename, index_col=0).drop_duplicates(keep='last', ignore_index=True)
df[df.loc[:, 'Total Smart Cards':].columns] = df[df.loc[:, 'Total Smart Cards':].columns].astype('Int64')
df.to_csv(filename, mode='w', header=True)
df.tail()

Unnamed: 0,Record Date,Total Smart Cards,Stored Value Card,One Day Pass,Three Day Pass,Five Day Pass,Total Tokens,Total NCMC,Group Ticket,Total QR,QR NammaMetro,QR WhatsApp,QR Paytm
91,10-02-2025,420427,419799,262,41,325,210464,16275,86,180897,49598,97916,33383
92,11-02-2025,420648,419995,230,57,366,182897,17151,293,157785,42958,84715,30112
93,12-02-2025,416380,415686,196,108,390,173176,17341,31,155883,43062,82608,30213
94,13-02-2025,406654,405914,274,94,372,179196,17176,62,148163,40588,78334,29241
95,14-02-2025,381182,380410,293,66,413,187060,16023,130,178855,48653,95356,34846
