In [1]:
# pip install selenium

In [1]:
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
import datetime
import logging

In [2]:
OUTPUT_FILE_NAME = 'tool_plazas.csv'

#  Set the logger 

In [3]:
# Configure logging
logging.basicConfig(level=logging.INFO)
logging.basicConfig(format="%(asctime)s - %(levelname)s - %(message)s",datefmt="%Y-%m-%d %H:%M:%S",force=True)
# logging.info("This is an info message.")

In [4]:
start_time = datetime.datetime.now()

# Set up Selenium WebDriver 

In [5]:
options = webdriver.ChromeOptions()
options.add_argument("--headless")  # Run in headless mode
driver = webdriver.Chrome(options=options)

# Open the website

In [6]:
url = "https://tis.nhai.gov.in/tollplazasataglance.aspx?language=en#"
logging.info("Launching the webpage")
driver.get(url)
driver.implicitly_wait(10)

2025-03-18 11:55:14 - INFO - Launching the webpage


# Locate the table containing list of toll plaza on the webpage

In [8]:
table_element=driver.find_element(By.ID, 'tollList')

# Get all rows of the table containing data

In [9]:
rows = table_element.find_elements(By.XPATH,"//div[@id='tollList']/table/tbody/tr")

# Split table in to header and rows with data

In [10]:
table_header = rows[0:1] # header
table_data = rows[1:] # data 

# Function to write data to the file line by line

In [11]:
def write_to_file(data):
    # print(data)
    with open(OUTPUT_FILE_NAME, "a") as file:
        line=','.join(data)
        # print(line)
        file.write(line)
        file.write("\n")

# Get the header of the table

In [12]:
logging.info("Getting the table header")

2025-02-21 17:35:35 - INFO - Getting the table header


In [13]:
for header_idx in range(0,len(table_header)):
    cells=table_header[header_idx].find_elements(By.TAG_NAME,'th')
    temp_rows=[]
    for cell_idx in range(0,len(cells)):
        data=cells[cell_idx].text
        # print(data)
        temp_rows.append(data)
    temp_rows.append('TollPlazaID')    
    print(temp_rows)
    # logging.debug(temp_rows)
    write_to_file(temp_rows)

['Sr No.', 'State', 'NH-No.', 'Toll Plaza Name', 'Toll Plaza Location', 'Section / Stretch', 'TollPlazaID']


# Extract each row of the table one at a time

In [14]:
logging.info("Getting the table rows and writing to the file.")

2025-02-21 17:35:35 - INFO - Getting the table rows and writing to the file.


In [15]:
extracted_rows=[]
for table_idx in range(0,len(table_data)):#len(table_data)
    cells=table_data[table_idx].find_elements(By.TAG_NAME,'td')
    toll_plaza_id=table_data[table_idx].find_element(By.XPATH,"td/a").get_dom_attribute('onclick').split("(")[1].split(")")[0]
    temp_rows=[]
    for cell_idx in range(0,len(cells)):
        data=cells[cell_idx].text
        if "," in data:
            data = data.replace(',',';')
        temp_rows.append(data)
    temp_rows.append(toll_plaza_id)
    # logging.debug(temp_rows)
    # print(temp_rows)
    extracted_rows.append(temp_rows)

# Write rows to the output file

In [16]:
for idx in range(0,len(extracted_rows)):
    write_to_file(extracted_rows[idx])

In [17]:
logging.info("Collecting list of all toll plazas complete")

2025-02-21 17:37:18 - INFO - Collecting list of all toll plazas complete


# Replace all occurrence of ; with , which was introduced to prepare the comma separated output file

In [102]:
df=pd.read_csv(OUTPUT_FILE_NAME,encoding='cp1252')
df = df.replace(";", ",", regex=True)
df.to_csv(OUTPUT_FILE_NAME,index=False)

In [None]:
end_time = datetime.datetime.now()
time_taken = end_time - start_time
format="%y-%m-%d %H:%M:%S"
print(f"Start Time = {start_time.strftime(format)}")
print(f"End Time = {end_time.strftime(format)}")
print(f"Total Execution time = {round(time_taken.total_seconds(),2)} sec")

# Add lat and long to the toll plaza name using Google Maps API

In [1]:
import requests

In [2]:
GOOGLE_API_KEY="ENTER_GOOGLE_API_KEY"

In [98]:
def get_coordinates(address):
    api_key = GOOGLE_API_KEY # Replace with your Google API key
    url = f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={api_key}"
    response = requests.get(url).json()
    if 'results' in response:
        latitude = response['results'][0]['geometry']['location']['lat']
        longitude = response['results'][0]['geometry']['location']['lng']
        place_id = response['results'][0]['place_id']
        try:
            address_partial_match = response['results'][0]['partial_match']
        except:
            address_partial_match = False        
        return latitude, longitude,place_id,address_partial_match
    else:
        return None,None,None,None


In [96]:
# get_coordinates1("Aganampudi toll plaza,Andhra Pradesh") #response['results'][0]['geometry']['location']['lat']

In [97]:
# def get_coordinates_org(address):
#     api_key = GOOGLE_API_KEY # Replace with your Google API key
#     url = f"https://maps.googleapis.com/maps/api/geocode/json?address={address}&key={api_key}"
#     response = requests.get(url).json()
#     if 'results' in response:
#         latitude = response['results'][0]['geometry']['location']['lat']
#         longitude = response['results'][0]['geometry']['location']['lng']
#         return latitude, longitude
#     else:
#         return None,None

In [106]:
for idx in range(0,df.shape[0]):
    state_name=df.iloc[idx]['State']
    toll_name= df.iloc[idx]['Toll Plaza Name']
    if "toll" not in toll_name.lower():
        toll_name = toll_name + " toll plaza"
    address = toll_name + ", "+state_name
    latitude, longitude,place_id,address_partial_match=get_coordinates(address)
    print(idx, address,latitude, longitude,place_id,address_partial_match)
    df.loc[idx,'latitude']=latitude
    df.loc[idx,'longitude']=longitude
    df.loc[idx,'place_id']=place_id
    df.loc[idx,'address_partial_match']=address_partial_match
    

0 Aganampudi toll plaza, Andhra Pradesh 17.6854173 83.14995069999999 ChIJyaj65ttuOToRH26fq-li1fY False
1 Amakathadu toll plaza, Andhra Pradesh 15.4864765 77.9009433 ChIJG6ZfoqN_tjsR6hw94uU86QQ False
2 Annampalli toll plaza, Andhra Pradesh 16.6722283 82.1473895 ChIJcw8uBaz1NzoRMva9Ii5taRE False
3 Badava toll plaza, Andhra Pradesh 16.8504745 80.63358439999999 ChIJr0zTpNLaNToReLaGbihNfUU False
4 Bandaplli toll plaza, Andhra Pradesh 14.1311987 78.75668209999999 ChIJ2T3B-NwFszsRs_e_vnu63yw True
5 Bandlapalli toll plaza, Andhra Pradesh 14.1311987 78.75668209999999 ChIJ2T3B-NwFszsRs_e_vnu63yw False
6 Basapuram toll plaza, Andhra Pradesh 14.7083358 78.8723771 ChIJ4_R8zQRjszsRkEOmZw1aqqk False
7 Bathalapalli toll plaza, Andhra Pradesh 14.5001074 77.7966071 ChIJid-qWD5bsTsRNqTOBrQzN6Q False
8 Bellupada toll plaza, Andhra Pradesh 19.1113097 84.6992649 ChIJVyVbW_1pPToRAqCixwnTQPg False
9 Bollapalli toll plaza, Andhra Pradesh 15.8865647 80.07081560000002 ChIJ823aWrD1SjoRnBgUBxXInl4 True
10 Brahmana

In [107]:
df.head()

Unnamed: 0,Sr No.,State,NH-No.,Toll Plaza Name,Toll Plaza Location,Section / Stretch,TollPlazaID,latitude,longitude,place_id,address_partial_match
0,1,Andhra Pradesh,16,Aganampudi,Km 728.055,Vishakhapatnam - Ankapalli [Km 2.837 to &Km 39...,236,17.685417,83.149951,ChIJyaj65ttuOToRH26fq-li1fY,False
1,2,Andhra Pradesh,7 (new 44),Amakathadu,Km 250.700,Hyderabad Bangalore (km 211.000 to km 462.164),258,15.486477,77.900943,ChIJG6ZfoqN_tjsR6hw94uU86QQ,False
2,3,Andhra Pradesh,NH-216,Annampalli,Annampalli,Gurajanapalli To pasarlapudi,5977,16.672228,82.14739,ChIJcw8uBaz1NzoRMva9Ii5taRE,False
3,4,Andhra Pradesh,221,Badava,35.8,Imbrahimpatnam to AP Telangana Border,4486,16.850475,80.633584,ChIJr0zTpNLaNToReLaGbihNfUU,False
4,5,Andhra Pradesh,NH40,Bandaplli,119.945 Bandaplli,Rayachoty Kadapa Section,5697,14.131199,78.756682,ChIJ2T3B-NwFszsRs_e_vnu63yw,True


In [108]:
df.to_csv(OUTPUT_FILE_NAME,index=False)

In [109]:
df=pd.read_csv(OUTPUT_FILE_NAME,encoding='cp1252')


In [110]:
df.head()

Unnamed: 0,Sr No.,State,NH-No.,Toll Plaza Name,Toll Plaza Location,Section / Stretch,TollPlazaID,latitude,longitude,place_id,address_partial_match
0,1,Andhra Pradesh,16,Aganampudi,Km 728.055,Vishakhapatnam - Ankapalli [Km 2.837 to &Km 39...,236,17.685417,83.149951,ChIJyaj65ttuOToRH26fq-li1fY,False
1,2,Andhra Pradesh,7 (new 44),Amakathadu,Km 250.700,Hyderabad Bangalore (km 211.000 to km 462.164),258,15.486477,77.900943,ChIJG6ZfoqN_tjsR6hw94uU86QQ,False
2,3,Andhra Pradesh,NH-216,Annampalli,Annampalli,Gurajanapalli To pasarlapudi,5977,16.672228,82.14739,ChIJcw8uBaz1NzoRMva9Ii5taRE,False
3,4,Andhra Pradesh,221,Badava,35.8,Imbrahimpatnam to AP Telangana Border,4486,16.850475,80.633584,ChIJr0zTpNLaNToReLaGbihNfUU,False
4,5,Andhra Pradesh,NH40,Bandaplli,119.945 Bandaplli,Rayachoty Kadapa Section,5697,14.131199,78.756682,ChIJ2T3B-NwFszsRs_e_vnu63yw,True


In [91]:
get_coordinates1("Sarsawa toll plaza,Uttarakhand") #response['results'][0]['geometry']['location']['lat']

(30.066753, 79.01929969999999, 'ChIJCZwnAsLcCTkRBcCcaGM7xAc', True)

In [92]:
get_coordinates("Sarsawa toll plaza,Uttarakhand") # from latlong.net=41.707617,-73.960606

(30.066753, 79.01929969999999)

In [66]:
get_coordinates("Rawason Bridge (MoRTH) toll plaza,74 (Old 734),Uttarakhand")

(30.066753, 79.01929969999999)

In [67]:
get_coordinates("Bandlapalli toll plaza,Andhra Pradesh")

(14.1311987, 78.75668209999999)

In [68]:
get_coordinates("Danamaiahgari palli toll plaza,Andhra Pradesh")  # from latlong.net =13.805130,78.893375

(15.9128998, 79.7399875)

In [69]:
get_coordinates("Goshtani toll plaza,Andhra Pradesh") # from latlong.net = 17.734291,83.302480

(15.9128998, 79.7399875)

In [113]:
df[df['address_partial_match']==True]

Unnamed: 0,Sr No.,State,NH-No.,Toll Plaza Name,Toll Plaza Location,Section / Stretch,TollPlazaID,latitude,longitude,place_id,address_partial_match
4,5,Andhra Pradesh,NH40,Bandaplli,119.945 Bandaplli,Rayachoty Kadapa Section,5697,14.131199,78.756682,ChIJ2T3B-NwFszsRs_e_vnu63yw,True
9,10,Andhra Pradesh,5,Bollapalli,Km 1200.000,Chilkaluripet - Nellore (Km 1182.802 - Km 1366...,252,15.886565,80.070816,ChIJ823aWrD1SjoRnBgUBxXInl4,True
10,11,Andhra Pradesh,71,Brahmanapalli,348.72,Telangana Border to Erpedu,4495,16.516835,79.830608,ChIJSQ-ifCtvNToRdv35c16W5t8,True
13,14,Andhra Pradesh,40,Chagalmarri,Km 228.350,Kadapa - Kurnool(Km 167.750 - km 356.502),452,14.979577,78.574807,ChIJTYeL94JjtDsRSg8ZJxVi4xg,True
17,18,Andhra Pradesh,NH 216,Chinaganjam Toll Plaza,Chinaganjam,Eepuripalem to Ongole section,6632,15.692996,80.241858,ChIJP3vrpg5RSjoR2BSv_n7mrm4,True
...,...,...,...,...,...,...,...,...,...,...,...
1046,1047,West Bengal,19 (OLD NH 02),Rajchandrapur (Howrah),SECOND VIVEKANANDA BRIDGE TOLLWAY COMPANY PVT....,6 LANNING,6612,22.653594,88.327250,ChIJ2aG_jSud-DkRDdwGmGBV-4Q,True
1047,1048,West Bengal,AH48,Rangalibazna,Km. 74.900,May0guri Changrabandha to Jargaon Hasimara Dhu...,5662,26.684893,89.230772,ChIJuTv66la74zkROB1fM_EHQOc,True
1048,1049,West Bengal,NH34 (Old) NH12 (New),Sali Bamandanga,141.14,Krish0garBaharampore section of NH34 from Km 1...,5634,23.629036,88.360083,ChIJe54D9x0R-TkR-Iaq_ZIrVjc,True
1049,1050,West Bengal,31C,Satbhaiya,Km. 23.400,Satbhaiya,5686,26.670762,88.219338,ChIJd31jym5M5DkR1eu8qsUHxNk,True


In [111]:
df[df[['latitude','longitude']].duplicated()]

Unnamed: 0,Sr No.,State,NH-No.,Toll Plaza Name,Toll Plaza Location,Section / Stretch,TollPlazaID,latitude,longitude,place_id,address_partial_match
5,6,Andhra Pradesh,NH42,Bandlapalli,Bandlapalli village,Madanapalli to punganur to palmner,5952,14.131199,78.756682,ChIJ2T3B-NwFszsRs_e_vnu63yw,False
21,22,Andhra Pradesh,42,Danamaiahgari palli,Danamaiahgari palli,KrishnagiriPalamaneru,6607,15.912900,79.739987,ChIJf9STrvhGNToRg82tlb670TM,True
32,33,Andhra Pradesh,516 C,Goshtani,KM 0.00 For a length of 10.336km On NH16,VISAKHAPATNAM PORT CONNECTIVTY PROJECT (2.262 KM),6606,15.912900,79.739987,ChIJf9STrvhGNToRg82tlb670TM,True
34,35,Andhra Pradesh,42,Jallipalli,KM 47.400,A.P Boarder to Antapur,5670,15.912900,79.739987,ChIJf9STrvhGNToRg82tlb670TM,True
45,46,Andhra Pradesh,5,Main Toll (Panchvati),Km 9.158,Vishakhapatnam Port Connectivity Project,240,15.912900,79.739987,ChIJf9STrvhGNToRg82tlb670TM,True
...,...,...,...,...,...,...,...,...,...,...,...
1016,1017,Uttarakhand,NH58 (new NH334),Chhapar,Ch. 138.552 (near Vill. Chhapar),Muzaffar0gar to Haridwar,5713,30.066753,79.019300,ChIJCZwnAsLcCTkRBcCcaGM7xAc,True
1021,1022,Uttarakhand,74,Puraini,Puraini Distt Bijnor UP,,5752,30.066753,79.019300,ChIJCZwnAsLcCTkRBcCcaGM7xAc,True
1022,1023,Uttarakhand,74 (Old 734),Rawason Bridge (MoRTH),Km 20,Bridge of Rawason river in Km. 19,364,30.066753,79.019300,ChIJCZwnAsLcCTkRBcCcaGM7xAc,True
1023,1024,Uttarakhand,NH73 (new NH344),Sarsawa,Ch. 81.155 (near Sarsawa),4Laning of GagalheriSaharanpurYamu00gar (UPHar...,5715,30.066753,79.019300,ChIJCZwnAsLcCTkRBcCcaGM7xAc,True


In [49]:
# df[df['Toll Plaza Name'] =='Husludanga']

In [50]:
# df[df['TollPlazaID'] ==5774]['Toll Plaza Name']

In [51]:
# toll_plaza_id=5774
# location_name = df[df['TollPlazaID'] ==toll_plaza_id]['Toll Plaza Name'] + " toll plaza , " + df[df['TollPlazaID'] ==toll_plaza_id]['State']
# location_name.values[0]

In [16]:
# # Step 1: Open the URL
# driver.get("https://developers.google.com/maps/documentation/geocoding/overview")
# time.sleep(3)  # Allow the page to load

# # Step 2: Locate the search input box and enter the location
# search_box = driver.find_element(By.ID, "query-input")  # Google documentation has a search box with name 'q'
# search_box.send_keys(location_name)
# search_box.send_keys(Keys.RETURN)
# time.sleep(3)  # Allow search results to load

# # Step 3: Extract the Location Value
# result = driver.find_element(By.CLASS_NAME, "devsite-search-result-item-title").text
# print("Extracted Location Value:", result)

In [44]:
# l=[name for name in df['Toll Plaza Name'] if 'toll' in name.lower()]

In [45]:
# l

In [46]:
# m=[name for name in df['Toll Plaza Name'] if 'toll' not in name.lower()]
# m