In [68]:
import numpy as np
import pandas as pd
import os, time, re, urllib.parse
from datetime import datetime
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

In [69]:
from openlocationcode import openlocationcode as olc
global locations_data, referenceLatitude, referenceLongitude
locations_data = "csv-locations_12.9514242_77.6590212.csv"
referenceLatitude = float(locations_data.strip(".csv").split("_")[1])
referenceLongitude = float(locations_data.strip(".csv").split("_")[2])
locations_df = pd.read_csv(locations_data)
routes_df = pd.read_csv("csv-routes.csv")
out_file = "csv-bangalore_traffic"

In [70]:
# Selenium options required to create a 'headless' browser
options = Options()
options.add_argument("--blink-settings=imagesEnabled=false")
options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--no-sandbox")
options.add_argument("--window-size=1280,800")
options.add_argument("user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/110.0.5481.77 Safari/537.37")
driver = webdriver.Chrome(options=options)

In [71]:
def is_plus_code(code):
    # Regex pattern for Plus Codes like "7FG9V3F5+X2" or "2HVW+G8"
    pattern = r'^[2-9A-HJ-NP-Z]{4,7}\+[2-9A-HJ-NP-Z]{2,}$'
    return re.match(pattern, code) is not None

def get_maps_url(origin, destination):
    origin = urllib.parse.quote(origin)
    destination = urllib.parse.quote(destination)
    url = f"https://www.google.com/maps/dir/?api=1&origin={origin}&destination={destination}&travelmode=driving"
    return url

def avg_speed(duration, distance):
    distance = float(distance.split(' ')[0])
    d = duration.split(' ')
    duration = int(d[0])*60 if 'hr' in d else 0
    duration += int(d[-2]) if 'min' in d else 0
    return distance/duration*60

def get_route_points(route_code, format="short"):
    origin, destination = route_code.split("|")

    if format in ["long", "latlong"]:
        origin = olc.recoverNearest(origin, referenceLatitude, referenceLongitude)
        destination = olc.recoverNearest(destination, referenceLatitude, referenceLongitude)

    if format == "latlong":
        origin = olc.decode(origin).latitudeCenter
        destination = olc.decode(destination).longitudeCenter

    return origin, destination

def get_traffic_report(origin, destination, mode='car', max_retries=3, retry_delay=10):
    modes = {'bike': "\ue9f9", 'car': "\ue531", 'transit': "\ue535"}
    if mode in modes.keys():
        mode = modes[mode]
    else:
        mode = modes['car']

    attempts = 0
    while True:
        try:
            maps_url = get_maps_url(origin, destination)
            print(f"From {origin} to {destination}\n{maps_url}")
            driver.get(maps_url)

            routes = driver.find_elements(By.CSS_SELECTOR, "div[data-trip-index]")

            # Initialize defaults from the first route; set None on IndexError
            try:
                parts0 = routes[0].text.split("\n")
                time_taken = parts0[1]
                distance = parts0[2]
            except IndexError:
                time_taken = None
                distance = None

            # Try to refine using the mode-specific route; set None on IndexError
            for route in routes:
                if mode in route.text:
                    parts = route.text.split("\n")
                    try:
                        time_taken = parts[1]
                    except IndexError:
                        time_taken = None
                    try:
                        distance = parts[2]
                    except IndexError:
                        distance = None
                    break

            return time_taken, distance

        except ValueError as e:
            attempts += 1
            if attempts >= max_retries:
                raise
            time.sleep(retry_delay)

        except Exception as e:
            attempts += 1
            if attempts >= max_retries:
                raise
            time.sleep(retry_delay)


In [72]:
df = pd.DataFrame()
date_now = datetime.now().date()
time_now = datetime.now().time().strftime("%H:%M")

for index, route in routes_df.iterrows():
    origin, destination = get_route_points(route["route_code"])
    origin = locations_df[locations_df["plus_code"] == origin]["location"].values[0]
    destination = locations_df[locations_df["plus_code"] == destination]["location"].values[0]
    travel_time, travel_distance = get_traffic_report(origin, destination)

    new_row = {
        "date": date_now,
        "time": time_now,
        "route_code": route["route_code"],
        "duration": travel_time,
        "distance": travel_distance,
    }

    df = pd.concat([df, pd.DataFrame([new_row])], ignore_index=True)

driver.quit()

From Jaya Prakash Narayana Park to Coles Park, Fraser Town
https://www.google.com/maps/dir/?api=1&origin=Jaya%20Prakash%20Narayana%20Park&destination=Coles%20Park%2C%20Fraser%20Town&travelmode=driving
From MG Road Metro Station to Kempegowda International Airport, Bengaluru
https://www.google.com/maps/dir/?api=1&origin=MG%20Road%20Metro%20Station&destination=Kempegowda%20International%20Airport%2C%20Bengaluru&travelmode=driving
From Kudlu Gate Metro Station to Biocon Campus
https://www.google.com/maps/dir/?api=1&origin=Kudlu%20Gate%20Metro%20Station&destination=Biocon%20Campus&travelmode=driving
From Jaya Prakash Nagar Metro Station to Hemavathi Park, HSR Layout
https://www.google.com/maps/dir/?api=1&origin=Jaya%20Prakash%20Nagar%20Metro%20Station&destination=Hemavathi%20Park%2C%20HSR%20Layout&travelmode=driving
From Swami Vivekananda Road Metro Station to Christ University, Hosur Main Road
https://www.google.com/maps/dir/?api=1&origin=Swami%20Vivekananda%20Road%20Metro%20Station&desti

In [73]:
display(df)

Unnamed: 0,date,time,route_code,duration,distance
0,2025-09-23,13:25,2HM2+P8|XJV5+RG,31 min,9.9 km
1,2025-09-23,13:25,XJG4+7J|5PX4+HQ,54 min,34.3 km
2,2025-09-23,13:25,VJRQ+2M|RMJJ+F4,27 min,16.1 km
3,2025-09-23,13:25,WH5F+26|WJ8X+F5W,25 min,10.2 km
4,2025-09-23,13:25,XJPW+92|WJP4+FF,36 min,10.7 km
5,2025-09-23,13:25,2HVW+G8|XJXR+WG,32 min,9.9 km
6,2025-09-23,13:25,XHFC+WMF|WGGC+MG,39 min,11.4 km
7,2025-09-23,13:25,XPC7+72|XM33+J3,24 min,9.4 km
8,2025-09-23,13:25,WHCJ+J4|XGCP+8W,,
9,2025-09-23,13:25,XMW9+G8|WMJR+V4,32 min,10.1 km


In [74]:
def get_duration(s):
    # Handles: "25 min", "1 hr 5 min", "2 hr", "7 min"
    if not isinstance(s, str) or not s.strip():
        return np.nan
    parts = s.split()
    mins = 0
    try:
        if "hr" in parts:
            h_idx = parts.index("hr")
            mins += int(parts[h_idx - 1]) * 60
        if "min" in parts:
            m_idx = parts.index("min")
            mins += int(parts[m_idx - 1])
        # Fallback: if neither token present but a bare integer exists (rare)
        if "hr" not in parts and "min" not in parts:
            mins = float(parts[0])
    except Exception:
        return np.nan
    return mins

# 1) Distance: strip " km", coerce to numeric (invalid -> NaN)
df["distance"] = pd.to_numeric(
    df["distance"].str.replace(" km", "", regex=False), errors="coerce")

# 2) Duration: parse to minutes (invalid -> NaN)
df["duration"] = df["duration"].apply(get_duration)

# 3) Drop rows where either is missing
df = df.dropna(subset=["distance", "duration"]).copy()

# 4) Ensure dtypes
df["distance"] = df["distance"].astype(float)
df["duration"] = df["duration"].astype(int)

# 5) Append or write
if os.path.exists(out_file + ".csv"):
    df.to_csv(out_file + ".csv", mode="a", header=False, index=False)
else:
    df.to_csv(out_file + ".csv", mode="w", header=df.columns, index=False)

In [75]:
df_traffic = df.copy()
df_traffic['avg_speed'] = round(df_traffic['distance'] / (df_traffic['duration'] / 60), 2)
df_traffic['origin'] = df_traffic['route_code'].str.split('|').str[0]
df_traffic['destination'] = df_traffic['route_code'].str.split('|').str[1]
df_traffic = df_traffic.sort_values('avg_speed', ascending=True).reset_index(drop=True)
df_traffic['origin'] = df_traffic['origin'].map(locations_df.set_index('plus_code')['location'])
df_traffic['destination'] = df_traffic['destination'].map(locations_df.set_index('plus_code')['location'])
df_traffic = df_traffic[['date', 'time', 'origin', 'destination', 'duration', 'distance', 'avg_speed']]
display(df_traffic)

if os.path.exists(out_file + "_processed.csv"):
    df_traffic.to_csv(out_file + "_processed.csv", mode="a", header=False, index=False)
else:
    df_traffic.to_csv(out_file + "_processed.csv", mode="w", header=df_traffic.columns, index=False)

Unnamed: 0,date,time,origin,destination,duration,distance,avg_speed
0,2025-09-23,13:25,"Big Bull Temple, Basavanagudi","Shri Someshwara Swamy Temple, Halasuru",40,10.7,16.05
1,2025-09-23,13:25,"Kempegowda Bus Station, Majestic","Sparsh Hospital, RR Nagar",39,11.4,17.54
2,2025-09-23,13:25,Swami Vivekananda Road Metro Station,"Christ University, Hosur Main Road",36,10.7,17.83
3,2025-09-23,13:25,Lulu Mall Bengaluru,Nexus Mall Koramangala,37,11.2,18.16
4,2025-09-23,13:25,"Bethel AG Church, Hebbal",SMVT Railway Station,32,9.9,18.56
5,2025-09-23,13:25,Benniganahalli Metro Station,"Embassy TechVillage, Devarabisanahalli",32,10.1,18.94
6,2025-09-23,13:25,Jaya Prakash Narayana Park,"Coles Park, Fraser Town",31,9.9,19.16
7,2025-09-23,13:25,The Rameshwaram Cafe @ Brookfield,"Gawky Goose, Wind Tunnel Rd",24,9.4,23.5
8,2025-09-23,13:25,Jaya Prakash Nagar Metro Station,"Hemavathi Park, HSR Layout",25,10.2,24.48
9,2025-09-23,13:25,Kudlu Gate Metro Station,Biocon Campus,27,16.1,35.78
