In [46]:
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
from bs4 import BeautifulSoup
import pandas as pd
import json

# Scraping List Items

In [33]:
# Setup Selenium with Chrome
options = Options()
options.headless = True  # Run in headless mode (without GUI)
driver = webdriver.Chrome()

try:
    # Load the page
    driver.get("https://ev.plugndrive.ca/vehicles")

    # Wait for vehicle cards to load
    WebDriverWait(driver, 15).until(
        EC.presence_of_all_elements_located((By.CLASS_NAME, "EVCard"))
    )

    # Get the fully rendered page source
    html_content = driver.page_source

finally:
    driver.quit()

In [34]:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

In [35]:
soup

<html data-react-helmet="lang" lang="en"><head><meta content="IE=edge" http-equiv="X-UA-Compatible"/><link href="https://fonts.googleapis.com/css2?family=Titillium+Web:wght@300;400;600;700&amp;display=swap" rel="stylesheet"/><meta content="PlugNDrive's guide to electric vehicles, including incentives and charging stations." property="og:description"/><link href="/manifest.json" rel="manifest"/><title>Electric Vehicles | PlugNDrive Buyer's Guide</title><link href="/static/css/2.bdbc5ff3.chunk.css" rel="stylesheet"/><link href="/static/css/main.e747bc37.chunk.css" rel="stylesheet"/><link data-react-helmet="true" href="/static/media/favicon.008bdda2.ico" rel="shortcut icon"/><link data-react-helmet="true" href="https:///vehicles" rel="canonical"/><meta content="width=device-width, initial-scale=1" data-react-helmet="true" name="viewport"/><meta content="#000000" data-react-helmet="true" name="theme-color"/><meta content="Electric Vehicles | PlugNDrive Buyer's Guide" data-react-helmet="tru

In [4]:
# Find all the vehicle cards on the page
vehicle_cards = soup.find_all('a', class_='evc-card EVCard')

In [5]:
# Extracting data from the vehicle cards
vehicles_data = []

In [6]:
# Scraping each list iteam
for i, card in enumerate(vehicle_cards):
    if i >= 5:  # stop after 5 items 
        break
    # if i > 0 and i % 10 == 0:
    #     # Pause for 5 seconds after scraping 10 items
    #     time.sleep(5)

    # Extract data from the card
    href = card['href']
    vehicle_url = f"https://ev.plugndrive.ca{href}"
    make = card.find('p', class_='h2').text.strip()
    model_version = card.find('p', class_='h3 mt-1').text.strip()

    # Extracting data from the `renderRowOfData` div
    render_data = card.find('div', class_='renderRowOfData')
    electric_range = render_data.find('p').find_next('span').text.strip() if render_data else 'N/A'
    total_range = render_data.find_all('p')[1].find('span').text.strip() if len(render_data.find_all('p')) > 1 else 'N/A'
    incentives = render_data.find_all('p')[2].find('span').text.strip() if len(render_data.find_all('p')) > 2 else 'N/A'

    # Extracting MSRP and Match Score
    ev_card_bottom = card.find('div', class_='EVCardBottom')
    msrp = ev_card_bottom.find('p', class_='RenderItemValue').text.strip() if ev_card_bottom else 'N/A'
    match_score_tag = ev_card_bottom.find('p', class_='RenderItemValue ScoreValueGreaterThanEightyFive')
    match_score = match_score_tag.text.strip() if match_score_tag else 'N/A'

    # Append the extracted data to the list
    vehicles_data.append({
        'Make': make,
        'Model_Version': model_version,
        'Electric_Range': electric_range,
        'Total_Range': total_range,
        'Incentives': incentives,
        'MSRP': msrp,
        'Match_Score': match_score,
        'Vehicle_URL': vehicle_url
    })

In [7]:
# Create a DataFrame from the collected data
df = pd.DataFrame(vehicles_data)

In [8]:
df

Unnamed: 0,Make,Model_Version,Electric_Range,Total_Range,Incentives,MSRP,Match_Score,Vehicle_URL
0,Kia,Niro Plug-In Hybrid EX,55 km,836 km,"$5,000","$38,595",94 /100,https://ev.plugndrive.ca/vehicles/Kia_Niro_EX_...
1,Fiat,500e La Prima EV,,227 km,"$5,000","$39,995",92 /100,https://ev.plugndrive.ca/vehicles/Fiat_500e_La...
2,Fiat,500e Red Edition EV,,227 km,"$5,000","$39,995",92 /100,https://ev.plugndrive.ca/vehicles/Fiat_500e_Re...
3,Toyota,Prius Prime SE,72 km,962 km,"$5,000","$39,550",92 /100,https://ev.plugndrive.ca/vehicles/Toyota_Prius...
4,Nissan,LEAF SV,,240 km,"$5,000","$41,748",89 /100,https://ev.plugndrive.ca/vehicles/Nissan_LEAF_...


In [None]:
csv_file_path = 'plugndrive_electric_car_listings.csv'
df.to_csv(csv_file_path, index=False)

# Vehicle Data API Scraping

## Vehicles

In [68]:
# Enable Performance Logging in Chrome
options = Options()
options.headless = False  # Disable headless mode for debugging
options.set_capability("goog:loggingPrefs", {"performance": "ALL"})

# Setup Selenium
driver = webdriver.Chrome(options=options)

# Open the page
url = "https://ev.plugndrive.ca/vehicles"
driver.get(url)

# Wait for the page to load
driver.implicitly_wait(10)

# Capture network logs
logs = driver.get_log("performance")

# Filter API Calls
api_url_fragment = "https://api.production.zappyride.com/vehicles?postcode=L4K5V"
api_responses = []

for log in logs:
    log_json = json.loads(log["message"])  # Parse log message as JSON
    message = log_json.get("message", {})

    # Check if this is a network response
    if message.get("method") == "Network.responseReceived":
        response_url = message["params"]["response"]["url"]
        if api_url_fragment in response_url:  # Match target API endpoint
            print(f"API URL: {response_url}")

            # Get response body
            request_id = message["params"]["requestId"]
            try:
                response_body = driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})
                api_responses.append(json.loads(response_body["body"]))  # Parse JSON response
            except Exception as e:
                print(f"Error retrieving response body: {e}")

# Close the driver
driver.quit()

# Process API Responses
if api_responses:
    print("API Responses:")
    for response in api_responses:
        print(json.dumps(response, indent=2))  # Pretty-print the JSON

    # Convert first response to DataFrame (adjust as needed)
    if "vehicles" in api_responses[0]:
        df = pd.DataFrame(api_responses[0]["vehicles"])
        print("DataFrame:")
        print(df.head())

        # Save to CSV
        df.to_csv("vehicles_data.csv", index=False)
        print("Data saved to vehicles_data.csv")
else:
    print("No matching API responses found.")

API URL: https://api.production.zappyride.com/vehicles?postcode=L4K5V9&household_size=1&household_income=75000
Error retrieving response body: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No resource with given identifier found"}
  (Session info: chrome=131.0.6778.86)
Stacktrace:
0   chromedriver                        0x000000010d1e7e82 chromedriver + 6696578
1   chromedriver                        0x000000010d1dfc9a chromedriver + 6663322
2   chromedriver                        0x000000010cbe7e3e chromedriver + 405054
3   chromedriver                        0x000000010cbcecb9 chromedriver + 302265
4   chromedriver                        0x000000010cbcdbf3 chromedriver + 297971
5   chromedriver                        0x000000010cbcdf14 chromedriver + 298772
6   chromedriver                        0x000000010cbcde74 chromedriver + 298612
7   chromedriver                        0x000000010cbeb9be chromedriver + 420286
8   chromedriver                     

## Chargers

In [65]:
# Enable Performance Logging in Chrome
options = Options()
options.headless = False  # Disable headless mode for debugging
options.set_capability("goog:loggingPrefs", {"performance": "ALL"})

# Setup Selenium
driver = webdriver.Chrome(options=options)

# Open the page
url = "https://ev.plugndrive.ca/vehicles?postcode=l4e4z6"
driver.get(url)

# Wait for the page to load
driver.implicitly_wait(10)

# Capture network logs
logs = driver.get_log("performance")

# Filter API Calls
api_url_fragment = "https://api.production.zappyride.com/chargers"
api_responses = []

for log in logs:
    log_json = json.loads(log["message"])  # Parse log message as JSON
    message = log_json.get("message", {})

    # Check if this is a network response
    if message.get("method") == "Network.responseReceived":
        response_url = message["params"]["response"]["url"]
        if api_url_fragment in response_url:  # Match target API endpoint
            print(f"API URL: {response_url}")

            # Get response body
            request_id = message["params"]["requestId"]
            try:
                response_body = driver.execute_cdp_cmd("Network.getResponseBody", {"requestId": request_id})
                api_responses.append(json.loads(response_body["body"]))  # Parse JSON response
            except Exception as e:
                print(f"Error retrieving response body: {e}")

# Close the driver
driver.quit()

# Process API Responses
if api_responses:
    print("API Responses:")
    for response in api_responses:
        print(json.dumps(response, indent=2))  # Pretty-print the JSON

    # Convert first response to DataFrame (adjust as needed)
    if "chargers" in api_responses[0]:
        df = pd.DataFrame(api_responses[0]["chargers"])
        print("DataFrame:")
        print(df.head())

        # Save to CSV
        df.to_csv("chargers_data.csv", index=False)
        print("Data saved to chargers_data.csv")
else:
    print("No matching API responses found.")

API URL: https://api.production.zappyride.com/chargers?postcode=L4K5V9
Error retrieving response body: Message: unknown error: unhandled inspector error: {"code":-32000,"message":"No resource with given identifier found"}
  (Session info: chrome=131.0.6778.86)
Stacktrace:
0   chromedriver                        0x000000010bbe9e82 chromedriver + 6696578
1   chromedriver                        0x000000010bbe1c9a chromedriver + 6663322
2   chromedriver                        0x000000010b5e9e3e chromedriver + 405054
3   chromedriver                        0x000000010b5d0cb9 chromedriver + 302265
4   chromedriver                        0x000000010b5cfbf3 chromedriver + 297971
5   chromedriver                        0x000000010b5cff14 chromedriver + 298772
6   chromedriver                        0x000000010b5cfe74 chromedriver + 298612
7   chromedriver                        0x000000010b5ed9be chromedriver + 420286
8   chromedriver                        0x000000010b68ec3a chromedriver + 108