In [28]:
import requests
from bs4 import BeautifulSoup
import pandas as pd
import time

In [33]:
# Part 1: Fetch the content of the page
base_url = "https://ev.plugndrive.ca/vehicles"
response = requests.get(base_url)

In [51]:
# Save the response object to be used in Jupyter notebook
with open("response.html", "w", encoding="utf-8") as file:
    file.write(response.text)

In [52]:
response.text

'<!doctype html><html lang="en"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link href="https://fonts.googleapis.com/css2?family=Titillium+Web:wght@300;400;600;700&display=swap" rel="stylesheet"><meta property="og:description" content="PlugNDrive\'s guide to electric vehicles, including incentives and charging stations."/><link rel="manifest" href="/manifest.json"/><title>PlugNDrive EV Buyers Guide</title><link href="/static/css/2.bdbc5ff3.chunk.css" rel="stylesheet"><link href="/static/css/main.e747bc37.chunk.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><script>!function(l){function e(e){for(var r,t,n=e[0],o=e[1],u=e[2],f=0,i=[];f<n.length;f++)t=n[f],p[t]&&i.push(p[t][0]),p[t]=0;for(r in o)Object.prototype.hasOwnProperty.call(o,r)&&(l[r]=o[r]);for(s&&s(e);i.length;)i.shift()();return c.push.apply(c,u||[]),a()}function a(){for(var e,r=0;r<c.length;r++){for(var t=c[r],n=!0,o=1;o<t.length;o++){v

In [36]:
# Part 2: Parsing and extracting data (to be run in Jupyter Notebook)
# Load the saved HTML content
with open("response.html", "r", encoding="utf-8") as file:
    html_content = file.read()

In [37]:
html_content

'<!doctype html><html lang="en"><head><meta http-equiv="X-UA-Compatible" content="IE=edge"/><link href="https://fonts.googleapis.com/css2?family=Titillium+Web:wght@300;400;600;700&display=swap" rel="stylesheet"><meta property="og:description" content="PlugNDrive\'s guide to electric vehicles, including incentives and charging stations."/><link rel="manifest" href="/manifest.json"/><title>PlugNDrive EV Buyers Guide</title><link href="/static/css/2.bdbc5ff3.chunk.css" rel="stylesheet"><link href="/static/css/main.e747bc37.chunk.css" rel="stylesheet"></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><script>!function(l){function e(e){for(var r,t,n=e[0],o=e[1],u=e[2],f=0,i=[];f<n.length;f++)t=n[f],p[t]&&i.push(p[t][0]),p[t]=0;for(r in o)Object.prototype.hasOwnProperty.call(o,r)&&(l[r]=o[r]);for(s&&s(e);i.length;)i.shift()();return c.push.apply(c,u||[]),a()}function a(){for(var e,r=0;r<c.length;r++){for(var t=c[r],n=!0,o=1;o<t.length;o++){v

In [38]:
# Parse the HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')

In [39]:
soup

<!DOCTYPE html>
<html lang="en"><head><meta content="IE=edge" http-equiv="X-UA-Compatible"/><link href="https://fonts.googleapis.com/css2?family=Titillium+Web:wght@300;400;600;700&amp;display=swap" rel="stylesheet"/><meta content="PlugNDrive's guide to electric vehicles, including incentives and charging stations." property="og:description"/><link href="/manifest.json" rel="manifest"><title>PlugNDrive EV Buyers Guide</title><link href="/static/css/2.bdbc5ff3.chunk.css" rel="stylesheet"/><link href="/static/css/main.e747bc37.chunk.css" rel="stylesheet"/></link></head><body><noscript>You need to enable JavaScript to run this app.</noscript><div id="root"></div><script>!function(l){function e(e){for(var r,t,n=e[0],o=e[1],u=e[2],f=0,i=[];f<n.length;f++)t=n[f],p[t]&&i.push(p[t][0]),p[t]=0;for(r in o)Object.prototype.hasOwnProperty.call(o,r)&&(l[r]=o[r]);for(s&&s(e);i.length;)i.shift()();return c.push.apply(c,u||[]),a()}function a(){for(var e,r=0;r<c.length;r++){for(var t=c[r],n=!0,o=1;o<t.l

In [40]:
# Find all the vehicle cards on the page
vehicle_cards = soup.find_all('a', class_='evc-card EVCard')

In [41]:
vehicle_cards

[]

In [24]:
# List to store data for each vehicle
vehicles_data = []

In [25]:
# Loop through the vehicle cards (scrape 10 items at a time)
for i, card in enumerate(vehicle_cards):
    if i > 0 and i % 10 == 0:
        # Pause for 5 seconds after scraping 10 items
        time.sleep(5)

    # Extract data from the card
    href = card['href']
    vehicle_url = f"https://ev.plugndrive.ca{href}"
    make = card.find('p', class_='h2').text.strip()
    model_version = card.find('p', class_='h3 mt-1').text.strip()

    # Extracting data from the `renderRowOfData` div
    render_data = card.find('div', class_='renderRowOfData')
    electric_range = render_data.find_all('p')[0].find('span').text.strip() if render_data else 'N/A'
    total_range = render_data.find_all('p')[1].find('span').text.strip() if render_data else 'N/A'
    incentives = render_data.find_all('p')[2].find('span').text.strip() if render_data else 'N/A'

    # Extracting MSRP and Match Score
    ev_card_bottom = card.find('div', class_='EVCardBottom')
    msrp = ev_card_bottom.find('p', class_='RenderItemValue').text.strip() if ev_card_bottom else 'N/A'
    match_score = ev_card_bottom.find('p', class_='RenderItemValue ScoreValueGreaterThanEightyFive').text.strip() if ev_card_bottom else 'N/A'

    # Append the extracted data to the list
    vehicles_data.append({
        'Make': make,
        'Model_Version': model_version,
        'Electric_Range': electric_range,
        'Total_Range': total_range,
        'Incentives': incentives,
        'MSRP': msrp,
        'Match_Score': match_score,
        'Vehicle_URL': vehicle_url
    })

In [26]:
# Create a DataFrame from the collected data
df = pd.DataFrame(vehicles_data)

In [27]:
df