In [1]:
# import the necessary libraries
import requests
import time
import pandas as pd
from bs4 import BeautifulSoup

In [2]:
def extract_listing_data(listing):
    # Extract the individual elements from each listing
    ad_link_element = listing.find('a', {'data-testid': 'listing-link'})
    title_element = ad_link_element
    price_element = listing.find('p', {'data-testid': 'autos-listing-price'})
    description_element = listing.find('p', {'data-testid': 'listing-description'})
    mileage_element = listing.find('div', {'data-testid': 'autos-attribute-kilometers'})
    transmission_element = listing.find('div', {'data-testid': 'autos-attribute-transmission'})
    location_element = listing.find('p', {'data-testid': 'listing-location'})
    time_posted_element = listing.find('div', {'class': 'sc-24a49435-12'})  # Adjust class name as necessary

    # Populate the dictionary with extracted values for the current listing
    extracted_data = {
        'ad_link': ad_link_element['href'] if ad_link_element else None,
        'title': title_element.text.strip() if title_element else None,
        'price': price_element.text.strip() if price_element else None,
        'description': description_element.text.strip() if description_element else None,
        'mileage': mileage_element.find('p').text.strip() if mileage_element else None,
        'transmission': transmission_element.find('p').text.strip() if transmission_element else None,
        'location': location_element.text.strip() if location_element else None,
        'time_posted': time_posted_element.text.strip() if time_posted_element else None
    }

    return extracted_data

In [3]:
def scrape_kijiji_cars(base_url, url_suffix, total_pages):
    all_data = []

    for page_number in range(1, total_pages + 1):
        # Construct the URL for the current page
        if page_number == 1:
            url = f"{base_url}{url_suffix}"
        else:
            url = f"{base_url}page-{page_number}/{url_suffix}"

        print(f"Scraping URL: {url}")

        # Make the HTTP request to get the page content
        response = requests.get(url)

        if response.status_code == 200:
            soup = BeautifulSoup(response.text, "html.parser")

            # Find all elements that contain individual listings
            listings = soup.find_all('li', {'data-testid': lambda x: x and x.startswith('listing-card-list-item')})

            # Loop through each listing and extract data
            for listing in listings:
                data = extract_listing_data(listing)
                all_data.append(data)

        else:
            print(f"Failed to retrieve page {page_number}. Status code: {response.status_code}")

        # Pause for 10 seconds to avoid overwhelming the server
        time.sleep(10)

    return all_data

In [4]:
# base URL for the Kijiji website
# base_url = "https://www.kijiji.ca/b-cars-trucks/canada/electric/"
# url_suffix = "/c174l0a166?sort=dateDesc"
# page_1_url = base_url + url_suffix
# page_2_url = base_url + "page-2" + url_suffix
# page_n_url = base_url + "page-n" + url_suffix

base_url = "https://www.kijiji.ca/b-cars-trucks/canada/electric/"
url_suffix = "c174l0a166?sort=dateDesc"
total_pages = 108
all_listings = scrape_kijiji_cars(base_url, url_suffix, total_pages)

Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-2/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-3/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-4/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-5/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-6/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-7/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-8/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-9/c174l0a166?sort=dateDesc
Scraping URL: https://www.kijiji.ca/b-cars-trucks/canada/electric/page-10/c174l0a166?sort=dateDesc
Scraping URL: https://www.

In [1]:
# Print or process all the data
df = pd.DataFrame(all_listings)
df

NameError: name 'pd' is not defined

In [7]:
csv_file_path = 'kijiji_electric_car_listings.csv'
df.to_csv(csv_file_path, index=False)