# Part 2 - Scrape TradeMe motors for data on listings for key brands

### Import the required packages

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import csv
import time

### Create list of URL addresses to retrieve listing data from, using the list of manufacturers above.

This script will be only need to be run once for each manufacturer, with the only necessary changes being:

- Updating the name of the brand being seached

- Specifying the number of pages of listings to be searched. As the data is live, this was done manually by reviewing the search results in the browser; the last full page being deliberately omitted to avoid errors in scraping non-existent pages.

In [2]:
# Brand being searched:
brand = "toyota"

# Empty list to hold full set of urls:
url_list = []

# Format of URL search is: "https://www.trademe.co.nz/a/motors/cars/bmw?page=2"

# Loop creating url's to be searched:
for page in range (1, 672):
    url = f"https://www.trademe.co.nz/a/motors/cars/{brand}?page={page}"
    url_list.append(url)

# Check first 5 results
print(url_list[0:5])

['https://www.trademe.co.nz/a/motors/cars/toyota?page=1', 'https://www.trademe.co.nz/a/motors/cars/toyota?page=2', 'https://www.trademe.co.nz/a/motors/cars/toyota?page=3', 'https://www.trademe.co.nz/a/motors/cars/toyota?page=4', 'https://www.trademe.co.nz/a/motors/cars/toyota?page=5']


### Create csv file to store results 

In [3]:
with open(f'csv_files/{brand}.tsv', 'a', newline='') as csvfile:

    fieldnames = ['listing_id', 'location', 'title', 'sub_title', 'mileage', 'engine_details', 'asking_price', 'vendor', 'sale_type']
    # fieldnames = ['listing_id', 'title']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
    
    writer.writeheader()

### Create loop to extract data for each page of results for brand

In [4]:
for url in url_list:

    time.sleep(3)

    # Set source
    source = requests.get(url).text

    # Parse source data
    soup = BeautifulSoup(source, 'lxml')

    # Define parent <div> in soup page to be searched; and create loop:
    for item in soup.find_all('div', class_="tm-motors-search-card__wrapper ng-star-inserted"):

        # Set variable names
        try:
            listing_id = item.find('div', tmid="location").get('id')
        except:
            listing_id = "listing_id missing"
        
        try:          
            location = item.find('div', tmid="location").text.strip()
        except: 
            location = "location missing"

        try:
            title = item.find('div', tmid="title").text.strip()
        except:
            title = "title missing"

        try:
            sub_title = item.find('div', tmid="subtitle").text
        except:
            sub_title = "sub_title missing"

        try:
            mileage = item.find('span', tmid="odometer").text.strip()
        except:
            mileage = "mileage missing"

        try:
            engine_details = item.find('span', tmid="engineDetails").text.strip()
        except:
            engine_details = "engine_details missing"

        try:
            asking_price = item.find('span', class_="tm-motors-search-card__price").text.strip()
        except:
            asking_price = "asking_price missing"

        try:
            vendor = item.find('div', tmid="dealer").div.text
        except:
            vendor = "vendor missing"

        try:
            sale_type = item.find('div', class_="tm-motors-search-card__asking-price ng-star-inserted").text.strip()
        except:
            sale_type = "sale_type missing"

        # Append data a (write data w, read data r)
        with open(f'csv_files/{brand}.tsv', 'a', newline='') as csvfile:

            writer = csv.DictWriter(csvfile, fieldnames=fieldnames, delimiter='\t')
            
            writer.writerow({'listing_id': listing_id, 
                            'location': location, 
                            'title': title,  
                            'sub_title': sub_title, 
                            'mileage': mileage, 
                            'engine_details': engine_details, 
                            'asking_price': asking_price,
                            'vendor': vendor, 
                            'sale_type': sale_type})