# Part 1 - Determine which brands require further analysis

### Import the required packages

In [1]:
import pandas as pd
from bs4 import BeautifulSoup
import requests
import time

### Import list of car manufacturers listed on TradeMe:

This list was created from copying the inner html element of the dropdown menu on the TradeMe website, removing the surrounding code, and saving to a .csv file.
Only small changes were needed to formatting, e.g. 'Aston Martin' became 'Aston-Martin'.

In [2]:
car_makes_df = pd.read_csv('csv_files/CarManufacturers.csv')

### Check list of manufacturers:

In [3]:
car_makes_df.head()

Unnamed: 0,Make
0,Alfa-Romeo
1,Aston-Martin
2,Audi
3,Austin
4,Bentley


### Create list of URL addresses to retrieve listing data from, using the list of manufacturers above.

In [4]:
# Head URL common to all:
url = f'https://www.trademe.co.nz/a/motors/cars/'

# Empty list to hold final result
url_list = []

# Loop to concatenate head URL with manufactuerer name and add result to the url_list
for make in car_makes_df['Make']:
    url_list.append(url+make)

In [5]:
# Check result on first three items in list:
print(url_list[0:3])

['https://www.trademe.co.nz/a/motors/cars/Alfa-Romeo', 'https://www.trademe.co.nz/a/motors/cars/Aston-Martin', 'https://www.trademe.co.nz/a/motors/cars/Audi']


### Create empty dictionary to hold results from webscraper

In [6]:
results = {}

### Create loop to search each URL from the list, retrieve the number of listings based on the `<div>` class, and save the result to the results dictionary

In [7]:
for url in url_list:
    # Create variable for brand name based on URL ending, Set source of data
    brand = url[40:] 
    source = requests.get(url).text
    # Parse source data
    soup = BeautifulSoup(source, 'lxml')
    # Search source for div class identified:
    match = soup.find('div', class_="tm-motors-search-results__sort-and-view-options")
    h3 = soup.find('h3')
    # Print results to terminal and save result to the dictionary created above 
    try:
        print(brand + " : " + h3.text)
        results.update({brand: h3.text})
    except:
        print(brand + ': Error')
        results.update({brand: 'Error'})
    # Pause, to avoid spamming TradeMe server
    time.sleep(3.5)

Alfa-Romeo :  Showing 136 results

Aston-Martin :  Showing 42 results

Audi :  Showing 2,837 results

Austin: Error
Bentley :  Showing 57 results

BMW :  Showing 3,951 results

BYD :  Showing 32 results

Cadillac :  Showing 42 results

Chery :  Showing 9 results

Chevrolet :  Showing 415 results

Chrysler :  Showing 172 results

Citroen :  Showing 154 results

Cupra :  Showing 44 results

Daewoo :  Showing 1 result

Daihatsu :  Showing 113 results

Daimler :  Showing 14 results

Dodge :  Showing 257 results

DS-Automobiles :  Showing 0 results

Ferrari :  Showing 33 results

Fiat :  Showing 132 results

Ford :  Showing 4,603 results

Foton :  Showing 40 results

Geely :  Showing 0 results

GMC :  Showing 45 results

GWM :  Showing 242 results

HAVAL :  Showing 389 results

Holden :  Showing 3,631 results

Honda :  Showing 2,930 results

Hummer :  Showing 9 results

Hyundai :  Showing 2,224 results

INFINITI :  Showing 17 results

Isuzu :  Showing 666 results

Iveco :  Showing 4 results

### Create a dataframe with the listing count, from the dictionary created by the loop above.

In [8]:
count_listings_df = pd.DataFrame.from_dict(results, orient='index')

In [9]:
count_listings_df.columns = ['Results']

### Check result:

In [10]:
count_listings_df.head()

Unnamed: 0,Results
Alfa-Romeo,Showing 136 results\n
Aston-Martin,Showing 42 results\n
Audi,"Showing 2,837 results\n"
Austin,Error
Bentley,Showing 57 results\n


### Save the result to a .csv file for safekeeping:

In [12]:
count_listings_df.to_csv('csv_files/brand_results.csv')