# autotrader scraper

In this notebook, i'll be testing out the code provided in this blog post: https://www.shedloadofcode.com/blog/building-an-autotrader-scraper-with-python-to-search-for-multiple-makes-and-models

*make sure you are on the `webscraper` virtual environment!`

In [None]:
# !python -m pip install numpy pandas requests cloudscraper bs4 xlsxwriter openpyxl 

In [None]:
!python autotrader-searcher.py

In [None]:
from autotrader_scraper import get_cars
import pandas as pd

In [None]:
criteria = {
    "postcode": "LS6 3DT", 
    "min_year": 2015,
    "max_year": 2022,
    "radius": 1000,
    "min_price": 0,
    "max_price": 100000,
    "fuel": "Hybrid – Petrol/Electric",
    "transmission": "Automatic",
    "max_mileage": 1000000,
    "max_attempts_per_page": 1,
    "verbose": True
}

In [None]:
prius = get_cars(
    make = "Toyota",
    model = "Prius",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Prius search done.")

In [None]:
corolla = get_cars(
    make = "Toyota",
    model = "Corolla",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"], # exception as there are no 2015 corollas, they start at 2019, and then the script runs forever on non-existent pages...
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Corolla search done.")

In [None]:
ioniq = get_cars(
    make = "Hyundai",
    model = "Ioniq",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Ioniq search done.")

In [None]:
ioniq_5 = get_cars(
    make = "Hyundai",
    model = "Ioniq 5",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Ioniq 5 search done.")

In [None]:
civic = get_cars(
    make = "Honda",
    model = "Civic",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Civic search done.")

niro = get_cars(
    make = "Kia",
    model = "Niro",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Niro search done.")

EV6 = get_cars(
    make = "Kia",
    model = "EV6",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("EV6 search done.")

XC40 = get_cars(
    make = "Volvo",
    model = "XC40",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("XC40 search done.")

astra = get_cars(
    make = "Vauxhall",
    model = "Astra",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Astra search done.")

ID4 = get_cars(
    make = "Volkswagen",
    model = "ID.4",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("ID.4 search done.")

ID5 = get_cars(
    make = "Volkswagen",
    model = "ID.5",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("ID.5 search done.")

In [None]:
"make" + " " + "model"

In [None]:
astra[1]

In [None]:
results = (
    prius + 
    corolla + 
    ioniq +
    ioniq_5 +
    civic +
    niro +
    EV6 +
    XC40 +
    astra +
    ID4 +
    ID5    
)

print(f"Found {len(results)} total results.")

df = pd.DataFrame.from_records(results)

df["price"] = df["price"] \
    .str.replace("£", "") \
    .str.replace(",", "") \
    .astype(int)

df["distance"] = df["seller"].str.extract(r'(\d+ mile)', expand=False)
df["distance"] = df["distance"].str.replace(" mile", "")
df["distance"] = pd.to_numeric(df["distance"], errors="coerce").astype("Int64")

df["year"] = df["year"].str.replace(r"\s(\(\d\d reg\))", "", regex=True)
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")

print(f"{len(df)} cars met the criteria. Saving to 'autotrader-df-corolla.csv'")

df2 = df.sort_values(by="distance")
df2.to_csv("autotrader-df.csv")

In [None]:
shortlist = df[
  (df["price"] >= criteria["min_price"]) & 
  (df["price"] <= criteria["max_price"]) &
  (df["fuel"] == criteria["fuel"]) &
  (df["mileage"] <= criteria["max_mileage"]) &
  (df["transmission"] == criteria["transmission"]) &
  (df["engine"] != "1.0L") &
  (df["engine"] != "1.2L")
]

print(f"{len(shortlist)} cars met the criteria. Saving to 'autotrader-shortlist.csv'")

shortlist = shortlist.sort_values(by="distance")
shortlist.to_csv("autotrader-shortlist.csv")