# autotrader scraper

In this notebook, i'll be testing out the code provided in this blog post: https://www.shedloadofcode.com/blog/building-an-autotrader-scraper-with-python-to-search-for-multiple-makes-and-models

*make sure you are on the `webscraper` virtual environment!`

In [None]:
# !python -m pip install numpy pandas requests cloudscraper bs4 xlsxwriter openpyxl 

In [None]:
# !python autotrader-searcher.py

In [1]:
from autotrader_scraper import get_cars
import pandas as pd

In [2]:
criteria = {
    "postcode": "LS6 3DT", 
    "min_year": 2016,
    "max_year": 2023,
    "radius": 1000,
    "min_price": 0,
    "max_price": 100000,
    "fuel": "Hybrid – Petrol/Electric",
    "transmission": "Automatic",
    "max_mileage": 1000000,
    "max_attempts_per_page": 1,
    "verbose": True
}

In [3]:
ioniq = get_cars(
    make = "Hyundai",
    model = "Ioniq",
    postcode = criteria["postcode"],
    radius = criteria["radius"],
    min_year = criteria["min_year"],
    max_year = criteria["max_year"],
    include_writeoff = "exclude",
    max_attempts_per_page = criteria["max_attempts_per_page"],
    verbose = criteria["verbose"]
)

print("Ioniq search done.")

Year:  2016,      Page:  1,      Response:  <Response [200]>
Year:  2016,      Page:  2,      Response:  <Response [200]>
Year:  2016,      Page:  3,      Response:  <Response [200]>
Found total 12 results for year 2016 across 2 pages
Moving on to year 2017
---------------------------------
Year:  2017,      Page:  1,      Response:  <Response [200]>
Year:  2017,      Page:  2,      Response:  <Response [200]>
Year:  2017,      Page:  3,      Response:  <Response [200]>
Year:  2017,      Page:  4,      Response:  <Response [200]>
Year:  2017,      Page:  5,      Response:  <Response [200]>
Year:  2017,      Page:  6,      Response:  <Response [200]>
Year:  2017,      Page:  7,      Response:  <Response [200]>
Year:  2017,      Page:  8,      Response:  <Response [200]>
Year:  2017,      Page:  9,      Response:  <Response [200]>
Year:  2017,      Page:  10,      Response:  <Response [200]>
Year:  2017,      Page:  11,      Response:  <Response [200]>
Found total 100 results for year 20

In [None]:
ioniq[10]

In [None]:
results = (
    corolla 
)

In [None]:
results = (
    prius + 
    corolla + 
    ioniq +
    ioniq_5 +
    civic +
    niro +
    EV6 +
    XC40 +
    astra +
    ID4 +
    ID5    
)

In [None]:
print(f"Found {len(results)} total results.")

df = pd.DataFrame.from_records(results)

df["price"] = df["price"] \
    .str.replace("£", "") \
    .str.replace(",", "") \
    .astype(int)

df["distance"] = df["seller"].str.extract(r'(\d+ mile)', expand=False)
df["distance"] = df["distance"].str.replace(" mile", "")
df["distance"] = pd.to_numeric(df["distance"], errors="coerce").astype("Int64")

df["year"] = df["year"].str.replace(r"\s(\(\d\d reg\))", "", regex=True)
df["year"] = pd.to_numeric(df["year"], errors="coerce").astype("Int64")

print(f"{len(df)} cars met the criteria. Saving to 'autotrader-df.csv'")

df2 = df.sort_values(by="distance")
df2.to_csv("autotrader-corolla-df.csv")

In [None]:
shortlist = df[
  (df["price"] >= criteria["min_price"]) & 
  (df["price"] <= criteria["max_price"]) &
  (df["fuel"] == criteria["fuel"]) &
  (df["mileage"] <= criteria["max_mileage"]) &
  (df["transmission"] == criteria["transmission"]) &
  (df["engine"] != "1.0L") &
  (df["engine"] != "1.2L")
]

print(f"{len(shortlist)} cars met the criteria. Saving to 'autotrader-shortlist.csv'")

shortlist = shortlist.sort_values(by="distance")
shortlist.to_csv("autotrader-shortlist.csv")

# Manually adaptig the script to scrape additional info from the url

In [None]:
import cloudscraper
import requests
from bs4 import BeautifulSoup
url = 'https://www.autotrader.co.uk/car-details/202303165299047'
# url = 'https://www.autotrader.co.uk/car-search?journey=YOU_MAY_ALSO_LIKE_JOURNEY&advertising-location=at_cars&fuel-type=Electric&include-delivery-option=on&make=Hyundai&model=IONIQ&postcode=LS6%203DT&sort=price-asc&year-from=2020&year-to=2020&flrfc=1#202302164339789'

url = 'https://www.autotrader.co.uk/car-details/202303165299047'
scraper = cloudscraper.create_scraper()
response = scraper.get(url)

article = response.content

In [None]:
article

In [None]:
article_decode = response.content.decode('utf-8')

In [None]:
article_decode

In [None]:
import cloudscraper
from bs4 import BeautifulSoup

url = 'https://www.autotrader.co.uk/car-details/202303165299047'
scraper = cloudscraper.create_scraper()
response = scraper.get(url)
html_content = response.content.decode('utf-8')

soup = BeautifulSoup(html_content, 'html.parser')
title_element = soup.find("h3", {"class": "product-card-details__title"})

if title_element:
    title_text = title_element.get_text()
    print(title_text)
else:
    print("Title element not found")

In [None]:
soup

In [None]:
article_decode.find("h3", {"class": "product-card-details__title"})

In [None]:
article.find("h3", {"class": "product-card-details__title"}).text.strip()   

In [None]:
car = {}
car["model"] = make + " " + model
car["name"] = article.find("h3", {"class": "product-card-details__title"}).text.strip()             
car["link"] = "https://www.autotrader.co.uk" + \
      article.find("a", {"class": "listing-fpa-link"})["href"][: article.find("a", {"class": "listing-fpa-link"})["href"] \
      .find("?")]
car["price"] = article.find("div", {"class": "product-card-pricing__price"}).text.strip()

seller_info = article.find("ul", {"class": "product-card-seller-info__specs"}).text.strip()
car["seller"] = " ".join(seller_info.split())

key_specs_bs_list = article.find("ul", {"class": "listing-key-specs"}).find_all("li")

for key_spec_bs_li in key_specs_bs_list:

    key_spec_bs = key_spec_bs_li.text

    if any(keyword in key_spec_bs for keyword in keywords["mileage"]):
        car["mileage"] = int(key_spec_bs[:key_spec_bs.find(" miles")].replace(",",""))
    elif any(keyword in key_spec_bs for keyword in keywords["BHP"]):
        car["BHP"] = int(key_spec_bs[:key_spec_bs.find("BHP")])
    elif any(keyword in key_spec_bs for keyword in keywords["transmission"]):
        car["transmission"] = key_spec_bs
    elif any(keyword in key_spec_bs for keyword in keywords["fuel"]):
        car["fuel"] = key_spec_bs
    elif any(keyword in key_spec_bs for keyword in keywords["owners"]):
        car["owners"] = int(key_spec_bs[:key_spec_bs.find(" owners")])
    elif any(keyword in key_spec_bs for keyword in keywords["body"]):
        car["body"] = key_spec_bs
    elif any(keyword in key_spec_bs for keyword in keywords["ULEZ"]):
        car["ULEZ"] = key_spec_bs
    elif any(keyword in key_spec_bs for keyword in keywords["year"]):
        car["year"] = key_spec_bs
    elif key_spec_bs[1] == "." and key_spec_bs[3] == "L":
        car["engine"] = key_spec_bs