### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### HTTP Request

#### store website in variable

In [2]:
website = 'https://www.cars.com/shopping/results/?stock_type=cpo&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=20&zip='

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

### Results

In [6]:
results = soup.find_all('div', {'class' : 'vehicle-card'})

In [7]:
len(results)

21

### Target necessary data

In [8]:
# Name
# Mileage
# Dealer Name
# Rating
# Rating Count
# Price

#### Name

In [9]:
results[0].find('h2').get_text()

'2020 Mercedes-Benz AMG C 43 Base 4MATIC'

#### Mileage

In [10]:
results[0].find('div', {'class':'mileage'}).get_text()

'29,790 mi.'

#### Dealer Name

In [11]:
results[0].find('div', {'class':'dealer-name'}).get_text().strip()

'Mercedes-Benz of Kansas City'

#### Rating

In [12]:
rating_count_element = results[0].find('span', {'class': 'sds-rating__count'})
rating_count = rating_count_element.get_text() if rating_count_element else "N/A"

#### Review Count

In [13]:
rating_link_element = results[0].find('span', {'class': 'sds-rating__link'})
rating_link = rating_link_element.get_text() if rating_link_element else "N/A"

#### Price

In [14]:
results[0].find('span', {'class':'primary-price'}).get_text()

'$47,698'

### Put everything together inside a For-Loop

In [15]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for result in results:
    
    # name
    try:
        name.append(result.find('h2').get_text()) 
    except:
        name.append('n/a')
    
    # mileage
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
    
    # dealer_name
    try:
        dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer_name.append('n/a')
        
    # rating
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
    
    # review_count
    try:
        review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        review_count.append('n/a')
    
    #price 
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

### Create Pandas Dataframe

In [16]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [17]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG C 43 Base 4MATIC,"29,790 mi.",Mercedes-Benz of Kansas City,,,"$47,698"
1,2020 Mercedes-Benz GLS 450 Base 4MATIC,"38,260 mi.",Mercedes-Benz of White Plains,4.3,"(1,647 reviews)","$54,924"
2,2023 Mercedes-Benz GLS 450 4MATIC,"10,456 mi.",Mercedes-Benz of Silver Spring,4.9,"(2,002 reviews)","$83,900"
3,2019 Mercedes-Benz AMG GT 63 S 4-Door,"4,321 mi.",Mercedes-Benz of Hoffman Estates,4.6,"(1,210 reviews)","$125,999"
4,2020 Mercedes-Benz GLE 580 AWD 4MATIC,"33,365 mi.",Mercedes-Benz of Manchester,4.4,(761 reviews),"$56,989"
5,2021 Mercedes-Benz AMG C 43 Base 4MATIC,"18,182 mi.",Mercedes-Benz of Edison,5.0,"(4,416 reviews)","$54,599"
6,2023 Mercedes-Benz AMG C 43 Base 4MATIC,"2,410 mi.",Mercedes-Benz of Hunt Valley,4.7,(771 reviews),"$60,479"
7,2019 Mercedes-Benz A-Class A 220 4MATIC,"28,983 mi.",Mercedes-Benz of Arrowhead,2.7,(235 reviews),"$25,988"
8,2023 Mercedes-Benz AMG C 43 Base 4MATIC,799 mi.,Mercedes-Benz of Barrington,4.8,(488 reviews),"$62,955"
9,2022 Mercedes-Benz GLE 350 Base 4MATIC,"18,335 mi.",Mercedes-Benz of Barrington,4.8,(488 reviews),"$56,999"


#### Data Cleaning

In [18]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [19]:
# dataframe updated
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG C 43 Base 4MATIC,"29,790 mi.",Mercedes-Benz of Kansas City,,,"$47,698"
1,2020 Mercedes-Benz GLS 450 Base 4MATIC,"38,260 mi.",Mercedes-Benz of White Plains,4.3,1647.0,"$54,924"
2,2023 Mercedes-Benz GLS 450 4MATIC,"10,456 mi.",Mercedes-Benz of Silver Spring,4.9,2002.0,"$83,900"
3,2019 Mercedes-Benz AMG GT 63 S 4-Door,"4,321 mi.",Mercedes-Benz of Hoffman Estates,4.6,1210.0,"$125,999"
4,2020 Mercedes-Benz GLE 580 AWD 4MATIC,"33,365 mi.",Mercedes-Benz of Manchester,4.4,761.0,"$56,989"
5,2021 Mercedes-Benz AMG C 43 Base 4MATIC,"18,182 mi.",Mercedes-Benz of Edison,5.0,4416.0,"$54,599"
6,2023 Mercedes-Benz AMG C 43 Base 4MATIC,"2,410 mi.",Mercedes-Benz of Hunt Valley,4.7,771.0,"$60,479"
7,2019 Mercedes-Benz A-Class A 220 4MATIC,"28,983 mi.",Mercedes-Benz of Arrowhead,2.7,235.0,"$25,988"
8,2023 Mercedes-Benz AMG C 43 Base 4MATIC,799 mi.,Mercedes-Benz of Barrington,4.8,488.0,"$62,955"
9,2022 Mercedes-Benz GLE 350 Base 4MATIC,"18,335 mi.",Mercedes-Benz of Barrington,4.8,488.0,"$56,999"


### Output in Excel

In [20]:
!pip3 install openpyxl
car_dealer.to_excel('./assets/car_dealer_single_page.xlsx', index=False)

Defaulting to user installation because normal site-packages is not writeable


### Part 2 - Pagination 

In [21]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for i in range (1,11):
    
    # website in variable
    website = 'https://www.cars.com/shopping/results/?page='+ str(i) +'&page_size=20&dealer_id=&list_price_max=&list_price_min=&makes[]=mercedes_benz&maximum_distance=20&mileage_max=&sort=best_match_desc&stock_type=cpo&year_max=&year_min=&zip=' 
    
    # request to website
    response = requests.get(website)
    
    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # results
    results = soup.find_all('div', {'class' : 'vehicle-card'})
    
    # loop through results
    for result in results:
    
        # name
        try:
            name.append(result.find('h2').get_text()) 
        except:
            name.append('n/a')

        # mileage
        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('n/a')

        # dealer_name
        try:
            dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer_name.append('n/a')

        # rating
        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('n/a')

        # review_count
        try:
            review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            review_count.append('n/a')

        #price 
        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('n/a')

In [22]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [23]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG C 43 Base 4MATIC,"29,790 mi.",Mercedes-Benz of Kansas City,,,"$47,698"
1,2023 Mercedes-Benz GLS 450 4MATIC,"10,456 mi.",Mercedes-Benz of Silver Spring,4.9,"(2,002 reviews)","$83,900"
2,2020 Mercedes-Benz GLS 450 Base 4MATIC,"38,260 mi.",Mercedes-Benz of White Plains,4.3,"(1,647 reviews)","$54,924"
3,2019 Mercedes-Benz AMG GT 63 S 4-Door,"4,321 mi.",Mercedes-Benz of Hoffman Estates,4.6,"(1,210 reviews)","$125,999"
4,2021 Mercedes-Benz AMG C 43 Base 4MATIC,"18,182 mi.",Mercedes-Benz of Edison,5.0,"(4,416 reviews)","$54,599"
...,...,...,...,...,...,...
205,2022 Mercedes-Benz A-Class A 220,"12,678 mi.",Mercedes-Benz of West Houston,4.6,(75 reviews),"$33,893"
206,2023 Mercedes-Benz AMG GLB 35 Base,"4,048 mi.",Mercedes-Benz of Littleton,4.8,"(1,216 reviews)","$54,426"
207,2023 Mercedes-Benz AMG GLB 35 Base,"3,166 mi.",Mercedes-Benz of Hoffman Estates,4.6,"(1,210 reviews)","$50,999"
208,2023 Mercedes-Benz AMG C 43 Base 4MATIC,"2,585 mi.",Mercedes-Benz of Raleigh,5.0,"(1,766 reviews)","$77,197"


In [24]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [25]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2020 Mercedes-Benz AMG C 43 Base 4MATIC,"29,790 mi.",Mercedes-Benz of Kansas City,,,"$47,698"
1,2023 Mercedes-Benz GLS 450 4MATIC,"10,456 mi.",Mercedes-Benz of Silver Spring,4.9,2002,"$83,900"
2,2020 Mercedes-Benz GLS 450 Base 4MATIC,"38,260 mi.",Mercedes-Benz of White Plains,4.3,1647,"$54,924"
3,2019 Mercedes-Benz AMG GT 63 S 4-Door,"4,321 mi.",Mercedes-Benz of Hoffman Estates,4.6,1210,"$125,999"
4,2021 Mercedes-Benz AMG C 43 Base 4MATIC,"18,182 mi.",Mercedes-Benz of Edison,5.0,4416,"$54,599"
...,...,...,...,...,...,...
205,2022 Mercedes-Benz A-Class A 220,"12,678 mi.",Mercedes-Benz of West Houston,4.6,75,"$33,893"
206,2023 Mercedes-Benz AMG GLB 35 Base,"4,048 mi.",Mercedes-Benz of Littleton,4.8,1216,"$54,426"
207,2023 Mercedes-Benz AMG GLB 35 Base,"3,166 mi.",Mercedes-Benz of Hoffman Estates,4.6,1210,"$50,999"
208,2023 Mercedes-Benz AMG C 43 Base 4MATIC,"2,585 mi.",Mercedes-Benz of Raleigh,5.0,1766,"$77,197"
