## Import libraries

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

## Data Extracting with BeautifulSoup

In [2]:
# storing the website in a variable
url = "https://www.cars.com/shopping/results/?stock_type=all&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=all&zip="

In [3]:
# http requests
response = requests.get(url)

In [4]:
# status code
response.status_code

200

In [5]:
# soup object
soup = BeautifulSoup(response.content, 'html.parser')

In [6]:
soup

<!DOCTYPE html>

<html lang="en">
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<meta content="width=device-width, initial-scale=1.0" name="viewport"/>
<title data-suffix=" | Cars.com">New and used Mercedes-Benz for Sale Near Me | Cars.com</title>
<meta content="Shop Mercedes-Benz vehicles for sale at Cars.com. Research, compare, and save listings, or contact sellers directly from 10,000+ Mercedes-Benz models nationwide." name="description"/>
<meta content="noindex, nofollow" name="robots"/>
<meta content="Cars.com" property="og:site_name"/>
<meta content="website" property="og:type"/>
<meta content="New and used Mercedes-Benz for Sale Near Me | Cars.com" property="og:title"/>
<meta content="https://www.cars.com/shopping/results/" property="og:url"/>
<meta content="Shop Mercedes-Benz vehicles for sale at Cars.com. Research, compare, and save listings, or contact sellers directly from 10,000+ Mercedes-Benz models nationwide." property="og:descript

In [7]:
# results
results = soup.find_all('div', {'class': 'vehicle-card'})

len(results)

20

In [8]:
results[0]

<div class="vehicle-card" data-listing-id="d7b4bb17-74f9-4b0a-9e85-a3754c1bf697" data-tracking-id="1" data-tracking-type="srp-vehicle-card" id="d7b4bb17-74f9-4b0a-9e85-a3754c1bf697" phx-hook="VehicleCard" phx-target="5">
<a class="vehicle-card-visited-tracking-link" href="/vehicledetail/d7b4bb17-74f9-4b0a-9e85-a3754c1bf697/" rel="nofollow" tabindex="-1"></a>
<div class="vehicle-card-main js-gallery-click-card">
<div class="vehicle-card-photos js-gallery-click-gallery">
<div class="gallery-wrap">
<div aria-label="Photo 1 of 32 of 2016 Mercedes-Benz S-Class S 550" class="image-wrap" data-index="0">
<img alt="Photo 1 of 32 of 2016 Mercedes-Benz S-Class S 550" class="vehicle-image" loading="lazy" src="https://platform.cstatic-images.com/large/in/v2/08bc0c06-1396-5466-b09f-0927de420796/0df05db9-8222-43a5-817c-251646d39a40/S7IQi-ep6L308PUwh6-Nfb6A-oY.jpg"/>
</div>
<div aria-label="Photo 2 of 32 of 2016 Mercedes-Benz S-Class S 550" class="image-wrap" data-index="1">
<img alt="Photo 2 of 32 of

## Data to be extracted

- Name
- Year
- Mileage
- Rating
- Reviews count
- Price
- Dealer name

### Name

In [18]:
results[0].find('h2', {'class': 'title'}).get_text()

'2016 Mercedes-Benz S-Class S 550'

### Mileage

In [31]:
results[0].find('div', {'class': 'mileage'}).get_text()

'33,311 mi.'

### Rating

In [11]:
results[0].find('span', {'class': 'sds-rating__count'}).get_text()

'4.4'

### Reviews count

In [30]:
results[0].find('span', {'class': 'sds-rating__link sds-button-link'}).get_text()

'(20 reviews)'

### Price

In [13]:
results[0].find('span', {'class': 'primary-price'}).get_text()

'$84,996'

### Dealer name

In [14]:
results[0].find('div', {'class': 'dealer-name'}).get_text().strip()

'McLaren Charlotte'

## Merging the data

In [32]:
name = []
mileage = []
rating = []
reviews_count = []
dealer_name = []
price = []

for result in results:
    
    # name
    try:
        name.append(result.find('h2', {'class': 'title'}).get_text())
    except:
        name.append('n/a')
        
    # mileage
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
    
    # rating
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
                      
    # review_count
    try:
        reviews_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        reviews_count.append('n/a')
    
    # dealer_name
    try:
        dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer_name.append('n/a')
                           
    #price 
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

## Web scraping multiple pages

In [36]:
name = []
mileage = []
rating = []
reviews_count = []
dealer_name = []
price = []

for i in range(1,61):
    
    # storing the website in a variable
    url = "https://www.cars.com/shopping/results/?stock_type=all&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=all&zip="

    # http requests
    response = requests.get(url)
    
    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # results
    results = soup.find_all('div', {'class': 'vehicle-card'})
    
    # loop through results
    for result in results:
        
        # name
        try:
            name.append(result.find('h2', {'class': 'title'}).get_text())
        except:
            name.append('n/a')
            
        # mileage
        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('n/a')
            
        # rating
        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('n/a')
            
       # review_count
        try:
            reviews_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            reviews_count.append('n/a') 
            
        # dealer_name
        try:
            dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer_name.append('n/a')
            
        #price 
        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('n/a')

## Creating the Mercedes-Benz Dataframe

In [37]:
df_mercedes = df = pd.DataFrame({'Name': name, 'Mileage': mileage, 'Rating': rating, 'Reviews Count': reviews_count, 'Dealer Name': dealer_name,
                  'Price': price})

df

Unnamed: 0,Name,Mileage,Rating,Reviews Count,Dealer Name,Price
0,2016 Mercedes-Benz S-Class S 550,"33,311 mi.",4.4,(20 reviews),McLaren Charlotte,"$84,996"
1,2015 Mercedes-Benz C-Class C 300,"59,970 mi.",4.6,(642 reviews),Koons Toyota Of Westminster,"$24,500"
2,2015 Mercedes-Benz S-Class S 550 4MATIC,"49,919 mi.",5.0,(6 reviews),Platinum Motorsports,"$46,991"
3,2019 Mercedes-Benz AMG GT 53 Base,"19,070 mi.",4.9,"(1,714 reviews)",eCarOne,"$108,950"
4,2017 Mercedes-Benz AMG GT AMG GT,"23,367 mi.",4.5,"(1,090 reviews)",Mercedes-Benz of Houston North,"$92,999"
...,...,...,...,...,...,...
1195,2017 Mercedes-Benz GLE 350 Base 4MATIC,"49,548 mi.",4.8,"(1,548 reviews)",Koons Ford of Baltimore,"$33,500"
1196,2019 Mercedes-Benz GLC 300 Base 4MATIC,"29,949 mi.",4.8,"(1,569 reviews)",Mercedes-Benz of Massapequa,"$41,495"
1197,2021 Mercedes-Benz GLC 300 Base,"6,236 mi.",4.6,(309 reviews),Mercedes-Benz of Tucson,"$50,551"
1198,2015 Mercedes-Benz S-Class S 550 4MATIC,"109,684 mi.",3.8,(532 reviews),Merlex Auto Group,"$30,966"


## Saving the dataframe in a csv file

In [41]:
df_mercedes.to_csv('mercedes.csv').replace()