### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd

### HTTP Request

#### Store website in variable

In [2]:
website = 'https://www.cars.com/shopping/results/?stock_type=cpo&makes%5B%5D=mercedes_benz&models%5B%5D&list_price_max&maximum_distance=all&zip'

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

### Results

In [7]:
results = soup.find_all('div', {'class':'vehicle-card'})

In [8]:
len(results)

20

### Target necessary data

In [9]:
# Name
# Mileage
# Rating
# Rating Count
# Price
# Dealer

#### Name

In [10]:
name = results[0].find('h2').get_text()
print(name)

2020 Mercedes-Benz GLE 450 AWD 4MATIC


#### Mileage

In [11]:
mileage = results[0].find('div', {'class':'mileage'}).get_text()
print(mileage)

50,171 mi.


#### Rating

In [12]:
rating = results[0].find('span', {'class':'sds-rating__count'}).get_text()
print(rating)

2.5


#### Rating Count

In [13]:
rating_count = results[0].find('span', {'class':'sds-rating__link'}).get_text()
print(rating_count)

(55 reviews)


#### Price

In [14]:
price = results[0].find('span', {'class':'primary-price'}).get_text()
print(price)

$55,985


#### Dealer

In [15]:
dealer_name = results[0].find('div', {'class':'dealer-name'}).get_text()

In [16]:
dealer_name.strip()
print(dealer_name)


Mercedes-Benz of Pleasanton



### Put everything together inside a For-loop

In [17]:
name = []
mileage = []
dealer = []
rating = []
rating_count = []
price = []

for result in results: 
    try: 
        name.append(result.find('h2').get_text())
    except:
        name.append('n/a')
    
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
        
    try:
        dealer.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer.append('n/a')
        
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
        
    try:
        rating_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        rating_count.append('n/a')
        
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

### Create Pandas Dataframe

In [18]:
car_dealer = pd.DataFrame({'Name':name, 'Mileage (mi.)':mileage, 'Dealer':dealer, 'Rating':rating, 'Rating Count':rating_count, 'Pirce':price})

In [19]:
car_dealer.head()

Unnamed: 0,Name,Mileage (mi.),Dealer,Rating,Rating Count,Pirce
0,2020 Mercedes-Benz GLE 450 AWD 4MATIC,"50,171 mi.",Mercedes-Benz of Pleasanton,2.5,(55 reviews),"$55,985"
1,2019 Mercedes-Benz AMG C 63 S,"34,922 mi.",Mercedes-Benz of San Jose,4.6,(655 reviews),"$67,963"
2,2019 Mercedes-Benz CLA 250 Base 4MATIC,"32,763 mi.",Mercedes-Benz of Tysons Corner,4.5,(834 reviews),"$32,495"
3,2019 Mercedes-Benz GLS 450 Base 4MATIC,"27,964 mi.",Mercedes-Benz of Fort Washington,4.6,"(2,328 reviews)","$59,222"
4,2021 Mercedes-Benz C-Class C 300,"17,331 mi.",Mercedes-Benz of South Orlando,4.7,"(1,229 reviews)","$58,477"


#### Data Cleaning

In [20]:
car_dealer['Rating Count'] = car_dealer['Rating Count'].apply(lambda x : x.strip('reviews)').strip('('))

In [21]:
car_dealer['Mileage (mi.)'] = car_dealer['Mileage (mi.)'].apply(lambda x : x.strip('mi.'))

In [22]:
car_dealer.head()

Unnamed: 0,Name,Mileage (mi.),Dealer,Rating,Rating Count,Pirce
0,2020 Mercedes-Benz GLE 450 AWD 4MATIC,50171,Mercedes-Benz of Pleasanton,2.5,55,"$55,985"
1,2019 Mercedes-Benz AMG C 63 S,34922,Mercedes-Benz of San Jose,4.6,655,"$67,963"
2,2019 Mercedes-Benz CLA 250 Base 4MATIC,32763,Mercedes-Benz of Tysons Corner,4.5,834,"$32,495"
3,2019 Mercedes-Benz GLS 450 Base 4MATIC,27964,Mercedes-Benz of Fort Washington,4.6,2328,"$59,222"
4,2021 Mercedes-Benz C-Class C 300,17331,Mercedes-Benz of South Orlando,4.7,1229,"$58,477"


### Output in Excel

In [25]:
car_dealer.to_excel('single_page_car.xlsx', index=False)

### Paginagtion

In [27]:
name = []
mileage = []
dealer = []
rating = []
rating_count = []
price = []

for i in range(1, 21):
    
    website = 'https://www.cars.com/shopping/results/?page=' + str(i) + '&page_size=20&list_price_max=&makes[]=mercedes_benz&maximum_distance=all&models[]=&stock_type=cpo&zip='
    response = requests.get(website)
    soup = BeautifulSoup(response.content, 'html.parser')
    results = soup.find_all('div', {'class':'vehicle-card'})
    
    for result in results: 
    
        try: 
            name.append(result.find('h2').get_text())
        except:
            name.append('n/a')

        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('n/a')

        try:
            dealer.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer.append('n/a')

        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('n/a')

        try:
            rating_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            rating_count.append('n/a')

        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('n/a')
    

In [28]:
car_dealer = pd.DataFrame({'Name':name, 'Mileage (mi.)':mileage, 'Dealer':dealer, 'Rating':rating, 'Rating Count':rating_count, 'Pirce':price})

In [29]:
car_dealer['Rating Count'] = car_dealer['Rating Count'].apply(lambda x : x.strip('reviews)').strip('('))

car_dealer['Mileage (mi.)'] = car_dealer['Mileage (mi.)'].apply(lambda x : x.strip('mi.'))

In [30]:
car_dealer.to_excel('car_listing.xlsx', index=False)