# Web Scraping a Car Dealer Website



### Imports

In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### HTTP Request

#### store website in variable

In [2]:
website = 'https://www.cars.com/shopping/results/?stock_type=cpo&makes%5B%5D=mercedes_benz&models%5B%5D=&list_price_max=&maximum_distance=20&zip='

#### Get Request

In [3]:
response = requests.get(website)

#### Status Code

In [4]:
response.status_code

200

### Soup Object

In [5]:
soup = BeautifulSoup(response.content, 'html.parser')

### Results

In [6]:
results = soup.find_all('div', {'class' : 'vehicle-card'})

In [7]:
len(results)

20

### Target necessary data

In [8]:
# Name
# Mileage
# Dealer Name
# Rating
# Rating Count
# Price

#### Name

In [9]:
results[0].find('h2').get_text()

'2019 Mercedes-Benz C-Class C 300'

#### Mileage

In [10]:
results[0].find('div', {'class':'mileage'}).get_text()

'15,174 mi.'

#### Dealer Name

In [11]:
results[0].find('div', {'class':'dealer-name'}).get_text().strip()

'Mercedes-Benz of San Jose'

#### Rating

In [12]:
results[0].find('span', {'class':'sds-rating__count'}).get_text()

'4.6'

#### Review Count

In [13]:
results[0].find('span', {'class':'sds-rating__link'}).get_text()

'(599 reviews)'

#### Price

In [14]:
results[0].find('span', {'class':'primary-price'}).get_text()

'$34,952'

### Put everything together inside a For-Loop

In [15]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for result in results:
    
    # name
    try:
        name.append(result.find('h2').get_text()) 
    except:
        name.append('n/a')
    
    # mileage
    try:
        mileage.append(result.find('div', {'class':'mileage'}).get_text())
    except:
        mileage.append('n/a')
    
    # dealer_name
    try:
        dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
    except:
        dealer_name.append('n/a')
        
    # rating
    try:
        rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
    except:
        rating.append('n/a')
    
    # review_count
    try:
        review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
    except:
        review_count.append('n/a')
    
    #price 
    try:
        price.append(result.find('span', {'class':'primary-price'}).get_text())
    except:
        price.append('n/a')

### Create Pandas Dataframe

In [16]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [17]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz C-Class C 300,"15,174 mi.",Mercedes-Benz of San Jose,4.6,(599 reviews),"$34,952"
1,2020 Mercedes-Benz GLE 450 AWD 4MATIC,"35,245 mi.",Mercedes-Benz of Plano,4.5,"(1,222 reviews)","$69,955"
2,2020 Mercedes-Benz GLE 350 Base 4MATIC,"39,176 mi.",Mercedes-Benz of Easton,4.7,(667 reviews),"$57,493"
3,2019 Mercedes-Benz AMG GLE 43 4MATIC Coupe,"33,825 mi.",Mercedes-Benz of Plano,4.5,"(1,222 reviews)","$71,955"
4,2021 Mercedes-Benz GLC 300 Base 4MATIC,"11,386 mi.",Mercedes-Benz of Burlington,4.7,(261 reviews),"$50,491"
5,2018 Mercedes-Benz GLE 350 Base 4MATIC,"38,595 mi.",Mercedes-Benz of Cherry Hill,4.3,(495 reviews),"$41,795"
6,2020 Mercedes-Benz GLE 350 Base 4MATIC,"22,822 mi.",Mercedes-Benz of Fredericksburg,4.6,(308 reviews),"$59,000"
7,2019 Mercedes-Benz C-Class C 300 4MATIC,"49,431 mi.",Mercedes-Benz of Houston Greenway,4.7,"(1,882 reviews)","$36,992"
8,2020 Mercedes-Benz GLE 350 Base 4MATIC,"13,570 mi.",Mercedes-Benz of Naperville,4.7,(557 reviews),"$64,000"
9,2021 Mercedes-Benz GLS 450 4MATIC,"18,710 mi.",Mercedes-Benz of Pompano,4.5,(614 reviews),"$89,998"


#### Data Cleaning

In [18]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [19]:
# dataframe updated
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz C-Class C 300,"15,174 mi.",Mercedes-Benz of San Jose,4.6,599,"$34,952"
1,2020 Mercedes-Benz GLE 450 AWD 4MATIC,"35,245 mi.",Mercedes-Benz of Plano,4.5,1222,"$69,955"
2,2020 Mercedes-Benz GLE 350 Base 4MATIC,"39,176 mi.",Mercedes-Benz of Easton,4.7,667,"$57,493"
3,2019 Mercedes-Benz AMG GLE 43 4MATIC Coupe,"33,825 mi.",Mercedes-Benz of Plano,4.5,1222,"$71,955"
4,2021 Mercedes-Benz GLC 300 Base 4MATIC,"11,386 mi.",Mercedes-Benz of Burlington,4.7,261,"$50,491"
5,2018 Mercedes-Benz GLE 350 Base 4MATIC,"38,595 mi.",Mercedes-Benz of Cherry Hill,4.3,495,"$41,795"
6,2020 Mercedes-Benz GLE 350 Base 4MATIC,"22,822 mi.",Mercedes-Benz of Fredericksburg,4.6,308,"$59,000"
7,2019 Mercedes-Benz C-Class C 300 4MATIC,"49,431 mi.",Mercedes-Benz of Houston Greenway,4.7,1882,"$36,992"
8,2020 Mercedes-Benz GLE 350 Base 4MATIC,"13,570 mi.",Mercedes-Benz of Naperville,4.7,557,"$64,000"
9,2021 Mercedes-Benz GLS 450 4MATIC,"18,710 mi.",Mercedes-Benz of Pompano,4.5,614,"$89,998"


### Output in Excel

In [20]:
car_dealer.to_excel('car_dealer_single_page.xlsx', index=False)

# Pagination 

In [21]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

for i in range (1,11):
    
    # website in variable
    website = 'https://www.cars.com/shopping/results/?page='+ str(i) +'&page_size=20&dealer_id=&list_price_max=&list_price_min=&makes[]=mercedes_benz&maximum_distance=20&mileage_max=&sort=best_match_desc&stock_type=cpo&year_max=&year_min=&zip=' 
    
    # request to website
    response = requests.get(website)
    
    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # results
    results = soup.find_all('div', {'class' : 'vehicle-card'})
    
    # loop through results
    for result in results:
    
        # name
        try:
            name.append(result.find('h2').get_text()) 
        except:
            name.append('n/a')

        # mileage
        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('n/a')

        # dealer_name
        try:
            dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer_name.append('n/a')

        # rating
        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('n/a')

        # review_count
        try:
            review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            review_count.append('n/a')

        #price 
        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('n/a')

In [22]:
# dictionary
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [23]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz C-Class C 300,"15,174 mi.",Mercedes-Benz of San Jose,4.6,(599 reviews),"$34,952"
1,2020 Mercedes-Benz GLE 450 AWD 4MATIC,"35,245 mi.",Mercedes-Benz of Plano,4.5,"(1,222 reviews)","$69,955"
2,2020 Mercedes-Benz GLE 350 Base 4MATIC,"39,176 mi.",Mercedes-Benz of Easton,4.7,(667 reviews),"$57,493"
3,2019 Mercedes-Benz AMG GLE 43 4MATIC Coupe,"33,825 mi.",Mercedes-Benz of Plano,4.5,"(1,222 reviews)","$71,955"
4,2021 Mercedes-Benz GLC 300 Base 4MATIC,"11,386 mi.",Mercedes-Benz of Burlington,4.7,(261 reviews),"$50,491"
...,...,...,...,...,...,...
195,2019 Mercedes-Benz CLA 250 Base,"11,042 mi.",Jackie Cooper Imports,4.9,"(3,278 reviews)","$34,900"
196,2021 Mercedes-Benz C-Class C 300,"1,730 mi.",W.I. Simonson Inc.,4.5,(773 reviews),"$40,999"
197,2021 Mercedes-Benz GLE 580 AWD 4MATIC,"3,553 mi.",Mercedes-Benz of Chesterfield,4.4,(98 reviews),"$96,811"
198,2019 Mercedes-Benz C-Class C 300 4MATIC,"15,437 mi.",Mercedes-Benz of Rochester,2.5,(3 reviews),"$56,790"


In [24]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [25]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz C-Class C 300,"15,174 mi.",Mercedes-Benz of San Jose,4.6,599,"$34,952"
1,2020 Mercedes-Benz GLE 450 AWD 4MATIC,"35,245 mi.",Mercedes-Benz of Plano,4.5,1222,"$69,955"
2,2020 Mercedes-Benz GLE 350 Base 4MATIC,"39,176 mi.",Mercedes-Benz of Easton,4.7,667,"$57,493"
3,2019 Mercedes-Benz AMG GLE 43 4MATIC Coupe,"33,825 mi.",Mercedes-Benz of Plano,4.5,1222,"$71,955"
4,2021 Mercedes-Benz GLC 300 Base 4MATIC,"11,386 mi.",Mercedes-Benz of Burlington,4.7,261,"$50,491"
...,...,...,...,...,...,...
195,2019 Mercedes-Benz CLA 250 Base,"11,042 mi.",Jackie Cooper Imports,4.9,3278,"$34,900"
196,2021 Mercedes-Benz C-Class C 300,"1,730 mi.",W.I. Simonson Inc.,4.5,773,"$40,999"
197,2021 Mercedes-Benz GLE 580 AWD 4MATIC,"3,553 mi.",Mercedes-Benz of Chesterfield,4.4,98,"$96,811"
198,2019 Mercedes-Benz C-Class C 300 4MATIC,"15,437 mi.",Mercedes-Benz of Rochester,2.5,3,"$56,790"
