# Extracting Data from a Car Dealer Website

### Importing necessary libraries

In [54]:
from bs4 import BeautifulSoup
import requests
import pandas as pd 

### The features we are interested in:
- Name
- Mileage
- Dealer Name
- Rating
- Rating Count
- Price

### Creating individual arrays to store each data

In [55]:
name = []
mileage = []
dealer_name = []
rating = []
review_count = []
price = []

### Scraping data for first 10 pages

In [56]:
for i in range (1,11):
    
    # store website in variable
    website = 'https://www.cars.com/shopping/results/?page='+ str(i) +'&page_size=20&dealer_id=&list_price_max=&list_price_min=&makes[]=mercedes_benz&maximum_distance=20&mileage_max=&sort=best_match_desc&stock_type=cpo&year_max=&year_min=&zip=' 
    
    # request to website
    response = requests.get(website)
    
    # soup object
    soup = BeautifulSoup(response.content, 'html.parser')
    
    # results
    results = soup.find_all('div', {'class' : 'vehicle-card'})
    
    # loop through results to get data for each car on a page
    for result in results:
    
        # name
        try:
            name.append(result.find('h2').get_text()) 
        except:
            name.append('NA')

        # mileage
        try:
            mileage.append(result.find('div', {'class':'mileage'}).get_text())
        except:
            mileage.append('NA')

        # dealer_name
        try:
            dealer_name.append(result.find('div', {'class':'dealer-name'}).get_text().strip())
        except:
            dealer_name.append('NA')

        # rating
        try:
            rating.append(result.find('span', {'class':'sds-rating__count'}).get_text())
        except:
            rating.append('NA')

        # review_count
        try:
            review_count.append(result.find('span', {'class':'sds-rating__link'}).get_text())
        except:
            review_count.append('NA')

        #price 
        try:
            price.append(result.find('span', {'class':'primary-price'}).get_text())
        except:
            price.append('NA')

### Creating a dictionary with all the data

In [57]:
car_dealer = pd.DataFrame({'Name': name, 'Mileage':mileage, 'Dealer Name':dealer_name,
                                'Rating': rating, 'Review Count': review_count, 'Price': price})

In [58]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz AMG GLS 63 Base 4MATIC,"37,071 mi.",Mercedes-Benz of Pompano,4.5,(607 reviews),"$89,998"
1,2020 Mercedes-Benz GLE 350 Base 4MATIC,"35,197 mi.",Mercedes-Benz of Rochester,2.5,(3 reviews),"$59,987"
2,2018 Mercedes-Benz GLE 350 Base 4MATIC,"40,294 mi.",Mercedes-Benz of Rochester,4.7,(101 reviews),"$43,940"
3,2020 Mercedes-Benz GLE 350 Base 4MATIC,"55,853 mi.",Mercedes-Benz of Des Moines,4.5,(300 reviews),"$55,881"
4,2016 Mercedes-Benz GLE-Class GLE 350 4MATIC,"51,375 mi.",Mercedes-Benz of Manchester,4.4,(482 reviews),"$34,588"
...,...,...,...,...,...,...
195,2021 Mercedes-Benz AMG G 63 Base,"8,619 mi.",Mercedes-Benz of South Charlotte,4.8,"(1,184 reviews)","$268,914"
196,2020 Mercedes-Benz GLE 350 Base 4MATIC,"24,803 mi.",Mercedes-Benz of Fairfield,4.6,(289 reviews),"$58,000"
197,2018 Mercedes-Benz C-Class C 300 4MATIC,"38,901 mi.",Mercedes-Benz of South Orlando,4.7,"(1,157 reviews)","$34,701"
198,2020 Mercedes-Benz AMG GLC 43 Base 4MATIC,"14,762 mi.",Mercedes-Benz of Catonsville,4.2,(398 reviews),"$66,563"


### Cleaning the data

In [59]:
car_dealer['Review Count'] = car_dealer['Review Count'].apply(lambda x: x.strip('reviews)').strip('('))

In [60]:
car_dealer

Unnamed: 0,Name,Mileage,Dealer Name,Rating,Review Count,Price
0,2019 Mercedes-Benz AMG GLS 63 Base 4MATIC,"37,071 mi.",Mercedes-Benz of Pompano,4.5,607,"$89,998"
1,2020 Mercedes-Benz GLE 350 Base 4MATIC,"35,197 mi.",Mercedes-Benz of Rochester,2.5,3,"$59,987"
2,2018 Mercedes-Benz GLE 350 Base 4MATIC,"40,294 mi.",Mercedes-Benz of Rochester,4.7,101,"$43,940"
3,2020 Mercedes-Benz GLE 350 Base 4MATIC,"55,853 mi.",Mercedes-Benz of Des Moines,4.5,300,"$55,881"
4,2016 Mercedes-Benz GLE-Class GLE 350 4MATIC,"51,375 mi.",Mercedes-Benz of Manchester,4.4,482,"$34,588"
...,...,...,...,...,...,...
195,2021 Mercedes-Benz AMG G 63 Base,"8,619 mi.",Mercedes-Benz of South Charlotte,4.8,1184,"$268,914"
196,2020 Mercedes-Benz GLE 350 Base 4MATIC,"24,803 mi.",Mercedes-Benz of Fairfield,4.6,289,"$58,000"
197,2018 Mercedes-Benz C-Class C 300 4MATIC,"38,901 mi.",Mercedes-Benz of South Orlando,4.7,1157,"$34,701"
198,2020 Mercedes-Benz AMG GLC 43 Base 4MATIC,"14,762 mi.",Mercedes-Benz of Catonsville,4.2,398,"$66,563"


In [61]:
car_dealer.to_csv('C:\projects\Cars\data\cars_data.csv',index=False)