# Get CarGurus Expected Price

In [15]:
import requests
import pandas as pd
import os
import numpy as np
import bs4

In [58]:
# Read in scraped car data
data_file = os.path.join(os.getcwd(),"data","all_cars.csv")  # change to updated scraped data file
cars = pd.read_csv(data_file)
pd.set_option("display.max_columns",None) 
cars['mileage'] = cars['mileage'].astype('Int64')
cars['year'] = cars['year'].astype('Int64')
cars.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34456 entries, 0 to 34455
Data columns (total 26 columns):
post_date        34456 non-null object
lastpull_ts      34456 non-null int64
link             34456 non-null object
price            34456 non-null object
year             34454 non-null Int64
make             34454 non-null object
model            34454 non-null object
body             34380 non-null object
mileage          34382 non-null Int64
title_type       17236 non-null object
city             34308 non-null object
state            34308 non-null object
seller           34456 non-null object
trim             29217 non-null object
ext_color        32509 non-null object
int_color        29105 non-null object
transmission     31444 non-null object
liters           2523 non-null float64
cylinders        34191 non-null float64
fuel_type        34353 non-null object
n_doors          26498 non-null float64
ext_condition    8610 non-null object
int_condition    8600 non-null obje

# Create Zip Codes

In [22]:
# Create dictionary of zip codes for all towns to look up expected price
# Website to look up zipcodes
# http://localistica.com/usa/ut/salt%20lake%20city/zipcodes/all-zipcodes/

def get_most_populated_zip_code(city):
    try:
        r = requests.get(f'http://localistica.com/search.aspx?q={city.lower().replace(" ", "+")}')
        url = bs4.BeautifulSoup(r.text).find("a", id="ctl09_hlZipCodesCount")['href']
        return int(bs4.BeautifulSoup(requests.get(url).text).find(id="dgZipCodes").find_all("tr")[1].td.a.text)
    except:
        return None
    

# get unique cities in dataframe
all_cities = [car for car in cars.city.unique() if type(car) == str]
keyList = [x + ", " + cars[cars.city == x].iloc[0]['state'] for x in all_cities]
# look up zipcode
zip_codes = {key: get_most_populated_zip_code(key + " " + cars[cars.city == key].iloc[0]['state']) for key in all_cities}
# hard code the ones it missed
zip_codes.update({'St. Anthony': 83445, 'Provo Canyon': 84604})
# check for missing zip codes
print(len([k for k,v in zip_codes.items() if v == None]))
zip_codes

0


{'St. George': 84770,
 'Lindon': 84042,
 'Boise': 83701,
 'Twin Falls': 83301,
 'Ogden': 84201,
 'Layton': 84040,
 'Draper': 84020,
 'Murray': 84101,
 'Orem': 84057,
 'Salt Lake City': 84101,
 'Provo': 84601,
 'Farmington': 84025,
 'Centerville': 84014,
 'Midvale': 84047,
 'Sandy': 84070,
 'Morgan': 84050,
 'Millcreek': 84101,
 'Rexburg': 83440,
 'Springville': 84663,
 'West Valley City': 84081,
 'American Fork': 84003,
 'Elko': 89801,
 'Delta': 84624,
 'Bountiful': 84010,
 'Spanish Fork': 84660,
 'Riverton': 84065,
 'Pleasant Grove': 84062,
 'Brigham City': 84302,
 'Kaysville': 84037,
 'Woods Cross': 84087,
 'South Jordan': 84095,
 'Lehi': 84005,
 'Nephi': 84648,
 'Magna': 84044,
 'Malad City': 83252,
 'Pocatello': 83201,
 'Manti': 84642,
 'Roosevelt': 84066,
 'Highland': 84003,
 'Burley': 83318,
 'Lewiston': 84320,
 'North Salt Lake': 84054,
 'Logan': 84321,
 'Idaho Falls': 83631,
 'Herriman': 84065,
 'Blackfoot': 83221,
 'Taylorsville': 84101,
 'Rock Springs': 82901,
 'Wellsville': 

# CarGurus Pricing
Terms of Use: https://www.cargurus.com/Cars/TermsOfUse.html

Using car make, model, mileage, year, and zip code of listing to get the privatelisting value from CarGurus to compare listed prices to an expected price

In [79]:
# list of all cars in CarGurus database
all_cars = requests.get("https://www.cargurus.com/Cars/getCarPickerReferenceDataAJAX.action?showInactive=false&useInventoryService=false&quotableCarsOnly=false&localCountryCarsOnly=true&outputFormat=REACT").json()

# gets CarGuru make and model id to find price for individual make and model
def get_cargurus_maker_and_model_ids(all_cars, car_make, car_model):
    try:
        all_models = [x for x in all_cars.get('allMakerModels').get('makers') if x.get('name') == car_make][0]
        return (all_models.get('id'), [x for x in all_models.get('models') if x.get('name') == car_model][0].get('id'))
    except (IndexError, AttributeError):
        return (None, None)
    
# gets the entity id which includes the make, model, and year of car
def get_entity_id(maker_id, model_id, car_year):
    try:
        all_entities = requests.get(f"https://www.cargurus.com/Cars/getSelectedMakerModelCarsAJAX.action?showInactive=false&useInventoryService=false&quotableCarsOnly=false&localCountryCarsOnly=true&outputFormat=REACT&maker={maker_id}").json()
        model_entity_ids = [car for car in all_entities.get('models') if car.get('id') == model_id][0]
        return [ids for ids in model_entity_ids.get('cars') if ids.get('year') == car_year][0].get('id')
    except (IndexError, AttributeError):
        return None
    
# gets the estimated listing price of the car based on entity id and the mileage
def get_price(car_make, car_model, car_year, car_mileage, car_zip_code, all_cars):
    maker_id, model_id = get_cargurus_maker_and_model_ids(all_cars, car_make, car_model)
    if not model_id or pd.isna(car_mileage):
        return None
    
    entity_id = get_entity_id(maker_id, model_id, car_year)
    
    if not entity_id:
        return None 
    # data needed to request CarGurus report
    data = {
        'carDescription.radius': 75,
        'selectedEntity': entity_id,
        'carDescription.transmission': "",
        'carDescription.mileage': car_mileage,
        'carDescription.postalCode': car_zip_code,
        'carDescription.engineId': "",
        'carDescription.vin': "",
        'carDescription.vinType': "",
        'forPrivateListing': True,
        'inventoryListingId' : ""
    }
    
    res = requests.post("https://www.cargurus.com/Cars/generateReportJsonAjax.action", data=data)
    res.raise_for_status()
    try:
        return res.json().get("priceDetails").get("privateListingPrice") #private listing price from CarGurus report
    except AttributeError:
        raise Exception(res.json())


for index, row in cars.iterrows():
    if index > 31995:
        if row["city"] in zip_codes.keys():
            expected_price = get_price(row["make"], row["model"], row["year"], row["mileage"], zip_codes.get(row["city"]), all_cars)
            # change expected prices that are 0 to none
            if expected_price == None or expected_price < 1:
                expected_price = None 
                print(index)
            cars.loc[index, "expected_price"] = expected_price
        else:
            cars.loc[index, "expected_price"] = None
        

32008
32012
32020
32023
32028
32029
32044
32045
32046
32047
32053
32062
32068
32072
32075
32076
32077
32078
32088
32092
32093
32094
32095
32097
32102
32105
32106
32107
32110
32112
32113
32116
32119
32126
32127
32133
32135
32138
32139
32141
32142
32143
32144
32145
32146
32147
32149
32151
32153
32156
32160
32161
32162
32163
32167
32170
32174
32176
32181
32186
32187
32188
32190
32193
32195
32197
32198
32203
32204
32205
32211
32213
32217
32223
32226
32229
32232
32233
32234
32241
32244
32246
32248
32250
32251
32255
32262
32267
32268
32272
32273
32276
32277
32278
32280
32283
32286
32287
32288
32289
32290
32293
32294
32296
32299
32301
32304
32305
32306
32308
32310
32313
32315
32322
32325
32331
32332
32333
32334
32335
32342
32346
32350
32357
32359
32360
32361
32362
32364
32366
32371
32372
32374
32375
32378
32381
32382
32386
32387
32388
32390
32391
32392
32395
32397
32398
32401
32402
32406
32407
32412
32415
32431
32443
32444
32445
32447
32449
32458
32459
32460
32464
32470
32472
32473
32481
3248

In [83]:
display(cars)
# save cars dataframe to pickle
cars.to_pickle("./cars.pkl")

Unnamed: 0,post_date,lastpull_ts,link,price,year,make,model,body,mileage,title_type,city,state,seller,trim,ext_color,int_color,transmission,liters,cylinders,fuel_type,n_doors,ext_condition,int_condition,drive_type,VIN,n_pics,expected_price
0,2020-03-27,1585374184,https://cars.ksl.com/listing/6204647,8995,2014,Ford,Focus,Hatchback,111050,Clean Title,St. George,UT,Dealer,Titanium,black,black,Automatic,,4.0,Flex Fuel,,,,FWD,1FADP3N24EL463439,12,4877.0
1,2020-03-27,1585374185,https://cars.ksl.com/listing/6253659,10995,2015,Toyota,Corolla,Sedan,55044,Clean Title,Lindon,UT,Dealer,S Plus,white,black,Automatic,,4.0,Gasoline,,,,FWD,5YFBURHE2FP238514,48,10230.0
2,2020-03-27,1585374186,https://cars.ksl.com/listing/6320317,34869,2019,Jeep,Grand Cherokee,Sport Utility,22192,,Boise,ID,Dealer,Limited,diamond black crystal pearlcoat,black,Automatic,,6.0,Gasoline,4.0,,,4-Wheel Drive,1C4RJFBG8KC812665,27,32949.0
3,2020-03-27,1585374187,https://cars.ksl.com/listing/6203357,38495,2017,Chevrolet,Silverado 1500,Truck,21747,,Twin Falls,ID,Dealer,LTZ,gray,jet black,Automatic,,8.0,Gasoline,4.0,,,4-Wheel Drive,3GCUKSEC2HG500663,58,29138.0
4,2020-03-27,1585374188,https://cars.ksl.com/listing/6225558,6199,2007,Mitsubishi,Raider,Truck,119643,Clean Title,St. George,UT,Dealer,LS,electric blue,,Manual,,6.0,Gasoline,,,,RWD,1Z7HC22K87S213334,20,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
34451,2020-03-20,1584833057,https://cars.ksl.com/listing/6269151,20148,2019,Ford,Fusion Hybrid,Sedan,20303,,Price,UT,Dealer,SE,silver,,,,4.0,Hybrid,,,,FWD,3FA6P0LU0KR206951,0,15249.0
34452,2020-03-20,1584833058,https://cars.ksl.com/listing/5968149,15995,2009,Jeep,Grand Cherokee,Sport Utility,122000,,,,Dealer,SRT-8,red,charcoal gray,Automatic,,8.0,Gasoline,4.0,,,4-Wheel Drive,1J8HR78W89C505718,19,
34453,2020-03-20,1584833058,https://cars.ksl.com/listing/6287241,52900,2020,Acura,MDX,Sport Utility,2080,,Salt Lake City,UT,Dealer,with Advance/Entertainment Package,white,black,Automatic,,6.0,Gasoline,,,,AWD,5J8YD4H99LL011505,50,38464.0
34454,2020-03-20,1584833059,https://cars.ksl.com/listing/6269149,22325,2019,Hyundai,Santa Fe,Sport Utility,17195,,Price,UT,Dealer,SE,quartz white,espresso/gray,Automatic,,4.0,Gasoline,,,,AWD,5NMS2CAD0KH106182,21,20557.0


In [85]:
# Check read out worked
pickle_cars = pd.read_pickle('cars.pkl')
pickle_cars.info() # 24,315 expected prices

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 34456 entries, 0 to 34455
Data columns (total 27 columns):
post_date         34456 non-null object
lastpull_ts       34456 non-null int64
link              34456 non-null object
price             34456 non-null object
year              34454 non-null Int64
make              34454 non-null object
model             34454 non-null object
body              34380 non-null object
mileage           34382 non-null Int64
title_type        17236 non-null object
city              34308 non-null object
state             34308 non-null object
seller            34456 non-null object
trim              29217 non-null object
ext_color         32509 non-null object
int_color         29105 non-null object
transmission      31444 non-null object
liters            2523 non-null float64
cylinders         34191 non-null float64
fuel_type         34353 non-null object
n_doors           26498 non-null float64
ext_condition     8610 non-null object
int_condition