In [None]:
from bs4 import BeautifulSoup as bs
import requests
import os
import pandas as pd
import json

In [2]:
url = "https://www.usatoday.com/story/news/nation-now/2017/08/30/these-most-popular-cars-and-trucks-every-state/478537001/"

response = requests.get(url)

soup = bs(response.text,'html.parser')

In [3]:
results = soup.find_all('p', class_="p-text")

results[6].text

'Alaska:\xa0F150'

In [4]:
states_cars = [] 
for result in results:
    try:
        state_car = result.text
        states_cars.append(state_car)
        print(state_car)
    except AttributeError as e:
        print(e)

Ford's F150 is America's best selling vehicle, but the pick-up truck doesn't reign supreme in all 50 states.
According to Kelley Blue Book data, more than 10 states choose SUVs over pickups. Compact cars (Honda's Civic, Toyota's Corolla and Yaris) are the top picks in California, Ohio, Florida, Washington, D.C. and Puerto Rico. 
Still, Kelley Blue Book's list of the most popular vehicles sold in each state based on new car registrations in 2016 shows Americans love their trucks. 
This year, the F-Series remains most popular, but Tim Fleming, analyst for Kelley Blue Book said 2017 consumers could shift last year's data. 
"Sedans are rapidly falling out of favor," Fleming said in an email. "The Camry, Civic and Corolla have dropped from the fourth, fifth and sixth places to seventh, eighth and ninth this year. We don’t see this trend stopping any time soon, although the new Camry could boost sales in the short-term."
Here's a look at the 2016 data:
Alaska: F150
Alabama: F150
Arizona: F15

In [5]:
states_cars = states_cars[6:58]

In [6]:
df = pd.DataFrame({
    "States_and_Cars":states_cars
})

df.head()

Unnamed: 0,States_and_Cars
0,Alaska: F150
1,Alabama: F150
2,Arizona: F150
3,Arkansas: Sierra 1500
4,California: Civic


In [7]:
df=df['States_and_Cars'].str.split(":", n=2, expand = True)

df.rename(columns = {0:'State',1:'Model'},inplace=True)

df.head()

Unnamed: 0,State,Model
0,Alaska,F150
1,Alabama,F150
2,Arizona,F150
3,Arkansas,Sierra 1500
4,California,Civic


In [8]:
car_list = []
item = 0

for i in range(len(df['Model'])):
    car = df['Model'][i].replace(u'\xa0',u' ').lstrip().rstrip().replace(' ', '-').replace('F150', 'F-150')
    car_list.append(car)
    
set(car_list)

{'Accord',
 'CR-V',
 'Civic',
 'Corolla',
 'Escape',
 'F-150',
 'Outback',
 'RAV4',
 'Rogue',
 'Sierra-1500',
 'Silverado-1500',
 'Tacoma',
 'Yaris'}

In [9]:
df['Model'] = car_list

df.head()

Unnamed: 0,State,Model
0,Alaska,F-150
1,Alabama,F-150
2,Arizona,F-150
3,Arkansas,Sierra-1500
4,California,Civic


In [10]:
df['Make'] = df['Model'].map({
    'Accord' : 'Honda',
    'CR-V' : 'Honda',
    'Civic' : 'Honda',
    'Corolla' : 'Toyota',
    'Escape' : 'Ford',
    'F-150' : 'Ford',
    'Outback' : 'Subaru',
    'RAV4' : 'Toyota',
    'Rogue' : 'Nissan',
    'Sierra-1500' : 'GMC',
    'Silverado-1500' : 'Chevrolet',
    'Tacoma' : 'Toyota',
    'Yaris' : 'Toyota'})

df.head()

Unnamed: 0,State,Model,Make
0,Alaska,F-150,Ford
1,Alabama,F-150,Ford
2,Arizona,F-150,Ford
3,Arkansas,Sierra-1500,GMC
4,California,Civic,Honda


In [11]:
unique_cars_df = df[['Model','Make']]

unique_cars_df = unique_cars_df.drop_duplicates(subset='Model')

unique_cars_df.reset_index(drop=True, inplace=True)

unique_cars_df

Unnamed: 0,Model,Make
0,F-150,Ford
1,Sierra-1500,GMC
2,Civic,Honda
3,Outback,Subaru
4,Rogue,Nissan
5,Silverado-1500,Chevrolet
6,Corolla,Toyota
7,Tacoma,Toyota
8,CR-V,Honda
9,RAV4,Toyota


# Scrape Motor Trend for Vehicle Information

In [13]:
car_response = []
car_soup = []

#https://www.motortrend.com/cars/chevrolet/silverado-1500/2019/

for item in range(len(unique_cars_df['Model'])):
    car_url = "https://www.motortrend.com/cars/" + unique_cars_df['Make'][item] + "/" + unique_cars_df['Model'][item] + \
    "/2016/" 
    print(car_url)
    responses = requests.get(car_url)
    car_soup.append(bs(responses.text,'html.parser'))
    car_response.append(responses)
    print(responses)

https://www.motortrend.com/cars/Ford/F-150/2016/
<Response [200]>
https://www.motortrend.com/cars/GMC/Sierra-1500/2016/
<Response [200]>
https://www.motortrend.com/cars/Honda/Civic/2016/
<Response [200]>
https://www.motortrend.com/cars/Subaru/Outback/2016/
<Response [200]>
https://www.motortrend.com/cars/Nissan/Rogue/2016/
<Response [200]>
https://www.motortrend.com/cars/Chevrolet/Silverado-1500/2016/
<Response [200]>
https://www.motortrend.com/cars/Toyota/Corolla/2016/
<Response [200]>
https://www.motortrend.com/cars/Toyota/Tacoma/2016/
<Response [200]>
https://www.motortrend.com/cars/Honda/CR-V/2016/
<Response [200]>
https://www.motortrend.com/cars/Toyota/RAV4/2016/
<Response [200]>
https://www.motortrend.com/cars/Ford/Escape/2016/
<Response [200]>
https://www.motortrend.com/cars/Honda/Accord/2016/
<Response [200]>
https://www.motortrend.com/cars/Toyota/Yaris/2016/
<Response [200]>


In [14]:
car_soup[0].find_all('div', class_ = "key")[1:12]

[<div class="key">Value Rating</div>,
 <div class="key">Engine Name</div>,
 <div class="key">Transmission Name</div>,
 <div class="key">Trim</div>,
 <div class="key">Class</div>,
 <div class="key">Horsepower</div>,
 <div class="key">Standard MPG</div>,
 <div class="key">Body Style</div>,
 <div class="key">Drivetrain</div>,
 <div class="key">Fuel Type</div>,
 <div class="key">Seating Capacity (Std/Max)</div>]

In [15]:
car_soup[0].find_all('div', class_ = "value")[1:12]

[<div class="value">Below Average</div>,
 <div class="value">Engine: V6 Flex Fuel 3.5 Liter</div>,
 <div class="value"><span itemprop="vehicleTransmission">SelectShift® 6 Speed Automatic Mode Select with Manual Mode (6R80E)</span></div>,
 <div class="value"><span itemprop="vehicleConfiguration">XL 2WD Short Bed Regular Cab</span></div>,
 <div class="value">Full-Size Pickup LD</div>,
 <div class="value">283@6500</div>,
 <div class="value"><div itemprop="fuelEfficiency" itemscope="" itemtype="//schema.org/QuantitativeValue">
 <span itemprop="minValue">18</span> City /
 									  <span itemprop="maxValue">24</span> Hwy</div></div>,
 <div class="value"><span itemprop="bodyType">Truck</span></div>,
 <div class="value"><span itemprop="driveWheelConfiguration">RWD</span></div>,
 <div class="value"><span itemprop="fuelType">Unleaded Regular</span></div>,
 <div class="value"><span itemprop="vehicleSeatingCapacity">3/3</span></div>]

In [16]:
car_soup[0]('div', class_="price")[2].text

'$21,610'

In [17]:
MSRP = []
CPO = []
UCV = []
VR = []
EN = []
TN = []
T = []
C = []
HP = []
MPG = []
BSt = []
DT = []
FT = []
SC = []
IMGURL = []

for item in car_soup:
    #Retreive Pricing Information from Price Tags
    msr = item.find_all('div', class_="price")[0].text
    try:
        cp = item.find_all('div', class_="price")[1].text        
    except:
        cp = "N/A"
    try:
        uc = item.find_all('div', class_="price")[2].text        
    except:
        uc = "N/A"
    MSRP.append(msr)
    CPO.append(cp)
    UCV.append(uc)
    
    #Retreive Specification Information from Key Value pair tags
    aV = item.find_all('div', class_ = "value")[1].text
    aE = item.find_all('div', class_ = "value")[2].text
    aTN = item.find_all('div', class_ = "value")[3].text
    aT = item.find_all('div', class_ = "value")[4].text
    aC = item.find_all('div', class_ = "value")[5].text
    aHP = item.find_all('div', class_ = "value")[6].text
    aMPG = item.find_all('div', class_ = "value")[7].text
    aBSt = item.find_all('div', class_ = "value")[8].text
    aDT = item.find_all('div', class_ = "value")[9].text
    aFT = item.find_all('div', class_ = "value")[10].text
    aSC = item.find_all('div', class_ = "value")[11].text
    IU = item.find('img', class_ = "attachment-full-width")['data-base']
    
    VR.append(aV)
    EN.append(aE)
    TN.append(aTN)
    T.append(aT)
    C.append(aC)
    HP.append(aHP)
    MPG.append(aMPG.replace('\n', '').replace('\t\t\t\t\t\t\t\t\t ', ''))
    BSt.append(aBSt)
    DT.append(aDT)
    FT.append(aFT)
    SC.append(aSC)
    IMGURL.append(IU)
    
    print(MSRP)
    print(CPO)
    print(UCV)
    print(VR)
    print(EN)
    print(TN)
    print(T)
    print(C)
    print(HP)
    print(MPG)
    print(BSt)
    print(DT)
    print(FT)
    print(SC)
    print(IU)

['$26,540']
['$23,900']
['$21,610']
['Below Average']
['Engine: V6 Flex Fuel 3.5 Liter']
['SelectShift® 6 Speed Automatic Mode Select with Manual Mode (6R80E)']
['XL 2WD Short Bed Regular Cab']
['Full-Size Pickup LD']
['283@6500']
['18 City / 24 Hwy']
['Truck']
['RWD']
['Unleaded Regular']
['3/3']
https://st.motortrend.com/uploads/sites/10/2017/11/2018-ford-f-150-xlt-supercab-pick-up-angular-front.png
['$26,540', '$27,815']
['$23,900', '$20,350']
['$21,610', '$18,685']
['Below Average', 'Excellent']
['Engine: V6 Flex Fuel 3.5 Liter', 'Engine: V6 Flex Fuel 4.3 Liter EcoTec3']
['SelectShift® 6 Speed Automatic Mode Select with Manual Mode (6R80E)', 'Hydra-Matic 6 Speed Automatic Mode Select (6L80)']
['XL 2WD Short Bed Regular Cab', 'Base 2WD Standard Bed Regular Cab']
['Full-Size Pickup LD', 'Full-Size Pickup LD']
['283@6500', '285@5300']
['18 City / 24 Hwy', '18 City / 23 Hwy']
['Truck', 'Truck']
['RWD', 'RWD']
['Unleaded Regular', 'Unleaded Regular']
['3/3', '3/2']
https://st.motortrend

['$26,540', '$27,815', '$18,640', '$24,995', '$23,330', '$27,095', '$17,300', '$23,660', '$23,845']
['$23,900', '$20,350', '$18,640', '$19,175', '$16,225', '$20,000', '$17,300', '$20,000', '$17,075']
['$21,610', '$18,685', 'N/A', '$17,890', '$15,115', '$18,435', 'N/A', '$18,501', '$15,876']
['Below Average', 'Excellent', 'N/A', 'Average', 'Average', 'Excellent', 'N/A', 'Excellent', 'Excellent']
['Engine: V6 Flex Fuel 3.5 Liter', 'Engine: V6 Flex Fuel 4.3 Liter EcoTec3', 'Engine: 4-Cyl 2.0 Liter', 'Engine: 4-Cyl 2.5 Liter BOXER', 'Engine: 4-Cyl 2.5 Liter', 'Engine: V6 Flex Fuel 4.3 Liter EcoTec3', 'Engine: 4-Cyl 1.8 Liter', 'Engine: 4-Cyl 2.7 Liter', 'Engine: 4-Cyl 2.4 Liter Earth Dreams Technology™']
['SelectShift® 6 Speed Automatic Mode Select with Manual Mode (6R80E)', 'Hydra-Matic 6 Speed Automatic Mode Select (6L80)', '6 Speed Manual', 'Lineartronic CVT Continuously Variable Automatic Mode Select with Manual Mode', 'Xtronic Continuously Variable Automatic Mode Select', 'Hydra-Matic

['$26,540', '$27,815', '$18,640', '$24,995', '$23,330', '$27,095', '$17,300', '$23,660', '$23,845', '$24,350', '$23,100', '$22,205', '$14,895']
['$23,900', '$20,350', '$18,640', '$19,175', '$16,225', '$20,000', '$17,300', '$20,000', '$17,075', '$18,675', '$14,425', '$22,205', '$14,895']
['$21,610', '$18,685', 'N/A', '$17,890', '$15,115', '$18,435', 'N/A', '$18,501', '$15,876', '$17,401', '$13,140', 'N/A', 'N/A']
['Below Average', 'Excellent', 'N/A', 'Average', 'Average', 'Excellent', 'N/A', 'Excellent', 'Excellent', 'Excellent', 'Excellent', 'N/A', 'N/A']
['Engine: V6 Flex Fuel 3.5 Liter', 'Engine: V6 Flex Fuel 4.3 Liter EcoTec3', 'Engine: 4-Cyl 2.0 Liter', 'Engine: 4-Cyl 2.5 Liter BOXER', 'Engine: 4-Cyl 2.5 Liter', 'Engine: V6 Flex Fuel 4.3 Liter EcoTec3', 'Engine: 4-Cyl 1.8 Liter', 'Engine: 4-Cyl 2.7 Liter', 'Engine: 4-Cyl 2.4 Liter Earth Dreams Technology™', 'Engine: 4-Cyl 2.5 Liter', 'Engine: 4-Cyl 2.5 Liter Duratec', 'Engine: 4-Cyl 2.4 Liter Earth Dreams Technology™', 'Engine: 4-C

In [18]:
df_prelim = pd.DataFrame({
    'Model': unique_cars_df['Model'],
    'Manufacturer_Suggested_Retail_Price':MSRP,
    'Certified_Pre-Owned_Price':CPO,
    'Used_Car_Value':UCV,
    'Value_Rating':VR,
    'Engine_Name':EN,
    'Transmission_Name':TN,
    'Trim':T,
    'Class':C,
    'Horsepower':HP,
    'Standard_MPG':MPG,
    'Body_Style':BSt,
    'Drivetrain':DT,
    'Fuel_Type':FT,
    'Seating_Capacity':SC,
    'Image_URL':IMGURL
})

df_prelim.head()

Unnamed: 0,Model,Manufacturer_Suggested_Retail_Price,Certified_Pre-Owned_Price,Used_Car_Value,Value_Rating,Engine_Name,Transmission_Name,Trim,Class,Horsepower,Standard_MPG,Body_Style,Drivetrain,Fuel_Type,Seating_Capacity,Image_URL
0,F-150,"$26,540","$23,900","$21,610",Below Average,Engine: V6 Flex Fuel 3.5 Liter,SelectShift® 6 Speed Automatic Mode Select wit...,XL 2WD Short Bed Regular Cab,Full-Size Pickup LD,283@6500,18 City / 24 Hwy,Truck,RWD,Unleaded Regular,3/3,https://st.motortrend.com/uploads/sites/10/201...
1,Sierra-1500,"$27,815","$20,350","$18,685",Excellent,Engine: V6 Flex Fuel 4.3 Liter EcoTec3,Hydra-Matic 6 Speed Automatic Mode Select (6L80),Base 2WD Standard Bed Regular Cab,Full-Size Pickup LD,285@5300,18 City / 23 Hwy,Truck,RWD,Unleaded Regular,3/2,https://st.motortrend.com/uploads/sites/10/201...
2,Civic,"$18,640","$18,640",,,Engine: 4-Cyl 2.0 Liter,6 Speed Manual,LX (Manual) Sedan,Compact Passenger Car,158@6500,27 City / 38 Hwy,Sedan,FWD,Unleaded Regular,5/5,https://st.motortrend.com/uploads/sites/10/201...
3,Outback,"$24,995","$19,175","$17,890",Average,Engine: 4-Cyl 2.5 Liter BOXER,Lineartronic CVT Continuously Variable Automat...,2.5i Wagon,Mid-Size Utility,175@5800,25 City / 32 Hwy,Wagon,AWD,Unleaded Regular,5/5,https://st.motortrend.com/uploads/sites/10/201...
4,Rogue,"$23,330","$16,225","$15,115",Average,Engine: 4-Cyl 2.5 Liter,Xtronic Continuously Variable Automatic Mode S...,S 2WD,Compact Utility,170@6000,26 City / 32 Hwy,SUV/Crossover,FWD,Unleaded Regular,5/7,https://st.motortrend.com/uploads/sites/10/201...


In [19]:
df.head()

Unnamed: 0,State,Model,Make
0,Alaska,F-150,Ford
1,Alabama,F-150,Ford
2,Arizona,F-150,Ford
3,Arkansas,Sierra-1500,GMC
4,California,Civic,Honda


In [20]:
final_df = df.merge(df_prelim, how = 'left', on = 'Model')

final_df.head()

Unnamed: 0,State,Model,Make,Manufacturer_Suggested_Retail_Price,Certified_Pre-Owned_Price,Used_Car_Value,Value_Rating,Engine_Name,Transmission_Name,Trim,Class,Horsepower,Standard_MPG,Body_Style,Drivetrain,Fuel_Type,Seating_Capacity,Image_URL
0,Alaska,F-150,Ford,"$26,540","$23,900","$21,610",Below Average,Engine: V6 Flex Fuel 3.5 Liter,SelectShift® 6 Speed Automatic Mode Select wit...,XL 2WD Short Bed Regular Cab,Full-Size Pickup LD,283@6500,18 City / 24 Hwy,Truck,RWD,Unleaded Regular,3/3,https://st.motortrend.com/uploads/sites/10/201...
1,Alabama,F-150,Ford,"$26,540","$23,900","$21,610",Below Average,Engine: V6 Flex Fuel 3.5 Liter,SelectShift® 6 Speed Automatic Mode Select wit...,XL 2WD Short Bed Regular Cab,Full-Size Pickup LD,283@6500,18 City / 24 Hwy,Truck,RWD,Unleaded Regular,3/3,https://st.motortrend.com/uploads/sites/10/201...
2,Arizona,F-150,Ford,"$26,540","$23,900","$21,610",Below Average,Engine: V6 Flex Fuel 3.5 Liter,SelectShift® 6 Speed Automatic Mode Select wit...,XL 2WD Short Bed Regular Cab,Full-Size Pickup LD,283@6500,18 City / 24 Hwy,Truck,RWD,Unleaded Regular,3/3,https://st.motortrend.com/uploads/sites/10/201...
3,Arkansas,Sierra-1500,GMC,"$27,815","$20,350","$18,685",Excellent,Engine: V6 Flex Fuel 4.3 Liter EcoTec3,Hydra-Matic 6 Speed Automatic Mode Select (6L80),Base 2WD Standard Bed Regular Cab,Full-Size Pickup LD,285@5300,18 City / 23 Hwy,Truck,RWD,Unleaded Regular,3/2,https://st.motortrend.com/uploads/sites/10/201...
4,California,Civic,Honda,"$18,640","$18,640",,,Engine: 4-Cyl 2.0 Liter,6 Speed Manual,LX (Manual) Sedan,Compact Passenger Car,158@6500,27 City / 38 Hwy,Sedan,FWD,Unleaded Regular,5/5,https://st.motortrend.com/uploads/sites/10/201...


# Update JSON File

In [27]:
with open('state_boundaries.json', 'r') as f:
    datastore = json.load(f)

In [28]:
panel_df =  final_df[['State', 'Model', 'Make', 'Manufacturer_Suggested_Retail_Price', 'Class', 'Standard_MPG', \
                      'Body_Style', 'Image_URL']]
car_info_dict = panel_df.to_dict(orient='records')

car_info_dict

[{'State': 'Alaska',
  'Model': 'F-150',
  'Make': 'Ford',
  'Manufacturer_Suggested_Retail_Price': '$26,540',
  'Class': 'Full-Size Pickup LD',
  'Standard_MPG': '18 City / 24 Hwy',
  'Body_Style': 'Truck',
  'Image_URL': 'https://st.motortrend.com/uploads/sites/10/2017/11/2018-ford-f-150-xlt-supercab-pick-up-angular-front.png'},
 {'State': 'Alabama',
  'Model': 'F-150',
  'Make': 'Ford',
  'Manufacturer_Suggested_Retail_Price': '$26,540',
  'Class': 'Full-Size Pickup LD',
  'Standard_MPG': '18 City / 24 Hwy',
  'Body_Style': 'Truck',
  'Image_URL': 'https://st.motortrend.com/uploads/sites/10/2017/11/2018-ford-f-150-xlt-supercab-pick-up-angular-front.png'},
 {'State': 'Arizona',
  'Model': 'F-150',
  'Make': 'Ford',
  'Manufacturer_Suggested_Retail_Price': '$26,540',
  'Class': 'Full-Size Pickup LD',
  'Standard_MPG': '18 City / 24 Hwy',
  'Body_Style': 'Truck',
  'Image_URL': 'https://st.motortrend.com/uploads/sites/10/2017/11/2018-ford-f-150-xlt-supercab-pick-up-angular-front.png'},

In [29]:
for feature in datastore['features']:
#     print(feature['properties']['NAME'])
    for entry in car_info_dict:
        if entry['State'] == feature['properties']['NAME']:
            feature['properties']['vehicle'] = entry
            
datastore['features'][0]

{'type': 'Feature',
 'properties': {'GEO_ID': '0400000US23',
  'STATE': '23',
  'NAME': 'Maine',
  'LSAD': '',
  'CENSUSAREA': 30842.923,
  'vehicle': {'State': 'Maine',
   'Model': 'Silverado-1500',
   'Make': 'Chevrolet',
   'Manufacturer_Suggested_Retail_Price': '$27,095',
   'Class': 'Full-Size Pickup LD',
   'Standard_MPG': '18 City / 23 Hwy',
   'Body_Style': 'Truck',
   'Image_URL': 'https://st.motortrend.com/uploads/sites/10/2018/01/2019-Chevrolet-Silverado-1500-front-side-view-1.jpg'}},
 'geometry': {'type': 'MultiPolygon',
  'coordinates': [[[[-67.619761, 44.519754],
     [-67.61541, 44.521973],
     [-67.587738, 44.516196],
     [-67.582113, 44.513459],
     [-67.589259, 44.50084],
     [-67.590627, 44.49415],
     [-67.580288, 44.488068],
     [-67.562651, 44.472104],
     [-67.569189, 44.455531],
     [-67.571774, 44.453403],
     [-67.574206, 44.45173],
     [-67.588346, 44.449754],
     [-67.592755, 44.458572],
     [-67.604919, 44.502056],
     [-67.607199, 44.503576],


In [31]:
with open('state_car_info.json', 'w') as fp:
    json.dump(datastore, fp)