In [1]:
from bs4 import BeautifulSoup
import requests
import json
import re

import pandas as pd

import os

# Get safety data from IIHS.org

In [2]:
def get_vehicle_info (soup):
    data_rows = []
    for row in soup.find_all('td', class_="Vehicle"):
        for td in row:
            iihs_url = url_base + td.get('href')
            names = [x.text for x in td.children]
            output = {}
            if ('Safety' in names[0]):
                make_model = names[1]
                years = ' '.join(names[2:])
                iihs_ranking = names[0]
            else:
                make_model = names[0]
                years = ' '.join(names[1:])
                iihs_ranking = ''
            make = make_model.split(' ')[0]
            model = ' '.join(make_model.split(' ')[1:])
            if (make=="Alfa"):
                make = "Alfa Romeo"
                model = model.replace("Romeo ", "")
            data_rows = data_rows + [{'make': make, 'model': model, 'iihs_years': years, "iihs_ranking": iihs_ranking, 'iihs_url': iihs_url}]

    df = pd.DataFrame(data_rows)
    return (df)

In [3]:
def extract_table (url):
    page = requests.get(url)
    df_raw = pd.read_html(page.text)[0]
    soup = BeautifulSoup(page.text, 'html.parser')
    df_id = get_vehicle_info(soup).assign(iihs_type=os.path.basename(url))
    df = pd.concat([df_id, df_raw.drop(columns="Vehicle Name")], axis=1)
    return (df)

In [4]:
car_types = ["small-cars", "midsize-cars", "midsize-luxury-cars", "large-cars", "large-luxury-cars", 
             "small-suvs", "midsize-suvs", "midsize-luxury-suvs", "large-suvs", "minivans"]
url_base = "https://www.iihs.org"
urls = [url_base + "/ratings/class-summary/" + x for x in car_types]

In [5]:
car_table = pd.concat([extract_table(x) for x in urls]).reset_index(drop=True)

# Get fuel economy data from fueleconomy.gov

In [6]:
epadata_vehicles = pd.read_csv("https://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip")

  epadata_vehicles = pd.read_csv("https://www.fueleconomy.gov/feg/epadata/vehicles.csv.zip")


In [75]:
def find_fuelecon_ids (make, model, year, db_full):
    if (make=="Volvo"):
        model = model.replace("Cross Country", "CC")
    db = db_full[db_full["year"]>=(year-1)]
    criterion1 = db["make"].str.match(make, case=False)
    criterion2 = db["model"].str.contains(model, case=False)   
    id_list = db[criterion1 & criterion2]["id"].to_list()
    if (len(id_list)==0):
        criterion2 = True
        if ((make=="BMW") & ('series' in model)):
            criterion2 = db["model"].str.startswith(model.split(' ')[0])
            model = model.replace(' '.join(model.split(' ')[0:2]), '')
        if (' ' in model):
            for x in model.split(' '):
                criterion2 = criterion2 & db["model"].str.contains(x, case=False)
        if (make=="Mercedes-Benz"):
            model = model.replace('-Class', '')
            criterion2 = db["model"].str.replace('\d+', '').str.split().str.get(0).str.match(model)
        id_list = db[criterion1 & criterion2]["id"].to_list()
    return (id_list)

In [76]:
fueleconomy_ids = car_table[["make", "model"]].\
apply(lambda x: find_fuelecon_ids(make=x[0], model=x[1], year=2022, db_full=epadata_vehicles), axis=1)

  criterion2 = db["model"].str.replace('\d+', '').str.split().str.get(0).str.match(model)


In [81]:
car_table_fuel = car_table.assign(fuelecon_ids=fueleconomy_ids)

In [87]:
car_table_fuel

Unnamed: 0,make,model,iihs_years,iihs_ranking,iihs_url,iihs_type,Small overlap front,Moderate overlap front,Side,Roof,Head restraints & seats,Front crash prevention: vehicle-to-vehicle,Front crash prevention: vehicle-to-pedestrian,Headlights,Seat belt reminders,LATCH ease of use,fuelecon_ids
0,Honda,Civic,4-door sedan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/honda/civ...,small-cars,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,G,Not tested,G +,"[43055, 43056, 43057, 43355, 43356, 43357, 433..."
1,Honda,Insight,4-door sedan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/honda/ins...,small-cars,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,G,Not tested,G +,"[42537, 42542, 43947, 43948]"
2,Mazda,3,4-door hatchback | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/mazda/3-4...,small-cars,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,GA,Not tested,G +,"[42852, 42857, 43040, 43041, 43042, 43043, 430..."
3,Mazda,3,4-door sedan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/mazda/3-4...,small-cars,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,GA,Not tested,G +,"[42852, 42857, 43040, 43041, 43042, 43043, 430..."
4,Subaru,Crosstrek Hybrid,4-door wagon | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/subaru/cr...,small-cars,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,G,Not tested,G +,[43687]
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
164,Toyota,Sienna,minivan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/toyota/si...,minivans,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Advanced Standard system,G,Not tested,G +,"[43470, 43471, 44551, 44552]"
165,Honda,Odyssey,minivan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/honda/ody...,minivans,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,A,Not tested,G +,"[42848, 43697, 45203]"
166,Chrysler,Pacifica,minivan | 2022 models,Top Safety Pick +,https://www.iihs.org/ratings/vehicle/chrysler/...,minivans,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Advanced Standard system,A,Not tested,A,"[43417, 43419, 43497, 44645, 44647, 44930]"
167,Kia,Carnival,minivan | 2022 models built after March 2021,Top Safety Pick,https://www.iihs.org/ratings/vehicle/kia/carni...,minivans,G Driver-side G Passenger-side,G,G original test,G,G,Superior Standard system,Superior Standard system,GP,Not tested,A,[43702]


In [86]:
car_table_fuel["fuelecon_ids"].apply(len).value_counts()

2     26
4     22
8     19
6     18
12    16
7      8
9      8
3      7
1      6
16     5
5      4
10     4
11     4
15     3
14     3
13     3
18     2
26     2
28     2
24     2
19     2
25     1
0      1
89     1
Name: fuelecon_ids, dtype: int64

In [None]:
vehicle_cols = ["atvtype", "charge120", "charge240", "city08", "cityA08", "co2", "co2A", "comb08", "combA08", "cylinders", "drive", "emissionsList", "feScore", "fuelType", "highway08", "highwayA08", "hlv", "hpv", "id", "make", "mfrCode", "model", "mpgData", "rangeA", "rangeCityA", "rangeHwyA", "trany", "VClass", "year", "startStop", "phevCity", "phevHwy", "phevComb"]

# Save to file

In [None]:
outdir = "./"

In [None]:
car_table.to_csv(os.path.join(outdir, "2022_iihs_car_rankings_raw_DO_NOT_EDIT.csv"))

In [None]:
car_table.to_excel(os.path.join(outdir, "2022_iihs_car_rankings.xlsx"), sheet_name="iihs car ranking")