In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import datetime
import time
from fake_useragent import UserAgent 
ua = UserAgent()
headers = {'user-agent': ua.random}

#data = requests.get(url, headers=headers)

In [2]:
def get_monthly_sales_df(url, year_str, tbl_num):
    '''
    Return a time series df of monthly US car sales for every make and model 
    '''
    response = requests.get(url, headers=headers)
    page= response.text
    soup = BeautifulSoup(page, "lxml")
    
    #find car sales data and turn it into a list
    tbl_str = soup.find_all('tbody')[tbl_num].text
    tbl_list = tbl_str.split('\n')
    tbl_list = [i for i in tbl_list if i] 
    
    #create empty time series df for specified year
    date_time_str = year_str + '-01'
    start_date = datetime.datetime.strptime(date_time_str, '%Y-%m')
    index = pd.date_range(start_date, periods=12, freq='m')
    df = pd.DataFrame(index=index)
    
    #fill empty df with monthly sales for all makes and models
    col_name = ''
    idx = 0
    for x in range(0,len(tbl_list)//13):
        col_name = tbl_list[x+idx]
        list_vals = []
        for val in range(x+1,x+13):
            list_vals.append(tbl_list[idx+val])
        df[col_name]=list_vals
        idx = idx+12
    return df

In [3]:
url = 'https://www.goodcarbadcar.net/2019-us-vehicle-sales-figures-by-model/'
df_sales = get_monthly_sales_df(url, '2019', 2)
url = 'https://www.goodcarbadcar.net/2020-us-vehicle-sales-figures-by-model/'
df = get_monthly_sales_df(url, '2020', 1)
monthly_sales_df = pd.concat((df_sales, df))
#monthly_sales_df.columns

In [4]:
def get_model_sales_df(url, year):
    response = requests.get(url, headers=headers)
    page = response.text
    df_list = pd.read_html(page)
    
    if len(df_list) == 1 or year == '2019':
        df = pd.DataFrame(df_list[0])
    elif len(df_list) == 2:
        df = pd.DataFrame(df_list[1])
    else:
        df = pd.DataFrame(df_list[len(df_list)-1])
    
    #special case for 2020 data because most recent
    if year == '2020':  
        soup = BeautifulSoup(page, "lxml")
    
        #find car sales data and turn it into a list
        tbl_str = soup.find_all('tbody')[1].text
        tbl_list = tbl_str.split('\n')
        tbl_list = [i for i in tbl_list if i] 
        
        columns = ('Model',year)
        df = pd.DataFrame(columns=columns)

        #fill empty df with monthly sales for all makes and models
        model_name = ''
        idx = 0
        index = 0
        for x in range(0,len(tbl_list)//13):
            model_name = tbl_list[x+idx]
            year_sum = 0
            for val in range(x+1,x+13):
                month_total = tbl_list[idx+val].replace(',','')
                month_total = int(month_total)
                year_sum = year_sum + month_total
            #df1 = pd.DataFrame([model_name, year_sum], columns = ['Model', year])
            df = df.append(pd.DataFrame({'Model': model_name, year: year_sum}, index=[index]), ignore_index=True)
            index = index+1
            #df.append(df1)
            idx = idx+12
        return df
    
    if year == '2012':
        df = df.iloc[:, [2,3]]
    elif year == '2005':
        df = df.iloc[:, [1,3]]
    elif year in ['2017','2018', '2019', '2020']:
        df = df.iloc[:, [0,4]]
    else:
        df = df.iloc[:, [1,2]]
    df.columns = ['Model', year]
    
    #* indicate further breakdown of sum totals, overall totals be removed further down when duplicates are removed
    #other symbols refer to subnotes in the tables and are not apart of model names
    symbols = ['*', '²', '¹', '^', '†', '‡']
    
    for s in symbols:
        df['Model'] = df['Model'].str.replace(s,'')

    df.dropna(subset=['Model'], inplace=True)
    
    df['Model'] = df['Model'].str.rstrip()
    df['Model'] = df['Model'].str.lstrip()
    
    #remove first in set of duplicates b/c first is a sum of a car and the hybrid model
    
    df.drop_duplicates(subset='Model', keep='last', inplace=True)
    
    df[year] = df[year].apply(pd.to_numeric, errors='coerce')
    
    return df

In [5]:
url = 'https://www.goodcarbadcar.net/2006-usa-auto-sales-rankings-by-mode/'
yearly_sales_df= get_model_sales_df(url = url, year = '2005')

  df['Model'] = df['Model'].str.replace(s,'')


In [6]:
url_dict = {'2006': 'https://www.goodcarbadcar.net/2006-usa-auto-sales-rankings-by-mode/',
            '2007': 'https://www.goodcarbadcar.net/usa-2007-vehicle-sales-rankings-by-mode/',
            '2008': 'https://www.goodcarbadcar.net/2008-america-auto-sales-rankings-by-mode/',
            '2009': 'https://www.goodcarbadcar.net/usa-auto-sales-rankings-by-model-2009/',
            '2010': 'https://www.goodcarbadcar.net/2010-america-auto-sales-rankings-by-mode/',
            '2011': 'https://www.goodcarbadcar.net/top-268-best-selling-vehicles-2011-year/',
            '2012': 'https://www.goodcarbadcar.net/2012-usa-auto-sales-rankings-by-model7/',
            '2013': 'https://www.goodcarbadcar.net/usa-vehicle-sales-rankings-by-model-december-2013-year-end/',
            '2014': 'https://www.goodcarbadcar.net/usa-all-cars-sales-figures-2014-december-year-end/',
            '2015': 'https://www.goodcarbadcar.net/usa-car-sales-by-model-2015-year-end-december/',
            '2016': 'https://www.goodcarbadcar.net/usa-2016-vehicle-sales-by-model-manufacturer-brand/',
            '2017': 'https://www.goodcarbadcar.net/december-2017-year-end-u-s-passenger-car-sales-rankings-top-171-best-selling-cars-america-every-car-ranked/',
            '2018': 'https://www.goodcarbadcar.net/december-2018-the-best-selling-vehicles-in-america-every-vehicle-ranked/',
            '2019': 'https://www.goodcarbadcar.net/2019-us-vehicle-sales-figures-by-model/',
            '2020': 'https://www.goodcarbadcar.net/2020-us-vehicle-sales-figures-by-model/'
           }

years = ['2006']
for key, value in url_dict.items():
    #print(key)
    years.append(key)
    df= get_model_sales_df(url = value, year = key)
    #print(df)
    yearly_sales_df = pd.merge(yearly_sales_df, df, on='Model', how='outer')
    #print(yearly_sales_df[yearly_sales_df.Model.str.contains("Toyota")])



  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')


In [7]:
columns = ('Model','Total_Sales')
total_sales_df = pd.DataFrame(columns=columns)
total_sales_df = total_sales_df.fillna(0)
total_sales_df['Model'] = yearly_sales_df['Model']

total_sales_df['Total_Sales'] = yearly_sales_df[years].sum(axis=1)
#print(total_sales_df)

In [8]:
total_sales_df.sort_values('Total_Sales', ascending=False, inplace=True)
total_sales_df = total_sales_df[total_sales_df.Total_Sales != 0]

remove_strings = ["Market",'Total','Family','Brand','Passenger Cars, SUVs, Crossovers','Minivans','Pickup Trucks',
                 'Commercial Vans', 'COMPANY', 'MOTOR', 'GROUP', 'AMERICAN', 'AUTOMOBILES', 'JAGUAR', 'DAIMLER']

for string in remove_strings:
    total_sales_df = total_sales_df[~total_sales_df.Model.str.contains(string)]

#total_sales_df = total_sales_df[total_sales_df.Model != '*Market*']
total_sales_df

Unnamed: 0,Model,Total_Sales
0,Ford F-Series,9147816.0
1,Chevrolet Silverado,6685079.0
2,Toyota Camry,6221548.0
6,Honda Civic,5009981.0
5,Honda Accord,4943796.0
...,...,...
301,Mitsubishi Montero Sport,2.0
300,Mazda Protege,2.0
299,Land Rover Discovery,2.0
302,Suzuki Esteem,2.0


In [9]:
def get_brand_links(org_url):
    #org_url = 'https://www.carspecs.us/'
    response = requests.get(org_url, headers=headers)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    #tbl = soup.find_all(class_='modelnamesandmakes_item')
    #link_list = soup.find_all(id_ = 'homepage-browsemakes')
    brand_link_dict = dict()
    link_list = soup.find_all('li')
    for x in link_list[7:]:
        brand = x.text
        link = 'https://www.carspecs.us' + x.find('a')['href']
        brand_link_dict[brand] = link
        time.sleep(1)
    return brand_link_dict

In [10]:
def get_model_links(brand, url):
    #url = 'https://www.carspecs.us/cars/acura'
    response = requests.get(url, headers=headers)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    div_list = soup.find_all('div', class_='pure-u-1 pure-u-md-1-2')
    #print(div_list[1].find_all('li'))
    model_link_dict = dict()
    for x in div_list[1].find_all('li'):
        model = brand + ' ' + x.text
        link = org_url + x.find('a')['href']
        response = requests.get(link)
        page = response.text
        soup = BeautifulSoup(page, "lxml")
        year_links = soup.find_all('li')
        year = int(year_links[7].text)
        if year > 2004:
            #print(year)
            model_link = 'https://www.carspecs.us' + year_links[7].find('a')['href']

            model_link_dict[model] = model_link
            time.sleep(1)

    return model_link_dict

In [11]:
def get_model_specs(url):
#url = 'https://www.carspecs.us//cars/2020/acura/mdx'
    response = requests.get(url, headers=headers)
    page = response.text
    soup = BeautifulSoup(page, "lxml")

    doors =passengers =speed =horsepower =drive =mpg =engine =tank =volume =length =width =height = float('NaN')
    
    try:
        price = soup.find(text=re.compile('starting from'))
        price = price.findNext().text
    
    except:
        price = float('NaN')
    
    div_list = soup.find('div', class_='car-details').find_all('div')
    #print(div_list)

    for div in div_list[1:]:
        spec_list = div.text.split('\n')
        spec_list = [i for i in spec_list if i]
        #print(spec_list)
        if spec_list:
            if 'RPM' not in spec_list[0]:
                if 'Passenger Doors' in spec_list[0]:
                    doors = int(spec_list[-1])
                if 'Passenger Capacity' in spec_list[0]:
                    passengers = int(spec_list[-1])
                if 'mph' in spec_list[0]:
                    speed = spec_list[-1]
                if 'Horsepower' in spec_list[0]:
                    horsepower = spec_list[-1]
                if 'Drive type' in spec_list[0]:
                    drive = spec_list[-1]
                if 'combined' in spec_list[0]:
                    mpg = spec_list[-1]
                if 'Combined' in spec_list[0]:
                    mpg = spec_list[-1]
                if 'Engine type' in spec_list[0]:
                    engine = spec_list[-1]
                    engine = engine.replace('\t', '')
                if 'tank capacity' in spec_list[0]:
                    tank = spec_list[-1]
                if 'EPA interior' in spec_list[0]:
                    volume = spec_list[-1]
                if 'Length' in spec_list[0]:
                    length = spec_list[-1]
                if 'Width' in spec_list[0]:
                    width = spec_list[-1]
                if 'Height' in spec_list[0]:
                    height = spec_list[-1]
        
        if mpg == '':
            mpg = soup.find(text=re.compile('highway mpg'))
            try:
                mpg_list = mpg.split('/ ')
                city = mpg_list[0].lstrip('\r\n ')
                highway = mpg_list[1]
                mpg = (int(city[0:2])+int(highway[0:2]))/2
            except:
                mpg = 0
    
    return [price, doors, passengers, speed, horsepower, drive, mpg, engine, tank, volume, length, width, height]

In [12]:
org_url = 'https://www.carspecs.us/'
brand_links = get_brand_links(org_url)
#print(brand_links)
all_model_links = dict()
for key, value in brand_links.items():
    all_model_links.update(get_model_links(key, brand_links[key]))
    #print(all_model_links)
#rint(all_model_links)

In [13]:
columns = ('Model', 'url', 'price', 'doors', 'passengers', 'speed_sec', 'horsepower_hp', 'drive', 'mpg', 'engine', 
           'tank_gal', 'volume_cuft', 'length_in', 'width_in', 'height_in')
model_spec_df = pd.DataFrame(columns=columns) 
idx = 0
for key, value in all_model_links.items():
    specs = get_model_specs(value)
    print(specs)
    print(value)
    model_spec_df = model_spec_df.append(pd.DataFrame({'Model': key, 'url': value, 'price': specs[0],'doors': specs[1],
                                                      'passengers': specs[2],'speed_sec': specs[3], 'horsepower_hp': specs[4],
                                                       'drive': specs[5],'mpg': specs[6],'engine': specs[7],
                                                       'tank_gal': specs[8], 'volume_cuft': specs[9], 'length_in': specs[10],
                                                      'width_in': specs[11],'height_in': specs[12]}, index=[idx]), ignore_index=True)
    idx = idx +1
    #print(model_spec_df)

['$25,900', 4, 5, '6.63 sec', '201 hp', 'Front Wheel Drive ', '28 mpg', 'Gas', '13.20 gal.', '89.30 cu.ft.', '182.20 in.', '70.60 ', '55.60 in.']
https://www.carspecs.us/cars/2020/acura/ilx
[nan, nan, nan, '11.40 sec', '111 hp', ' Front wheel drive  ', nan, 'Hybrid', '13.20 gal.', '99.30 cu.ft.', '179.10 in.', '70.60 ', '55.60 in.']
https://www.carspecs.us/cars/2014/acura/ilx-hybrid
['$46,500', 4, 7, '6.43 sec', '290 hp', 'All Wheel Drive ', '22 mpg', 'Gas', '19.50 gal.', '132.70 cu.ft.', '196.20 in.', '77.70 ', '67.40 in.']
https://www.carspecs.us/cars/2020/acura/mdx
['$157,500', 2, 2, '3.17 sec', '573 hp', 'All Wheel Drive ', '21 mpg', 'Gas', '15.60 gal.', '44.00 cu.ft.', '176.00 in.', '76.30 ', '47.80 in.']
https://www.carspecs.us/cars/2020/acura/nsx
['$46,000', 4, 5, '6.48 sec', '272 hp', 'All Wheel Drive ', '23 mpg', 'Gas', '17.10 gal.', '104.00 cu.ft.', '186.70 in.', '74.80 ', '65.70 in.']
https://www.carspecs.us/cars/2020/acura/rdx
['$19,280', nan, nan, '6.10 sec', '300 hp', ' A

['$83,800', 4, 5, '5.82 sec', '335 hp', 'All Wheel Drive ', nan, 'Gas', '21.70 gal.', nan, '208.80 in.', '76.60 ', '58.60 in.']
https://www.carspecs.us/cars/2019/audi/a8-l
['$34,587', 4, 5, '6.91 sec', '220 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '16.10 gal.', nan, '185.90 in.', '72.50 ', '58.00 in.']
https://www.carspecs.us/cars/2016/audi/allroad
['$34,587', 4, 5, '6.91 sec', '220 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '16.10 gal.', nan, '185.90 in.', '72.50 ', '58.00 in.']
https://www.carspecs.us/cars/2016/audi/allroad
['$6,037', nan, nan, '6.62 sec', '250 hp', ' All wheel drive  ', nan, 'Gas', '18.50 gal.', '99.30 cu.ft.', '189.40 in.', nan, '62.00 in.']
https://www.carspecs.us/cars/2005/audi/allroad-quattro
['$74,800', 4, 5, '6.95 sec', '355 hp', 'All Wheel Drive ', '74 mpg', 'Electric', nan, nan, '193.00 in.', '76.30 ', '65.50 in.']
https://www.carspecs.us/cars/2019/audi/e-tron
['$34,700', 4, 5, '7.24 sec', '228 hp', 'All Wheel Drive ', '22 mpg', 'Gas', '15.90 gal.', nan, '17

['$115,188', nan, nan, '4.39 sec', '626 hp', ' All wheel drive  ', nan, 'Flex Fuel', '24.00 gal.', '93.00 cu.ft.', '189.20 in.', '76.50 ', '54.80 in.']
https://www.carspecs.us/cars/2014/bentley/continental-gt-speed-convertible
['$146,625', 2, 4, '5.16 sec', '500 hp', 'All Wheel Drive ', '17 mpg', 'Gas', '23.80 gal.', nan, '189.10 in.', '76.50 ', '55.20 in.']
https://www.carspecs.us/cars/2015/bentley/continental-gt-v8
['$155,940', 2, 4, '5.00 sec', '521 hp', 'All Wheel Drive ', '17 mpg', 'Gas', '23.80 gal.', nan, '189.10 in.', '76.50 ', '55.20 in.']
https://www.carspecs.us/cars/2015/bentley/continental-gt-v8-s
[nan, nan, nan, '4.27 sec', '572 hp', ' All wheel drive  ', nan, 'Gas', '24.00 gal.', nan, '189.20 in.', '76.50 ', '55.30 in.']
https://www.carspecs.us/cars/2015/bentley/continental-gt3-r
['$113,216', nan, nan, '5.00 sec', '521 hp', ' All wheel drive  ', nan, 'Gas', '24.00 gal.', nan, '189.20 in.', '76.50 ', '55.20 in.']
https://www.carspecs.us/cars/2014/bentley/continental-gtc
[n

['$66,700', nan, nan, '5.03 sec', '445 hp', ' Rear wheel drive  ', nan, 'Gas', '18.50 gal.', '109.00 cu.ft.', '197.20 in.', '74.60 ', '54.80 in.']
https://www.carspecs.us/cars/2014/bmw/7-series-gran-coupe
[nan, nan, nan, '5.73 sec', '315 hp', 'rear-wheel ', nan, nan, '21.70 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2013/bmw/740
[nan, nan, nan, '6.31 sec', '325 hp', 'rear-wheel ', nan, nan, '23.30 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2005/bmw/745
['$64,239', nan, nan, '4.51 sec', '443 hp', 'xDrive full-time all wheel ', nan, nan, '21.70 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2013/bmw/750
[nan, nan, nan, '4.22 sec', '536 hp', 'rear-wheel ', nan, nan, '21.70 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2013/bmw/760
[nan, nan, nan, '6.19 sec', '320 hp', ' Rear wheel drive  ', nan, 'Gas', '20.60 gal.', nan, '206.60 in.', '74.90 ', '58.20 in.']
https://www.carspecs.us/cars/2018/bmw/7-series
[nan, nan, nan, '6.06 sec', '300 hp', 're

['$29,070', 4, 5, '6.50 sec', '250 hp', 'All Wheel Drive ', nan, 'Gas', '16.30 gal.', nan, '196.30 in.', '73.30 ', '58.40 in.']
https://www.carspecs.us/cars/2019/buick/regal-tourx
['$4,534', 4, 5, '9.50 sec', '196 hp', 'Front Wheel Drive ', nan, 'Gas', '18.00 gal.', nan, '186.50 in.', '73.60 ', '68.90 in.']
https://www.carspecs.us/cars/2007/buick/rendezvous
['$4,798', 4, 7, '8.71 sec', '240 hp', 'Front Wheel Drive ', nan, 'Gas', '25.00 gal.', nan, '205.00 in.', '75.20 ', '72.00 in.']
https://www.carspecs.us/cars/2007/buick/terraza
[nan, 4, 5, '8.61 sec', '180 hp', 'Front Wheel Drive ', '24 mpg', 'Gas', '15.60 gal.', '91.00 cu.ft.', '183.90 in.', '71.40 ', '58.10 in.']
https://www.carspecs.us/cars/2017/buick/verano
['$44,795', 2, 4, '5.74 sec', '272 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '16.00 gal.', '91.00 cu.ft.', '183.60 in.', '72.50 ', '54.80 in.']
https://www.carspecs.us/cars/2019/cadillac/ats
['$41,395', 2, 4, '5.74 sec', '272 hp', 'All Wheel Drive ', nan, 'Gas', '16.00 gal.',

['$36,620', 4, 5, nan, '200 hp', 'Front Wheel Drive ', '118 mpg', 'Electric', nan, '94.40 cu.ft.', '164.00 in.', '69.50 ', '62.80 in.']
https://www.carspecs.us/cars/2020/chevrolet/bolt-ev
['$25,000', 2, 4, '5.45 sec', '275 hp', 'Rear Wheel Drive ', '23 mpg', 'Gas', '19.00 gal.', '93.00 cu.ft.', '188.30 in.', '74.70 ', '53.10 in.']
https://www.carspecs.us/cars/2020/chevrolet/camaro
['$20,358', nan, nan, '9.60 sec', '180 hp', ' Front wheel drive  ', nan, 'Flex Fuel', '19.00 gal.', '126.90 cu.ft.', '180.10 in.', '72.80 ', '67.00 in.']
https://www.carspecs.us/cars/2015/chevrolet/captiva-sport
['$2,191', 4, 5, '8.27 sec', '140 hp', 'Front Wheel Drive ', nan, 'Gas', '14.00 gal.', nan, '180.90 in.', '67.90 ', '26.20 in.']
https://www.carspecs.us/cars/2005/chevrolet/cavalier
[nan, nan, nan, '10.83 sec', '131 hp', ' Front wheel drive  ', nan, 'Gas', '14.50 gal.', '178.30 cu.ft.', '186.30 in.', '68.10 ', '73.70 in.']
https://www.carspecs.us/cars/2017/chevrolet/city-express
['$22,855', 4, 2, nan,

['$29,300', 4, 6, nan, '360 hp', 'Rear Wheel Drive ', nan, 'Gas', '36.00 gal.', nan, nan, '51.02 in.', '78.35 in.']
https://www.carspecs.us/cars/2015/chevrolet/silverado-3500hd-built-after-aug-14
['$5,268', 2, 6, nan, '345 hp', 'Rear Wheel Drive ', nan, 'Gas', '26.00 gal.', nan, nan, '50.00 in.', '73.90 in.']
https://www.carspecs.us/cars/2006/chevrolet/silverado-ss
['$5,990', 2, 6, nan, '345 hp', 'Rear Wheel Drive ', nan, 'Gas', '26.00 gal.', nan, nan, '50.00 in.', '73.90 in.']
https://www.carspecs.us/cars/2007/chevrolet/silverado-ss-classic
['$16,720', 4, 5, '9.43 sec', '138 hp', 'Front Wheel Drive ', '29 mpg', 'Gas', '12.20 gal.', '90.30 cu.ft.', '173.90 in.', '68.30 ', '59.70 in.']
https://www.carspecs.us/cars/2020/chevrolet/sonic
['$16,800', 4, 4, '9.54 sec', '98 hp', 'Front Wheel Drive ', '33 mpg', 'Gas', '9.00 gal.', '83.00 cu.ft.', '143.10 in.', '62.80 ', '58.40 in.']
https://www.carspecs.us/cars/2020/chevrolet/spark
[nan, nan, nan, nan, nan, ' Front wheel drive  ', nan, 'Electr

['$11,260', nan, nan, '6.91 sec', '345 hp', 'automatic full-time four-wheel ', nan, nan, '27.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2009/dodge/durango-hybrid
['$27,530', 4, 7, '7.51 sec', '283 hp', 'Front Wheel Drive ', '20 mpg', 'Gas', '20.00 gal.', '163.50 cu.ft.', '202.80 in.', '78.70 ', '67.90 in.']
https://www.carspecs.us/cars/2020/dodge/grand-caravan
['$28,595', 4, 7, '10.35 sec', '173 hp', 'Front Wheel Drive ', '21 mpg', 'Gas', '20.50 gal.', '123.70 cu.ft.', '192.40 in.', '41.40 in.', '30.80 in.']
https://www.carspecs.us/cars/2020/dodge/journey
['$4,918', nan, nan, '8.99 sec', '178 hp', ' Rear wheel drive  ', nan, 'Gas', '18.00 gal.', '133.10 cu.ft.', '197.70 in.', '74.10 ', '58.30 in.']
https://www.carspecs.us/cars/2008/dodge/magnum
['$2,416', 4, 5, '8.52 sec', '132 hp', 'Front Wheel Drive ', nan, 'Gas', '12.50 gal.', nan, '174.40 in.', '67.40 ', '31.10 in.']
https://www.carspecs.us/cars/2005/dodge/neon
['$9,263', 4, 5, '7.03 sec', '260 hp', 'Rear Wheel Driv

[nan, nan, nan, nan, '532 hp', ' Rear wheel drive  ', nan, 'Gas', '27.70 gal.', nan, '179.10 in.', '76.20 ', '50.30 in.']
https://www.carspecs.us/cars/2005/ferrari/superamerica
['$29,390', 2, 2, '6.41 sec', '164 hp', 'Rear Wheel Drive ', '30 mpg', 'Gas', '11.90 gal.', nan, '159.60 in.', '68.50 ', '48.50 in.']
https://www.carspecs.us/cars/2020/fiat/124-spider
['$22,240', 2, 4, '7.85 sec', '157 hp', 'Front Wheel Drive ', '27 mpg', 'Gas', '10.50 gal.', '76.20 cu.ft.', '144.40 in.', '64.10 ', '59.20 in.']
https://www.carspecs.us/cars/2019/fiat/500
['$12,740', nan, nan, '9.77 sec', '101 hp', 'front-wheel ', nan, nan, '10.50 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2013/fiat/500c
['$33,210', 2, 4, '11.46 sec', '111 hp', 'Front Wheel Drive ', '112 mpg', 'Electric', nan, '78.60 cu.ft.', '142.40 in.', '64.10 ', '60.10 in.']
https://www.carspecs.us/cars/2019/fiat/500e
['$24,645', 4, 5, '9.31 sec', '160 hp', 'Front Wheel Drive ', '25 mpg', 'Gas', '13.20 gal.', '98.80 cu.ft.', '167.

['$36,595', 4, 5, nan, '188 hp', 'Front Wheel Drive ', '97 mpg', 'Gas', '14.00 gal.', nan, '191.80 in.', '75.20 ', '58.00 in.']
https://www.carspecs.us/cars/2019/ford/fusion-energi
['$27,555', 4, 5, nan, '188 hp', 'Front Wheel Drive ', nan, 'Gas', '14.00 gal.', nan, '191.80 in.', '75.20 ', '58.00 in.']
https://www.carspecs.us/cars/2019/ford/fusion-hybrid
['$19,649', 4, 5, nan, '188 hp', 'Front Wheel Drive ', '38 mpg', 'Gas', '13.50 gal.', nan, '191.80 in.', '72.90 ', '58.00 in.']
https://www.carspecs.us/cars/2013/ford/fusion-plug-in-hybrid
['$23,999', 2, 2, nan, '550 hp', 'Rear Wheel Drive ', nan, 'Gas', '17.50 gal.', nan, '182.80 in.', '76.90 ', '44.30 in.']
https://www.carspecs.us/cars/2006/ford/gt
['$26,670', 2, 4, '5.18 sec', '310 hp', 'Rear Wheel Drive ', '24 mpg', 'Gas', '16.00 gal.', '84.50 cu.ft.', '188.30 in.', '75.40 ', '54.40 in.']
https://www.carspecs.us/cars/2020/ford/mustang
['$24,410', 4, 4, nan, '270 hp', 'Rear Wheel Drive ', '23 mpg', 'Gas', '18.00 gal.', nan, '210.80 

['$40,300', 4, 6, nan, '355 hp', 'Rear Wheel Drive ', '17 mpg', 'Gas', '26.00 gal.', nan, nan, '51.03 in.', '73.87 in.']
https://www.carspecs.us/cars/2019/gmc/sierra-1500-limited
['$5,844', 4, 5, nan, '300 hp', 'Four Wheel Drive ', nan, 'Gas', '26.00 gal.', nan, nan, '50.00 in.', '77.00 in.']
https://www.carspecs.us/cars/2006/gmc/sierra-1500hd
['$6,637', 4, 5, nan, '300 hp', 'Four Wheel Drive ', nan, 'Gas', '26.00 gal.', nan, nan, '50.00 in.', '77.00 in.']
https://www.carspecs.us/cars/2007/gmc/sierra-1500hd-classic
['$40,400', 2, 3, nan, '401 hp', 'Rear Wheel Drive ', nan, 'Gas', '36.00 gal.', nan, '235.50 in.', '50.55 in.', '79.94 in.']
https://www.carspecs.us/cars/2020/gmc/sierra-2500hd
['$36,811', 4, 5, nan, '360 hp', 'Rear Wheel Drive ', nan, 'Gas', '36.00 gal.', nan, nan, '51.02 in.', '78.27 in.']
https://www.carspecs.us/cars/2015/gmc/sierra-2500hd-built-after-aug-14
[nan, nan, nan, '7.85 sec', '300 hp', 'rear-wheel ', nan, nan, '26.00 gal.', nan, nan, nan, nan]
https://www.carspe

['$31,990', 4, 5, '7.09 sec', '280 hp', 'Front Wheel Drive ', '22 mpg', 'Gas', '19.50 gal.', '115.90 cu.ft.', '190.50 in.', '78.60 ', '71.60 in.']
https://www.carspecs.us/cars/2020/honda/passport
['$33,650', 4, 8, '6.58 sec', '280 hp', 'All Wheel Drive ', '21 mpg', 'Gas', '19.50 gal.', '152.90 cu.ft.', '196.50 in.', '78.60 ', '70.60 in.']
https://www.carspecs.us/cars/2020/honda/pilot
['$33,900', 4, 5, nan, '280 hp', 'Front Wheel Drive ', '22 mpg', 'Gas', '19.50 gal.', nan, '210.00 in.', '50.00 in.', '70.20 in.']
https://www.carspecs.us/cars/2020/honda/ridgeline
['$9,248', 2, 2, '5.35 sec', '237 hp', 'Rear Wheel Drive ', '21 mpg', 'Gas', '13.20 gal.', nan, '162.10 in.', '68.90 ', '50.70 in.']
https://www.carspecs.us/cars/2009/honda/s2000
['$22,363', 4, 4, nan, '300 hp', 'Four Wheel Drive ', nan, 'Gas', '28.50 gal.', nan, '44.00 in.', '86.50 ', '77.00 in.']
https://www.carspecs.us/cars/2006/hummer/h1
[nan, nan, nan, nan, '300 hp', ' Four wheel drive  ', nan, nan, '51.50 gal.', nan, '184.

[nan, nan, nan, '8.78 sec', '194 hp', 'Front wheel drive ', nan, nan, '18.50 gal.', '102.00 cu.ft.', '191.90 in.', '71.90 ', '55.90 in.']
https://www.carspecs.us/cars/2005/hyundai/xg350
[nan, nan, nan, '5.67 sec', '325 hp', ' Rear wheel drive  ', nan, 'Gas', '20.00 gal.', '110.30 cu.ft.', '182.30 in.', '71.00 ', '61.90 in.']
https://www.carspecs.us/cars/2013/infiniti/ex
['$15,240', nan, nan, '5.29 sec', '297 hp', 'ATTESA E-TS full-time all wheel ', nan, nan, '20.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2012/infiniti/ex35
[nan, nan, nan, '4.95 sec', '325 hp', 'ATTESA E-TS full-time all wheel ', nan, nan, '20.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2013/infiniti/ex37
[nan, nan, nan, '6.13 sec', '325 hp', ' Rear wheel drive  ', nan, nan, '23.80 gal.', nan, '191.30 in.', '75.90 ', '65.00 in.']
https://www.carspecs.us/cars/2013/infiniti/fx
['$18,060', 4, 5, nan, nan, 'All Wheel Drive ', '18 mpg', 'Gas', '23.80 gal.', nan, '35.30 in.', '41.70 in.', '31.10

[nan, nan, nan, '6.54 sec', '400 hp', ' Rear wheel drive  ', nan, 'Gas', '26.00 gal.', '169.10 cu.ft.', '208.90 in.', '79.90 ', '75.80 in.']
https://www.carspecs.us/cars/2017/infiniti/qx80
[nan, nan, nan, '15.77 sec', '300 hp', 'part-time four-wheel ', nan, nan, '70.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2007/international/cxt
[nan, nan, nan, nan, '230 hp', 'rear-wheel ', nan, nan, '65.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2007/international/rxt
['$5,701', 4, 5, nan, '285 hp', 'Rear Wheel Drive ', nan, 'Gas', '21.70 gal.', nan, '39.70 in.', '76.10 ', '71.90 in.']
https://www.carspecs.us/cars/2008/isuzu/ascender
['$3,038', 2, 4, nan, '175 hp', 'Rear Wheel Drive ', nan, 'Gas', '19.60 gal.', nan, nan, '42.60 in.', '64.90 in.']
https://www.carspecs.us/cars/2006/isuzu/i-280
['$3,737', 2, 3, nan, '185 hp', 'Rear Wheel Drive ', nan, 'Gas', '19.60 gal.', nan, nan, '42.60 in.', '64.90 in.']
https://www.carspecs.us/cars/2008/isuzu/i-290
['$4,377', 4, 4, n

['$23,390', 4, 5, '8.30 sec', '185 hp', 'Front Wheel Drive ', '27 mpg', 'Gas', '18.50 gal.', '120.70 cu.ft.', '191.10 in.', '73.20 ', '57.70 in.']
https://www.carspecs.us/cars/2020/kia/optima
['$30,990', 4, 5, '8.64 sec', '192 hp', 'Front Wheel Drive ', nan, 'Gas', '15.90 gal.', nan, '191.10 in.', '73.20 ', '57.50 in.']
https://www.carspecs.us/cars/2018/kia/optima-hybrid
['$35,210', 4, 5, '8.76 sec', '202 hp', 'Front Wheel Drive ', '40 mpg', 'Gas', '14.50 gal.', nan, '191.10 in.', '73.20 ', '57.50 in.']
https://www.carspecs.us/cars/2018/kia/optima-plug-in-hybrid
['$15,850', 4, 5, nan, '120 hp', 'Front Wheel Drive ', '36 mpg', 'Gas', '11.90 gal.', '89.90 cu.ft.', '172.60 in.', '67.90 ', '57.10 in.']
https://www.carspecs.us/cars/2020/kia/rio
['$18,700', 4, 5, '9.49 sec', '130 hp', 'Front Wheel Drive ', '32 mpg', 'Gas', '11.90 gal.', nan, '160.00 in.', '67.90 ', '57.10 in.']
https://www.carspecs.us/cars/2018/kia/rio-5-door
['$5,132', nan, nan, '10.08 sec', '110 hp', 'front-wheel ', nan, n

['$11,903', nan, nan, '7.68 sec', '263 hp', 'full-time four-wheel ', nan, nan, '23.00 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2009/lexus/gx-470
['$14,812', nan, nan, '9.08 sec', '187 hp', ' Front wheel drive  ', nan, 'Hybrid', '14.50 gal.', '102.30 cu.ft.', '184.80 in.', '70.30 ', '59.30 in.']
https://www.carspecs.us/cars/2012/lexus/hs
['$14,812', nan, nan, '10.88 sec', '147 hp', 'front-wheel ', nan, nan, '14.50 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2012/lexus/hs-250h
['$38,560', 4, 5, '6.80 sec', '241 hp', 'Rear Wheel Drive ', '24 mpg', 'Gas', '17.40 gal.', '90.20 cu.ft.', '184.30 in.', '71.30 ', '56.30 in.']
https://www.carspecs.us/cars/2020/lexus/is
['$30,233', 4, 5, '6.80 sec', '241 hp', 'Rear Wheel Drive ', '26 mpg', 'Gas', '17.40 gal.', nan, '183.70 in.', '71.30 ', '56.30 in.']
https://www.carspecs.us/cars/2016/lexus/is-200t
['$26,968', 4, 5, '7.48 sec', '204 hp', 'All Wheel Drive ', '23 mpg', 'Gas', '17.40 gal.', nan, '183.70 in.', '71.30 ', '56

['$41,040', 4, 5, '7.01 sec', '250 hp', 'Front Wheel Drive ', '23 mpg', 'Gas', '18.40 gal.', '108.30 cu.ft.', '190.00 in.', '76.10 ', '66.20 in.']
https://www.carspecs.us/cars/2020/lincoln/nautilus
['$76,185', 4, 7, nan, '450 hp', 'Rear Wheel Drive ', '19 mpg', 'Gas', '23.00 gal.', nan, '210.00 in.', '79.90 ', '76.40 in.']
https://www.carspecs.us/cars/2020/lincoln/navigator
['$80,755', 4, 7, nan, '450 hp', 'Rear Wheel Drive ', nan, 'Gas', '27.00 gal.', nan, '221.90 in.', '79.90 ', '76.20 in.']
https://www.carspecs.us/cars/2019/lincoln/navigator-l
[nan, nan, nan, '8.14 sec', '239 hp', ' Rear wheel drive  ', nan, 'Gas', '19.00 gal.', '120.50 cu.ft.', '221.40 in.', '78.20 ', '58.70 in.']
https://www.carspecs.us/cars/2011/lincoln/town-car
['$4,639', 4, 5, '7.56 sec', '221 hp', 'Front Wheel Drive ', nan, 'Gas', '17.50 gal.', nan, '190.50 in.', '72.20 ', '55.39 in.']
https://www.carspecs.us/cars/2006/lincoln/zephyr
[nan, nan, nan, '4.88 sec', '189 hp', ' Rear wheel drive  ', nan, 'Gas', '10.

['$12,333', 4, 5, '8.41 sec', '170 hp', 'Front Wheel Drive ', '25 mpg', 'Gas', '18.50 gal.', nan, '193.70 in.', '72.40 ', '57.90 in.']
https://www.carspecs.us/cars/2013/mazda/mazda6
['$12,333', 4, 5, '8.41 sec', '170 hp', 'Front Wheel Drive ', '25 mpg', 'Gas', '18.50 gal.', nan, '193.70 in.', '72.40 ', '57.90 in.']
https://www.carspecs.us/cars/2013/mazda/mazda6
[nan, nan, nan, '6.01 sec', '263 hp', ' Front wheel drive  ', nan, 'Gas', '15.90 gal.', '111.60 cu.ft.', '177.60 in.', '69.70 ', '57.50 in.']
https://www.carspecs.us/cars/2013/mazda/mazdaspeed-3
['$4,174', nan, nan, '5.51 sec', '274 hp', ' All wheel drive  ', nan, 'Gas', '15.90 gal.', '108.60 cu.ft.', '186.80 in.', '70.10 ', '56.30 in.']
https://www.carspecs.us/cars/2007/mazda/mazdaspeed-6
['$3,987', nan, nan, '6.12 sec', '178 hp', ' Rear wheel drive  ', nan, 'Gas', '12.70 gal.', '51.10 cu.ft.', '155.70 in.', '66.10 ', '48.00 in.']
https://www.carspecs.us/cars/2005/mazda/mazdaspeed-mx-5-miata
[nan, nan, nan, '6.01 sec', '263 hp'

['$12,025', nan, nan, '6.28 sec', '268 hp', ' Rear wheel drive  ', nan, 'Gas', '16.40 gal.', '91.90 cu.ft.', '183.20 in.', '68.50 ', '56.90 in.']
https://www.carspecs.us/cars/2009/mercedes-benz/clk-class
['$12,300', nan, nan, '7.69 sec', '215 hp', 'rear-wheel ', nan, nan, '16.40 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2005/mercedes-benz/clk320
[nan, nan, nan, '6.62 sec', '268 hp', 'rear-wheel ', nan, nan, '16.40 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2009/mercedes-benz/clk350
['$8,696', nan, nan, '5.74 sec', '302 hp', 'rear-wheel ', nan, nan, '16.40 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2006/mercedes-benz/clk500
[nan, nan, nan, '4.74 sec', '362 hp', 'rear-wheel ', nan, nan, '16.40 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2006/mercedes-benz/clk55-amg
['$14,200', nan, nan, '4.95 sec', '382 hp', 'rear-wheel ', nan, nan, '16.40 gal.', nan, nan, nan, nan]
https://www.carspecs.us/cars/2009/mercedes-benz/clk550
[nan, nan, nan, 

[nan, nan, nan, '6.47 sec', '362 hp', ' All wheel drive  ', nan, 'Gas', '26.40 gal.', nan, '201.60 in.', '84.30 ', '72.80 in.']
https://www.carspecs.us/cars/2018/mercedes-benz/gls-class
['$67,896', nan, nan, '6.32 sec', '329 hp', ' All wheel drive  ', nan, 'Gas', '24.60 gal.', nan, '189.10 in.', '84.30 ', '70.70 in.']
https://www.carspecs.us/cars/2015/mercedes-benz/m-class
[nan, nan, nan, '5.14 sec', '463 hp', ' All wheel drive  ', nan, 'Gas', '21.00 gal.', nan, '214.70 in.', nan, '58.70 in.']
https://www.carspecs.us/cars/2018/mercedes-benz/maybach
['$67,896', nan, nan, '6.32 sec', '329 hp', ' All wheel drive  ', nan, 'Gas', '24.60 gal.', nan, '189.10 in.', '84.30 ', '70.70 in.']
https://www.carspecs.us/cars/2015/mercedes-benz/m-class
['$31,390', 3, 2, nan, '208 hp', 'Rear Wheel Drive ', '22 mpg', 'Gas', '18.50 gal.', nan, '105.40 in.', '50.00 in.', '75.20 in.']
https://www.carspecs.us/cars/2020/mercedes-benz/metris
['$9,450', nan, nan, '7.78 sec', '215 hp', 'full-time 4MATIC four-whee

[nan, nan, nan, nan, '241 hp', ' Rear wheel drive  ', nan, 'Gas', '18.50 gal.', nan, '162.80 in.', '71.50 ', '51.20 in.']
https://www.carspecs.us/cars/2016/mercedes-benz/slk-class
['$123,750', nan, nan, '3.55 sec', '617 hp', ' Rear wheel drive  ', nan, 'Gas', '25.60 gal.', nan, '183.30 in.', '75.10 ', '49.30 in.']
https://www.carspecs.us/cars/2009/mercedes-benz/slr-mclaren
['$152,890', 2, 2, '3.17 sec', '583 hp', 'Rear Wheel Drive ', '15 mpg', 'Gas', '22.50 gal.', nan, '182.60 in.', '76.30 ', '49.30 in.']
https://www.carspecs.us/cars/2015/mercedes-benz/sls-amg
[nan, nan, nan, nan, '583 hp', ' Rear wheel drive  ', nan, nan, '22.50 gal.', nan, '182.60 in.', '76.30 ', '49.70 in.']
https://www.carspecs.us/cars/2014/mercedes-benz/sls-amg-gt
['$55,010', 3, 5, nan, '188 hp', 'Four Wheel Drive ', nan, 'Gas', '24.50 gal.', nan, '178.20 in.', '53.10 in.', '108.00 in.']
https://www.carspecs.us/cars/2018/mercedes-benz/sprinter
['$12,452', nan, nan, '10.85 sec', '188 hp', 'rear-wheel ', nan, nan, '

['$5,785', nan, nan, '8.82 sec', '215 hp', ' Four wheel drive  ', nan, 'Gas', '23.80 gal.', nan, '190.20 in.', '74.80 ', '74.20 in.']
https://www.carspecs.us/cars/2006/mitsubishi/montero
['$24,895', 4, 7, nan, '166 hp', 'Front Wheel Drive ', '27 mpg', 'Gas', '16.60 gal.', '128.20 cu.ft.', '184.80 in.', '71.30 ', '67.30 in.']
https://www.carspecs.us/cars/2020/mitsubishi/outlander
['$40,295', 4, 5, nan, nan, 'Four Wheel Drive ', '25 mpg', 'Gas', '11.30 gal.', nan, '184.80 in.', '70.80 ', '67.30 in.']
https://www.carspecs.us/cars/2018/mitsubishi/outlander-phev
['$22,595', 4, 5, nan, '148 hp', 'Front Wheel Drive ', '27 mpg', 'Gas', '16.60 gal.', '97.50 cu.ft.', '171.90 in.', '71.30 ', '64.80 in.']
https://www.carspecs.us/cars/2020/mitsubishi/outlander-sport
['$5,283', 2, 5, nan, '210 hp', 'Rear Wheel Drive ', '18 mpg', 'Gas', '22.00 gal.', nan, nan, '71.70 ', '68.60 in.']
https://www.carspecs.us/cars/2009/mitsubishi/raider
[nan, nan, nan, '3.75 sec', '376 hp', 'rear-wheel ', nan, nan, '14.

['$7,062', nan, nan, '6.90 sec', '256 hp', ' Rear wheel drive  ', nan, 'Gas', '19.00 gal.', '124.50 cu.ft.', '196.10 in.', '74.80 ', '57.70 in.']
https://www.carspecs.us/cars/2009/pontiac/g8
['$3,562', 2, 5, '8.53 sec', '175 hp', 'Front Wheel Drive ', nan, 'Gas', '14.00 gal.', nan, '186.30 in.', '70.40 ', '55.10 in.']
https://www.carspecs.us/cars/2005/pontiac/grand-am
['$4,664', nan, nan, '8.27 sec', '200 hp', ' Front wheel drive  ', nan, 'Gas', '17.00 gal.', '114.00 cu.ft.', '198.30 in.', '71.60 ', '55.90 in.']
https://www.carspecs.us/cars/2008/pontiac/grand-prix
['$5,006', 2, 4, '4.79 sec', '400 hp', 'Rear Wheel Drive ', nan, 'Gas', '18.00 gal.', nan, '189.80 in.', '72.50 ', '54.90 in.']
https://www.carspecs.us/cars/2006/pontiac/gto
['$4,440', 4, 7, '9.22 sec', '196 hp', 'All Wheel Drive ', nan, 'Gas', '25.00 gal.', nan, '205.60 in.', '72.00 ', '72.00 in.']
https://www.carspecs.us/cars/2006/pontiac/montana
['$3,880', nan, nan, '9.79 sec', '201 hp', ' Front wheel drive  ', nan, 'Gas',

['$320,500', 2, 4, '4.52 sec', '624 hp', 'Rear Wheel Drive ', '14 mpg', 'Gas', '21.90 gal.', nan, '208.10 in.', '76.70 ', '59.30 in.']
https://www.carspecs.us/cars/2019/rolls-royce/wraith
[nan, nan, nan, '6.28 sec', '300 hp', 'All wheel drive ', nan, nan, '18.50 gal.', '114.40 cu.ft.', '197.20 in.', '73.30 ', '57.80 in.']
https://www.carspecs.us/cars/2011/saab/900
['$4,312', 4, 5, '5.78 sec', '230 hp', 'All Wheel Drive ', nan, 'Gas', '15.90 gal.', nan, '175.60 in.', '66.70 ', '57.70 in.']
https://www.carspecs.us/cars/2006/saab/9-2x
['$14,846', 4, 4, '6.67 sec', '220 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '16.00 gal.', '96.00 cu.ft.', '182.90 in.', '70.90 ', '57.10 in.']
https://www.carspecs.us/cars/2012/saab/9-3
[nan, nan, nan, '7.04 sec', '220 hp', ' Front wheel drive  ', nan, 'Flex Fuel', '16.00 gal.', '108.40 cu.ft.', '182.90 in.', '70.90 ', '57.10 in.']
https://www.carspecs.us/cars/2012/saab/9-3-griffin
[nan, nan, nan, '6.13 sec', '220 hp', 'automatic full-time all wheel ', nan,

['$22,745', 4, 5, '7.90 sec', '182 hp', 'All Wheel Drive ', '30 mpg', 'Gas', '18.50 gal.', '105.50 cu.ft.', '190.60 in.', '72.40 ', '59.10 in.']
https://www.carspecs.us/cars/2020/subaru/legacy
['$6,676', 4, 5, '6.21 sec', '250 hp', 'All Wheel Drive ', nan, 'Gas', '16.90 gal.', nan, '188.70 in.', '69.70 ', '26.60 in.']
https://www.carspecs.us/cars/2008/subaru/legacy-sedan
['$4,266', 4, 5, '6.12 sec', '250 hp', 'All Wheel Drive ', nan, 'Gas', '16.90 gal.', nan, '43.50 in.', '42.20 in.', '23.10 in.']
https://www.carspecs.us/cars/2005/subaru/legacy-wagon-natl
['$26,645', 4, 5, '8.13 sec', '182 hp', 'All Wheel Drive ', '29 mpg', 'Gas', '18.50 gal.', '109.00 cu.ft.', '42.00 in.', '43.30 in.', '28.40 in.']
https://www.carspecs.us/cars/2020/subaru/outback
['$19,775', 4, 7, '6.59 sec', '256 hp', 'All Wheel Drive ', nan, 'Gas', '16.90 gal.', nan, '44.50 in.', '43.30 in.', '29.20 in.']
https://www.carspecs.us/cars/2014/subaru/tribeca
['$27,495', 4, 5, '5.26 sec', '268 hp', 'All Wheel Drive ', '23

[nan, 4, 5, nan, '99 hp', 'Front Wheel Drive ', '46 mpg', 'Gas', '9.50 gal.', '87.40 cu.ft.', '159.80 in.', '67.50 ', '58.70 in.']
https://www.carspecs.us/cars/2018/toyota/prius-c
['$20,693', nan, nan, '10.48 sec', '134 hp', ' Front wheel drive  ', nan, 'Hybrid', '10.60 gal.', '115.30 cu.ft.', '176.40 in.', '68.70 ', '58.70 in.']
https://www.carspecs.us/cars/2015/toyota/prius-plug-in
[nan, nan, nan, '11.77 sec', '121 hp', ' Front wheel drive  ', nan, 'Hybrid', '11.30 gal.', '111.30 cu.ft.', '182.90 in.', '69.30 ', '57.90 in.']
https://www.carspecs.us/cars/2017/toyota/prius-prime
[nan, 4, 5, nan, '134 hp', 'Front Wheel Drive ', '41 mpg', 'Gas', '11.90 gal.', '97.20 cu.ft.', '182.90 in.', '69.90 ', '62.00 in.']
https://www.carspecs.us/cars/2017/toyota/prius-v
['$25,950', 4, 5, '7.99 sec', '203 hp', 'Front Wheel Drive ', '30 mpg', 'Gas', '14.50 gal.', '98.90 cu.ft.', '180.90 in.', '73.00 ', '67.00 in.']
https://www.carspecs.us/cars/2020/toyota/rav4
['$28,884', 4, 5, '9.80 sec', '154 hp', 

[nan, nan, nan, '7.07 sec', '200 hp', 'Front wheel drive ', nan, nan, '14.50 gal.', '109.30 cu.ft.', '165.80 in.', '69.30 ', '58.40 in.']
https://www.carspecs.us/cars/2009/volkswagen/rabbit-gti-gti
['$3,916', nan, nan, '5.52 sec', '250 hp', ' All wheel drive  ', nan, 'Gas', '14.50 gal.', nan, '167.20 in.', '69.20 ', '57.70 in.']
https://www.carspecs.us/cars/2008/volkswagen/rabbit-r32
[nan, 4, 7, '7.32 sec', '283 hp', 'Front Wheel Drive ', nan, 'Gas', '20.50 gal.', nan, '202.50 in.', '76.90 ', '68.90 in.']
https://www.carspecs.us/cars/2013/volkswagen/routan
['$24,945', 4, 7, '9.33 sec', '184 hp', 'Front Wheel Drive ', '25 mpg', 'Gas', '15.30 gal.', '123.90 cu.ft.', '185.10 in.', '72.40 ', '66.30 in.']
https://www.carspecs.us/cars/2020/volkswagen/tiguan
['$24,225', 4, 5, '7.59 sec', '200 hp', 'All Wheel Drive ', '22 mpg', 'Gas', '16.90 gal.', nan, '174.50 in.', '71.20 ', '67.10 in.']
https://www.carspecs.us/cars/2018/volkswagen/tiguan-limited
[nan, 4, 5, nan, '280 hp', 'All Wheel Drive '

In [14]:
total_sales_df['Model_Merged'] = total_sales_df['Model'].str.strip()
total_sales_df['Model_Merged'] = total_sales_df['Model_Merged'].str.lower()
total_sales_df['Model_Merged'] = total_sales_df['Model_Merged'].str.replace('-','')
total_sales_df['Model_Merged'] = total_sales_df['Model_Merged'].str.replace('/','')
model_spec_df['Model_Merged'] = model_spec_df['Model'].str.strip()
model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.lower()
model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('-','')
model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('/','')

In [15]:
model_spec_sales_df = total_sales_df.merge(model_spec_df, how = 'left', on = 'Model_Merged')

drop uneeded columns and remove rows with a lot of na values

In [16]:
model_spec_sales_df.drop(labels=['Model_Merged', 'Model_y', 'url'], axis=1, inplace=True)
# model_spec_sales_df.dropna(how = ['price', 'doors', 'passengers', 'speed_sec', 'horsepower_hp', 'drive', 'mpg', 
#                                   'engine', 'tank_gal', 'volume_cuft', 'length_in', 'width_in', 'height_in'], inplace=True)

Clean drive feature

In [17]:
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.lower()
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.lstrip()
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.rstrip()
#model_spec_sales_df = model_spec_sales_df.dropna()
model_spec_sales_df['drive'].value_counts()

front wheel drive                  240
rear wheel drive                   137
all wheel drive                    123
four wheel drive                    27
front-wheel                         11
rear-wheel                           3
automatic full-time all wheel        2
attesa e-ts full-time all wheel      1
full-time all wheel                  1
Name: drive, dtype: int64

In [18]:
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('rear-wheel','rear wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('front-wheel','front wheel drive')
replace = ['full-time all wheel', 'attesa e-ts full-time all wheel', 'automatic full-time all wheel']

model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace(replace[1],'all wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace(replace[2],'all wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace(replace[2],'all wheel drive')
# for s in replace:
#     model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace(s,'all wheel drive')
model_spec_sales_df['drive'].value_counts()

front wheel drive      251
rear wheel drive       140
all wheel drive        126
four wheel drive        27
full-time all wheel      1
Name: drive, dtype: int64

In [19]:
model_spec_sales_df.info()
model_spec_sales_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 680 entries, 0 to 679
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Model_x        680 non-null    object 
 1   Total_Sales    680 non-null    float64
 2   price          460 non-null    object 
 3   doors          425 non-null    object 
 4   passengers     425 non-null    object 
 5   speed_sec      419 non-null    object 
 6   horsepower_hp  539 non-null    object 
 7   drive          545 non-null    object 
 8   mpg            354 non-null    object 
 9   engine         519 non-null    object 
 10  tank_gal       525 non-null    object 
 11  volume_cuft    248 non-null    object 
 12  length_in      518 non-null    object 
 13  width_in       523 non-null    object 
 14  height_in      527 non-null    object 
dtypes: float64(1), object(14)
memory usage: 85.0+ KB


Unnamed: 0,Model_x,Total_Sales,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in
0,Ford F-Series,9147816.0,,,,,,,,,,,,,
1,Chevrolet Silverado,6685079.0,,,,,,,,,,,,,
2,Toyota Camry,6221548.0,"$28,430",4.0,5.0,,208 hp,front wheel drive,52 mpg,Gas,13.20 gal.,99.90 cu.ft.,192.10 in.,72.4,56.90 in.
3,Honda Civic,5009981.0,"$21,750",4.0,5.0,,174 hp,front wheel drive,34 mpg,Gas,12.40 gal.,97.20 cu.ft.,177.90 in.,70.8,56.50 in.
4,Honda Accord,4943796.0,"$24,020",4.0,5.0,,192 hp,front wheel drive,33 mpg,Gas,14.80 gal.,105.60 cu.ft.,192.20 in.,73.3,57.10 in.


In [20]:
replace_list = ['hp', 'mpg', 'gal.', 'cu.ft.', 'in.', ',', 'sec', '$', '$']
for s in replace_list:
    model_spec_sales_df = model_spec_sales_df.replace(s,'', regex=True) 
model_spec_sales_df['price'] = model_spec_sales_df['price'].str.replace('$','')

  model_spec_sales_df['price'] = model_spec_sales_df['price'].str.replace('$','')


In [21]:
num_cols = ['price', 'passengers', 'speed_sec', 'horsepower_hp', 'mpg', 'tank_gal', 'volume_cuft', 
            'width_in', 'length_in','height_in']
for col in num_cols:
    model_spec_sales_df[col] = model_spec_sales_df[col].apply (pd.to_numeric, errors='coerce')
model_spec_sales_df['doors'] = model_spec_sales_df[doors].apply (pd.to_numeric, errors='coerce')

In [22]:
model_spec_sales_df.info()
model_spec_sales_df.sample(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 680 entries, 0 to 679
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Model_x        680 non-null    object 
 1   Total_Sales    680 non-null    float64
 2   price          460 non-null    float64
 3   doors          425 non-null    float64
 4   passengers     425 non-null    float64
 5   speed_sec      419 non-null    float64
 6   horsepower_hp  539 non-null    float64
 7   drive          545 non-null    object 
 8   mpg            354 non-null    float64
 9   engine         519 non-null    object 
 10  tank_gal       525 non-null    float64
 11  volume_cuft    248 non-null    float64
 12  length_in      518 non-null    float64
 13  width_in       523 non-null    float64
 14  height_in      527 non-null    float64
dtypes: float64(12), object(3)
memory usage: 85.0+ KB


Unnamed: 0,Model_x,Total_Sales,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in
502,Infti Q40/G Sedan,15590.0,,,,,,,,,,,,,
522,Lexus CT,13593.0,,4.0,5.0,,134.0,front wheel drive,42.0,Gas,11.9,86.1,171.2,69.5,27.4
3,Honda Civic,5009981.0,21750.0,4.0,5.0,,174.0,front wheel drive,34.0,Gas,12.4,97.2,177.9,70.8,56.5
354,BMW X6,59592.0,66600.0,4.0,5.0,,335.0,all wheel drive,22.0,Gas,22.4,,194.8,78.9,66.3
98,Lexus IS,590505.0,38560.0,4.0,5.0,6.8,241.0,rear wheel drive,24.0,Gas,17.4,90.2,184.3,71.3,56.3
184,Acura TLX,217010.0,39400.0,4.0,5.0,7.21,206.0,front wheel drive,26.0,Gas,17.2,93.3,191.0,73.0,57.0
233,Loln Town Car,154082.0,,,,8.14,239.0,rear wheel drive,,Gas,19.0,120.5,221.4,78.2,58.7
159,Dodge Dakota,265325.0,10415.0,,,8.82,210.0,rear wheel drive,,,22.0,,,,
87,GMC Yukon,660245.0,67700.0,4.0,7.0,,420.0,rear wheel drive,17.0,Gas,26.0,,43.2,80.5,74.4
590,Maserati Quattroporte,3968.0,99490.0,4.0,5.0,4.59,424.0,rear wheel drive,19.0,Gas,21.1,,207.2,76.7,58.3


In [23]:
model_spec_sales_df.to_pickle('../data/model_spec_sales_df.pkl')