In [1]:
from bs4 import BeautifulSoup
import requests
import pandas as pd
import re
import datetime
import time
from fake_useragent import UserAgent 
ua = UserAgent()
headers = {'user-agent': ua.random}

#data = requests.get(url, headers=headers)

#### Get Monthly Sales for 2019 & 2020

In [2]:
def get_monthly_sales_df(url, year_str, tbl_num):
    '''
    Return a time series df of monthly US car sales for every make and model 
    '''
    response = requests.get(url, headers=headers)
    page= response.text
    soup = BeautifulSoup(page, "lxml")
    
    #find car sales data and turn it into a list
    tbl_str = soup.find_all('tbody')[tbl_num].text
    tbl_list = tbl_str.split('\n')
    tbl_list = [i for i in tbl_list if i] 
    
    #create empty time series df for specified year
    date_time_str = year_str + '-01'
    start_date = datetime.datetime.strptime(date_time_str, '%Y-%m')
    index = pd.date_range(start_date, periods=12, freq='m')
    df = pd.DataFrame(index=index)
    
    #fill empty df with monthly sales for all makes and models
    col_name = ''
    idx = 0
    for x in range(0,len(tbl_list)//13):
        col_name = tbl_list[x+idx]
        list_vals = []
        for val in range(x+1,x+13):
            list_vals.append(tbl_list[idx+val])
        df[col_name]=list_vals
        idx = idx+12
    return df

new df called monthly_sales_df

In [3]:
url = 'https://www.goodcarbadcar.net/2019-us-vehicle-sales-figures-by-model/' 
df_sales = get_monthly_sales_df(url, '2019', 2)
url = 'https://www.goodcarbadcar.net/2020-us-vehicle-sales-figures-by-model/'
df = get_monthly_sales_df(url, '2020', 1)
monthly_sales_df = pd.concat((df_sales, df))
#monthly_sales_df.columns

#### Get All Model Yearly Sales for 2005-2020

In [4]:
def get_model_sales_df(url, year):
    '''
    Return a df of yearly US car sales for every make and model 
    columns = Model, Year, and Total_Sales
    '''
    #read url page into list of pandas dfs 
    response = requests.get(url, headers=headers)
    page = response.text
    df_list = pd.read_html(page)
    
    #find correct df based on number of data frames on url page and 
    if len(df_list) == 1 or year == '2019':
        df = pd.DataFrame(df_list[0])
    elif len(df_list) == 2:
        df = pd.DataFrame(df_list[1])
    else:
        df = pd.DataFrame(df_list[len(df_list)-1])
    
    #special case for 2020 data because sales data is in monthly sales format for each model
    if year == '2020':  
        soup = BeautifulSoup(page, "lxml")
    
        #find car sales data and turn it into a list
        tbl_str = soup.find_all('tbody')[1].text
        tbl_list = tbl_str.split('\n')
        tbl_list = [i for i in tbl_list if i] 
        
        columns = ('Model', 'Year', 'Total_Sales')
        df = pd.DataFrame(columns=columns)

        #fill empty df with monthly sales for all makes and models
        model_name = ''
        idx = 0
        index = 0
        for x in range(0,len(tbl_list)//13):
            model_name = tbl_list[x+idx]
            year_sum = 0
            for val in range(x+1,x+13):
                month_total = tbl_list[idx+val].replace(',','')
                month_total = int(month_total)
                year_sum = year_sum + month_total
            #df1 = pd.DataFrame([model_name, year_sum], columns = ['Model', year])
            df = df.append(pd.DataFrame({'Model': model_name, 'Year': year, 'Total_Sales': year_sum}, index=[index]), 
                           ignore_index=True)
            index = index+1
            #df.append(df1)
            idx = idx+12
        return df
    
    if year == '2012':
        df = df.iloc[:, [2,3]]
    elif year == '2005':
        df = df.iloc[:, [1,3]]
    elif year in ['2017','2018', '2019', '2020']:
        df = df.iloc[:, [0,4]]
    else:
        df = df.iloc[:, [1,2]]
    df.columns = ['Model', 'Total_Sales']
    df['Year'] = year
    
    #* indicate further breakdown of sum totals, overall totals be removed further down when duplicates are removed
    #other symbols refer to subnotes in the tables and are not apart of model names
    symbols = ['*', '²', '¹', '^', '†', '‡']
    
    for s in symbols:
        df['Model'] = df['Model'].str.replace(s,'')

    df.dropna(subset=['Model'], inplace=True)
    
    df['Model'] = df['Model'].str.rstrip()
    df['Model'] = df['Model'].str.lstrip()
    
    #remove first in set of duplicates b/c first is a sum of a car and the hybrid model
    
    df.drop_duplicates(subset='Model', keep='last', inplace=True)
    
    df['Total_Sales'] = df['Total_Sales'].apply(pd.to_numeric, errors='coerce')
    
    return df

In [5]:
url = 'https://www.goodcarbadcar.net/2006-usa-auto-sales-rankings-by-mode/'
yearly_sales_df= get_model_sales_df(url = url, year = '2005')

  df['Model'] = df['Model'].str.replace(s,'')


In [6]:
url_dict = {'2006': 'https://www.goodcarbadcar.net/2006-usa-auto-sales-rankings-by-mode/',
            '2007': 'https://www.goodcarbadcar.net/usa-2007-vehicle-sales-rankings-by-mode/',
            '2008': 'https://www.goodcarbadcar.net/2008-america-auto-sales-rankings-by-mode/',
            '2009': 'https://www.goodcarbadcar.net/usa-auto-sales-rankings-by-model-2009/',
            '2010': 'https://www.goodcarbadcar.net/2010-america-auto-sales-rankings-by-mode/',
            '2011': 'https://www.goodcarbadcar.net/top-268-best-selling-vehicles-2011-year/',
            '2012': 'https://www.goodcarbadcar.net/2012-usa-auto-sales-rankings-by-model7/',
            '2013': 'https://www.goodcarbadcar.net/usa-vehicle-sales-rankings-by-model-december-2013-year-end/',
            '2014': 'https://www.goodcarbadcar.net/usa-all-cars-sales-figures-2014-december-year-end/',
            '2015': 'https://www.goodcarbadcar.net/usa-car-sales-by-model-2015-year-end-december/',
            '2016': 'https://www.goodcarbadcar.net/usa-2016-vehicle-sales-by-model-manufacturer-brand/',
            '2017': 'https://www.goodcarbadcar.net/december-2017-year-end-u-s-passenger-car-sales-rankings-top-171-best-selling-cars-america-every-car-ranked/',
            '2018': 'https://www.goodcarbadcar.net/december-2018-the-best-selling-vehicles-in-america-every-vehicle-ranked/',
            '2019': 'https://www.goodcarbadcar.net/2019-us-vehicle-sales-figures-by-model/',
            '2020': 'https://www.goodcarbadcar.net/2020-us-vehicle-sales-figures-by-model/'
           }

years = ['2006']
for key, value in url_dict.items():
    #print(key)
    years.append(key)
    df= get_model_sales_df(url = value, year = key)
    #print(df)
    yearly_sales_df = pd.concat([yearly_sales_df, df], axis=0)
    #yearly_sales_df = pd.merge(yearly_sales_df, df, on='Model', how='outer')
    #print(yearly_sales_df[yearly_sales_df.Model.str.contains("Toyota")])

print(yearly_sales_df)

  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')
  df['Model'] = df['Model'].str.replace(s,'')


                     Model Total_Sales  Year
0            Ford F-Series    901463.0  2005
1      Chevrolet Silverado    705980.0  2005
2             Toyota Camry    431703.0  2005
3    Toyota Corolla/Matrix    341290.0  2005
4                Dodge Ram    400543.0  2005
..                     ...         ...   ...
298        Volvo 60-Series       15729  2020
299        Volvo 90-Series        3195  2020
300             Volvo XC40       23778  2020
301             Volvo XC60       32078  2020
302             Volvo XC90       34251  2020

[4518 rows x 3 columns]


In [7]:
# columns = ('Model','Total_Sales')
# total_sales_df = pd.DataFrame(columns=columns)
# total_sales_df = total_sales_df.fillna(0)
# total_sales_df['Model'] = yearly_sales_df['Model']

# total_sales_df['Total_Sales'] = yearly_sales_df[years].sum(axis=1)
# #print(total_sales_df)

In [8]:
# total_sales_df.sort_values('Total_Sales', ascending=False, inplace=True)
yearly_sales_df = yearly_sales_df[yearly_sales_df.Total_Sales != 0]

remove_strings = ["Market",'Total','Family','Brand','Passenger Cars, SUVs, Crossovers','Minivans','Pickup Trucks',
                 'Commercial Vans', 'COMPANY', 'MOTOR', 'GROUP', 'AMERICAN', 'AUTOMOBILES', 'JAGUAR', 'DAIMLER']

for string in remove_strings:
    yearly_sales_df = yearly_sales_df[~yearly_sales_df.Model.str.contains(string)]

#total_sales_df = total_sales_df[total_sales_df.Model != '*Market*']
yearly_sales_df = yearly_sales_df.reset_index(drop=True)
yearly_sales_df

Unnamed: 0,Model,Total_Sales,Year
0,Ford F-Series,901463.0,2005
1,Chevrolet Silverado,705980.0,2005
2,Toyota Camry,431703.0,2005
3,Toyota Corolla/Matrix,341290.0,2005
4,Dodge Ram,400543.0,2005
...,...,...,...
4388,Volvo 60-Series,15729,2020
4389,Volvo 90-Series,3195,2020
4390,Volvo XC40,23778,2020
4391,Volvo XC60,32078,2020


In [13]:
def get_brand_links(org_url):
    '''
    Return a df of yearly US car sales for every make and model 
    columns = Model, Year, and Total_Sales
    '''
    #org_url = 'https://www.carspecs.us/'
    response = requests.get(org_url, headers=headers)
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    #tbl = soup.find_all(class_='modelnamesandmakes_item')
    #link_list = soup.find_all(id_ = 'homepage-browsemakes')
    brand_link_dict = dict()
    link_list = soup.find_all('li')
    for x in link_list[7:]:
        brand = x.text
        link = 'https://www.carspecs.us' + x.find('a')['href']
        brand_link_dict[brand] = link
        time.sleep(0.5)
    return brand_link_dict

In [21]:
def get_model_links(brand, url):
    '''
    Return a df of yearly US car sales for every make and model 
    columns = Model, Year, and Total_Sales
    '''
    #load url page with all car models for specified brand url
    response = requests.get(url, headers=headers) #random user agent
    page = response.text
    soup = BeautifulSoup(page, "lxml")
    
    #find first div with all model links
    div_list = soup.find_all('div', class_='pure-u-1 pure-u-md-1-2')
    
    #new df to store all makes and models for all years past 2004
#     columns = ('Model', 'Year', 'Model_url')
#     df = pd.DataFrame(columns=columns)
    model_link_dict = dict()
    
    #loop through all model links for specified brand 
    for x in div_list[1].find_all('li'):
        model = brand + ' ' + x.text 
        link = org_url + x.find('a')['href'] #model url
        
        #load model url to get list of model year urls
        response = requests.get(link)
        page = response.text
        soup = BeautifulSoup(page, "lxml")
        year_links = soup.find_all('li') #list of all year urls for specified model
        
        #loop through all year links starting at index 7
        idx = 7
#         index = 0
        for y in year_links[7:]:
            year = year_links[idx].text
            if year in years: #only add year links that are 2005 to 2020
                print(year)
                model_link = 'https://www.carspecs.us' + year_links[idx].find('a')['href']
#                 df = df.append(pd.DataFrame({'Model': model, 'Year': year, 'Model_url': model_link}, index=[index]), 
#                                ignore_index=True)
#                 index = index+1
                model_link_dict[model] = (year, model_link)
                idx = idx+1
        time.sleep(0.5) #pause 
            

    return model_link_dict

In [22]:
def get_model_specs(url):
#url = 'https://www.carspecs.us//cars/2020/acura/mdx'
    response = requests.get(url, headers=headers)
    page = response.text
    soup = BeautifulSoup(page, "lxml")

    doors =passengers =speed =horsepower =drive =engine =tank =volume =length =width =height = float('NaN')
    mpg = ''
    try:
        price = soup.find(text=re.compile('starting from'))
        price = price.findNext().text
    
    except:
        price = float('NaN')
    
    div_list = soup.find('div', class_='car-details').find_all('div')
    #print(div_list)

    for div in div_list[1:]:
        spec_list = div.text.split('\n')
        spec_list = [i for i in spec_list if i]
        #print(spec_list)
        if spec_list:
            if 'RPM' not in spec_list[0]:
                if 'Passenger Doors' in spec_list[0]:
                    doors = int(spec_list[-1])
                if 'Passenger Capacity' in spec_list[0]:
                    passengers = int(spec_list[-1])
                if 'mph' in spec_list[0]:
                    speed = spec_list[-1]
                if 'Horsepower' in spec_list[0]:
                    horsepower = spec_list[-1]
                if 'Drive type' in spec_list[0]:
                    drive = spec_list[-1]
                if 'combined' in spec_list[0]:
                    mpg = spec_list[-1]
                if 'Combined' in spec_list[0]:
                    mpg = spec_list[-1]
                if 'Engine type' in spec_list[0]:
                    engine = spec_list[-1]
                    engine = engine.replace('\t', '')
                if 'tank capacity' in spec_list[0]:
                    tank = spec_list[-1]
                if 'EPA interior' in spec_list[0]:
                    volume = spec_list[-1]
                if 'Length' in spec_list[0]:
                    length = spec_list[-1]
                if 'Width' in spec_list[0]:
                    width = spec_list[-1]
                if 'Height' in spec_list[0]:
                    height = spec_list[-1]
        
        if mpg == '':
            mpg = soup.find(text=re.compile('highway mpg'))
            try:
                mpg_list = mpg.split('/ ')
                city = mpg_list[0].lstrip('\r\n ')
                highway = mpg_list[1]
                mpg = (int(city[0:2])+int(highway[0:2]))/2
            except:
                mpg = 0
    
    return [price, doors, passengers, speed, horsepower, drive, mpg, engine, tank, volume, length, width, height]

In [24]:
org_url = 'https://www.carspecs.us/'
brand_links = get_brand_links(org_url)
#print(brand_links)


In [25]:
all_model_links = dict()
for key, value in brand_links.items():
    all_model_links.update(get_model_links(key, brand_links[key]))
    #print(all_model_links)
#rint(all_model_links)

In [17]:
columns = ('Model', 'url', 'price', 'doors', 'passengers', 'speed_sec', 'horsepower_hp', 'drive', 'mpg', 'engine', 
           'tank_gal', 'volume_cuft', 'length_in', 'width_in', 'height_in')
model_spec_df = pd.DataFrame(columns=columns) 
idx = 0
for key, value in all_model_links.items():
    print(value)
    specs = get_model_specs(value[1])
    print(specs)
    print(value[0])
    print(value[1])
    model_spec_df = model_spec_df.append(pd.DataFrame({'Model': key,'Year':value[0], 'url': value[1], 'price': specs[0],'doors': specs[1],
                                                      'passengers': specs[2],'speed_sec': specs[3], 'horsepower_hp': specs[4],
                                                       'drive': specs[5],'mpg': specs[6],'engine': specs[7],
                                                       'tank_gal': specs[8], 'volume_cuft': specs[9], 'length_in': specs[10],
                                                      'width_in': specs[11],'height_in': specs[12]}, index=[idx]), ignore_index=True)
    idx = idx +1
    #print(model_spec_df)

(2013, 'https://www.carspecs.us/cars/2013/acura/ilx')
[nan, nan, nan, '8.98 sec', '150 hp', ' Front wheel drive  ', 29.5, 'Gas', '13.20 gal.', '101.70 cu.ft.', '179.10 in.', '70.60 ', '55.60 in.']
2013
https://www.carspecs.us/cars/2013/acura/ilx
(2014, 'https://www.carspecs.us/cars/2014/acura/ilx-hybrid')
[nan, nan, nan, '11.40 sec', '111 hp', ' Front wheel drive  ', 38.5, 'Hybrid', '13.20 gal.', '99.30 cu.ft.', '179.10 in.', '70.60 ', '55.60 in.']
2014
https://www.carspecs.us/cars/2014/acura/ilx-hybrid
(2005, 'https://www.carspecs.us/cars/2005/acura/mdx')
['$5,535', nan, nan, '7.16 sec', '265 hp', ' All wheel drive  ', 18.0, 'Gas', '20.40 gal.', nan, '188.70 in.', '77.00 ', '68.70 in.']
2005
https://www.carspecs.us/cars/2005/acura/mdx
(2005, 'https://www.carspecs.us/cars/2005/acura/nsx')
[nan, nan, nan, '5.01 sec', '290 hp', ' Rear wheel drive  ', 19.0, 'Gas', '18.50 gal.', '53.90 cu.ft.', '174.20 in.', '71.30 ', '46.10 in.']
2005
https://www.carspecs.us/cars/2005/acura/nsx
(2007, 'ht

['$3,958', nan, nan, '7.34 sec', '200 hp', ' Front wheel drive  ', 25.0, 'Gas', '14.50 gal.', nan, '168.70 in.', '69.50 ', '56.00 in.']
2006
https://www.carspecs.us/cars/2006/audi/a3
(2017, 'https://www.carspecs.us/cars/2017/audi/a3-cabriolet')
['$37,600', 2, 4, nan, '186 hp', 'Front Wheel Drive ', '28 mpg', 'Gas', '13.20 gal.', nan, '174.10 in.', '7.50 in.', '55.50 in.']
2017
https://www.carspecs.us/cars/2017/audi/a3-cabriolet
(2016, 'https://www.carspecs.us/cars/2016/audi/a3-e-tron')
['$30,699', 4, 5, '8.40 sec', '204 hp', 'Front Wheel Drive ', '83 mpg', 'Gas', '10.60 gal.', nan, '169.80 in.', '70.30 ', '56.00 in.']
2016
https://www.carspecs.us/cars/2016/audi/a3-e-tron
(2017, 'https://www.carspecs.us/cars/2017/audi/a3-sedan')
['$31,200', 4, 5, nan, '186 hp', 'Front Wheel Drive ', '29 mpg', 'Gas', '13.20 gal.', nan, '175.50 in.', '70.70 ', '55.70 in.']
2017
https://www.carspecs.us/cars/2017/audi/a3-sedan
(2016, 'https://www.carspecs.us/cars/2016/audi/a3-sportback-e-tron')
[nan, nan, n

['$53,100', 2, 4, '5.03 sec', '333 hp', 'All Wheel Drive ', '20 mpg', 'Gas', '16.10 gal.', nan, '182.70 in.', '73.00 ', '53.90 in.']
2017
https://www.carspecs.us/cars/2017/audi/s5-coupe
(2018, 'https://www.carspecs.us/cars/2018/audi/s5-sportback')
['$54,400', 4, 5, '5.23 sec', '354 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '15.30 gal.', nan, '187.10 in.', '72.60 ', '54.50 in.']
2018
https://www.carspecs.us/cars/2018/audi/s5-sportback
(2007, 'https://www.carspecs.us/cars/2007/audi/s6')
['$12,960', 4, 5, '4.95 sec', '435 hp', 'All Wheel Drive ', 18.0, 'Gas', '21.10 gal.', nan, '193.50 in.', '79.20 ', '57.00 in.']
2007
https://www.carspecs.us/cars/2007/audi/s6
(2013, 'https://www.carspecs.us/cars/2013/audi/s7')
['$38,612', 4, 5, '5.10 sec', '420 hp', 'All Wheel Drive ', '20 mpg', 'Gas', '19.80 gal.', nan, '195.60 in.', '75.20 ', '55.90 in.']
2013
https://www.carspecs.us/cars/2013/audi/s7
(2007, 'https://www.carspecs.us/cars/2007/audi/s8')
['$16,560', 4, 5, '4.66 sec', '450 hp', 'All Wheel

['$96,900', 4, 5, '5.53 sec', '505 hp', 'Rear Wheel Drive ', '13 mpg', 'Gas', '25.40 gal.', nan, '219.50 in.', '75.80 ', '59.90 in.']
2011
https://www.carspecs.us/cars/2011/bentley/mulsanne
(2008, 'https://www.carspecs.us/cars/2008/bmw/1-series')
['$6,951', nan, nan, '6.10 sec', '230 hp', ' Rear wheel drive  ', 23.0, 'Gas', '14.00 gal.', nan, '171.70 in.', '68.80 ', '56.00 in.']
2008
https://www.carspecs.us/cars/2008/bmw/1-series
(2011, 'https://www.carspecs.us/cars/2011/bmw/1-series-m')
['$15,685', 2, 4, '4.64 sec', '335 hp', 'Rear Wheel Drive ', 0, 'Gas', '14.00 gal.', nan, '172.20 in.', '71.00 ', '55.90 in.']
2011
https://www.carspecs.us/cars/2011/bmw/1-series-m
(2008, 'https://www.carspecs.us/cars/2008/bmw/128')
['$8,211', nan, nan, '6.43 sec', '230 hp', 'rear-wheel ', 23.0, nan, '14.00 gal.', nan, nan, nan, nan]
2008
https://www.carspecs.us/cars/2008/bmw/128
(2008, 'https://www.carspecs.us/cars/2008/bmw/135')
['$8,211', nan, nan, '5.46 sec', '300 hp', 'rear-wheel ', 21.5, nan, '14

(2005, 'https://www.carspecs.us/cars/2005/bmw/7-series')
['$10,485', nan, nan, '6.31 sec', '325 hp', ' Rear wheel drive  ', 20.0, 'Gas', '23.30 gal.', '122.00 cu.ft.', '198.00 in.', '74.90 ', '58.70 in.']
2005
https://www.carspecs.us/cars/2005/bmw/7-series
(2014, 'https://www.carspecs.us/cars/2014/bmw/7-series-gran-coupe')
['$66,700', nan, nan, '5.03 sec', '445 hp', ' Rear wheel drive  ', 21.0, 'Gas', '18.50 gal.', '109.00 cu.ft.', '197.20 in.', '74.60 ', '54.80 in.']
2014
https://www.carspecs.us/cars/2014/bmw/7-series-gran-coupe
(2011, 'https://www.carspecs.us/cars/2011/bmw/740')
[nan, nan, nan, '5.73 sec', '315 hp', 'rear-wheel ', 21.0, nan, '21.70 gal.', nan, nan, nan, nan]
2011
https://www.carspecs.us/cars/2011/bmw/740
(2005, 'https://www.carspecs.us/cars/2005/bmw/745')
[nan, nan, nan, '6.31 sec', '325 hp', 'rear-wheel ', 22.0, nan, '23.30 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/bmw/745
(2006, 'https://www.carspecs.us/cars/2006/bmw/750')
[nan, nan, nan, '6

['$29,212', 2, 4, '9.15 sec', '200 hp', 'Front Wheel Drive ', '23 mpg', 'Gas', '14.30 gal.', nan, '184.90 in.', '72.40 ', '56.80 in.']
2016
https://www.carspecs.us/cars/2016/buick/cascada
(2005, 'https://www.carspecs.us/cars/2005/buick/century')
['$3,306', 4, 6, '8.88 sec', '175 hp', 'Front Wheel Drive ', 25.0, 'Gas', '17.00 gal.', nan, '194.60 in.', '72.70 ', '56.60 in.']
2005
https://www.carspecs.us/cars/2005/buick/century
(2008, 'https://www.carspecs.us/cars/2008/buick/enclave')
['$7,309', 4, 7, '7.56 sec', '275 hp', 'All Wheel Drive ', 19.0, 'Gas', '22.00 gal.', nan, '55.20 in.', '79.00 ', '72.20 in.']
2008
https://www.carspecs.us/cars/2008/buick/enclave
(2013, 'https://www.carspecs.us/cars/2013/buick/encore')
['$14,548', 4, 5, '9.32 sec', '138 hp', 'All Wheel Drive ', '26 mpg', 'Gas', '14.00 gal.', nan, '168.40 in.', '69.90 ', '65.20 in.']
2013
https://www.carspecs.us/cars/2013/buick/encore
(2016, 'https://www.carspecs.us/cars/2016/buick/envision')
['$36,215', 4, 5, '6.90 sec', '2

['$8,460', 4, 7, nan, '345 hp', 'All Wheel Drive ', 15.0, 'Gas', '26.00 gal.', nan, nan, '49.20 in.', '76.50 in.']
2005
https://www.carspecs.us/cars/2005/cadillac/escalade
(2005, 'https://www.carspecs.us/cars/2005/cadillac/escalade-esv')
['$8,820', nan, nan, '7.16 sec', '345 hp', ' All wheel drive  ', 14.0, 'Gas', '31.00 gal.', nan, '221.40 in.', '79.50 ', '75.70 in.']
2005
https://www.carspecs.us/cars/2005/cadillac/escalade-esv
(2005, 'https://www.carspecs.us/cars/2005/cadillac/escalade-ext')
['$8,000', 4, 5, nan, '345 hp', 'All Wheel Drive ', 15.0, 'Gas', '31.00 gal.', nan, nan, '79.50 ', '75.60 in.']
2005
https://www.carspecs.us/cars/2005/cadillac/escalade-ext
(2009, 'https://www.carspecs.us/cars/2009/cadillac/escalade-hybrid')
['$21,858', nan, nan, '7.80 sec', '332 hp', ' Rear wheel drive  ', 20.5, 'Hybrid', '26.00 gal.', nan, '202.50 in.', '79.00 ', '75.90 in.']
2009
https://www.carspecs.us/cars/2009/cadillac/escalade-hybrid
(2005, 'https://www.carspecs.us/cars/2005/cadillac/srx')

['$33,060', nan, nan, '3.69 sec', '455 hp', ' Rear wheel drive  ', 23.0, 'Gas', '18.50 gal.', nan, '176.90 in.', '73.90 ', '48.80 in.']
2014
https://www.carspecs.us/cars/2014/chevrolet/corvette-stingray
(2011, 'https://www.carspecs.us/cars/2011/chevrolet/cruze')
['$6,264', 4, 5, nan, '138 hp', 'Front Wheel Drive ', '33 mpg', 'Gas', '12.60 gal.', nan, '181.00 in.', '70.70 ', '58.10 in.']
2011
https://www.carspecs.us/cars/2011/chevrolet/cruze
(2016, 'https://www.carspecs.us/cars/2016/chevrolet/cruze-limited')
[nan, nan, nan, '9.30 sec', '138 hp', ' Front wheel drive  ', 30.5, 'Gas', '15.60 gal.', '109.60 cu.ft.', '181.00 in.', '70.70 ', '58.10 in.']
2016
https://www.carspecs.us/cars/2016/chevrolet/cruze-limited
(2005, 'https://www.carspecs.us/cars/2005/chevrolet/equinox')
['$3,494', 4, 5, '8.26 sec', '185 hp', 'All Wheel Drive ', 22.0, 'Gas', '16.70 gal.', nan, '188.80 in.', '71.40 ', '69.30 in.']
2005
https://www.carspecs.us/cars/2005/chevrolet/equinox
(2005, 'https://www.carspecs.us/ca

['$4,614', nan, nan, nan, '300 hp', ' Rear wheel drive  ', 0, 'Gas', '34.00 gal.', nan, '256.20 in.', '96.10 ', '77.30 in.']
2005
https://www.carspecs.us/cars/2005/chevrolet/silverado-3500
(2007, 'https://www.carspecs.us/cars/2007/chevrolet/silverado-3500-classic')
[nan, nan, nan, nan, '300 hp', ' Four wheel drive  ', 0, 'Gas', '34.00 gal.', nan, '224.50 in.', '96.10 ', '76.70 in.']
2007
https://www.carspecs.us/cars/2007/chevrolet/silverado-3500-classic
(2008, 'https://www.carspecs.us/cars/2008/chevrolet/silverado-3500hd')
['$6,822', nan, nan, nan, '312 hp', ' Four wheel drive  ', 0, 'Gas', '34.00 gal.', nan, '258.70 in.', '95.90 ', '76.10 in.']
2008
https://www.carspecs.us/cars/2008/chevrolet/silverado-3500hd
(2015, 'https://www.carspecs.us/cars/2015/chevrolet/silverado-3500hd-built-after-aug-14')
['$29,300', 4, 6, nan, '360 hp', 'Rear Wheel Drive ', 0, 'Gas', '36.00 gal.', nan, nan, '51.02 in.', '78.35 in.']
2015
https://www.carspecs.us/cars/2015/chevrolet/silverado-3500hd-built-afte

['$4,054', 2, 4, '8.00 sec', '200 hp', 'Front Wheel Drive ', 24.5, 'Gas', '16.00 gal.', nan, '193.70 in.', '69.40 ', '55.00 in.']
2005
https://www.carspecs.us/cars/2005/chrysler/sebring-convertible
(2005, 'https://www.carspecs.us/cars/2005/chrysler/sebring-coupe')
['$3,694', 2, 5, '7.89 sec', '200 hp', 'Front Wheel Drive ', 24.0, 'Gas', '16.30 gal.', nan, '191.90 in.', '69.90 ', '53.90 in.']
2005
https://www.carspecs.us/cars/2005/chrysler/sebring-coupe
(2005, 'https://www.carspecs.us/cars/2005/chrysler/sebring-sedan')
['$3,375', 4, 5, '7.71 sec', '200 hp', 'Front Wheel Drive ', 24.5, 'Gas', '16.00 gal.', nan, '190.70 in.', '70.60 ', '54.90 in.']
2005
https://www.carspecs.us/cars/2005/chrysler/sebring-sedan
(2005, 'https://www.carspecs.us/cars/2005/chrysler/town--country')
['$3,138', nan, nan, '9.93 sec', '180 hp', 'front-wheel ', 22.5, nan, '20.00 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/chrysler/town--country
(2005, 'https://www.carspecs.us/cars/2005/chrysler/

[nan, nan, nan, '10.17 sec', '154 hp', 'rear-wheel ', 0, nan, '26.40 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/dodge/sprinter-wagon-2500
(2013, 'https://www.carspecs.us/cars/2013/dodge/srt-viper')
['$58,993', 2, 2, '2.95 sec', '640 hp', 'Rear Wheel Drive ', '15 mpg', 'Gas', '16.00 gal.', nan, '175.70 in.', '76.40 ', '49.10 in.']
2013
https://www.carspecs.us/cars/2013/dodge/srt-viper
(2005, 'https://www.carspecs.us/cars/2005/dodge/srt4')
[nan, nan, nan, '6.05 sec', '230 hp', 'front-wheel ', 26.0, nan, '12.50 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/dodge/srt4
(2005, 'https://www.carspecs.us/cars/2005/dodge/stratus')
['$3,053', nan, nan, '9.61 sec', '150 hp', ' Front wheel drive  ', 22.5, 'Gas', '16.00 gal.', '110.00 cu.ft.', '191.20 in.', '70.60 ', '54.90 in.']
2005
https://www.carspecs.us/cars/2005/dodge/stratus
(2005, 'https://www.carspecs.us/cars/2005/dodge/stratus-coupe')
['$3,447', 2, 5, '7.25 sec', '200 hp', 'Front Wheel Drive ', 25

['$19,954', 4, 5, '7.04 sec', '180 hp', 'All Wheel Drive ', '24 mpg', 'Gas', '13.20 gal.', nan, '167.20 in.', '70.70 ', '29.80 in.']
2016
https://www.carspecs.us/cars/2016/fiat/500x
(2012, 'https://www.carspecs.us/cars/2012/fisker/karma')
['$46,000', 4, 4, nan, nan, 'Rear Wheel Drive ', '20 mpg', 'Gas', '9.00 gal.', nan, '196.80 in.', '78.10 ', '52.40 in.']
2012
https://www.carspecs.us/cars/2012/fisker/karma
(2017, 'https://www.carspecs.us/cars/2017/ford/c-max')
[nan, 4, 5, nan, '188 hp', 'Front Wheel Drive ', '40 mpg', 'Gas', '13.50 gal.', '99.70 cu.ft.', '173.60 in.', '72.00 ', '63.90 in.']
2017
https://www.carspecs.us/cars/2017/ford/c-max
(2013, 'https://www.carspecs.us/cars/2013/ford/c-max-energi')
['$16,145', 4, 5, nan, nan, 'Front Wheel Drive ', '38 mpg', 'Gas', '14.00 gal.', nan, '173.60 in.', '72.00 ', '63.80 in.']
2013
https://www.carspecs.us/cars/2013/ford/c-max-energi
(2013, 'https://www.carspecs.us/cars/2013/ford/c-max-hybrid')
['$12,348', 4, 5, nan, nan, 'Front Wheel Drive

[nan, nan, nan, '6.12 sec', '252 hp', ' Front wheel drive  ', 27.5, nan, '12.10 gal.', nan, '171.70 in.', '71.80 ', '58.40 in.']
2013
https://www.carspecs.us/cars/2013/ford/focus-st
(2005, 'https://www.carspecs.us/cars/2005/ford/freestar')
['$4,913', 4, 7, '9.83 sec', '201 hp', 'Front Wheel Drive ', 20.0, 'Gas', '26.00 gal.', '206.80 cu.ft.', '90.90 in.', '76.40 ', '70.60 in.']
2005
https://www.carspecs.us/cars/2005/ford/freestar
(2005, 'https://www.carspecs.us/cars/2005/ford/freestyle')
['$4,266', 4, 6, nan, '203 hp', 'Front Wheel Drive ', 23.5, 'Gas', '19.00 gal.', nan, '199.80 in.', '74.40 ', '65.90 in.']
2005
https://www.carspecs.us/cars/2005/ford/freestyle
(2006, 'https://www.carspecs.us/cars/2006/ford/fusion')
['$3,300', 4, 5, '7.47 sec', '221 hp', 'Front Wheel Drive ', 25.0, 'Gas', '17.50 gal.', nan, '190.20 in.', '72.20 ', '28.30 in.']
2006
https://www.carspecs.us/cars/2006/ford/fusion
(2013, 'https://www.carspecs.us/cars/2013/ford/fusion-energi')
['$18,963', nan, nan, '9.47 se

['$6,340', 4, 7, '7.50 sec', '275 hp', 'All Wheel Drive ', 20.5, 'Gas', '22.00 gal.', nan, '200.70 in.', '78.20 ', '69.90 in.']
2007
https://www.carspecs.us/cars/2007/gmc/acadia
(2005, 'https://www.carspecs.us/cars/2005/gmc/canyon')
[nan, nan, nan, nan, '175 hp', ' Four wheel drive  ', 19.5, 'Gas', '19.00 gal.', nan, '192.80 in.', '67.60 ', '67.60 in.']
2005
https://www.carspecs.us/cars/2005/gmc/canyon
(2005, 'https://www.carspecs.us/cars/2005/gmc/envoy')
['$4,899', nan, nan, '7.11 sec', '275 hp', ' Four wheel drive  ', 16.5, 'Gas', '22.00 gal.', nan, '191.60 in.', '74.70 ', '71.90 in.']
2005
https://www.carspecs.us/cars/2005/gmc/envoy
(2005, 'https://www.carspecs.us/cars/2005/gmc/envoy-xl')
['$4,454', 4, 7, nan, '275 hp', 'Rear Wheel Drive ', 17.0, 'Gas', '25.00 gal.', nan, '58.60 in.', '45.40 in.', '75.50 in.']
2005
https://www.carspecs.us/cars/2005/gmc/envoy-xl
(2005, 'https://www.carspecs.us/cars/2005/gmc/envoy-xuv')
['$4,467', 4, 5, nan, '275 hp', 'Rear Wheel Drive ', 17.0, 'Gas',

['$6,323', nan, nan, '7.80 sec', '295 hp', 'rear-wheel ', 16.0, nan, '31.00 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/gmc/yukon-xl-1500
(2005, 'https://www.carspecs.us/cars/2005/gmc/yukon-xl-2500')
[nan, nan, nan, '7.56 sec', '335 hp', 'AutoTrac automatic full-time four-wheel ', 0, nan, '37.00 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/gmc/yukon-xl-2500
(2005, 'https://www.carspecs.us/cars/2005/gmc/yukon-xl-denali')
['$7,790', 4, 8, nan, '335 hp', 'All Wheel Drive ', 15.0, 'Gas', '31.00 gal.', nan, '49.80 in.', '49.20 in.', '75.70 in.']
2005
https://www.carspecs.us/cars/2005/gmc/yukon-xl-denali
(2005, 'https://www.carspecs.us/cars/2005/honda/accord')
['$3,330', nan, nan, '9.20 sec', '160 hp', ' Front wheel drive  ', 26.0, 'Gas', '17.10 gal.', '111.70 cu.ft.', '189.50 in.', '71.50 ', '57.10 in.']
2005
https://www.carspecs.us/cars/2005/honda/accord
(2005, 'https://www.carspecs.us/cars/2005/honda/accord-coupe')
['$3,487', 2, 5, '9.03 sec', '1

['$4,972', nan, nan, '5.33 sec', '240 hp', ' Rear wheel drive  ', 20.0, 'Gas', '13.20 gal.', '50.10 cu.ft.', '162.20 in.', '68.90 ', '50.00 in.']
2005
https://www.carspecs.us/cars/2005/honda/s2000
(2006, 'https://www.carspecs.us/cars/2006/hummer/h1')
['$22,363', 4, 4, nan, '300 hp', 'Four Wheel Drive ', 0, 'Gas', '28.50 gal.', nan, '44.00 in.', '86.50 ', '77.00 in.']
2006
https://www.carspecs.us/cars/2006/hummer/h1
(2006, 'https://www.carspecs.us/cars/2006/hummer/h1-alpha')
[nan, nan, nan, nan, '300 hp', ' Four wheel drive  ', 0, nan, '51.50 gal.', nan, '184.50 in.', '86.50 ', '79.00 in.']
2006
https://www.carspecs.us/cars/2006/hummer/h1-alpha
(2005, 'https://www.carspecs.us/cars/2005/hummer/h2')
['$7,864', 4, 5, nan, '325 hp', 'Four Wheel Drive ', 0, 'Gas', '32.00 gal.', nan, '34.70 in.', '47.00 in.', '79.20 in.']
2005
https://www.carspecs.us/cars/2005/hummer/h2
(2005, 'https://www.carspecs.us/cars/2005/hummer/h2-sut')
[nan, nan, nan, nan, '325 hp', ' Four wheel drive  ', 0, 'Gas', '3

['$8,770', nan, nan, '8.10 sec', '206 hp', ' Front wheel drive  ', 36.5, 'Hybrid', '17.20 gal.', '114.50 cu.ft.', '189.80 in.', '72.20 ', '57.70 in.']
2011
https://www.carspecs.us/cars/2011/hyundai/sonata-hybrid
(2016, 'https://www.carspecs.us/cars/2016/hyundai/sonata-plug-in-hybrid')
[nan, nan, nan, '8.76 sec', '202 hp', ' Front wheel drive  ', 0, 'Hybrid', '14.50 gal.', '116.00 cu.ft.', '191.10 in.', '73.40 ', '57.90 in.']
2016
https://www.carspecs.us/cars/2016/hyundai/sonata-plug-in-hybrid
(2005, 'https://www.carspecs.us/cars/2005/hyundai/tiburon')
['$2,699', nan, nan, '7.75 sec', '172 hp', ' Front wheel drive  ', 20.0, 'Gas', '14.50 gal.', '81.90 cu.ft.', '173.00 in.', '69.30 ', '52.40 in.']
2005
https://www.carspecs.us/cars/2005/hyundai/tiburon
(2005, 'https://www.carspecs.us/cars/2005/hyundai/tucson')
['$2,849', 4, 5, '8.81 sec', '140 hp', 'Four Wheel Drive ', 23.5, 'Gas', '15.30 gal.', nan, '32.00 in.', '39.40 in.', '68.10 in.']
2005
https://www.carspecs.us/cars/2005/hyundai/tuc

(2012, 'https://www.carspecs.us/cars/2012/infiniti/m35h')
['$21,480', 4, 5, nan, nan, 'Rear Wheel Drive ', '29 mpg', 'Gas', '17.80 gal.', nan, '194.70 in.', '72.60 ', '59.10 in.']
2012
https://www.carspecs.us/cars/2012/infiniti/m35h
(2006, 'https://www.carspecs.us/cars/2006/infiniti/m35x')
[nan, nan, nan, '5.56 sec', '280 hp', 'ATTESA E-TS full-time all wheel ', 20.5, nan, '20.00 gal.', nan, nan, nan, nan]
2006
https://www.carspecs.us/cars/2006/infiniti/m35x
(2011, 'https://www.carspecs.us/cars/2011/infiniti/m37')
['$16,728', 4, 5, '5.26 sec', '330 hp', 'All Wheel Drive ', '20 mpg', 'Gas', '20.00 gal.', nan, '194.70 in.', '72.60 ', '59.60 in.']
2011
https://www.carspecs.us/cars/2011/infiniti/m37
(2011, 'https://www.carspecs.us/cars/2011/infiniti/m37x')
[nan, nan, nan, '4.97 sec', '330 hp', 'ATTESA E-TS full-time all wheel ', 20.5, nan, '20.00 gal.', nan, nan, nan, nan]
2011
https://www.carspecs.us/cars/2011/infiniti/m37x
(2006, 'https://www.carspecs.us/cars/2006/infiniti/m45')
['$6,534

(2019, 'https://www.carspecs.us/cars/2019/jaguar/i-pace')
['$69,500', 4, 5, '5.60 sec', '394 hp', 'All Wheel Drive ', '76 mpg', 'Electric', nan, nan, '184.30 in.', '74.60 ', '61.30 in.']
2019
https://www.carspecs.us/cars/2019/jaguar/i-pace
(2005, 'https://www.carspecs.us/cars/2005/jaguar/s-type')
['$8,899', 4, 5, '5.12 sec', '390 hp', 'Rear Wheel Drive ', 20.5, 'Gas', '18.40 gal.', '98.00 cu.ft.', '193.10 in.', '71.60 ', '56.00 in.']
2005
https://www.carspecs.us/cars/2005/jaguar/s-type
(2017, 'https://www.carspecs.us/cars/2017/jaguar/xe')
[nan, 4, 5, '5.25 sec', '340 hp', 'All Wheel Drive ', '23 mpg', 'Gas', '16.60 gal.', '91.00 cu.ft.', '183.90 in.', '72.80 ', '55.70 in.']
2017
https://www.carspecs.us/cars/2017/jaguar/xe
(2009, 'https://www.carspecs.us/cars/2009/jaguar/xf')
['$13,800', 4, 5, '5.52 sec', '300 hp', 'Rear Wheel Drive ', '19 mpg', 'Gas', '18.40 gal.', nan, '195.30 in.', '73.90 ', '57.50 in.']
2009
https://www.carspecs.us/cars/2009/jaguar/xf
(2005, 'https://www.carspecs.us

[nan, nan, nan, '9.54 sec', '166 hp', 'front-wheel ', 36.5, nan, '17.20 gal.', nan, nan, nan, nan]
2011
https://www.carspecs.us/cars/2011/kia/optima-hybrid
(2017, 'https://www.carspecs.us/cars/2017/kia/optima-plug-in-hybrid')
['$35,210', 4, 5, '8.76 sec', '202 hp', 'Front Wheel Drive ', '42 mpg', 'Gas', '14.50 gal.', nan, '191.10 in.', '73.20 ', '57.50 in.']
2017
https://www.carspecs.us/cars/2017/kia/optima-plug-in-hybrid
(2005, 'https://www.carspecs.us/cars/2005/kia/rio')
[nan, nan, nan, '9.53 sec', '104 hp', ' Front wheel drive  ', 25.5, 'Gas', '11.90 gal.', '97.00 cu.ft.', '166.90 in.', '66.10 ', '56.70 in.']
2005
https://www.carspecs.us/cars/2005/kia/rio
(2017, 'https://www.carspecs.us/cars/2017/kia/rio-5-door')
['$17,905', 4, 5, '8.70 sec', '138 hp', 'Front Wheel Drive ', '30 mpg', 'Gas', '11.40 gal.', nan, '159.40 in.', '67.70 ', '57.30 in.']
2017
https://www.carspecs.us/cars/2017/kia/rio-5-door
(2006, 'https://www.carspecs.us/cars/2006/kia/rio5')
[nan, nan, nan, '9.24 sec', '110

[nan, nan, nan, '7.57 sec', '225 hp', 'front-wheel ', 25.0, nan, '18.50 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/lexus/es-330
(2007, 'https://www.carspecs.us/cars/2007/lexus/es-350')
[nan, nan, nan, '5.90 sec', '272 hp', 'front-wheel ', 25.5, nan, '18.50 gal.', nan, nan, nan, nan]
2007
https://www.carspecs.us/cars/2007/lexus/es-350
(2005, 'https://www.carspecs.us/cars/2005/lexus/gs')
['$5,831', nan, nan, '7.38 sec', '220 hp', ' Rear wheel drive  ', 19.5, 'Gas', '19.80 gal.', '100.00 cu.ft.', '189.20 in.', '70.90 ', '55.90 in.']
2005
https://www.carspecs.us/cars/2005/lexus/gs
(2016, 'https://www.carspecs.us/cars/2016/lexus/gs-200t')
['$36,948', 4, 5, '7.11 sec', '241 hp', 'Rear Wheel Drive ', '24 mpg', 'Gas', '17.40 gal.', nan, '192.10 in.', '72.40 ', '57.30 in.']
2016
https://www.carspecs.us/cars/2016/lexus/gs-200t
(2005, 'https://www.carspecs.us/cars/2005/lexus/gs-300')
['$5,831', 4, 5, '7.38 sec', '220 hp', 'Rear Wheel Drive ', 21.5, 'Gas', '19.80 gal.', nan,

['$31,067', 2, 4, '5.79 sec', '306 hp', 'All Wheel Drive ', '21 mpg', 'Gas', '17.40 gal.', nan, '184.80 in.', '72.40 ', '55.10 in.']
2015
https://www.carspecs.us/cars/2015/lexus/rc
(2016, 'https://www.carspecs.us/cars/2016/lexus/rc-200t')
['$32,395', 2, 4, '7.02 sec', '241 hp', 'Rear Wheel Drive ', '26 mpg', 'Gas', '17.40 gal.', nan, '184.80 in.', '72.40 ', '54.90 in.']
2016
https://www.carspecs.us/cars/2016/lexus/rc-200t
(2016, 'https://www.carspecs.us/cars/2016/lexus/rc-300')
['$34,514', 2, 4, '6.64 sec', '255 hp', 'All Wheel Drive ', '21 mpg', 'Gas', '17.40 gal.', nan, '184.80 in.', '72.40 ', '55.10 in.']
2016
https://www.carspecs.us/cars/2016/lexus/rc-300
(2015, 'https://www.carspecs.us/cars/2015/lexus/rc-f')
['$43,056', 2, 4, '4.15 sec', '467 hp', 'Rear Wheel Drive ', '19 mpg', 'Gas', '17.40 gal.', nan, '185.20 in.', '72.60 ', '54.70 in.']
2015
https://www.carspecs.us/cars/2015/lexus/rc-f
(2005, 'https://www.carspecs.us/cars/2005/lexus/rx')
['$5,403', nan, nan, '8.06 sec', '230 hp

['$14,725', 2, 4, '4.46 sec', '396 hp', 'Rear Wheel Drive ', 13.0, 'Gas', '23.20 gal.', nan, '178.00 in.', '71.70 ', '50.70 in.']
2005
https://www.carspecs.us/cars/2005/maserati/gransport
(2008, 'https://www.carspecs.us/cars/2008/maserati/granturismo')
['$23,100', 2, 4, '5.14 sec', '405 hp', 'Rear Wheel Drive ', 16.0, 'Gas', nan, nan, '192.00 in.', '72.00 ', '53.00 in.']
2008
https://www.carspecs.us/cars/2008/maserati/granturismo
(2010, 'https://www.carspecs.us/cars/2010/maserati/granturismo-convertible')
[nan, nan, nan, '5.08 sec', '433 hp', ' Rear wheel drive  ', 15.5, 'Gas', '19.80 gal.', '90.50 cu.ft.', '192.20 in.', '72.70 ', '53.30 in.']
2010
https://www.carspecs.us/cars/2010/maserati/granturismo-convertible
(2017, 'https://www.carspecs.us/cars/2017/maserati/levante')
[nan, 4, 5, '6.05 sec', '345 hp', 'All Wheel Drive ', '16 mpg', 'Gas', '21.10 gal.', '108.00 cu.ft.', '197.00 in.', '77.50 ', '66.10 in.']
2017
https://www.carspecs.us/cars/2017/maserati/levante
(2005, 'https://www.

['$2,789', 4, 6, '8.95 sec', '157 hp', 'Front Wheel Drive ', 24.5, 'Gas', '15.90 gal.', nan, '181.50 in.', '69.10 ', '64.20 in.']
2006
https://www.carspecs.us/cars/2006/mazda/mazda5
(2005, 'https://www.carspecs.us/cars/2005/mazda/mazda6')
['$2,849', 4, 5, '8.36 sec', '160 hp', 'Front Wheel Drive ', 27.0, 'Gas', '18.00 gal.', nan, '186.80 in.', '70.10 ', '56.70 in.']
2005
https://www.carspecs.us/cars/2005/mazda/mazda6
(2005, 'https://www.carspecs.us/cars/2005/mazda/mazda6')
['$2,849', 4, 5, '8.36 sec', '160 hp', 'Front Wheel Drive ', 27.0, 'Gas', '18.00 gal.', nan, '186.80 in.', '70.10 ', '56.70 in.']
2005
https://www.carspecs.us/cars/2005/mazda/mazda6
(2007, 'https://www.carspecs.us/cars/2007/mazda/mazdaspeed-3')
[nan, nan, nan, '5.83 sec', '263 hp', ' Front wheel drive  ', 22.0, 'Gas', '14.50 gal.', nan, '176.80 in.', '69.50 ', '57.70 in.']
2007
https://www.carspecs.us/cars/2007/mazda/mazdaspeed-3
(2006, 'https://www.carspecs.us/cars/2006/mazda/mazdaspeed-6')
['$4,433', nan, nan, '5.5

[nan, nan, nan, '4.36 sec', '362 hp', 'rear-wheel ', 19.5, nan, '16.40 gal.', nan, nan, nan, nan]
2006
https://www.carspecs.us/cars/2006/mercedes-benz/c5-amg
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/c55-amg')
[nan, nan, nan, '4.36 sec', '362 hp', 'rear-wheel ', 19.0, nan, '16.40 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/mercedes-benz/c55-amg
(2008, 'https://www.carspecs.us/cars/2008/mercedes-benz/c63-amg')
['$11,298', 4, 5, '3.78 sec', '451 hp', 'Rear Wheel Drive ', 15.5, 'Gas', '17.40 gal.', nan, '186.00 in.', '70.70 ', '56.60 in.']
2008
https://www.carspecs.us/cars/2008/mercedes-benz/c63-amg
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/c-class')
['$4,237', nan, nan, '7.06 sec', '189 hp', ' Rear wheel drive  ', 24.5, 'Gas', '16.40 gal.', nan, '171.00 in.', '68.00 ', '54.30 in.']
2005
https://www.carspecs.us/cars/2005/mercedes-benz/c-class
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/cl-class')
['$26,865', nan, nan, '4.15 sec

[nan, nan, nan, '4.00 sec', '469 hp', 'rear-wheel ', 17.5, nan, '20.60 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/mercedes-benz/e55-amg
(2007, 'https://www.carspecs.us/cars/2007/mercedes-benz/e550')
[nan, nan, nan, '4.83 sec', '382 hp', 'full-time 4MATIC all wheel ', 18.5, nan, '20.60 gal.', nan, nan, nan, nan]
2007
https://www.carspecs.us/cars/2007/mercedes-benz/e550
(2007, 'https://www.carspecs.us/cars/2007/mercedes-benz/e63-amg')
[nan, nan, nan, '3.71 sec', '507 hp', 'rear-wheel ', 17.0, nan, '20.60 gal.', nan, nan, nan, nan]
2007
https://www.carspecs.us/cars/2007/mercedes-benz/e63-amg
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/e-class')
['$7,552', nan, nan, '8.20 sec', '201 hp', ' Rear wheel drive  ', 28.0, 'Diesel', '21.10 gal.', '113.10 cu.ft.', '189.70 in.', '71.30 ', '57.20 in.']
2005
https://www.carspecs.us/cars/2005/mercedes-benz/e-class
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/g-class')
[nan, nan, nan, '7.83 sec', '292 hp', 

['$7,862', nan, nan, '8.86 sec', '215 hp', 'full-time four-wheel ', 24.0, nan, '25.10 gal.', nan, nan, nan, nan]
2007
https://www.carspecs.us/cars/2007/mercedes-benz/ml320
(2009, 'https://www.carspecs.us/cars/2009/mercedes-benz/ml320-bluetec')
[nan, nan, nan, '8.11 sec', '210 hp', 'full-time 4MATIC four-wheel ', 21.0, nan, '25.10 gal.', nan, nan, nan, nan]
2009
https://www.carspecs.us/cars/2009/mercedes-benz/ml320-bluetec
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/ml350')
[nan, nan, nan, '8.37 sec', '232 hp', 'full-time four-wheel ', 16.5, nan, '22.60 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/mercedes-benz/ml350
(2010, 'https://www.carspecs.us/cars/2010/mercedes-benz/ml350-bluetec')
[nan, nan, nan, '8.11 sec', '210 hp', 'full-time 4MATIC four-wheel ', 21.5, nan, '25.10 gal.', nan, nan, nan, nan]
2010
https://www.carspecs.us/cars/2010/mercedes-benz/ml350-bluetec
(2010, 'https://www.carspecs.us/cars/2010/mercedes-benz/ml450-hybrid')
[nan, nan, nan, '6

[nan, nan, nan, '6.39 sec', '241 hp', ' Rear wheel drive  ', 28.5, 'Gas', '15.90 gal.', nan, '162.80 in.', '71.50 ', '51.30 in.']
2017
https://www.carspecs.us/cars/2017/mercedes-benz/slc-class
(2017, 'https://www.carspecs.us/cars/2017/mercedes-benz/slc-class')
[nan, nan, nan, '6.39 sec', '241 hp', ' Rear wheel drive  ', 28.5, 'Gas', '15.90 gal.', nan, '162.80 in.', '71.50 ', '51.30 in.']
2017
https://www.carspecs.us/cars/2017/mercedes-benz/slc-class
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/sl-class')
['$13,695', nan, nan, '6.31 sec', '302 hp', ' Rear wheel drive  ', 17.5, 'Gas', nan, '68.00 cu.ft.', '178.50 in.', '71.50 ', '51.00 in.']
2005
https://www.carspecs.us/cars/2005/mercedes-benz/sl-class
(2005, 'https://www.carspecs.us/cars/2005/mercedes-benz/slk-class')
['$6,937', nan, nan, '5.41 sec', '268 hp', ' Rear wheel drive  ', 19.5, 'Gas', nan, '43.80 cu.ft.', '160.70 in.', '70.40 ', '51.10 in.']
2005
https://www.carspecs.us/cars/2005/mercedes-benz/slk-class
(2012, 'htt

['$11,368', nan, nan, '9.90 sec', '121 hp', ' Front wheel drive  ', 31.0, 'Gas', '12.40 gal.', '103.00 cu.ft.', '162.00 in.', '70.30 ', '59.90 in.']
2013
https://www.carspecs.us/cars/2013/mini/cooper-paceman
(2012, 'https://www.carspecs.us/cars/2012/mini/cooper-roadster')
[nan, nan, nan, '9.12 sec', '121 hp', ' Front wheel drive  ', 31.0, 'Gas', '13.20 gal.', nan, '146.80 in.', '66.30 ', '54.50 in.']
2012
https://www.carspecs.us/cars/2012/mini/cooper-roadster
(2011, 'https://www.carspecs.us/cars/2011/mini/countryman')
['$7,361', nan, nan, '6.92 sec', '181 hp', 'full-time ALL4 all wheel ', 28.0, nan, '12.40 gal.', nan, nan, nan, nan]
2011
https://www.carspecs.us/cars/2011/mini/countryman
(2012, 'https://www.carspecs.us/cars/2012/mini/coupe')
[nan, nan, nan, '8.92 sec', '121 hp', 'front-wheel ', 33.0, nan, '13.20 gal.', nan, nan, nan, nan]
2012
https://www.carspecs.us/cars/2012/mini/coupe
(2005, 'https://www.carspecs.us/cars/2005/mini/hardtop')
['$3,217', nan, nan, '9.17 sec', '115 hp', 

['$5,865', 4, 7, '7.59 sec', '305 hp', 'Rear Wheel Drive ', 16.0, 'Gas', '28.00 gal.', nan, '206.90 in.', '78.80 ', '77.80 in.']
2005
https://www.carspecs.us/cars/2005/nissan/armada
(2009, 'https://www.carspecs.us/cars/2009/nissan/cube')
['$4,197', 4, 5, nan, '122 hp', 'Front Wheel Drive ', '29 mpg', 'Gas', '13.20 gal.', nan, '156.70 in.', '66.70 ', '28.60 in.']
2009
https://www.carspecs.us/cars/2009/nissan/cube
(2005, 'https://www.carspecs.us/cars/2005/nissan/frontier')
['$3,562', nan, nan, '7.09 sec', '265 hp', ' Rear wheel drive  ', 16.5, 'Gas', '21.10 gal.', nan, '205.50 in.', '72.80 ', '69.70 in.']
2005
https://www.carspecs.us/cars/2005/nissan/frontier
(2005, 'https://www.carspecs.us/cars/2005/nissan/frontier-2wd')
['$3,600', 4, 5, nan, '265 hp', 'Rear Wheel Drive ', 19.0, 'Gas', '21.00 gal.', nan, nan, '44.40 in.', '73.90 in.']
2005
https://www.carspecs.us/cars/2005/nissan/frontier-2wd
(2005, 'https://www.carspecs.us/cars/2005/nissan/frontier-4wd')
['$3,997', 4, 5, nan, '265 hp',

['$4,400', 4, 5, '6.22 sec', '303 hp', 'Front Wheel Drive ', 22.5, 'Gas', '17.00 gal.', nan, '198.30 in.', '71.60 ', '55.90 in.']
2005
https://www.carspecs.us/cars/2005/pontiac/grand-prix
(2005, 'https://www.carspecs.us/cars/2005/pontiac/gto')
['$4,844', 2, 4, '4.79 sec', '400 hp', 'Rear Wheel Drive ', 18.5, 'Gas', '18.00 gal.', nan, '189.80 in.', '72.50 ', '54.90 in.']
2005
https://www.carspecs.us/cars/2005/pontiac/gto
(2005, 'https://www.carspecs.us/cars/2005/pontiac/montana')
['$4,563', 4, 7, '9.64 sec', '185 hp', 'Front Wheel Drive ', 22.5, 'Gas', '25.00 gal.', nan, '62.40 in.', '48.30 in.', '22.80 in.']
2005
https://www.carspecs.us/cars/2005/pontiac/montana
(2005, 'https://www.carspecs.us/cars/2005/pontiac/montana-sv6')
['$4,531', nan, nan, '9.08 sec', '200 hp', 'VersaTrak automatic full-time all wheel ', 20.0, nan, '25.00 gal.', nan, nan, nan, nan]
2005
https://www.carspecs.us/cars/2005/pontiac/montana-sv6
(2006, 'https://www.carspecs.us/cars/2006/pontiac/solstice')
['$3,186', 2,

['$17,011', 4, 2, nan, '178 hp', 'Front Wheel Drive ', '24 mpg', 'Gas', '16.10 gal.', nan, '87.20 in.', '48.40 in.', '74.00 in.']
2015
https://www.carspecs.us/cars/2015/ram/promaster-city
(2015, 'https://www.carspecs.us/cars/2015/ram/promaster-city')
['$17,011', 4, 2, nan, '178 hp', 'Front Wheel Drive ', '24 mpg', 'Gas', '16.10 gal.', nan, '87.20 in.', '48.40 in.', '74.00 in.']
2015
https://www.carspecs.us/cars/2015/ram/promaster-city
(2014, 'https://www.carspecs.us/cars/2014/ram/promaster-window-van')
['$19,287', nan, nan, '8.94 sec', '280 hp', ' Front wheel drive  ', 0, 'Gas', '24.00 gal.', nan, '236.00 in.', '82.70 ', '101.00 in.']
2014
https://www.carspecs.us/cars/2014/ram/promaster-window-van
(2019, 'https://www.carspecs.us/cars/2019/rolls-royce/cullinan')
['$325,000', 4, 5, nan, '563 hp', 'All Wheel Drive ', '14 mpg', 'Gas', nan, nan, '88.40 in.', '85.00 ', '72.30 in.']
2019
https://www.carspecs.us/cars/2019/rolls-royce/cullinan
(2016, 'https://www.carspecs.us/cars/2016/rolls-roy

[nan, nan, nan, '5.98 sec', '200 hp', ' Rear wheel drive  ', 26.0, 'Gas', '13.20 gal.', '83.40 cu.ft.', '166.70 in.', '69.90 ', '50.60 in.']
2013
https://www.carspecs.us/cars/2013/scion/fr-s
(2016, 'https://www.carspecs.us/cars/2016/scion/ia')
['$13,608', 4, 5, '10.14 sec', '106 hp', 'Front Wheel Drive ', '37 mpg', 'Gas', '11.60 gal.', nan, '171.70 in.', '66.70 ', '58.50 in.']
2016
https://www.carspecs.us/cars/2016/scion/ia
(2016, 'https://www.carspecs.us/cars/2016/scion/im')
['$15,552', 4, 5, nan, '137 hp', 'Front Wheel Drive ', '32 mpg', 'Gas', '14.00 gal.', nan, '170.50 in.', '69.30 ', '55.30 in.']
2016
https://www.carspecs.us/cars/2016/scion/im
(2012, 'https://www.carspecs.us/cars/2012/scion/iq')
['$6,106', nan, nan, '10.08 sec', '94 hp', ' Front wheel drive  ', 36.5, 'Gas', '8.50 gal.', '77.30 cu.ft.', '120.10 in.', '66.10 ', '59.10 in.']
2012
https://www.carspecs.us/cars/2012/scion/iq
(2005, 'https://www.carspecs.us/cars/2005/scion/tc')
['$2,400', 2, 5, '7.96 sec', '160 hp', 'Fro

['$2,759', 4, 5, '7.56 sec', '165 hp', 'Rear Wheel Drive ', 20.5, 'Gas', '16.90 gal.', nan, '47.60 in.', '43.50 in.', '67.30 in.']
2005
https://www.carspecs.us/cars/2005/suzuki/grand-vitara
(2010, 'https://www.carspecs.us/cars/2010/suzuki/kizashi')
[nan, nan, nan, '7.75 sec', '185 hp', 'front-wheel ', 24.5, nan, '16.60 gal.', nan, nan, nan, nan]
2010
https://www.carspecs.us/cars/2010/suzuki/kizashi
(2005, 'https://www.carspecs.us/cars/2005/suzuki/reno')
['$2,152', 4, 5, '9.90 sec', '126 hp', 'Front Wheel Drive ', 26.0, 'Gas', '14.50 gal.', nan, '169.10 in.', '67.90 ', '56.90 in.']
2005
https://www.carspecs.us/cars/2005/suzuki/reno
(2007, 'https://www.carspecs.us/cars/2007/suzuki/sx4')
['$2,897', 4, 5, '8.17 sec', '143 hp', 'All Wheel Drive ', 29.5, 'Gas', '11.00 gal.', nan, '162.80 in.', '69.10 ', '63.20 in.']
2007
https://www.carspecs.us/cars/2007/suzuki/sx4
(2005, 'https://www.carspecs.us/cars/2005/suzuki/verona')
['$2,902', 4, 5, '9.81 sec', '155 hp', 'Front Wheel Drive ', 24.0, 'Ga

['$7,580', 4, 5, nan, '73 hp', 'Front Wheel Drive ', '50 mpg', 'Gas', '9.50 gal.', nan, '157.30 in.', '66.70 ', '56.90 in.']
2012
https://www.carspecs.us/cars/2012/toyota/prius-c
(2012, 'https://www.carspecs.us/cars/2012/toyota/prius-plug-in')
['$12,800', nan, nan, '10.41 sec', '134 hp', ' Front wheel drive  ', 0, 'Hybrid', '10.60 gal.', '115.30 cu.ft.', '176.40 in.', '68.70 ', '58.70 in.']
2012
https://www.carspecs.us/cars/2012/toyota/prius-plug-in
(2017, 'https://www.carspecs.us/cars/2017/toyota/prius-prime')
[nan, nan, nan, '11.77 sec', '121 hp', ' Front wheel drive  ', 54.0, 'Hybrid', '11.30 gal.', '111.30 cu.ft.', '182.90 in.', '69.30 ', '57.90 in.']
2017
https://www.carspecs.us/cars/2017/toyota/prius-prime
(2012, 'https://www.carspecs.us/cars/2012/toyota/prius-v')
['$10,620', 4, 5, nan, '98 hp', 'Front Wheel Drive ', '42 mpg', 'Gas', '11.90 gal.', nan, '181.70 in.', '69.90 ', '62.00 in.']
2012
https://www.carspecs.us/cars/2012/toyota/prius-v
(2005, 'https://www.carspecs.us/cars/2

[nan, nan, nan, '7.39 sec', '200 hp', ' Front wheel drive  ', 24.5, 'Gas', '14.50 gal.', '107.00 cu.ft.', '179.30 in.', '70.10 ', '57.40 in.']
2008
https://www.carspecs.us/cars/2008/volkswagen/jetta-gli
(2013, 'https://www.carspecs.us/cars/2013/volkswagen/jetta-hybrid')
[nan, nan, nan, '7.91 sec', '170 hp', ' Front wheel drive  ', 45.0, nan, '11.90 gal.', '105.40 cu.ft.', '182.80 in.', '70.00 ', '57.20 in.']
2013
https://www.carspecs.us/cars/2013/volkswagen/jetta-hybrid
(2005, 'https://www.carspecs.us/cars/2005/volkswagen/jetta-sedan')
['$2,783', 4, 5, '11.06 sec', '115 hp', 'Front Wheel Drive ', 27.0, 'Gas', '14.50 gal.', nan, '172.30 in.', '68.30 ', '56.70 in.']
2005
https://www.carspecs.us/cars/2005/volkswagen/jetta-sedan
(2011, 'https://www.carspecs.us/cars/2011/volkswagen/jetta-sportwagen')
['$6,798', nan, nan, '8.23 sec', '170 hp', ' Front wheel drive  ', 28.0, 'Gas', '14.50 gal.', '124.50 cu.ft.', '179.40 in.', '70.10 ', '59.20 in.']
2011
https://www.carspecs.us/cars/2011/volksw

['$35,700', 4, 5, '6.57 sec', '248 hp', 'All Wheel Drive ', '26 mpg', 'Gas', '14.20 gal.', nan, '34.90 in.', '37.30 in.', '65.30 in.']
2019
https://www.carspecs.us/cars/2019/volvo/xc40
(2010, 'https://www.carspecs.us/cars/2010/volvo/xc60')
['$9,394', nan, nan, '8.16 sec', '235 hp', ' Front wheel drive  ', 22.5, 'Gas', '18.50 gal.', nan, '182.20 in.', '74.40 ', '67.40 in.']
2010
https://www.carspecs.us/cars/2010/volvo/xc60
(2005, 'https://www.carspecs.us/cars/2005/volvo/xc70')
['$5,307', 4, 5, '7.45 sec', '208 hp', 'All Wheel Drive ', 21.0, 'Gas', '18.00 gal.', nan, '33.30 in.', '73.20 ', '61.50 in.']
2005
https://www.carspecs.us/cars/2005/volvo/xc70
(2005, 'https://www.carspecs.us/cars/2005/volvo/xc90')
['$5,293', 4, 5, '9.67 sec', '208 hp', 'Front Wheel Drive ', 20.5, 'Gas', '21.10 gal.', nan, '41.20 in.', '74.70 ', '70.20 in.']
2005
https://www.carspecs.us/cars/2005/volvo/xc90


In [27]:
#model_spec_df['Year'] = model_spec_df['Year'].astype(int)
#model_spec_df['Year'] = model_spec_df['Year'].str.strip()
#yearly_sales_df['Year'] = yearly_sales_df['Year'].astype(int)
#yearly_sales_df['Year'] = yearly_sales_df['Year'].str.strip()
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model'].str.strip() + yearly_sales_df['Year'].str.strip()
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.lower()
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.replace('-','')
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.replace('/','')
# model_spec_df['Model_Merged'] = model_spec_df['Model'].str.strip() + model_spec_df['Year'].str.strip()
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.lower()
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('-','')
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('/','')
model_spec_df.info()
# model_spec_df['Year'] = model_spec_df['Year'].astype(str)
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model'].str.strip() + yearly_sales_df['Year'].str.strip()
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.lower()
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.replace('-','')
# yearly_sales_df['Model_Merged'] = yearly_sales_df['Model_Merged'].str.replace('/','')
# model_spec_df['Model_Merged'] = model_spec_df['Model'].str.strip() + model_spec_df['Year'].str.strip()
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.lower()
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('-','')
# model_spec_df['Model_Merged'] = model_spec_df['Model_Merged'].str.replace('/','')
model_spec_df.head()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1240 entries, 0 to 1239
Data columns (total 16 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Model          1240 non-null   object 
 1   url            1240 non-null   object 
 2   price          905 non-null    object 
 3   doors          637 non-null    float64
 4   passengers     637 non-null    float64
 5   speed_sec      1007 non-null   object 
 6   horsepower_hp  1219 non-null   object 
 7   drive          1238 non-null   object 
 8   mpg            1240 non-null   object 
 9   engine         955 non-null    object 
 10  tank_gal       1188 non-null   object 
 11  volume_cuft    220 non-null    object 
 12  length_in      911 non-null    object 
 13  width_in       968 non-null    object 
 14  height_in      970 non-null    object 
 15  Year           1240 non-null   object 
dtypes: float64(2), object(14)
memory usage: 155.1+ KB


Unnamed: 0,Model,url,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in,Year
0,Acura ILX,https://www.carspecs.us/cars/2013/acura/ilx,,,,8.98 sec,150 hp,Front wheel drive,29.5,Gas,13.20 gal.,101.70 cu.ft.,179.10 in.,70.6,55.60 in.,2013.0
1,Acura ILX Hybrid,https://www.carspecs.us/cars/2014/acura/ilx-hy...,,,,11.40 sec,111 hp,Front wheel drive,38.5,Hybrid,13.20 gal.,99.30 cu.ft.,179.10 in.,70.6,55.60 in.,2014.0
2,Acura MDX,https://www.carspecs.us/cars/2005/acura/mdx,"$5,535",,,7.16 sec,265 hp,All wheel drive,18.0,Gas,20.40 gal.,,188.70 in.,77.0,68.70 in.,2005.0
3,Acura NSX,https://www.carspecs.us/cars/2005/acura/nsx,,,,5.01 sec,290 hp,Rear wheel drive,19.0,Gas,18.50 gal.,53.90 cu.ft.,174.20 in.,71.3,46.10 in.,2005.0
4,Acura RDX,https://www.carspecs.us/cars/2007/acura/rdx,"$5,939",,,7.05 sec,240 hp,All wheel drive,19.5,Gas,18.00 gal.,,180.70 in.,73.6,65.20 in.,2007.0


In [28]:
#model_spec_df['Year'] = model_spec_df['Year'].astype(str)
#model_spec_sales_df = yearly_sales_df.merge(model_spec_df, how = 'left', on = ['Model_Merged'])
model_spec_sales_df = yearly_sales_df.merge(model_spec_df, how = 'left',on=["Model"])
model_spec_sales_df.info()
model_spec_sales_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4393 entries, 0 to 4392
Data columns (total 18 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Model          4393 non-null   object 
 1   Total_Sales    4310 non-null   object 
 2   Year_x         4393 non-null   object 
 3   url            3841 non-null   object 
 4   price          3455 non-null   object 
 5   doors          2404 non-null   float64
 6   passengers     2404 non-null   float64
 7   speed_sec      3350 non-null   object 
 8   horsepower_hp  3811 non-null   object 
 9   drive          3835 non-null   object 
 10  mpg            3841 non-null   object 
 11  engine         3654 non-null   object 
 12  tank_gal       3730 non-null   object 
 13  volume_cuft    1006 non-null   object 
 14  length_in      3632 non-null   object 
 15  width_in       3740 non-null   object 
 16  height_in      3740 non-null   object 
 17  Year_y         3841 non-null   object 
dtypes: float

Unnamed: 0,Model,Total_Sales,Year_x,url,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in,Year_y
0,Ford F-Series,901463.0,2005,,,,,,,,,,,,,,,
1,Chevrolet Silverado,705980.0,2005,,,,,,,,,,,,,,,
2,Toyota Camry,431703.0,2005,https://www.carspecs.us/cars/2005/toyota/camry,"$3,018",4.0,5.0,8.48 sec,160 hp,Front Wheel Drive,28.5,Gas,18.50 gal.,101.80 cu.ft.,189.20 in.,70.70,58.70 in.,2005.0
3,Toyota Corolla/Matrix,341290.0,2005,,,,,,,,,,,,,,,
4,Dodge Ram,400543.0,2005,https://www.carspecs.us/cars/2005/dodge/ram,"$4,061",2.0,3.0,,235 hp,Four Wheel Drive,16.0,Gas,26.00 gal.,,,51.00 in.,76.60 in.,2005.0


drop uneeded columns and remove rows with a lot of na values

In [30]:
model_spec_sales_df.drop(labels=['Model', 'Year_y', 'url'], axis=1, inplace=True)
# model_spec_sales_df.dropna(how = ['price', 'doors', 'passengers', 'speed_sec', 'horsepower_hp', 'drive', 'mpg', 
#                                   'engine', 'tank_gal', 'volume_cuft', 'length_in', 'width_in', 'height_in'], inplace=True)

Clean drive feature

In [31]:
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.lower()
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.lstrip()
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.rstrip()
#model_spec_sales_df = model_spec_sales_df.dropna()
model_spec_sales_df['drive'].value_counts()

front wheel drive                           1597
rear wheel drive                             970
all wheel drive                              794
four wheel drive                             389
rear-wheel                                    39
front-wheel                                   20
automatic full-time all wheel                  9
full-time all wheel                            7
real time automatic full-time four-wheel       6
versatrak automatic full-time all wheel        4
Name: drive, dtype: int64

In [37]:
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('rear-wheel','rear wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('front-wheel','front wheel drive')
#replace = ['full-time all wheel', 'attesa e-ts full-time all wheel', 'automatic full-time all wheel']

model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('automatic full-time all wheel',
                                                                        'all wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('versatrak ','')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('full-time all wheel','all wheel drive')
model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace('real time automatic full-time four-wheel',
                                                                        'all wheel drive')
# for s in replace:
#     model_spec_sales_df['drive'] = model_spec_sales_df['drive'].str.replace(s,'all wheel drive')
model_spec_sales_df['drive'].value_counts()

front wheel drive    1617
rear wheel drive     1009
all wheel drive       820
four wheel drive      389
Name: drive, dtype: int64

In [38]:
model_spec_sales_df.info()
model_spec_sales_df.head()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4393 entries, 0 to 4392
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Total_Sales    4310 non-null   object 
 1   Year_x         4393 non-null   object 
 2   price          3455 non-null   object 
 3   doors          2404 non-null   float64
 4   passengers     2404 non-null   float64
 5   speed_sec      3350 non-null   object 
 6   horsepower_hp  3811 non-null   object 
 7   drive          3835 non-null   object 
 8   mpg            3841 non-null   object 
 9   engine         3654 non-null   object 
 10  tank_gal       3730 non-null   object 
 11  volume_cuft    1006 non-null   object 
 12  length_in      3632 non-null   object 
 13  width_in       3740 non-null   object 
 14  height_in      3740 non-null   object 
dtypes: float64(2), object(13)
memory usage: 549.1+ KB


Unnamed: 0,Total_Sales,Year_x,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in
0,901463.0,2005,,,,,,,,,,,,,
1,705980.0,2005,,,,,,,,,,,,,
2,431703.0,2005,"$3,018",4.0,5.0,8.48 sec,160 hp,front wheel drive,28.5,Gas,18.50 gal.,101.80 cu.ft.,189.20 in.,70.70,58.70 in.
3,341290.0,2005,,,,,,,,,,,,,
4,400543.0,2005,"$4,061",2.0,3.0,,235 hp,four wheel drive,16.0,Gas,26.00 gal.,,,51.00 in.,76.60 in.


In [39]:
replace_list = ['hp', 'mpg', 'gal.', 'cu.ft.', 'in.', ',', 'sec', '$', '$']
for s in replace_list:
    model_spec_sales_df = model_spec_sales_df.replace(s,'', regex=True) 
model_spec_sales_df['price'] = model_spec_sales_df['price'].str.replace('$','')

  model_spec_sales_df['price'] = model_spec_sales_df['price'].str.replace('$','')


In [40]:
num_cols = ['price',  'speed_sec', 'horsepower_hp', 'mpg', 'tank_gal', 'volume_cuft', 
            'width_in', 'length_in','height_in']
for col in num_cols:
    model_spec_sales_df[col] = model_spec_sales_df[col].apply (pd.to_numeric, errors='coerce')
model_spec_sales_df['doors'] = model_spec_sales_df['doors'].astype(str)
model_spec_sales_df['passengers'] = model_spec_sales_df['passengers'].astype(str)

In [41]:
model_spec_sales_df.info()
model_spec_sales_df.sample(10)

<class 'pandas.core.frame.DataFrame'>
Int64Index: 4393 entries, 0 to 4392
Data columns (total 15 columns):
 #   Column         Non-Null Count  Dtype  
---  ------         --------------  -----  
 0   Total_Sales    4310 non-null   float64
 1   Year_x         4393 non-null   object 
 2   price          3455 non-null   float64
 3   doors          4393 non-null   object 
 4   passengers     4393 non-null   object 
 5   speed_sec      3350 non-null   float64
 6   horsepower_hp  3811 non-null   float64
 7   drive          3835 non-null   object 
 8   mpg            3841 non-null   float64
 9   engine         3654 non-null   object 
 10  tank_gal       3730 non-null   float64
 11  volume_cuft    1006 non-null   float64
 12  length_in      3632 non-null   float64
 13  width_in       3740 non-null   float64
 14  height_in      3740 non-null   float64
dtypes: float64(10), object(5)
memory usage: 549.1+ KB


Unnamed: 0,Total_Sales,Year_x,price,doors,passengers,speed_sec,horsepower_hp,drive,mpg,engine,tank_gal,volume_cuft,length_in,width_in,height_in
3638,173600.0,2018,3300.0,4.0,5.0,7.47,221.0,front wheel drive,25.0,Gas,17.5,,190.2,72.2,28.3
2435,66146.0,2013,3971.0,4.0,5.0,7.69,156.0,front wheel drive,28.0,Gas,13.7,,178.3,69.9,57.5
2617,1598.0,2013,4911.0,4.0,5.0,7.08,250.0,all wheel drive,20.5,Gas,16.9,,44.4,73.9,66.5
3346,59.0,2016,23425.0,,,5.44,328.0,rear wheel drive,23.0,Gas,20.0,110.0,187.9,69.8,57.2
3114,78565.0,2016,14548.0,4.0,5.0,9.32,138.0,all wheel drive,26.0,Gas,14.0,,168.4,69.9,65.2
1961,9674.0,2011,,,,,,,,,,,,,
3903,5369.0,2019,,,,,,,,,,,,,
3271,7019.0,2016,15148.0,,,10.81,131.0,front wheel drive,25.0,Gas,14.5,,186.3,68.1,73.7
181,20503.0,2005,6285.0,,,6.73,255.0,all wheel drive,20.5,Gas,21.1,113.8,193.5,71.3,57.5
3198,27812.0,2016,15092.0,,,,240.0,rear wheel drive,29.0,Gas,16.6,,176.5,70.8,60.8


In [42]:
model_spec_sales_df.to_pickle('../data/model_spec_sales_df.pkl')