In [1]:
# Necessary imports
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
import patsy
import glob
import re

import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import RidgeCV
%matplotlib inline

print('Libraries imported')

Libraries imported


In [2]:
path = r'C:/Metis_Bootcamp/Regression_project/data/joined-data/' # data path
all_files = glob.glob(path + "/*.csv")

li = []

for filename in all_files:
    data = pd.read_csv(filename, index_col=0, header=0)
    li.append(data)

df = pd.concat(li, axis=0, ignore_index=True)

print(df.shape)

df.head()

(27174, 15)


Unnamed: 0,Name,Mileage,Address,Rating,Fuel Type,City MPG,Highway MPG,Drivetrain,Engine,Exterior Color,Interior Color,Transmission,Entertainment,Safety,Price
0,2012 Volvo S60 T5,91476,"Kansas City, MO 64111",5.0,Gasoline,20,30,FWD,2.5L I5 20V MPFI DOHC Turbo,Black Stone,Off Black,6-Speed Automatic,"['Bluetooth', 'Premium Sound System']","['Brake Assist', 'Stability Control']","$9,995"
1,2019 Ford Escape SE,56976,"North Kansas City, MO 64116",4.7,Gasoline,23,30,FWD,1.5L I4 16V GDI DOHC Turbo,Blue,Chromite Gray,6-Speed Automatic,"['Bluetooth', 'Apple CarPlay/Android Auto']","['Backup Camera', 'Brake Assist', 'Stability C...","$16,500"
2,2017 Dodge Grand Caravan SXT,45892,"Kansas City, MO 64116",4.8,E85 Flex Fuel,17,25,FWD,3.6L V6 24V MPFI DOHC Flexible Fuel,Billet Clearcoat,Black / Light Graystone,Automatic,['Bluetooth'],"['Backup Camera', 'Brake Assist', 'Stability C...","$17,500"
3,2020 Volvo XC40 T5 R-Design,11103,"KCMO, MO 64105",4.2,Gasoline,22,30,AWD,2.0L I4 16V GDI DOHC Turbo,White,Black,8-Speed Automatic,"['Bluetooth', 'Premium Sound System']","['Backup Camera', 'Brake Assist', 'Stability C...","$41,590"
4,2020 INFINITI QX60 Luxe,33903,"KCMO, MO 64105",4.2,Gasoline,19,26,AWD,3.5L V6 24V GDI DOHC,Black,Beige,Automatic CVT,"['Bluetooth', 'Premium Sound System']","['Backup Camera', 'Brake Assist', 'Stability C...","$29,990"


In [3]:
def model_year_extractor(name):
    
    name_year = name.replace('Certified ','').strip()
    
    year = int(name_year.split(' ')[0])
    
    return year

In [4]:
df['Model Year'] = df['Name'].apply(model_year_extractor)

In [5]:
df = df[df['Model Year'] > 2000]
df['Model Year'].value_counts().sort_index()

2001      31
2002      32
2003      52
2004      65
2005      85
2006     115
2007     128
2008     144
2009      96
2010     156
2011     237
2012     479
2013     622
2014     759
2015    1133
2016    1464
2017    3473
2018    7583
2019    3905
2020    4240
2021    2273
2022       3
Name: Model Year, dtype: int64

In [6]:
def make_extractor(name):
    
    if 'Land' in name:
        return 'Land Rover'
    
    elif 'Alfa' in name:
        return 'Alfa Romeo'
    
    elif 'MINI' in name:
        return 'MINI Cooper'
    
    elif 'Aston' in name:
        return 'Aston Martin'
    
    else:
        return name.split(' ')[2]

In [7]:
df['Make'] = df['Name'].apply(make_extractor)

In [8]:
df['Make'].value_counts()

Ford             2846
Toyota           2725
Mercedes-Benz    1789
Chevrolet        1677
Honda            1671
BMW              1664
Nissan           1532
Jeep             1432
Audi             1136
Lexus            1126
Hyundai           903
Volkswagen        807
GMC               685
Kia               674
Subaru            651
Acura             619
Mazda             582
Dodge             542
INFINITI          499
RAM               456
Land Rover        414
Cadillac          354
Porsche           296
Volvo             265
Lincoln           252
Buick             230
Chrysler          206
MINI Cooper       180
Jaguar            155
Mitsubishi        152
Alfa Romeo        146
Tesla             142
Maserati           83
Genesis            51
Scion              31
FIAT               27
Bentley            14
Ferrari            10
Rolls-Royce        10
McLaren             9
Pontiac             8
Aston Martin        5
smart               5
Hummer              5
Lamborghini         4
Saturn    

In [9]:
other_make = df['Make'].value_counts()[df['Make'].value_counts() <= df.shape[0]*0.001].index
df['Make'] = df['Make'].replace(other_make, 'Other')
df['Make'].value_counts()

Ford             2846
Toyota           2725
Mercedes-Benz    1789
Chevrolet        1677
Honda            1671
BMW              1664
Nissan           1532
Jeep             1432
Audi             1136
Lexus            1126
Hyundai           903
Volkswagen        807
GMC               685
Kia               674
Subaru            651
Acura             619
Mazda             582
Dodge             542
INFINITI          499
RAM               456
Land Rover        414
Cadillac          354
Porsche           296
Volvo             265
Lincoln           252
Buick             230
Chrysler          206
MINI Cooper       180
Jaguar            155
Mitsubishi        152
Alfa Romeo        146
Tesla             142
Other             102
Maserati           83
Genesis            51
Scion              31
Name: Make, dtype: int64

In [10]:
def mileage_to_int(mileage):
    
    return int(mileage.replace(',',''))

def price_to_int(price):
    
    return int(price.replace('$','').replace(',',''))

In [11]:
#Converse mileage and price to int

df['Mileage'] = df['Mileage'].apply(mileage_to_int)

df['Price'] = df['Price'].apply(price_to_int)

df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27075 entries, 0 to 27173
Data columns (total 17 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Name            27075 non-null  object 
 1   Mileage         27075 non-null  int64  
 2   Address         27075 non-null  object 
 3   Rating          27057 non-null  float64
 4   Fuel Type       27075 non-null  object 
 5   City MPG        27075 non-null  int64  
 6   Highway MPG     27075 non-null  int64  
 7   Drivetrain      27075 non-null  object 
 8   Engine          27075 non-null  object 
 9   Exterior Color  27075 non-null  object 
 10  Interior Color  27075 non-null  object 
 11  Transmission    27075 non-null  object 
 12  Entertainment   27075 non-null  object 
 13  Safety          27075 non-null  object 
 14  Price           27075 non-null  int64  
 15  Model Year      27075 non-null  int64  
 16  Make            27075 non-null  object 
dtypes: float64(1), int64(5), object

In [12]:
df.Price.isna().sum()

0

In [13]:
df.describe()

Unnamed: 0,Mileage,Rating,City MPG,Highway MPG,Price,Model Year
count,27075.0,27057.0,27075.0,27075.0,27075.0,27075.0
mean,37797.355088,4.152474,22.155716,30.186371,29949.31,2017.577581
std,34493.301044,0.85986,14.566293,25.478897,19933.39,2.932219
min,1.0,1.0,-1.0,-1.0,1999.0,2001.0
25%,16594.0,3.7,18.0,24.0,19980.5,2017.0
50%,29553.0,4.4,21.0,28.0,26980.0,2018.0
75%,46221.0,4.8,25.0,33.0,35998.0,2019.0
max,337414.0,5.0,224.0,384.0,1450000.0,2022.0


In [14]:
def num_ent_fea(entertainment):
    
    return len(entertainment.split(','))

def num_safe_fea(safety):
    
    return len(safety.split(','))

In [15]:
df['Entertainment'] = df['Entertainment'].apply(num_ent_fea)
df['Safety'] = df['Safety'].apply(num_safe_fea)

In [16]:
df.rename(columns={'Entertainment': 'Num_ent_features', 'Safety': 'Num_safe_features'}, inplace=True)

In [17]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 27075 entries, 0 to 27173
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               27075 non-null  object 
 1   Mileage            27075 non-null  int64  
 2   Address            27075 non-null  object 
 3   Rating             27057 non-null  float64
 4   Fuel Type          27075 non-null  object 
 5   City MPG           27075 non-null  int64  
 6   Highway MPG        27075 non-null  int64  
 7   Drivetrain         27075 non-null  object 
 8   Engine             27075 non-null  object 
 9   Exterior Color     27075 non-null  object 
 10  Interior Color     27075 non-null  object 
 11  Transmission       27075 non-null  object 
 12  Num_ent_features   27075 non-null  int64  
 13  Num_safe_features  27075 non-null  int64  
 14  Price              27075 non-null  int64  
 15  Model Year         27075 non-null  int64  
 16  Make               270

In [18]:
df.describe()

Unnamed: 0,Mileage,Rating,City MPG,Highway MPG,Num_ent_features,Num_safe_features,Price,Model Year
count,27075.0,27057.0,27075.0,27075.0,27075.0,27075.0,27075.0,27075.0
mean,37797.355088,4.152474,22.155716,30.186371,1.756491,3.671099,29949.31,2017.577581
std,34493.301044,0.85986,14.566293,25.478897,0.705814,1.45449,19933.39,2.932219
min,1.0,1.0,-1.0,-1.0,1.0,1.0,1999.0,2001.0
25%,16594.0,3.7,18.0,24.0,1.0,3.0,19980.5,2017.0
50%,29553.0,4.4,21.0,28.0,2.0,3.0,26980.0,2018.0
75%,46221.0,4.8,25.0,33.0,2.0,5.0,35998.0,2019.0
max,337414.0,5.0,224.0,384.0,4.0,7.0,1450000.0,2022.0


In [19]:
def define_transmission(transmission):
    
    if 'automatic' in transmission.lower():
        return 'Automatic'
    
    elif 'manual' in transmission.lower():
        return 'Manual'
    
    else:
        return 'Unknown'

In [20]:
df['Transmission'] = df['Transmission'].apply(define_transmission)

In [21]:
df['Transmission'].value_counts()

Automatic    26203
Manual         503
Unknown        369
Name: Transmission, dtype: int64

In [22]:
df['Transmission'].isna().sum()

0

In [23]:
df.head()

Unnamed: 0,Name,Mileage,Address,Rating,Fuel Type,City MPG,Highway MPG,Drivetrain,Engine,Exterior Color,Interior Color,Transmission,Num_ent_features,Num_safe_features,Price,Model Year,Make
0,2012 Volvo S60 T5,91476,"Kansas City, MO 64111",5.0,Gasoline,20,30,FWD,2.5L I5 20V MPFI DOHC Turbo,Black Stone,Off Black,Automatic,2,2,9995,2012,Volvo
1,2019 Ford Escape SE,56976,"North Kansas City, MO 64116",4.7,Gasoline,23,30,FWD,1.5L I4 16V GDI DOHC Turbo,Blue,Chromite Gray,Automatic,2,3,16500,2019,Ford
2,2017 Dodge Grand Caravan SXT,45892,"Kansas City, MO 64116",4.8,E85 Flex Fuel,17,25,FWD,3.6L V6 24V MPFI DOHC Flexible Fuel,Billet Clearcoat,Black / Light Graystone,Automatic,1,3,17500,2017,Dodge
3,2020 Volvo XC40 T5 R-Design,11103,"KCMO, MO 64105",4.2,Gasoline,22,30,AWD,2.0L I4 16V GDI DOHC Turbo,White,Black,Automatic,2,4,41590,2020,Volvo
4,2020 INFINITI QX60 Luxe,33903,"KCMO, MO 64105",4.2,Gasoline,19,26,AWD,3.5L V6 24V GDI DOHC,Black,Beige,Automatic,2,4,29990,2020,INFINITI


In [24]:
df.Engine.value_counts().head(50)

 2.0L I4 16V GDI DOHC Turbo                          3696
 1.5L I4 16V GDI DOHC Turbo                          1032
 3.6L V6 24V MPFI DOHC                                925
 3.5L V6 24V MPFI DOHC                                786
 2.0L I4 16V MPFI DOHC                                712
 2.5L I4 16V GDI DOHC                                 681
 2.4L I4 16V GDI DOHC                                 681
 5.3L V8 16V GDI OHV                                  662
 2.5L I4 16V MPFI DOHC                                654
 3.6L V6 24V GDI DOHC                                 647
 3.5L V6 24V GDI SOHC                                 548
 3.5L V6 24V GDI DOHC                                 538
 3.5L V6 24V PDI DOHC                                 494
 Electric                                             468
 3.0L V6 24V GDI DOHC Twin Turbo                      466
 2.5L I4 16V PDI DOHC                                 454
 1.8L I4 16V MPFI DOHC                                454
 2.4L I4 16V M

In [25]:
def define_engine(engine):
    
    if 'intercooled' in engine.lower() or 'regular' in engine.lower() or 'electric' in engine.lower():
        return engine.strip()
    
    else:
        return engine.strip().split(' ')[0]

In [26]:
df['Engine'] = df['Engine'].apply(define_engine)

In [27]:
other_engine = df['Engine'].value_counts()[df['Engine'].value_counts() <= df.shape[0]*0.001].index
df['Engine'] = df['Engine'].replace(other_engine, 'Other')
df['Engine'].value_counts()

2.0L                                                       6093
3.5L                                                       3112
2.5L                                                       2687
3.6L                                                       1785
3.0L                                                       1468
1.5L                                                       1391
2.4L                                                       1367
1.8L                                                        925
Other                                                       752
5.3L                                                        739
1.6L                                                        522
Electric                                                    468
5.7L                                                        389
4.0L                                                        322
1.4L                                                        307
Intercooled Turbo Premium Unleaded I-4 2

In [28]:
df['Exterior Color'].value_counts().head(30)

 Black                                 2302
 White                                 1887
 Gray                                  1292
 Silver                                 942
 Blue                                   607
 Summit White                           399
 Red                                    348
 Polar White                            317
 Oxford White                           310
 Gun Metallic                           306
 Bright White Clearcoat                 303
 Super White                            291
-1                                      263
 Crystal Black Pearl                    263
 Alpine White                           234
 Shadow Black                           231
 Modern Steel Metallic                  223
 Granite Crystal Clearcoat Metallic     215
 Brilliant Silver Metallic              203
 Magnetic Metallic                      196
 Silver Ice Metallic                    191
 Black Sapphire Metallic                176
 Billet Silver Metallic Clearcoa

In [29]:
def define_exterior_color(exterior_color):
    
    if 'black' in exterior_color.lower():
        return 'Black'
    
    elif 'white' in exterior_color.lower():
        return 'White'
    
    elif 'gray' in exterior_color.lower():
        return 'Gray'
    
    elif 'silver' in exterior_color.lower():
        return 'Silver'
    
    elif 'blue' in exterior_color.lower():
        return 'Blue'
    
    elif 'red' in exterior_color.lower():
        return 'Red'
    
    elif 'metallic' in exterior_color.lower():
        return 'Metallic'
    
    elif '-1' in exterior_color.lower():
        return np.nan
    
    else:
        return 'Other'

In [30]:
df['Exterior Color'] = df['Exterior Color'].apply(define_exterior_color)

In [31]:
df['Exterior Color'].value_counts()

White       6350
Black       5929
Silver      3558
Gray        3062
Blue        2292
Other       2177
Metallic    1938
Red         1506
Name: Exterior Color, dtype: int64

In [32]:
df['Interior Color'].value_counts().head(30)

 Black                  10085
-1                       4509
 Gray                    1742
 Jet Black               1118
 Ebony                   1114
 Charcoal                 879
 Graphite                 488
 Beige                    484
 Titan Black              321
 Charcoal Black           284
 Ash                      225
 Ebony Black              207
 Tan                      156
 Parchment                156
 Sport                    154
 Ivory                    141
 Red                      134
 Brown                    130
 Medium Earth Gray        113
 Light Gray               107
 Carbon Black              96
 Medium Light Stone        80
 Diesel Gray / Black       73
 Black / Red               73
 Ebony / Ebony             71
 Almond                    68
 Medium Ash Gray           61
 Wheat                     61
 Mocha                     58
 Dark Gray                 57
Name: Interior Color, dtype: int64

In [33]:
def define_interior_color(interior_color):

    if 'ebony' in interior_color.lower() or 'ebony black' in interior_color.lower():
        return 'Ebony'
    
    elif 'black' in interior_color.lower():
        return 'Black'
    
    elif 'gray' in interior_color.lower():
        return 'Gray'
    
    elif 'charcoal' in interior_color.lower():
        return 'Charcoal'
    
    elif 'graphite' in interior_color.lower():
        return 'Graphite'
    
    elif '-1' in interior_color.lower():
        return np.nan
    
    else:
        return 'Other'

In [34]:
df['Interior Color'] = df['Interior Color'].apply(define_interior_color)

In [35]:
df['Interior Color'].value_counts()

Black       12853
Other        4194
Gray         2537
Ebony        1562
Charcoal      908
Graphite      512
Name: Interior Color, dtype: int64

In [36]:
def define_drivetrain(drivetrain):
    
    if 'fwd' in drivetrain.lower() or 'front wheel drive' in drivetrain.lower() or 'front-wheel drive' in drivetrain.lower():
        return 'FWD'
    
    elif 'awd' in drivetrain.lower() or 'all wheel drive' in drivetrain.lower() or 'all-wheel drive' in drivetrain.lower():
        return 'AWD'
    
    elif '4wd' in drivetrain.lower() or 'four wheel drive' in drivetrain.lower() or 'four-wheel drive' in drivetrain.lower():
        return '4WD'
    
    elif 'rwd' in drivetrain.lower() or 'rear wheel drive' in drivetrain.lower() or 'rear-wheel drive' in drivetrain.lower():
        return 'RWD'
    
    else:
        return np.nan

In [37]:
df['Drivetrain'] = df['Drivetrain'].apply(define_drivetrain)

In [38]:
df['Drivetrain'].value_counts()

FWD    10667
AWD     7837
4WD     4571
RWD     3937
Name: Drivetrain, dtype: int64

In [39]:
      
df['City MPG'] = df['City MPG'].apply(lambda x: np.nan if x == -1 else x)

df['City MPG'].fillna(df['City MPG'].median(), inplace=True)

df['Highway MPG'] = df['Highway MPG'].apply(lambda x: np.nan if x == -1 else x)

df['Highway MPG'].fillna(df['Highway MPG'].median(), inplace=True)

In [40]:
df.dropna(inplace=True)

In [41]:
df.sample(10)

Unnamed: 0,Name,Mileage,Address,Rating,Fuel Type,City MPG,Highway MPG,Drivetrain,Engine,Exterior Color,Interior Color,Transmission,Num_ent_features,Num_safe_features,Price,Model Year,Make
25227,Certified 2016 Honda Civic LX,38275,"Everett, WA 98204",4.0,Gasoline,31.0,41.0,FWD,2.0L,Black,Black,Automatic,1,3,16425,2016,Honda
21763,2018 Audi Q7 3.0T Prestige,36468,"Lynnwood, WA 98037",4.4,Gasoline,19.0,25.0,AWD,3.0L,White,Black,Automatic,2,3,45499,2018,Audi
18808,2009 Chevrolet HHR LS,200200,"Roseville, CA 95678",2.9,E85 Flex Fuel,22.0,32.0,FWD,Other,White,Gray,Automatic,1,1,3995,2009,Chevrolet
10239,2012 Ford Escape XLT,120897,"Los Angeles, CA 90022",4.2,Gasoline,21.0,28.0,FWD,2.5L,Gray,Gray,Automatic,1,2,7950,2012,Ford
7097,2017 Lexus NX 200t Base,34853,"Whittier, CA 90602",4.0,Gasoline,22.0,28.0,FWD,2.0L,White,Other,Automatic,2,3,26700,2017,Lexus
5354,2019 Dodge Charger GT,36850,"Huntington Beach, CA 92647",1.0,Gasoline,19.0,30.0,RWD,3.6L,Metallic,Black,Automatic,3,3,25993,2019,Dodge
1671,2020 Subaru Forester Limited,12009,"Lawrence, KS 66046",4.8,Gasoline,26.0,33.0,AWD,2.5L,Silver,Black,Automatic,1,4,31988,2020,Subaru
16585,2021 Audi Q3 45 S line Premium,3152,"Sacramento, CA 95821",2.3,Gasoline,20.0,28.0,AWD,2.0L,Blue,Black,Automatic,2,7,42944,2021,Audi
3646,2020 Ford Fusion SE,2,"Leavenworth, KS 66048",4.9,Gasoline,23.0,34.0,FWD,1.5L,Black,Other,Automatic,2,7,22991,2020,Ford
23501,2009 Jeep Wrangler Unlimited X,144175,"Moses Lake, WA 98837",5.0,Gasoline,15.0,194.0,4WD,3.8L,Red,Gray,Automatic,2,2,18000,2009,Jeep


In [42]:
#df.to_csv('data/cleaned_10000.csv')

In [43]:
df.describe()

Unnamed: 0,Mileage,Rating,City MPG,Highway MPG,Num_ent_features,Num_safe_features,Price,Model Year
count,22441.0,22441.0,22441.0,22441.0,22441.0,22441.0,22441.0,22441.0
mean,37204.185687,4.171659,23.475023,32.001069,1.78107,3.721091,30042.59,2017.644445
std,34229.010511,0.850958,13.474079,24.509938,0.707214,1.447127,20819.82,2.91462
min,1.0,1.0,0.0,0.0,1.0,1.0,1999.0,2001.0
25%,15366.0,3.8,18.0,25.0,1.0,3.0,19900.0,2017.0
50%,29444.0,4.4,21.0,29.0,2.0,3.0,26800.0,2018.0
75%,46469.0,4.8,25.0,33.0,2.0,5.0,35995.0,2020.0
max,337414.0,5.0,224.0,384.0,4.0,7.0,1450000.0,2022.0


In [44]:
df.shape

(22441, 17)

In [45]:
df.head()

Unnamed: 0,Name,Mileage,Address,Rating,Fuel Type,City MPG,Highway MPG,Drivetrain,Engine,Exterior Color,Interior Color,Transmission,Num_ent_features,Num_safe_features,Price,Model Year,Make
0,2012 Volvo S60 T5,91476,"Kansas City, MO 64111",5.0,Gasoline,20.0,30.0,FWD,2.5L,Black,Black,Automatic,2,2,9995,2012,Volvo
1,2019 Ford Escape SE,56976,"North Kansas City, MO 64116",4.7,Gasoline,23.0,30.0,FWD,1.5L,Blue,Gray,Automatic,2,3,16500,2019,Ford
2,2017 Dodge Grand Caravan SXT,45892,"Kansas City, MO 64116",4.8,E85 Flex Fuel,17.0,25.0,FWD,3.6L,Other,Black,Automatic,1,3,17500,2017,Dodge
3,2020 Volvo XC40 T5 R-Design,11103,"KCMO, MO 64105",4.2,Gasoline,22.0,30.0,AWD,2.0L,White,Black,Automatic,2,4,41590,2020,Volvo
4,2020 INFINITI QX60 Luxe,33903,"KCMO, MO 64105",4.2,Gasoline,19.0,26.0,AWD,3.5L,Black,Other,Automatic,2,4,29990,2020,INFINITI


In [46]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 22441 entries, 0 to 27173
Data columns (total 17 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Name               22441 non-null  object 
 1   Mileage            22441 non-null  int64  
 2   Address            22441 non-null  object 
 3   Rating             22441 non-null  float64
 4   Fuel Type          22441 non-null  object 
 5   City MPG           22441 non-null  float64
 6   Highway MPG        22441 non-null  float64
 7   Drivetrain         22441 non-null  object 
 8   Engine             22441 non-null  object 
 9   Exterior Color     22441 non-null  object 
 10  Interior Color     22441 non-null  object 
 11  Transmission       22441 non-null  object 
 12  Num_ent_features   22441 non-null  int64  
 13  Num_safe_features  22441 non-null  int64  
 14  Price              22441 non-null  int64  
 15  Model Year         22441 non-null  int64  
 16  Make               224

In [48]:
df.to_csv('data/clean_20000.csv')

In [49]:
df['Model Year'].value_counts()

2018    6153
2020    3615
2019    3133
2017    2797
2021    2151
2016    1211
2015     936
2014     623
2013     526
2012     389
2011     192
2010     129
2008     118
2007     110
2006      94
2009      67
2004      52
2005      52
2003      41
2002      25
2001      24
2022       3
Name: Model Year, dtype: int64