# Project - User Car Prices - Predictor

## Team Background

- Project Grp 08

### Team Members

- Team member 1
    - Name: Richard Anton
    - Email: [rna63@drexel.edu](mailto:rna63@drexel.edu)
    

In [14]:
from IPython.display import display, HTML

In [1]:
import joblib
from datetime import datetime

def save_model(model_name, model, use_ts=True):
    if use_ts:
        rn = datetime.now()
        dts = '_' + rn.strftime("%Y_%m_%d_%H_%M_%S")
    else:
        dts = ''

    dest_path = f"model_{model_name}{dts}.pkl"
    print(f"Saving model to {dest_path}")
    joblib.dump(model, dest_path)
    
    return dest_path

def load_model(model_path):
    print(f"Loading model from {model_path}")
    model = joblib.load(model_path)
    return model


In [39]:
# TODO: put in final model path

model_path = 'model_xgboost_s_2023_03_13_07_53_37.pkl'
model = load_model(model_path)

Loading model from model_xgboost_s_2023_03_13_07_53_37.pkl


In [27]:
# load dataset
import pandas as pd

# TODO: use full dataset.
dataset_path = "craigslist_sampled_cleaned_2023_03_05_19_07_36.csv"
target_col = 'price'

orig_df = pd.read_csv(dataset_path)
df = orig_df.copy()
# show a sample for sanity check
df.head()


# split into input data and output values
X_all = df.drop(columns=[target_col])
y_all = df[target_col]

print("X.shape", X_all.shape)
print("y.shape", y_all.shape)

X.shape (4034, 14)
y.shape (4034,)


In [28]:
from sklearn import preprocessing
import category_encoders as ce
from sklearn.preprocessing import StandardScaler

In [29]:
# Convert categorical columns to Pandas category type
import numpy as np

cats = X_all.select_dtypes(exclude=np.number).columns.tolist()
print("cats:")
display(cats)
for col in cats:
    X_all[col] = X_all[col].astype('category')

display(X_all.dtypes)
display(X_all.head())

cats:


['make',
 'model',
 'condition',
 'cylinders',
 'fuel',
 'title_status',
 'transmission',
 'drive',
 'size',
 'type',
 'paint_color',
 'state']

year             float64
make            category
model           category
condition       category
cylinders       category
fuel            category
odometer         float64
title_status    category
transmission    category
drive           category
size            category
type            category
paint_color     category
state           category
dtype: object

Unnamed: 0,year,make,model,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
0,2016.0,chevrolet,silverado 1500 double,good,6 cylinders,gas,29499.0,clean,other,4wd,full-size,pickup,silver,al
1,2013.0,lincoln,mkz sedan 4d,good,6 cylinders,gas,61087.0,clean,other,fwd,full-size,sedan,red,al
2,2005.0,honda,accord ex-l,excellent,4 cylinders,gas,155000.0,clean,manual,4wd,full-size,sedan,white,al
3,2012.0,ford,f250 super duty,good,6 cylinders,gas,47380.0,clean,automatic,4wd,full-size,sedan,white,al
4,2021.0,ford,SPECIAL FINANCE PROGRAM 2020,fair,other,other,1400.0,clean,other,4wd,full-size,bus,green,al


In [30]:
# Train test split
from sklearn.model_selection import train_test_split

# It's important that the random_state matches the other notebook.
_, X_test, _, y_test = train_test_split(X_all, y_all, test_size=0.2, random_state=193)
# just keep test data

print("X_test.shape", X_test.shape)
display(X_test)
print("y_test.shape", y_test.shape)

X_test.shape (807, 14)


Unnamed: 0,year,make,model,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
2211,2016.0,ram,1500,excellent,6 cylinders,gas,44853.0,clean,automatic,4wd,full-size,sedan,silver,mt
2720,2016.0,dodge,grand caravan sxt,good,6 cylinders,gas,48000.0,clean,automatic,fwd,full-size,mini-van,black,ny
2153,1999.0,dodge,f-150,good,10 cylinders,diesel,56000.0,clean,manual,4wd,full-size,pickup,blue,mo
3969,2009.0,honda,accord exl v6,good,8 cylinders,gas,180000.0,clean,automatic,fwd,full-size,sedan,white,wi
3363,1994.0,toyota,land cruiser landcruiser,good,6 cylinders,gas,208000.0,clean,automatic,4wd,mid-size,sedan,red,tn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,2018.0,toyota,rav4,good,4 cylinders,gas,42537.0,clean,automatic,4wd,full-size,SUV,white,wa
1639,2012.0,lincoln,mkx,excellent,6 cylinders,gas,85925.0,clean,automatic,fwd,full-size,SUV,white,ky
35,2016.0,chevrolet,colorado extended cab,good,6 cylinders,gas,17302.0,clean,other,4wd,full-size,pickup,red,al
2674,2003.0,toyota,camry solara,good,6 cylinders,gas,228000.0,clean,automatic,4wd,mid-size,convertible,white,ny


y_test.shape (807,)


In [31]:
from sklearn.compose import ColumnTransformer 
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline

In [32]:
import xgboost as xgb

In [33]:
numeric_cols = ['year', 'odometer']
cat_cols = ['make', 'model', 'condition', 'cylinders', 'fuel', 'title_status',
       'transmission', 'drive', 'size', 'type', 'paint_color', 'state']


In [34]:
preprocessor_path = 'model_preprocessor_s_2023_03_13_07_28_32.pkl'
preprocess = load_model(preprocessor_path)

Loading model from model_preprocessor_s_2023_03_13_07_28_32.pkl


In [40]:
def xgboost_predict(X_test):
    print("X_test:")
    display(X_test)
    X_pre_test =  preprocess.transform(X_test)
    display(X_pre_test)
    # Convert to the format XGBoost lib expects.
    dtest_reg = xgb.DMatrix(X_pre_test)
    
    print("dtest_reg.feature_names:")
    display(dtest_reg.feature_names)
    
    print("model.feature_names:")
    display(model.feature_names)

    predict_test = model.predict(dtest_reg)
        
    return predict_test

In [41]:
xgboost_predict(X_test)

X_test:


Unnamed: 0,year,make,model,condition,cylinders,fuel,odometer,title_status,transmission,drive,size,type,paint_color,state
2211,2016.0,ram,1500,excellent,6 cylinders,gas,44853.0,clean,automatic,4wd,full-size,sedan,silver,mt
2720,2016.0,dodge,grand caravan sxt,good,6 cylinders,gas,48000.0,clean,automatic,fwd,full-size,mini-van,black,ny
2153,1999.0,dodge,f-150,good,10 cylinders,diesel,56000.0,clean,manual,4wd,full-size,pickup,blue,mo
3969,2009.0,honda,accord exl v6,good,8 cylinders,gas,180000.0,clean,automatic,fwd,full-size,sedan,white,wi
3363,1994.0,toyota,land cruiser landcruiser,good,6 cylinders,gas,208000.0,clean,automatic,4wd,mid-size,sedan,red,tn
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3914,2018.0,toyota,rav4,good,4 cylinders,gas,42537.0,clean,automatic,4wd,full-size,SUV,white,wa
1639,2012.0,lincoln,mkx,excellent,6 cylinders,gas,85925.0,clean,automatic,fwd,full-size,SUV,white,ky
35,2016.0,chevrolet,colorado extended cab,good,6 cylinders,gas,17302.0,clean,other,4wd,full-size,pickup,red,al
2674,2003.0,toyota,camry solara,good,6 cylinders,gas,228000.0,clean,automatic,4wd,mid-size,convertible,white,ny


array([[ 9.51317794e+03, -8.43026441e+01,  9.39591750e+03, ...,
        -7.42492889e+02,  6.09181523e-01, -7.68922040e-01],
       [-7.93702799e+03, -3.50500620e+03,  3.86941266e+03, ...,
        -1.05755802e+03,  6.09181523e-01, -7.18763828e-01],
       [ 5.11743655e+03,  4.86853434e+03, -4.12756049e+03, ...,
        -1.69795557e+03, -1.61286230e+00, -5.91256457e-01],
       ...,
       [ 1.36729694e+04, -9.61737207e+02, -7.37402871e+03, ...,
        -8.29213862e+00,  6.09181523e-01, -1.20804149e+00],
       [-9.15646559e+00, -5.22777126e+01, -2.68044400e+03, ...,
         6.16023291e+03, -1.09002846e+00,  2.15015203e+00],
       [ 1.92966138e+03,  3.35172695e+03, -4.66323844e+03, ...,
        -1.91198275e+03,  3.47764602e-01, -8.10999473e-01]])

dtest_reg.feature_names:


None

model.feature_names:


None

array([33322.77  , 18468.045 , 24534.773 ,  6127.7773,  3982.3354,
        5760.7485, 24269.848 , 11013.122 , 51453.035 , 30991.752 ,
       11993.205 ,  9072.685 , 18105.912 , 20910.09  , 35258.863 ,
        9789.386 , 14011.304 , 16420.73  ,  7568.2524, 10044.384 ,
       36601.574 , 29452.215 ,  7964.365 , 37717.29  ,  5261.262 ,
        7127.125 , 45406.504 , 42893.234 , 14911.084 , 19037.494 ,
        8876.463 ,  6242.1997, 35349.82  , 16557.324 , 30283.543 ,
       31239.256 , 34459.973 ,  9751.36  , 24084.186 , 48088.277 ,
        8125.1396, 11128.938 , 10698.406 , 21485.521 , 19631.354 ,
       10864.613 , 20544.533 ,  4507.309 , 18338.436 ,  4956.081 ,
        5452.4136, 14071.38  ,  5011.088 ,  6361.079 , 18406.025 ,
       11664.484 , 23014.898 ,  5880.305 ,  5125.1523, 23671.348 ,
        8242.54  , 22810.078 ,  5811.646 ,  5776.875 , 14648.722 ,
        6057.874 ,  8772.128 , 38202.715 ,  9428.322 , 12057.88  ,
        6206.5415, 16918.81  , 23539.484 , 19956.555 , 25354.0