## Building and storing the file as pickle file

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score, mean_squared_error
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.ensemble import RandomForestRegressor

In [None]:
import sklearn
sklearn.__version__

'0.22.2.post1'

## Loading the dataset: Used Car Price Prediction

In [None]:
cars_df = pd.read_csv( "https://drive.google.com/uc?export=download&id=1V_VBbyjGj6vvD0A90S5Lk0DG90djz28B" )

In [None]:
cars_df.head(5)

Unnamed: 0,Name,Location,Year,Kilometers_Driven,Fuel_Type,Transmission,Owner_Type,Mileage,Engine,Power,Seats,New_Price,Price,mileage_new,engine_new,power_new,age,make,model,KM_Driven
0,Honda Jazz V,Chennai,2011,46000,Petrol,Manual,First,18.2 kmpl,1199 CC,88.7 bhp,5.0,8.61 Lakh,4.5,18.2,1199,88.7,9,honda,jazz,46
1,Maruti Ertiga VDI,Chennai,2012,87000,Diesel,Manual,First,20.77 kmpl,1248 CC,88.76 bhp,7.0,,6.0,20.77,1248,88.76,8,maruti,ertiga,87
2,Nissan Micra Diesel XV,Jaipur,2013,86999,Diesel,Manual,First,23.08 kmpl,1461 CC,63.1 bhp,5.0,,3.5,23.08,1461,63.1,7,nissan,micra,86
3,Tata Indica Vista Quadrajet LS,Chennai,2012,65932,Diesel,Manual,Second,22.3 kmpl,1248 CC,74 bhp,5.0,,1.95,22.3,1248,74.0,8,tata,indica,65
4,Maruti Swift VDI BSIV,Jaipur,2015,64424,Diesel,Manual,First,25.2 kmpl,1248 CC,74 bhp,5.0,,5.6,25.2,1248,74.0,5,maruti,swift,64


In [None]:
x_columns = ['KM_Driven', 'Fuel_Type', 'age',
              'Transmission', 'Owner_Type', 'Seats',
              'make', 'mileage_new', 'engine_new', 'model',
              'power_new', 'Location']
## model of the car is not included in the model

In [None]:
cars_df.shape

(3092, 20)

In [None]:
cars_df = cars_df[x_columns + ['Price']].dropna()

In [None]:
cars_df.shape

(3091, 13)

## Identifying numerical and categorical features

In [None]:
cat_features = ['Fuel_Type',
                'Transmission', 'Owner_Type', 'model',
                'make', 'Location']

In [None]:
num_features = list(set(x_columns) - set(cat_features))

## Split the dataset

In [None]:
x_train, x_test, y_train, y_test = train_test_split(cars_df[x_columns],
                                                    cars_df.Price,
                                                    train_size = 0.8,
                                                    random_state = 100)

## Creating the pipeline for the deployment

In [None]:
numeric_transformer = Pipeline(steps=[('scaler', StandardScaler())])

categorical_transformer = Pipeline(steps=[('onehot', 
                                           OneHotEncoder(handle_unknown='ignore'))])

preprocessor = ColumnTransformer(
    transformers=[
        ('num', numeric_transformer, num_features),                  
        ('cat', categorical_transformer, cat_features),
    ])

params = { "n_estimators": 400,
           "max_depth": 4 }

rf_regressor = RandomForestRegressor(**params)

reg = Pipeline(steps=[('preprocessor', preprocessor),
                      ('regressor', rf_regressor)])           

reg.fit(x_train, 
        y_train)

rmse = np.sqrt(mean_squared_error(y_test, 
                                  reg.predict(x_test)))

In [None]:
print(rmse)

0.6096850479179529


## Creating Pickel File and Storing it

In [None]:
class CarPredictionModel():
    
    def __init__(self, pipeline, all_features, cat_features, num_features, rmse):
        self.pipeline = pipeline
        self.all_features = all_features
        self.cat_features = cat_features
        self.num_features = num_features
        self.rmse = rmse

In [None]:
car_model = CarPredictionModel(reg, x_columns, cat_features, num_features, rmse)

In [None]:
from joblib import dump, load

In [None]:
dump(car_model, "carmodel.pkl")

['carmodel.pkl']

In [None]:
loaded_car_model = load("carmodel.pkl")

In [None]:
loaded_car_model.pipeline.predict(x_test[0:1])

array([2.7118423], dtype=float32)