In [1]:
import pandas as pd
import numpy as np

In [2]:
cars=pd.read_csv('train.csv')
cars.columns

Index(['v.id', 'on road old', 'on road now', 'years', 'km', 'rating',
       'condition', 'economy', 'top speed', 'hp', 'torque', 'current price'],
      dtype='object')

In [3]:
cars.dtypes

v.id               int64
on road old        int64
on road now        int64
years              int64
km                 int64
rating             int64
condition          int64
economy            int64
top speed          int64
hp                 int64
torque             int64
current price    float64
dtype: object

In [4]:
cars.isnull().sum()

v.id             0
on road old      0
on road now      0
years            0
km               0
rating           0
condition        0
economy          0
top speed        0
hp               0
torque           0
current price    0
dtype: int64

In [5]:
cars[cars.duplicated]

Unnamed: 0,v.id,on road old,on road now,years,km,rating,condition,economy,top speed,hp,torque,current price


In [6]:
cars['on_road_diff']=cars['on road now']-cars['on road old']

In [7]:
cars.head()

Unnamed: 0,v.id,on road old,on road now,years,km,rating,condition,economy,top speed,hp,torque,current price,on_road_diff
0,1,535651,798186,3,78945,1,2,14,177,73,123,351318.0,262535
1,2,591911,861056,6,117220,5,9,9,148,74,95,285001.5,269145
2,3,686990,770762,2,132538,2,8,15,181,53,97,215386.0,83772
3,4,573999,722381,4,101065,4,3,11,197,54,116,244295.5,148382
4,5,691388,811335,6,61559,3,9,12,160,53,105,531114.5,119947


In [11]:
cars.columns

Index(['v.id', 'on road old', 'on road now', 'years', 'km', 'rating',
       'condition', 'economy', 'top speed', 'hp', 'torque', 'current price',
       'on_road_diff'],
      dtype='object')

In [10]:
from sklearn.model_selection import train_test_split

In [18]:
x=cars[['years', 'km', 'rating',
       'condition', 'economy', 'top speed', 'hp', 'torque', 
       'on_road_diff']]
y=cars['current price']

In [13]:
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=22)

In [22]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(800, 9)
(200, 9)
(800,)
(200,)


In [24]:
from sklearn.linear_model import LinearRegression
lr=LinearRegression()
lr.fit(x_train,y_train)
lr_pred=lr.predict(x_test)

In [25]:
from sklearn.metrics import mean_absolute_error, mean_squared_error,r2_score

In [26]:
print('MAE_lr:',mean_absolute_error(lr_pred,y_test))
print('RMSE_lr:',np.sqrt(mean_squared_error(lr_pred,y_test)))
print('r2_score:',r2_score(lr_pred,y_test))      

MAE_lr: 36191.1687966407
RMSE_lr: 44137.57412052631
r2_score: 0.8663361482329506


In [30]:
from sklearn.tree import DecisionTreeRegressor
dt=DecisionTreeRegressor(random_state=42)
dt.fit(x_train,y_train)
dt_pred=dt.predict(x_test)

In [31]:
print('MAE_dt:',mean_absolute_error(dt_pred,y_test))
print('RMSE_dt:',np.sqrt(mean_squared_error(dt_pred,y_test)))
print('r2_score_dt:',r2_score(dt_pred,y_test))      

MAE_dt: 50987.3525
RMSE_dt: 62236.25673304308
r2_score_dt: 0.7524706819158915


In [34]:
from sklearn.ensemble import RandomForestRegressor
rf=RandomForestRegressor(n_estimators=200,random_state=42)
rf.fit(x_train,y_train)
rf_pred=rf.predict(x_test)

In [35]:
print('MAE_rf:',mean_absolute_error(rf_pred,y_test))
print('RMSE_rf:',np.sqrt(mean_squared_error(rf_pred,y_test)))
print('r2_score_rf:',r2_score(rf_pred,y_test))      

MAE_rf: 37729.9134125
RMSE_rf: 45681.27745344832
r2_score_rf: 0.8536451932442939


In [37]:
import joblib
joblib.dump(rf,'final_model.pkl')

['final_model.pkl']

In [39]:
used_car_price=joblib.load('final_model.pkl')
used_car_price.predict(x_test)

array([225577.3875, 410560.05  , 429737.59  , 131060.1075, 303554.915 ,
       234304.785 , 131853.175 , 244468.97  , 248382.735 , 302230.725 ,
       285783.675 , 315888.9525, 347569.7575, 452194.325 , 264194.2875,
       437794.6525, 109519.25  , 173294.125 , 236576.5025, 195726.1725,
       219216.575 , 436019.99  , 181036.3125, 270994.9   , 103794.675 ,
       235625.715 , 242079.4   , 315084.1625, 430823.405 , 439259.1825,
       281295.99  , 327765.7425, 122654.7   , 306199.09  , 446323.1375,
       156415.6725, 155190.3775, 242421.9275, 252299.2875, 439955.3   ,
       406737.3075, 452365.6425, 240350.1125, 428312.8225, 437137.7875,
       125405.57  , 433285.0475, 167427.235 , 438191.1175, 257606.71  ,
       385358.79  , 462061.2825, 164600.49  , 187612.205 , 239586.065 ,
       447573.3725, 261910.955 , 257972.6975, 324706.8375, 285084.8925,
       316299.2875, 449415.15  , 229785.255 , 418758.8875, 403288.38  ,
       452287.3075, 149072.6175, 241173.6425, 136662.165 , 28805