# Importing the libraries

In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd

# Importing the dataset

In [2]:
dataset = pd.read_csv('data.csv')
x = dataset.iloc[:,1:-1].values
y = dataset.iloc[:,-1].values
dataset.head()

Unnamed: 0,name,year,km_driven,fuel,seller_type,transmission,owner,seats,torque_rpm,mil_kmpl,engine_cc,max_power_bhp,selling_price
0,Maruti Swift Dzire VDI,2014,145500,Diesel,Individual,Manual,First Owner,5.0,2000,23.4,1248.0,74.0,450000
1,Skoda Rapid 1.5 TDI Ambition,2014,120000,Diesel,Individual,Manual,Second Owner,5.0,2500,21.14,1498.0,103.52,370000
2,Honda City 2017-2020 EXi,2006,140000,Petrol,Individual,Manual,Third Owner,5.0,2700,17.7,1497.0,78.0,158000
3,Hyundai i20 Sportz Diesel,2010,127000,Diesel,Individual,Manual,First Owner,5.0,2750,23.0,1396.0,90.0,225000
4,Maruti Swift VXI BSIII,2007,120000,Petrol,Individual,Manual,First Owner,5.0,4500,16.1,1298.0,88.2,130000


# Encoding categorical variables

In [3]:
print(x[:2])

[[2014 145500 'Diesel' 'Individual' 'Manual' 'First Owner' 5.0 2000 23.4
  1248.0 74.0]
 [2014 120000 'Diesel' 'Individual' 'Manual' 'Second Owner' 5.0 2500
  21.14 1498.0 103.52]]


In [4]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers=[('encoder', OneHotEncoder(), [2, 3, 4, 5])], remainder='passthrough')
x = np.array(ct.fit_transform(x))

# Splitting the dataset into training and test set

In [5]:
from sklearn.model_selection import train_test_split
x_train, x_test, y_train, y_test = train_test_split(x, y, train_size= 0.8, random_state= 0)

In [6]:
print(x_train[:2])
x_train.shape

[[0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 2017 60000 5.0
  3500 23.84 1199.0 84.0]
 [0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0 2017 30000 7.0
  2500 17.6 2179.0 153.86]]


(6324, 21)

# Feature Scaling

In [7]:
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
sc_y = StandardScaler()
x_train[:,14:-1] = sc_x.fit_transform(x_train[:,14:-1])
y_train = sc_y.fit_transform(y_train.reshape(len(y_train),1))

In [8]:
print(x_train.shape)
print(x_train[:2])

(6324, 21)
[[0.0 0.0 0.0 1.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0
  0.7812132191955661 -0.1606507221044623 -0.43418109875384236
  0.4654992150575925 1.0967528609021615 -0.5164148246220253 84.0]
 [0.0 1.0 0.0 0.0 0.0 1.0 0.0 0.0 1.0 1.0 0.0 0.0 0.0 0.0
  0.7812132191955661 -0.6726079107458584 1.653056657627229
  -0.593838555496678 -0.44925237497311166 1.433774743642715 153.86]]


# Training the SVR model on the training set

In [9]:
from sklearn.svm import SVR

In [10]:
regressor = SVR(kernel='rbf')
regressor.fit(x_train, y_train.ravel())

SVR()

In [11]:
x_test[:,14:-1] = sc_x.transform(x_test[:,14:-1])

# Predicting the test set results

In [12]:
predictions = sc_y.inverse_transform(regressor.predict(x_test))

In [13]:
np.set_printoptions(precision=2)
np.concatenate((y_test.reshape(len(y_test),1), predictions.reshape(len(predictions),1)),1)

array([[430000.  , 531852.69],
       [779000.  , 697391.92],
       [200000.  , 240076.24],
       ...,
       [120000.  ,   9020.21],
       [340000.  , 386811.71],
       [620000.  , 619826.94]])

# Evaluating the model performance

In [14]:
from sklearn.metrics import r2_score
r2_score(y_test, predictions)

0.7436549137494886