# #1 Using Pandas

In [6]:
import pandas as pd
from sklearn.linear_model import LinearRegression

In [4]:
df = pd.read_csv("car_prices.csv")

In [5]:
dummy_var = pd.get_dummies(df['Car Model'])
dummy_var

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,False,True,False
1,False,True,False
2,False,True,False
3,False,True,False
4,False,True,False
5,True,False,False
6,True,False,False
7,True,False,False
8,True,False,False
9,False,False,True


In [13]:
# Creating Dummy Variables & removing one feature for efficiency

modified_df = pd.concat([df, dummy_var], axis = 'columns')
final_df = modified_df.drop(['Car Model', 'Mercedez Benz C class'], axis = 'columns')
final_df

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5
0,69000,18000,6,False,True
1,35000,34000,3,False,True
2,57000,26100,5,False,True
3,22500,40000,2,False,True
4,46000,31500,4,False,True
5,59000,29400,5,True,False
6,52000,32000,5,True,False
7,72000,19300,6,True,False
8,91000,12000,8,True,False
9,67000,22000,6,False,False


In [21]:
# Training Data-Set

X = final_df.drop(['Sell Price($)'], axis = 'columns')
y = final_df['Sell Price($)']

reg = LinearRegression()
reg.fit(X, y)
print("Model is", "{:.3f}".format(reg.score(X, y)*100), '% Accurate')

Model is 94.171 % Accurate


In [22]:
print("Mercedez Benz 4 yrs 45000 Mileage -> Price:", "{:.2f}".format(reg.predict([[45000, 4, 0, 0]])[0]))
print("BMW X5 7 yrs 86000 Mileage -> Price:", "{:.2f}".format(reg.predict([[86000, 7, 0, 1]])[0]))

Mercedez Benz 4 yrs 45000 Mileage -> Price: 36991.32
BMW X5 7 yrs 86000 Mileage -> Price: 11080.74




# #2 Using OneHotEncoding

In [1]:
import pandas as pd
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
from sklearn.compose import ColumnTransformer

In [5]:
dataFrame = pd.read_csv("car_prices.csv")
dataFrame

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [6]:
dataFrame['Car Model'] = LabelEncoder().fit_transform(dataFrame['Car Model'])
dataFrame

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


In [8]:
# Data-Sets

features = dataFrame[['Car Model', 'Mileage', 'Age(yrs)']].values
dependent = dataFrame['Sell Price($)'].values

In [10]:
# Using OneHotEncoder

ct = ColumnTransformer([('Car Model', OneHotEncoder(), [0])], remainder = 'passthrough')
ct

In [11]:
features = ct.fit_transform(features)
features   # Audi , BMW , Mercedez

array([[0.00e+00, 1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [0.00e+00, 1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [12]:
features = features[:, 1:]  # Eliminating Audi
features

array([[1.00e+00, 0.00e+00, 6.90e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 3.50e+04, 3.00e+00],
       [1.00e+00, 0.00e+00, 5.70e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 2.25e+04, 2.00e+00],
       [1.00e+00, 0.00e+00, 4.60e+04, 4.00e+00],
       [0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [0.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 1.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 1.00e+00, 5.90e+04, 5.00e+00]])

In [16]:
# Training Model

car_model = LinearRegression()
car_model.fit(features, dependent)
print("Car Model is", "{:.3f}".format(car_model.score(features, dependent)*100), '% Accurate')

Car Model is 94.171 % Accurate


In [18]:
print("Mercedez 4 yrs 45k Mileage -> Price:", "{:.2f}".format(car_model.predict([[0, 1, 45000, 4]])[0]))
print("BMW X5 7 yrs 86k Mileage -> Price:", "{:.2f}".format(car_model.predict([[1, 0, 86000, 7]])[0]))

Mercedez 4 yrs 45k Mileage -> Price: 36991.32
BMW X5 7 yrs 86k Mileage -> Price: 11080.74
