## Using Dummy Variables

In [46]:
import pandas as pd

In [47]:
data = pd.read_csv('/content/carprices.csv')
data

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [48]:
dummies = pd.get_dummies(data['Car Model'])
dummies = dummies.astype(int)

dummies

Unnamed: 0,Audi A5,BMW X5,Mercedez Benz C class
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [49]:
merged = pd.concat([data, dummies], axis='columns')
merged

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5,Mercedez Benz C class
0,BMW X5,69000,18000,6,0,1,0
1,BMW X5,35000,34000,3,0,1,0
2,BMW X5,57000,26100,5,0,1,0
3,BMW X5,22500,40000,2,0,1,0
4,BMW X5,46000,31500,4,0,1,0
5,Audi A5,59000,29400,5,1,0,0
6,Audi A5,52000,32000,5,1,0,0
7,Audi A5,72000,19300,6,1,0,0
8,Audi A5,91000,12000,8,1,0,0
9,Mercedez Benz C class,67000,22000,6,0,0,1


In [50]:
final = merged.drop(['Car Model', 'Mercedez Benz C class'], axis='columns')
final

Unnamed: 0,Mileage,Sell Price($),Age(yrs),Audi A5,BMW X5
0,69000,18000,6,0,1
1,35000,34000,3,0,1
2,57000,26100,5,0,1
3,22500,40000,2,0,1
4,46000,31500,4,0,1
5,59000,29400,5,1,0
6,52000,32000,5,1,0
7,72000,19300,6,1,0
8,91000,12000,8,1,0
9,67000,22000,6,0,0


In [51]:
from sklearn.linear_model import LinearRegression

model = LinearRegression()

In [52]:
X = final.drop('Sell Price($)', axis='columns')
X

Unnamed: 0,Mileage,Age(yrs),Audi A5,BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1
5,59000,5,1,0
6,52000,5,1,0
7,72000,6,1,0
8,91000,8,1,0
9,67000,6,0,0


In [53]:
y = final['Sell Price($)']
y

0     18000
1     34000
2     26100
3     40000
4     31500
5     29400
6     32000
7     19300
8     12000
9     22000
10    20000
11    21000
12    33000
Name: Sell Price($), dtype: int64

In [54]:
model.fit(X, y)

In [55]:
# Predicting price for Mercedez Benz C class that is 4 yr old with mileage 45000

model.predict([[45000,4,0,0]])



array([36991.31721061])

In [56]:
# Price of BMW X5 that is 7 yr old with mileage 86000

model.predict([[86000,7,0,1]])



array([11080.74313219])

In [57]:
model.score(X,y)

0.9417050937281082

## Usinf Sklearn's One Hot Encoding

In [58]:
from sklearn.preprocessing import LabelEncoder, OneHotEncoder

In [59]:
le = LabelEncoder()

In [60]:
df = pd.read_csv('/content/carprices.csv')
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [68]:
df['Car Model'] = le.fit_transform(dfle['Car Model'])
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,1,69000,18000,6
1,1,35000,34000,3
2,1,57000,26100,5
3,1,22500,40000,2
4,1,46000,31500,4
5,0,59000,29400,5
6,0,52000,32000,5
7,0,72000,19300,6
8,0,91000,12000,8
9,2,67000,22000,6


In [69]:
X = df[['Car Model', 'Mileage', 'Age(yrs)']].values
X

array([[    1, 69000,     6],
       [    1, 35000,     3],
       [    1, 57000,     5],
       [    1, 22500,     2],
       [    1, 46000,     4],
       [    0, 59000,     5],
       [    0, 52000,     5],
       [    0, 72000,     6],
       [    0, 91000,     8],
       [    2, 67000,     6],
       [    2, 83000,     7],
       [    2, 79000,     7],
       [    2, 59000,     5]])

In [72]:
y = df['Sell Price($)'].values
y

array([18000, 34000, 26100, 40000, 31500, 29400, 32000, 19300, 12000,
       22000, 20000, 21000, 33000])

In [73]:
ohe = OneHotEncoder(sparse=False, dtype=int)
X_ohe  = ohe.fit_transform(data[['Car Model']])

X = pd.concat([pd.DataFrame(X_ohe, columns=ohe.get_feature_names_out(['Car Model'])), df[['Mileage', 'Age(yrs)']].reset_index(drop=True)], axis=1)

X



Unnamed: 0,Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class,Mileage,Age(yrs)
0,0,1,0,69000,6
1,0,1,0,35000,3
2,0,1,0,57000,5
3,0,1,0,22500,2
4,0,1,0,46000,4
5,1,0,0,59000,5
6,1,0,0,52000,5
7,1,0,0,72000,6
8,1,0,0,91000,8
9,0,0,1,67000,6


In [74]:
model = LinearRegression()
model.fit(X, y)

In [78]:
# Predicting price for Mercedez Benz C class that is 4 yr old with mileage 45000

model.predict([[0, 0, 1, 45000, 4]])



array([36991.31721061])

In [79]:
# Price of BMW X5 that is 7 yr old with mileage 86000

model.predict([[0, 1, 0, 86000, 7]])



array([11080.74313219])

In [77]:
model.score(X, y)

0.9417050937281083