In [155]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import OneHotEncoder
from sklearn.linear_model import LinearRegression

In [156]:
df = pd.read_csv('carprices.csv')
df

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [157]:
encoder = OneHotEncoder(handle_unknown='error', sparse_output=False)
encoded_car_model = encoder.fit_transform(df['Car Model'].values.reshape(-1, 1))
encoded_car_model

array([[0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [0., 1., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [1., 0., 0.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.],
       [0., 0., 1.]])

In [158]:
X_train = pd.concat([pd.DataFrame(encoded_car_model), df[['Mileage', 'Age(yrs)']]], axis=1)
X_train

Unnamed: 0,0,1,2,Mileage,Age(yrs)
0,0.0,1.0,0.0,69000,6
1,0.0,1.0,0.0,35000,3
2,0.0,1.0,0.0,57000,5
3,0.0,1.0,0.0,22500,2
4,0.0,1.0,0.0,46000,4
5,1.0,0.0,0.0,59000,5
6,1.0,0.0,0.0,52000,5
7,1.0,0.0,0.0,72000,6
8,1.0,0.0,0.0,91000,8
9,0.0,0.0,1.0,67000,6


In [159]:
X_train.drop(columns=2, inplace=True)
X_train = X_train.values
X_train

array([[0.00e+00, 1.00e+00, 6.90e+04, 6.00e+00],
       [0.00e+00, 1.00e+00, 3.50e+04, 3.00e+00],
       [0.00e+00, 1.00e+00, 5.70e+04, 5.00e+00],
       [0.00e+00, 1.00e+00, 2.25e+04, 2.00e+00],
       [0.00e+00, 1.00e+00, 4.60e+04, 4.00e+00],
       [1.00e+00, 0.00e+00, 5.90e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 5.20e+04, 5.00e+00],
       [1.00e+00, 0.00e+00, 7.20e+04, 6.00e+00],
       [1.00e+00, 0.00e+00, 9.10e+04, 8.00e+00],
       [0.00e+00, 0.00e+00, 6.70e+04, 6.00e+00],
       [0.00e+00, 0.00e+00, 8.30e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 7.90e+04, 7.00e+00],
       [0.00e+00, 0.00e+00, 5.90e+04, 5.00e+00]])

In [160]:
y_train = df['Sell Price($)'].values
y_train

array([18000, 34000, 26100, 40000, 31500, 29400, 32000, 19300, 12000,
       22000, 20000, 21000, 33000], dtype=int64)

In [161]:
model = LinearRegression()
model.fit(X_train, y_train)

In [162]:
model.score(X_train, y_train)

0.9417050937281082

In [163]:
X_test = np.array([['Mercedez Benz C class', 45000, 4], 
         ['BMW X5', 86000, 7]])

In [164]:
X_test_encoded = encoder.transform(X_test[:,:1])[:,:-1]
X_test_encoded

array([[0., 0.],
       [0., 1.]])

In [165]:
X_test = np.concatenate([X_test_encoded, X_test[:,1:]], axis=1).astype(np.float64)
X_test

array([[0.0e+00, 0.0e+00, 4.5e+04, 4.0e+00],
       [0.0e+00, 1.0e+00, 8.6e+04, 7.0e+00]])

In [166]:
model.predict(X_test)

array([36991.31721062, 11080.74313218])