In [3]:
import numpy as np
import pandas as pd
from sklearn import linear_model

df = pd.read_csv('GermanCarPrices.csv')
df

Unnamed: 0,Car,Mileage,Price,Age
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,AUDI Q7,59000,29400,5
6,AUDI Q7,52000,32000,5
7,AUDI Q7,72000,19300,6
8,AUDI Q7,91000,12000,8
9,MERCEDES GLC,67000,22000,6


In [4]:
df.shape

(13, 4)

In [5]:
df.describe()

Unnamed: 0,Mileage,Price,Age
count,13.0,13.0,13.0
mean,60884.615385,26023.076923,5.307692
std,19185.665055,8003.661021,1.652504
min,22500.0,12000.0,2.0
25%,52000.0,20000.0,5.0
50%,59000.0,26100.0,5.0
75%,72000.0,32000.0,6.0
max,91000.0,40000.0,8.0


In [6]:
dummies = pd.get_dummies(df.Car)
dummies

Unnamed: 0,AUDI Q7,BMW X5,MERCEDES GLC
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,0,0,1


In [7]:
merged = pd.concat([df, dummies], axis='columns')
merged

Unnamed: 0,Car,Mileage,Price,Age,AUDI Q7,BMW X5,MERCEDES GLC
0,BMW X5,69000,18000,6,0,1,0
1,BMW X5,35000,34000,3,0,1,0
2,BMW X5,57000,26100,5,0,1,0
3,BMW X5,22500,40000,2,0,1,0
4,BMW X5,46000,31500,4,0,1,0
5,AUDI Q7,59000,29400,5,1,0,0
6,AUDI Q7,52000,32000,5,1,0,0
7,AUDI Q7,72000,19300,6,1,0,0
8,AUDI Q7,91000,12000,8,1,0,0
9,MERCEDES GLC,67000,22000,6,0,0,1


In [8]:
final = merged.drop(['Car', 'MERCEDES GLC'], axis='columns')
final

Unnamed: 0,Mileage,Price,Age,AUDI Q7,BMW X5
0,69000,18000,6,0,1
1,35000,34000,3,0,1
2,57000,26100,5,0,1
3,22500,40000,2,0,1
4,46000,31500,4,0,1
5,59000,29400,5,1,0
6,52000,32000,5,1,0
7,72000,19300,6,1,0
8,91000,12000,8,1,0
9,67000,22000,6,0,0


In [9]:
X = final.drop(['Price'], axis='columns')
X

Unnamed: 0,Mileage,Age,AUDI Q7,BMW X5
0,69000,6,0,1
1,35000,3,0,1
2,57000,5,0,1
3,22500,2,0,1
4,46000,4,0,1
5,59000,5,1,0
6,52000,5,1,0
7,72000,6,1,0
8,91000,8,1,0
9,67000,6,0,0


In [10]:
y = final['Price']
y

0     18000
1     34000
2     26100
3     40000
4     31500
5     29400
6     32000
7     19300
8     12000
9     22000
10    20000
11    21000
12    33000
Name: Price, dtype: int64

In [11]:
model = linear_model.LinearRegression()
model.fit(X,y)

LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
         normalize=False)

In [12]:
model.score(X,y)

0.9417050937281083

In [13]:
model.coef_

array([-3.70122094e-01, -1.33245363e+03, -2.45354074e+03, -6.73820733e+03])

In [14]:
model.intercept_

58976.625968537235

In [15]:
model.predict([[45000, 4,0,0]])

array([36991.31721061])

In [16]:
model.predict([[86000, 7,0,1]])

array([11080.74313219])

In [17]:
car_df = pd.read_csv('PredictGermanCarPrices.csv')
car_df

Unnamed: 0,Car,Mileage,Age
0,BMW X5,55000,5
1,BMW X5,15000,2
2,BMW X5,63000,7
3,BMW X5,44000,5
4,BMW X5,75000,9
5,AUDI Q7,16000,3
6,AUDI Q7,45000,6
7,AUDI Q7,33000,4
8,AUDI Q7,29000,5
9,AUDI Q7,77000,8


In [20]:
car_dummies = pd.get_dummies(car_df.Car)
car_dummies

Unnamed: 0,AUDI Q7,BMW X5,MERCEDES GLC
0,0,1,0
1,0,1,0
2,0,1,0
3,0,1,0
4,0,1,0
5,1,0,0
6,1,0,0
7,1,0,0
8,1,0,0
9,1,0,0


In [22]:
merged_car = pd.concat([car_df, car_dummies], axis='columns')
merged_car

Unnamed: 0,Car,Mileage,Age,AUDI Q7,BMW X5,MERCEDES GLC
0,BMW X5,55000,5,0,1,0
1,BMW X5,15000,2,0,1,0
2,BMW X5,63000,7,0,1,0
3,BMW X5,44000,5,0,1,0
4,BMW X5,75000,9,0,1,0
5,AUDI Q7,16000,3,1,0,0
6,AUDI Q7,45000,6,1,0,0
7,AUDI Q7,33000,4,1,0,0
8,AUDI Q7,29000,5,1,0,0
9,AUDI Q7,77000,8,1,0,0


In [23]:
final_car = merged_car.drop(['Car', 'MERCEDES GLC'], axis='columns')
final_car

Unnamed: 0,Mileage,Age,AUDI Q7,BMW X5
0,55000,5,0,1
1,15000,2,0,1
2,63000,7,0,1
3,44000,5,0,1
4,75000,9,0,1
5,16000,3,1,0
6,45000,6,1,0
7,33000,4,1,0
8,29000,5,1,0
9,77000,8,1,0


In [24]:
price = model.predict(final_car)
price

array([25219.43531345, 44021.67997182, 19593.55130312, 29290.77835173,
       12487.17891524, 46603.77083822, 31872.86921813, 38979.24160601,
       39127.27635595, 17364.05494247, 24259.0608124 , 37879.59614933,
       31809.60788918, 37583.52664945, 41358.72974877])

In [25]:
car_df['Price'] = price
car_df

Unnamed: 0,Car,Mileage,Age,Price
0,BMW X5,55000,5,25219.435313
1,BMW X5,15000,2,44021.679972
2,BMW X5,63000,7,19593.551303
3,BMW X5,44000,5,29290.778352
4,BMW X5,75000,9,12487.178915
5,AUDI Q7,16000,3,46603.770838
6,AUDI Q7,45000,6,31872.869218
7,AUDI Q7,33000,4,38979.241606
8,AUDI Q7,29000,5,39127.276356
9,AUDI Q7,77000,8,17364.054942


In [26]:
car_df.to_csv('GERMAN CAR PRICES PREDICTION.csv')

In [27]:
from sklearn.externals import joblib
joblib.dump(model, 'GermanCarPredictingModel_joblib')

['GermanCarPredictingModel_joblib']