In [1]:
import pandas as pd
import numpy as np
from sklearn import linear_model
import math
import pickle #dump or save your model
from joblib import dump,load 

In [2]:
filename = "carprices.csv"
dataframe = pd.read_csv(filename)
dataframe.columns
#dataframe

Index(['Car name', 'Fuel type', 'Number of doors', 'Car body', 'Driver wheel',
       'Car length', 'Car width', 'Number of cylinders', 'Horse power',
       'Price'],
      dtype='object')

In [3]:
#Number of cylinders => "int"
dataframe['Number of cylinders'].drop_duplicates()
dataframe['Number of cylinders'] = dataframe['Number of cylinders'].map({
    'four': 4,
    'five': 5,
    'six': 6,
    'eight':8
})
dataframe['Number of doors'].drop_duplicates()
dataframe['Number of doors'] = dataframe['Number of doors'].map({
    'two': 2,
    'four': 4
})
dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Car body,Driver wheel,Car length,Car width,Number of cylinders,Horse power,Price
0,audi fox,gas,2,sedan,fwd,177.3,66.3,5,110,15250
1,bmw x1,gas,2,sedan,rwd,176.8,64.8,6,121,20970
2,mazda glc deluxe,diesel,4,sedan,fwd,177.8,66.5,4,120,10795
3,buick century luxus (sw),diesel,4,wagon,rwd,190.9,70.3,5,123,28248
4,buick skylark,gas,2,convertible,rwd,180.3,70.5,8,155,35056
5,mitsubishi g4,gas,2,hatchback,fwd,157.3,63.8,4,116,9959
6,peugeot 504,gas,4,sedan,rwd,186.7,68.4,4,120,11900
7,peugeot 304,diesel,4,sedan,rwd,186.7,68.4,4,152,13200
8,porsche macan,gas,2,hatchback,rwd,168.9,68.3,4,143,22018
9,porsche cayenne,gas,2,hatchback,rwd,66.5,55.2,8,90,9295


In [4]:
dataframe['Car body'].drop_duplicates()

0          sedan
3          wagon
4    convertible
5      hatchback
Name: Car body, dtype: object

In [5]:
#split columns => get dummies
merged_dataframe = pd.concat(
    [dataframe, pd.get_dummies(dataframe['Car body'])],
    axis='columns'
)

In [6]:
merged_dataframe = merged_dataframe.drop(['Car body'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Driver wheel,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon
0,audi fox,gas,2,fwd,177.3,66.3,5,110,15250,0,0,1,0
1,bmw x1,gas,2,rwd,176.8,64.8,6,121,20970,0,0,1,0
2,mazda glc deluxe,diesel,4,fwd,177.8,66.5,4,120,10795,0,0,1,0
3,buick century luxus (sw),diesel,4,rwd,190.9,70.3,5,123,28248,0,0,0,1
4,buick skylark,gas,2,rwd,180.3,70.5,8,155,35056,1,0,0,0
5,mitsubishi g4,gas,2,fwd,157.3,63.8,4,116,9959,0,1,0,0
6,peugeot 504,gas,4,rwd,186.7,68.4,4,120,11900,0,0,1,0
7,peugeot 304,diesel,4,rwd,186.7,68.4,4,152,13200,0,0,1,0
8,porsche macan,gas,2,rwd,168.9,68.3,4,143,22018,0,1,0,0
9,porsche cayenne,gas,2,rwd,66.5,55.2,8,90,9295,0,1,0,0


In [8]:
merged_dataframe['Driver wheel'].drop_duplicates()

0     fwd
1     rwd
14    4wd
Name: Driver wheel, dtype: object

In [9]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Driver wheel'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Driver wheel'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Fuel type,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,4wd,fwd,rwd
0,audi fox,gas,2,177.3,66.3,5,110,15250,0,0,1,0,0,1,0
1,bmw x1,gas,2,176.8,64.8,6,121,20970,0,0,1,0,0,0,1
2,mazda glc deluxe,diesel,4,177.8,66.5,4,120,10795,0,0,1,0,0,1,0
3,buick century luxus (sw),diesel,4,190.9,70.3,5,123,28248,0,0,0,1,0,0,1
4,buick skylark,gas,2,180.3,70.5,8,155,35056,1,0,0,0,0,0,1
5,mitsubishi g4,gas,2,157.3,63.8,4,116,9959,0,1,0,0,0,1,0
6,peugeot 504,gas,4,186.7,68.4,4,120,11900,0,0,1,0,0,0,1
7,peugeot 304,diesel,4,186.7,68.4,4,152,13200,0,0,1,0,0,0,1
8,porsche macan,gas,2,168.9,68.3,4,143,22018,0,1,0,0,0,0,1
9,porsche cayenne,gas,2,66.5,55.2,8,90,9295,0,1,0,0,0,0,1


In [10]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Fuel type'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Fuel type'], axis='columns')
merged_dataframe

Unnamed: 0,Car name,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,4wd,fwd,rwd,diesel,gas
0,audi fox,2,177.3,66.3,5,110,15250,0,0,1,0,0,1,0,0,1
1,bmw x1,2,176.8,64.8,6,121,20970,0,0,1,0,0,0,1,0,1
2,mazda glc deluxe,4,177.8,66.5,4,120,10795,0,0,1,0,0,1,0,1,0
3,buick century luxus (sw),4,190.9,70.3,5,123,28248,0,0,0,1,0,0,1,1,0
4,buick skylark,2,180.3,70.5,8,155,35056,1,0,0,0,0,0,1,0,1
5,mitsubishi g4,2,157.3,63.8,4,116,9959,0,1,0,0,0,1,0,0,1
6,peugeot 504,4,186.7,68.4,4,120,11900,0,0,1,0,0,0,1,0,1
7,peugeot 304,4,186.7,68.4,4,152,13200,0,0,1,0,0,0,1,1,0
8,porsche macan,2,168.9,68.3,4,143,22018,0,1,0,0,0,0,1,0,1
9,porsche cayenne,2,66.5,55.2,8,90,9295,0,1,0,0,0,0,1,0,1


In [13]:
merged_dataframe = pd.concat(
    [merged_dataframe, pd.get_dummies(dataframe['Car name'])],
    axis='columns'
)
merged_dataframe = merged_dataframe.drop(['Car name'], axis='columns')
merged_dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,176.8,64.8,6,121,20970,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,4,177.8,66.5,4,120,10795,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,190.9,70.3,5,123,28248,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,2,180.3,70.5,8,155,35056,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2,157.3,63.8,4,116,9959,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,4,186.7,68.4,4,120,11900,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
7,4,186.7,68.4,4,152,13200,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
8,2,168.9,68.3,4,143,22018,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
9,2,66.5,55.2,8,90,9295,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [14]:
merged_dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,176.8,64.8,6,121,20970,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,4,177.8,66.5,4,120,10795,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,190.9,70.3,5,123,28248,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,2,180.3,70.5,8,155,35056,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2,157.3,63.8,4,116,9959,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,4,186.7,68.4,4,120,11900,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
7,4,186.7,68.4,4,152,13200,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
8,2,168.9,68.3,4,143,22018,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
9,2,66.5,55.2,8,90,9295,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0


In [16]:
merged_dataframe.dtypes

Number of doors               int64
Car length                  float64
Car width                   float64
Number of cylinders           int64
Horse power                   int64
Price                         int64
convertible                   uint8
hatchback                     uint8
sedan                         uint8
wagon                         uint8
4wd                           uint8
fwd                           uint8
rwd                           uint8
diesel                        uint8
gas                           uint8
audi fox                      uint8
bmw x1                        uint8
buick century luxus (sw)      uint8
buick skylark                 uint8
mazda glc deluxe              uint8
mitsubishi g4                 uint8
peugeot 304                   uint8
peugeot 504                   uint8
porsche cayenne               uint8
porsche macan                 uint8
renault 5 gtl                 uint8
subaru                        uint8
subaru trezia               

In [27]:
from sklearn.linear_model import LinearRegression
linear_regression = LinearRegression()
linear_regression

LinearRegression()

In [28]:
#X - features
X = merged_dataframe.drop('Price', axis='columns')
#y - label = output
y = merged_dataframe['Price']


In [29]:
#train the model
linear_regression.fit(X, y)
#linear Regression with multiple variables

LinearRegression()

In [35]:
#predict
X
X.columns
linear_regression.predict(
    [[2, 164.5, 62, 5, 60, 1,0,0,0,0,1,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0]])



array([9849.73505852])

In [37]:
linear_regression.score(X, y)

1.0

In [39]:
dataframe = merged_dataframe
dataframe

Unnamed: 0,Number of doors,Car length,Car width,Number of cylinders,Horse power,Price,convertible,hatchback,sedan,wagon,...,porsche macan,renault 5 gtl,subaru,subaru trezia,toyota corolla 1200,toyota corona mark ii,toyota mark ii,volkswagen 411 (sw),volkswagen super beetle,volvo 145e (sw)
0,2,177.3,66.3,5,110,15250,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
1,2,176.8,64.8,6,121,20970,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
2,4,177.8,66.5,4,120,10795,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
3,4,190.9,70.3,5,123,28248,0,0,0,1,...,0,0,0,0,0,0,0,0,0,0
4,2,180.3,70.5,8,155,35056,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
5,2,157.3,63.8,4,116,9959,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
6,4,186.7,68.4,4,120,11900,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
7,4,186.7,68.4,4,152,13200,0,0,1,0,...,0,0,0,0,0,0,0,0,0,0
8,2,168.9,68.3,4,143,22018,0,1,0,0,...,1,0,0,0,0,0,0,0,0,0
9,2,66.5,55.2,8,90,9295,0,1,0,0,...,0,0,0,0,0,0,0,0,0,0
