### Imporing Libraries

In [5]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression

In [6]:
car = pd.read_csv("carprices.csv")

In [7]:
car

Unnamed: 0,Car Model,Mileage,Sell Price($),Age(yrs)
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


### Changing the column names and keep it simple

In [8]:
car.rename(columns = {"Sell Price($)" : "Sell Price", "Age(yrs)" : "Age"}, inplace = True)
car

Unnamed: 0,Car Model,Mileage,Sell Price,Age
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [9]:
## y = m1 * Car model + m2 * Mileage + m3 * Age

In [10]:
## but the car model is kind of categorical nominal data

In [11]:
## so, need to convert into numbers

### Use get_dummies method from pandas

Some of the columns have categorical data which are going to be in Text from but at the same time that information might be important to model and not including those columns could be bad idea. In that case we use something called Dummy which gives seperate column for each category providing 1 for respective category and 0 for other. 

In [12]:
car1 = pd.get_dummies(car, columns = ["Car Model"])

In [13]:
car1

Unnamed: 0,Mileage,Sell Price,Age,Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
0,69000,18000,6,0,1,0
1,35000,34000,3,0,1,0
2,57000,26100,5,0,1,0
3,22500,40000,2,0,1,0
4,46000,31500,4,0,1,0
5,59000,29400,5,1,0,0
6,52000,32000,5,1,0,0
7,72000,19300,6,1,0,0
8,91000,12000,8,1,0,0
9,67000,22000,6,0,0,1


In [14]:
X = car1.drop("Sell Price", axis = 1)

In [15]:
X

Unnamed: 0,Mileage,Age,Car Model_Audi A5,Car Model_BMW X5,Car Model_Mercedez Benz C class
0,69000,6,0,1,0
1,35000,3,0,1,0
2,57000,5,0,1,0
3,22500,2,0,1,0
4,46000,4,0,1,0
5,59000,5,1,0,0
6,52000,5,1,0,0
7,72000,6,1,0,0
8,91000,8,1,0,0
9,67000,6,0,0,1


In [16]:
y = car1["Sell Price"]

In [17]:
# dummies = pd.get_dummies(data = car["Car Model"])
# dummies

In [18]:
## concat both dataframes

In [19]:
# new_car_df = pd.concat([car, dummies], axis = "columns")

# new_car_df


In [20]:
## divide X and y
## before that we have to drop Car Model as not needed and one of the dummies column to avoid dummy trap

In [21]:
# new_car_df.drop(["Car Model", "Mercedez Benz C class"], axis = "columns", inplace = True)

In [22]:
# new_car_df

In [23]:
# X = new_car_df[["Mileage" , "Age", "Audi A5", "BMW X5"]].copy()
# X

In [24]:
# y = new_car_df["Sell Price"]
# y

In [25]:
## fit the model
reg = LinearRegression()
reg.fit(X, y)

LinearRegression()

In [26]:
reg.coef_

array([-3.70122094e-01, -1.33245363e+03,  6.10375284e+02, -3.67429130e+03,
        3.06391602e+03])

In [27]:
reg.intercept_

55912.70994756205

In [28]:
## Sell Price = 58976.62596853723 + (-3.70122094e-01)*Mileage + (-1.33245363e+03)*Age + (-2.45354074e+03)*Audi A5 + (-6.73820733e+03)*BMW X5

## BMW X5 with 86000 mileage 7 year age price

In [29]:
reg.predict([[86000,7,0,1,0]])

array([11080.74313219])

## Mercedez benz 45000 mileage and 4 year age price

In [30]:
reg.predict([[45000,4,0,0,1]])

array([36991.31721061])

## Accuracy

In [31]:
Accuracy = reg.score(X,y)*100

In [32]:
Accuracy

94.17050937281083

### More methods to convert dummy

In [33]:
df = car.copy()

In [34]:
df

Unnamed: 0,Car Model,Mileage,Sell Price,Age
0,BMW X5,69000,18000,6
1,BMW X5,35000,34000,3
2,BMW X5,57000,26100,5
3,BMW X5,22500,40000,2
4,BMW X5,46000,31500,4
5,Audi A5,59000,29400,5
6,Audi A5,52000,32000,5
7,Audi A5,72000,19300,6
8,Audi A5,91000,12000,8
9,Mercedez Benz C class,67000,22000,6


In [35]:
df["Car Model"] = df["Car Model"].map({"BMW X5" : 0, "Audi A5" : 1 , "Mercedez Benz C class" : 2})

In [36]:
df

Unnamed: 0,Car Model,Mileage,Sell Price,Age
0,0,69000,18000,6
1,0,35000,34000,3
2,0,57000,26100,5
3,0,22500,40000,2
4,0,46000,31500,4
5,1,59000,29400,5
6,1,52000,32000,5
7,1,72000,19300,6
8,1,91000,12000,8
9,2,67000,22000,6
